Optimise TableSyntax (#472)
* Optimise TableSyntax
* Update a comment
* Update tables.unit
* Set growable false for columns
* Simplify neverMach
* Update gfm_stats.txt
* Optimise TableSyntax
* Update a comment
* Set growable false for columns
* Simplify neverMach
* Optimise performance
* More optimisation
* Fix some requests
* Add a comment
* Rename align to alignment
* Fix a request
diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart
index beaa46a..9289ae1 100644
--- a/lib/src/block_parser.dart
+++ b/lib/src/block_parser.dart
@@ -91,6 +91,10 @@
_pos++;
}
+ void retreat() {
+ _pos--;
+ }
+
bool get isDone => _pos >= lines.length;
/// Gets whether or not the current line matches the given pattern.
@@ -107,11 +111,28 @@
List<Node> parseLines() {
final blocks = <Node>[];
+
+ // If the `_pos` does not change before and after `parse()`, never try to
+ // parse the line at `_pos` with the same syntax again.
+ // For example the `TableSyntax` might not advance the `_pos` in `parse`
+ // method, beause of the header row does not match the delimiter row in the
+ // number of cells, which makes a table like structure not be recognized.
+ BlockSyntax? neverMatch;
+
while (!isDone) {
for (final syntax in blockSyntaxes) {
+ if (neverMatch == syntax) {
+ continue;
+ }
+
if (syntax.canParse(this)) {
+ final positionBefore = _pos;
final block = syntax.parse(this);
- if (block != null) blocks.add(block);
+ if (block != null) {
+ blocks.add(block);
+ }
+ neverMatch = _pos != positionBefore ? null : syntax;
+
break;
}
}
diff --git a/lib/src/block_syntaxes/table_syntax.dart b/lib/src/block_syntaxes/table_syntax.dart
index a63540f..fbd63c9 100644
--- a/lib/src/block_syntaxes/table_syntax.dart
+++ b/lib/src/block_syntaxes/table_syntax.dart
@@ -36,6 +36,7 @@
final columnCount = alignments.length;
final headRow = _parseRow(parser, alignments, 'th');
if (headRow.children!.length != columnCount) {
+ parser.retreat();
return null;
}
final head = Element('thead', [headRow]);
@@ -50,7 +51,7 @@
if (children != null) {
while (children.length < columnCount) {
// Insert synthetic empty cells.
- children.add(Element.empty('td'));
+ children.add(Element('td', const []));
}
while (children.length > columnCount) {
children.removeLast();
@@ -71,29 +72,42 @@
}
List<String?> _parseAlignments(String line) {
- final startIndex = _walkPastOpeningPipe(line);
+ final columns = <String?>[];
+ // Set the value to `true` when hitting a non whitespace character other
+ // than the first pipe character.
+ var started = false;
+ var hitDash = false;
+ String? alignment;
- var endIndex = line.length - 1;
- while (endIndex > 0) {
- final ch = line.codeUnitAt(endIndex);
- if (ch == $pipe) {
- endIndex--;
- break;
+ for (var i = 0; i < line.length; i++) {
+ final char = line.codeUnitAt(i);
+ if (char == $space || char == $tab || (!started && char == $pipe)) {
+ continue;
}
- if (ch != $space && ch != $tab) {
- break;
+ started = true;
+
+ if (char == $colon) {
+ if (hitDash) {
+ alignment = alignment == 'left' ? 'center' : 'right';
+ } else {
+ alignment = 'left';
+ }
}
- endIndex--;
+
+ if (char == $pipe) {
+ columns.add(alignment);
+ hitDash = false;
+ alignment = null;
+ } else {
+ hitDash = true;
+ }
}
- // Optimization: We walk [line] too many times. One lap should do it.
- return line.substring(startIndex, endIndex + 1).split('|').map((column) {
- column = column.trim();
- if (column.startsWith(':') && column.endsWith(':')) return 'center';
- if (column.startsWith(':')) return 'left';
- if (column.endsWith(':')) return 'right';
- return null;
- }).toList(growable: false);
+ if (hitDash) {
+ columns.add(alignment);
+ }
+
+ return columns;
}
/// Parses a table row at the current line into a table row element, with
@@ -166,7 +180,7 @@
for (var i = 0; i < row.length && i < alignments.length; i++) {
if (alignments[i] == null) continue;
- row[i].attributes['style'] = 'text-align: ${alignments[i]};';
+ row[i].attributes['align'] = '${alignments[i]}';
}
return Element('tr', row);
diff --git a/lib/src/html_renderer.dart b/lib/src/html_renderer.dart
index a6abfcd..8b7dedc 100644
--- a/lib/src/html_renderer.dart
+++ b/lib/src/html_renderer.dart
@@ -77,7 +77,12 @@
'main',
'nav',
'section',
- 'table'
+ 'table',
+ 'thead',
+ 'tbody',
+ 'th',
+ 'tr',
+ 'td',
];
/// Translates a parsed AST to HTML.
diff --git a/test/extensions/tables.unit b/test/extensions/tables.unit
index 9516fb3..2f076df 100644
--- a/test/extensions/tables.unit
+++ b/test/extensions/tables.unit
@@ -4,7 +4,20 @@
body | cells
<<<
-<table><thead><tr><th>head</th><th>cells</th></tr></thead><tbody><tr><td>body</td><td>cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>cells</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td>cells</td>
+</tr>
+</tbody>
+</table>
>>> multiple rows
head | cells
-----|------
@@ -12,40 +25,124 @@
more | cells
<<<
-<table><thead><tr><th>head</th><th>cells</th></tr></thead><tbody><tr><td>body</td><td>cells</td></tr><tr><td>more</td><td>cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>cells</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td>cells</td>
+</tr>
+<tr>
+<td>more</td>
+<td>cells</td>
+</tr>
+</tbody>
+</table>
>>> rows wrapped in pipes
| head | cells |
|------|-------|
| body | cells |
<<<
-<table><thead><tr><th>head</th><th>cells</th></tr></thead><tbody><tr><td>body</td><td>cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>cells</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td>cells</td>
+</tr>
+</tbody>
+</table>
>>> rows wrapped in pipes, whitespace alignment row
| head | cells |
| -- | --- |
| body | cells |
<<<
-<table><thead><tr><th>head</th><th>cells</th></tr></thead><tbody><tr><td>body</td><td>cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>cells</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td>cells</td>
+</tr>
+</tbody>
+</table>
>>> rows wrapped in pipes, tabs in whitespace
| head | cells |
| -- | --- |
| body | cells |
<<<
-<table><thead><tr><th>head</th><th>cells</th></tr></thead><tbody><tr><td>body</td><td>cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>cells</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td>cells</td>
+</tr>
+</tbody>
+</table>
>>> cells with inline syntax
head `code` | _cells_
------------|--------
*text* | <span>text</span>
<<<
-<table><thead><tr><th>head <code>code</code></th><th><em>cells</em></th></tr></thead><tbody><tr><td><em>text</em></td><td><span>text</span></td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head <code>code</code></th>
+<th><em>cells</em></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>text</em></td>
+<td><span>text</span></td>
+</tr>
+</tbody>
+</table>
>>> cells are parsed before inline syntax
header | _foo | bar_
-------|------------|---
text | text
<<<
-<table><thead><tr><th>header</th><th>_foo</th><th>bar_</th></tr></thead><tbody><tr><td>text</td><td>text</td><td /></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>header</th>
+<th>_foo</th>
+<th>bar_</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>text</td>
+<td>text</td>
+<td></td>
+</tr>
+</tbody>
+</table>
>>> cells contain reference links
header | header
-------|--------
@@ -53,20 +150,64 @@
[here]: http://url
<<<
-<table><thead><tr><th>header</th><th>header</th></tr></thead><tbody><tr><td>text</td><td><a href="http://url">link</a></td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>header</th>
+<th>header</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>text</td>
+<td><a href="http://url">link</a></td>
+</tr>
+</tbody>
+</table>
>>> one column tables
head
-----|
body
<<<
-<table><thead><tr><th>head</th></tr></thead><tbody><tr><td>body</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+</tr>
+</tbody>
+</table>
>>> varying cells per row
head | foo | bar
-----|-----|-----
body
row with | two cells
<<<
-<table><thead><tr><th>head</th><th>foo</th><th>bar</th></tr></thead><tbody><tr><td>body</td><td /><td /></tr><tr><td>row with</td><td>two cells</td><td /></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>head</th>
+<th>foo</th>
+<th>bar</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>body</td>
+<td></td>
+<td></td>
+</tr>
+<tr>
+<td>row with</td>
+<td>two cells</td>
+<td></td>
+</tr>
+</tbody>
+</table>
>>> left, center, and right alignment
head | cells | here
:----|:-----:|----:
@@ -74,7 +215,27 @@
too | many | cells | here
<<<
-<table><thead><tr><th style="text-align: left;">head</th><th style="text-align: center;">cells</th><th style="text-align: right;">here</th></tr></thead><tbody><tr><td style="text-align: left;">body</td><td style="text-align: center;">cells</td><td style="text-align: right;">here</td></tr><tr><td style="text-align: left;">too</td><td style="text-align: center;">many</td><td style="text-align: right;">cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th align="left">head</th>
+<th align="center">cells</th>
+<th align="right">here</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">body</td>
+<td align="center">cells</td>
+<td align="right">here</td>
+</tr>
+<tr>
+<td align="left">too</td>
+<td align="center">many</td>
+<td align="right">cells</td>
+</tr>
+</tbody>
+</table>
>>> left, center, and right alignment, with whitespace
head | cells | here
:-- | :---: | ---:
@@ -82,7 +243,27 @@
too | many | cells | here
<<<
-<table><thead><tr><th style="text-align: left;">head</th><th style="text-align: center;">cells</th><th style="text-align: right;">here</th></tr></thead><tbody><tr><td style="text-align: left;">body</td><td style="text-align: center;">cells</td><td style="text-align: right;">here</td></tr><tr><td style="text-align: left;">too</td><td style="text-align: center;">many</td><td style="text-align: right;">cells</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th align="left">head</th>
+<th align="center">cells</th>
+<th align="right">here</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">body</td>
+<td align="center">cells</td>
+<td align="right">here</td>
+</tr>
+<tr>
+<td align="left">too</td>
+<td align="center">many</td>
+<td align="right">cells</td>
+</tr>
+</tbody>
+</table>
>>> escape pipe
| Name | Character |
| --- | --- |
@@ -90,17 +271,60 @@
| Pipe | \| |
<<<
-<table><thead><tr><th>Name</th><th>Character</th></tr></thead><tbody><tr><td>Backtick</td><td>`</td></tr><tr><td>Pipe</td><td>|</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Character</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Backtick</td>
+<td>`</td>
+</tr>
+<tr>
+<td>Pipe</td>
+<td>|</td>
+</tr>
+</tbody>
+</table>
>>> escape pipe, preserve trailing whitespace
| Name | Character |
| --- | --- |
| Pipe | \| abcdef |
<<<
-<table><thead><tr><th>Name</th><th>Character</th></tr></thead><tbody><tr><td>Pipe</td><td>| abcdef</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Character</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Pipe</td>
+<td>| abcdef</td>
+</tr>
+</tbody>
+</table>
>>> trailing whitespace after final pipe
| Name | Character |
| --- | --- |
| Pipe | abcdef |
<<<
-<table><thead><tr><th>Name</th><th>Character</th></tr></thead><tbody><tr><td>Pipe</td><td>abcdef</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Character</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Pipe</td>
+<td>abcdef</td>
+</tr>
+</tbody>
+</table>
diff --git a/test/gfm/tables_extension.unit b/test/gfm/tables_extension.unit
index cd84708..625fca8 100644
--- a/test/gfm/tables_extension.unit
+++ b/test/gfm/tables_extension.unit
@@ -3,27 +3,80 @@
| --- | --- |
| baz | bim |
<<<
-<table><thead><tr><th>foo</th><th>bar</th></tr></thead><tbody><tr><td>baz</td><td>bim</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>foo</th>
+<th>bar</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>baz</td>
+<td>bim</td>
+</tr>
+</tbody>
+</table>
>>> Tables (extension) - 199
| abc | defghi |
:-: | -----------:
bar | baz
<<<
-<table><thead><tr><th style="text-align: center;">abc</th><th style="text-align: right;">defghi</th></tr></thead><tbody><tr><td style="text-align: center;">bar</td><td style="text-align: right;">baz</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th align="center">abc</th>
+<th align="right">defghi</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center">bar</td>
+<td align="right">baz</td>
+</tr>
+</tbody>
+</table>
>>> Tables (extension) - 200
| f\|oo |
| ------ |
| b `\|` az |
| b **\|** im |
<<<
-<table><thead><tr><th>f|oo</th></tr></thead><tbody><tr><td>b <code>|</code> az</td></tr><tr><td>b <strong>|</strong> im</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>f|oo</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>b <code>|</code> az</td>
+</tr>
+<tr>
+<td>b <strong>|</strong> im</td>
+</tr>
+</tbody>
+</table>
>>> Tables (extension) - 201
| abc | def |
| --- | --- |
| bar | baz |
> bar
<<<
-<table><thead><tr><th>abc</th><th>def</th></tr></thead><tbody><tr><td>bar</td><td>baz</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+</tbody>
+</table>
<blockquote>
<p>bar</p>
</blockquote>
@@ -35,14 +88,32 @@
bar
<<<
-<table><thead><tr><th>abc</th><th>def</th></tr></thead><tbody><tr><td>bar</td><td>baz</td></tr><tr><td>bar</td><td /></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+<tr>
+<td>bar</td>
+<td></td>
+</tr>
+</tbody>
+</table>
<p>bar</p>
>>> Tables (extension) - 203
| abc | def |
| --- |
| bar |
<<<
-<p>| --- |
+<p>| abc | def |
+| --- |
| bar |</p>
>>> Tables (extension) - 204
| abc | def |
@@ -50,9 +121,33 @@
| bar |
| bar | baz | boo |
<<<
-<table><thead><tr><th>abc</th><th>def</th></tr></thead><tbody><tr><td>bar</td><td /></tr><tr><td>bar</td><td>baz</td></tr></tbody></table>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td></td>
+</tr>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+</tbody>
+</table>
>>> Tables (extension) - 205
| abc | def |
| --- | --- |
<<<
-<table><thead><tr><th>abc</th><th>def</th></tr></thead></table>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+</table>
diff --git a/tool/gfm_stats.json b/tool/gfm_stats.json
index 294d348..9627e27 100644
--- a/tool/gfm_stats.json
+++ b/tool/gfm_stats.json
@@ -682,14 +682,14 @@
"492": "strict"
},
"Tables (extension)": {
- "198": "loose",
- "199": "fail",
- "200": "loose",
- "201": "loose",
- "202": "loose",
- "203": "loose",
- "204": "loose",
- "205": "loose"
+ "198": "strict",
+ "199": "strict",
+ "200": "strict",
+ "201": "strict",
+ "202": "strict",
+ "203": "strict",
+ "204": "strict",
+ "205": "strict"
},
"Tabs": {
"1": "strict",
diff --git a/tool/gfm_stats.txt b/tool/gfm_stats.txt
index 7976ba7..8204a44 100644
--- a/tool/gfm_stats.txt
+++ b/tool/gfm_stats.txt
@@ -24,9 +24,9 @@
26 of 27 – 96.3% Setext headings
2 of 2 – 100.0% Soft line breaks
2 of 2 – 100.0% Strikethrough (extension)
- 7 of 8 – 87.5% Tables (extension)
+ 8 of 8 – 100.0% Tables (extension)
11 of 11 – 100.0% Tabs
3 of 3 – 100.0% Textual content
19 of 19 – 100.0% Thematic breaks
- 633 of 671 – 94.3% TOTAL
- 561 of 633 – 88.6% TOTAL Strict
+ 634 of 671 – 94.5% TOTAL
+ 569 of 634 – 89.7% TOTAL Strict