Improve tab support, and horizontal rules (#194)
Improve tab support, and horizontal rules
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96f5b64..4ab93a9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,10 @@
## 1.1.2-dev
* Overhaul support for emphasis (`*foo*` and `_foo_`) and strong emphasis
- (`**foo**` and `__foo__`). This raises the compliance with the CommonMark
- specs to 89%, and the compliance with the GFM specs to 86%.
+ (`**foo**` and `__foo__`), dramatically improving CommonMark compliance.
+* Improve support for tab characters, and horizontal rules.
+* The above fixes raise compliance with the CommonMark specs to 90%, and
+ compliance with the GFM specs to 87%.
## 1.1.1
diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart
index 1b09329..f630a92 100644
--- a/lib/src/block_parser.dart
+++ b/lib/src/block_parser.dart
@@ -22,7 +22,7 @@
final _blockquotePattern = new RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
/// A line indented four spaces. Used for code blocks and lists.
-final _indentPattern = new RegExp(r'^(?: |\t)(.*)$');
+final _indentPattern = new RegExp(r'^(?: | {0,3}\t)(.*)$');
/// Fenced code block.
final _codePattern = new RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
@@ -30,7 +30,7 @@
/// Three or more hyphens, asterisks or underscores by themselves. Note that
/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
/// SETEXT should win.
-final _hrPattern = new RegExp(r'^ {0,3}([-*_]) *\1 *\1(?:\1| )*$');
+final _hrPattern = new RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
/// A line starting with one of these markers: `-`, `*`, `+`. May have up to
/// three leading spaces before the marker and any number of spaces or tabs
@@ -580,6 +580,8 @@
_olPattern
];
+ static final _whitespaceRe = new RegExp('[ \t]*');
+
Node parse(BlockParser parser) {
var items = <ListItem>[];
var childLines = <String>[];
@@ -604,6 +606,8 @@
int startNumber;
while (!parser.isDone) {
+ var leadingSpace = _whitespaceRe.matchAsPrefix(parser.current).group(0);
+ var leadingExpandedTabLength = _expandedTabLength(leadingSpace);
if (tryMatch(_emptyPattern)) {
if (_emptyPattern.firstMatch(parser.next ?? '') != null) {
// Two blank lines ends a list.
@@ -611,10 +615,15 @@
}
// Add a blank line to the current list item.
childLines.add('');
- } else if (indent != null && parser.current.startsWith(indent)) {
+ } else if (indent != null && indent.length <= leadingExpandedTabLength) {
// Strip off indent and add to current item.
- var line = parser.current.replaceFirst(indent, '');
+ var line = parser.current
+ .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength)
+ .replaceFirst(indent, '');
childLines.add(line);
+ } else if (tryMatch(_hrPattern)) {
+ // Horizontal rule takes precedence to a new list item.
+ break;
} else if (tryMatch(_ulPattern) || tryMatch(_olPattern)) {
var precedingWhitespace = match[1];
var digits = match[2] ?? '';
@@ -735,6 +744,14 @@
}
return anyEmpty;
}
+
+ static int _expandedTabLength(String input) {
+ var length = 0;
+ for (var char in input.codeUnits) {
+ length += char == 0x9 ? 4 - (length % 4) : 1;
+ }
+ return length;
+ }
}
/// Parses unordered lists.
diff --git a/tool/common_mark_stats.json b/tool/common_mark_stats.json
index 83b6f50..1a78497 100644
--- a/tool/common_mark_stats.json
+++ b/tool/common_mark_stats.json
@@ -638,16 +638,16 @@
},
"Tabs": {
"1": "strict",
- "2": "fail",
+ "2": "strict",
"3": "strict",
- "4": "fail",
- "5": "fail",
+ "4": "loose",
+ "5": "loose",
"6": "loose",
"7": "loose",
"8": "strict",
- "9": "fail",
+ "9": "loose",
"10": "strict",
- "11": "fail"
+ "11": "strict"
},
"Textual content": {
"622": "strict",
@@ -672,7 +672,7 @@
"27": "loose",
"28": "strict",
"29": "strict",
- "30": "fail",
+ "30": "loose",
"31": "loose"
}
}
diff --git a/tool/common_mark_stats.txt b/tool/common_mark_stats.txt
index f6edbe5..9d03b4e 100644
--- a/tool/common_mark_stats.txt
+++ b/tool/common_mark_stats.txt
@@ -21,7 +21,7 @@
15 of 21 – 71.4% Raw HTML
25 of 26 – 96.2% Setext headings
2 of 2 – 100.0% Soft line breaks
- 6 of 11 – 54.5% Tabs
+ 11 of 11 – 100.0% Tabs
3 of 3 – 100.0% Textual content
- 18 of 19 – 94.7% Thematic breaks
- 555 of 624 – 88.9% TOTAL
+ 19 of 19 – 100.0% Thematic breaks
+ 561 of 624 – 89.9% TOTAL
diff --git a/tool/gfm_stats.json b/tool/gfm_stats.json
index 5d32124..bc6195f 100644
--- a/tool/gfm_stats.json
+++ b/tool/gfm_stats.json
@@ -668,16 +668,16 @@
},
"Tabs": {
"1": "strict",
- "2": "fail",
+ "2": "strict",
"3": "strict",
- "4": "fail",
- "5": "fail",
+ "4": "loose",
+ "5": "loose",
"6": "loose",
"7": "loose",
"8": "strict",
- "9": "fail",
+ "9": "loose",
"10": "strict",
- "11": "fail"
+ "11": "strict"
},
"Textual content": {
"646": "strict",
@@ -702,7 +702,7 @@
"27": "loose",
"28": "strict",
"29": "strict",
- "30": "fail",
+ "30": "loose",
"31": "loose"
}
}
diff --git a/tool/gfm_stats.txt b/tool/gfm_stats.txt
index 08bfcef..d585e0c 100644
--- a/tool/gfm_stats.txt
+++ b/tool/gfm_stats.txt
@@ -25,7 +25,7 @@
2 of 2 – 100.0% Soft line breaks
1 of 3 – 33.3% Strikethrough (extension)
2 of 7 – 28.6% Tables (extension)
- 6 of 11 – 54.5% Tabs
+ 11 of 11 – 100.0% Tabs
3 of 3 – 100.0% Textual content
- 18 of 19 – 94.7% Thematic breaks
- 558 of 646 – 86.4% TOTAL
+ 19 of 19 – 100.0% Thematic breaks
+ 564 of 646 – 87.3% TOTAL