Add startCharacter, reducing parsing time by another 17%; bump to 2.1.2 (#276)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c4ab46..97740d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,13 @@
-## 2.1.2-dev
+## 2.1.2
* Drop support for Dart 2.0.0 through 2.1.0.
* Recognize Unicode ellipsis (…) and other Unicode punctuation as punctuation
when parsing potential emphasis.
* Reduce time to parse a large HTML-block-free Markdown document (such as that
- in #271) by about half.
+ in #271) by more than half.
+* Add a new optional parameter for InlineSyntax(), `startCharacter`, where a
+ subclass can specify a single character to try to match, before matching with
+ more expensive regular expressions.
## 2.1.1
diff --git a/lib/src/inline_parser.dart b/lib/src/inline_parser.dart
index 0b1a25e..ed4513f 100644
--- a/lib/src/inline_parser.dart
+++ b/lib/src/inline_parser.dart
@@ -22,9 +22,9 @@
// Allow any punctuation to be escaped.
EscapeSyntax(),
// "*" surrounded by spaces is left alone.
- TextSyntax(r' \* '),
+ TextSyntax(r' \* ', startCharacter: $space),
// "_" surrounded by spaces is left alone.
- TextSyntax(r' _ '),
+ TextSyntax(r' _ ', startCharacter: $space),
// Parse "**strong**" and "*emphasis*" tags.
TagSyntax(r'\*+', requiresDelimiterRun: true),
// Parse "__strong__" and "_emphasis_" tags.
@@ -37,13 +37,13 @@
List<InlineSyntax>.unmodifiable(<InlineSyntax>[
// Leave already-encoded HTML entities alone. Ensures we don't turn
// "&" into "&amp;"
- TextSyntax(r'&[#a-zA-Z0-9]*;'),
+ TextSyntax(r'&[#a-zA-Z0-9]*;', startCharacter: $ampersand),
// Encode "&".
- TextSyntax(r'&', sub: '&'),
+ TextSyntax(r'&', sub: '&', startCharacter: $ampersand),
// Encode "<".
- TextSyntax(r'<', sub: '<'),
+ TextSyntax(r'<', sub: '<', startCharacter: $lt),
// Encode ">".
- TextSyntax(r'>', sub: '>'),
+ TextSyntax(r'>', sub: '>', startCharacter: $gt),
// We will add the LinkSyntax once we know about the specific link resolver.
]);
@@ -164,7 +164,17 @@
abstract class InlineSyntax {
final RegExp pattern;
- InlineSyntax(String pattern) : pattern = RegExp(pattern, multiLine: true);
+ /// The first character of [pattern], to be used as an efficient first check
+ /// that this syntax matches the current parser position.
+ final int _startCharacter;
+
+ /// Create a new [InlineSyntax] which matches text on [pattern].
+ ///
+ /// If [startCharacter] is passed, it is used as a pre-matching check which
+ /// is faster than matching against [pattern].
+ InlineSyntax(String pattern, {int startCharacter})
+ : pattern = RegExp(pattern, multiLine: true),
+ _startCharacter = startCharacter;
/// Tries to match at the parser's current position.
///
@@ -173,6 +183,14 @@
bool tryMatch(InlineParser parser, [int startMatchPos]) {
if (startMatchPos == null) startMatchPos = parser.pos;
+ // Before matching with the regular expression [pattern], which can be
+ // expensive on some platforms, check if even the first character matches
+ // this syntax.
+ if (_startCharacter != null &&
+ parser.source.codeUnitAt(startMatchPos) != _startCharacter) {
+ return false;
+ }
+
final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos);
if (startMatch == null) return false;
@@ -205,9 +223,14 @@
class TextSyntax extends InlineSyntax {
final String substitute;
- TextSyntax(String pattern, {String sub})
+ /// Create a new [TextSyntax] which matches text on [pattern].
+ ///
+ /// If [sub] is passed, it is used as a simple replacement for [pattern]. If
+ /// [startCharacter] is passed, it is used as a pre-matching check which is
+ /// faster than matching against [pattern].
+ TextSyntax(String pattern, {String sub, int startCharacter})
: substitute = sub,
- super(pattern);
+ super(pattern, startCharacter: startCharacter);
/// Adds a [Text] node to [parser] and returns `true` if there is a
/// [substitute], as long as the preceding character (if any) is not a `/`.
@@ -262,7 +285,9 @@
/// TODO(srawlins): improve accuracy while ensuring performance, once
/// Markdown benchmarking is more mature.
class InlineHtmlSyntax extends TextSyntax {
- InlineHtmlSyntax() : super(r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>');
+ InlineHtmlSyntax()
+ : super(r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>',
+ startCharacter: $lt);
}
/// Matches autolinks like `<foo@bar.example.com>`.
@@ -273,7 +298,7 @@
r'''[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}'''
r'''[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*''';
- EmailAutolinkSyntax() : super('<($_email)>');
+ EmailAutolinkSyntax() : super('<($_email)>', startCharacter: $lt);
bool onMatch(InlineParser parser, Match match) {
var url = match[1];
@@ -551,9 +576,17 @@
/// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis
final bool requiresDelimiterRun;
- TagSyntax(String pattern, {String end, this.requiresDelimiterRun = false})
+ /// Create a new [TagSyntax] which matches text on [pattern].
+ ///
+ /// If [end] is passed, it is used as the pattern which denotes the end of
+ /// matching text. Otherwise, [pattern] is used. If [requiresDelimiterRun] is
+ /// passed, this syntax parses according to the same nesting rules as
+ /// emphasis delimiters. If [startCharacter] is passed, it is used as a
+ /// pre-matching check which is faster than matching against [pattern].
+ TagSyntax(String pattern,
+ {String end, this.requiresDelimiterRun = false, int startCharacter})
: endPattern = RegExp((end != null) ? end : pattern, multiLine: true),
- super(pattern);
+ super(pattern, startCharacter: startCharacter);
bool onMatch(InlineParser parser, Match match) {
var runLength = match.group(0).length;
@@ -638,9 +671,12 @@
final Resolver linkResolver;
- LinkSyntax({Resolver linkResolver, String pattern = r'\['})
+ LinkSyntax(
+ {Resolver linkResolver,
+ String pattern = r'\[',
+ int startCharacter = $lbracket})
: this.linkResolver = (linkResolver ?? (String _, [String __]) => null),
- super(pattern, end: r'\]');
+ super(pattern, end: r'\]', startCharacter: startCharacter);
// The pending [TagState]s, all together, are "active" or "inactive" based on
// whether a link element has just been parsed.
@@ -1053,7 +1089,10 @@
/// `![alternate text][label]`.
class ImageSyntax extends LinkSyntax {
ImageSyntax({Resolver linkResolver})
- : super(linkResolver: linkResolver, pattern: r'!\[');
+ : super(
+ linkResolver: linkResolver,
+ pattern: r'!\[',
+ startCharacter: $exclamation);
Node _createNode(TagState state, String destination, String title) {
var element = Element.empty('img');
diff --git a/lib/src/version.dart b/lib/src/version.dart
index 7a59bfe..5e5a029 100644
--- a/lib/src/version.dart
+++ b/lib/src/version.dart
@@ -1,2 +1,2 @@
// Generated code. Do not modify.
-const packageVersion = '2.1.2-dev';
+const packageVersion = '2.1.2';
diff --git a/pubspec.yaml b/pubspec.yaml
index 396387a..39bef73 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: markdown
-version: 2.1.2-dev
+version: 2.1.2
description: A library for converting markdown to HTML.
author: Dart Team <misc@dartlang.org>