Overhaul links (#202) Overhaul link parsing, dramatically improving CommonMark compliance.

commit: edd15a170e3ca36189dd1fc42b3b0fbe5d10ed60 [log] [tgz]
author: Sam Rawlins <sam.rawlins@gmail.com> Fri Mar 23 12:26:38 2018 -0700
committer: GitHub <noreply@github.com> Fri Mar 23 12:26:38 2018 -0700
tree: a09a0cce1a762a2569a7bd5831bc617a768a6518
parent: 1b408e55095dec9f3eca30cae4c1d746ecd43dc5 [diff]
diff --git a/lib/src/ast.dart b/lib/src/ast.dart
index 9e4869e..4d7af15 100644
--- a/lib/src/ast.dart
+++ b/lib/src/ast.dart

@@ -2,7 +2,7 @@
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
-typedef Node Resolver(String name);
+typedef Node Resolver(String name, [String title]);
 
 /// Base class for any AST item.
 ///

diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart
index e41770a..e4239df 100644
--- a/lib/src/block_parser.dart
+++ b/lib/src/block_parser.dart

@@ -32,6 +32,9 @@
 /// SETEXT should win.
 final _hrPattern = new RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
 
+/// One or more whitespace, for compressing.
+final _oneOrMoreWhitespacePattern = new RegExp('[ \n\r\t]+');
+
 /// A line starting with one of these markers: `-`, `*`, `+`. May have up to
 /// three leading spaces before the marker and any number of spaces or tabs
 /// after.
@@ -1021,8 +1024,9 @@
       title = title.substring(1, title.length - 1);
     }
 
-    // References are case-insensitive.
-    label = label.toLowerCase().trim();
+    // References are case-insensitive, and internal whitespace is compressed.
+    label =
+        label.toLowerCase().trim().replaceAll(_oneOrMoreWhitespacePattern, ' ');
 
     parser.document.linkReferences
         .putIfAbsent(label, () => new LinkReference(label, destination, title));

diff --git a/lib/src/extension_set.dart b/lib/src/extension_set.dart
index d6bec74..12be92b 100644
--- a/lib/src/extension_set.dart
+++ b/lib/src/extension_set.dart

@@ -7,7 +7,8 @@
 /// For example, the [gitHub] set of syntax extensions allows users to output
 /// HTML from their Markdown in a similar fashion to GitHub's parsing.
 class ExtensionSet {
-  /// The [none] extension set renders Markdown similar to [Markdown.pl].
+  /// The [ExtensionSet.none] extension set renders Markdown similar to
+  /// [Markdown.pl].
   ///
   /// However, this set does not render _exactly_ the same as Markdown.pl;
   /// rather it is more-or-less the CommonMark standard of Markdown, without

diff --git a/lib/src/inline_parser.dart b/lib/src/inline_parser.dart
index 12d8fee..4242d0b 100644
--- a/lib/src/inline_parser.dart
+++ b/lib/src/inline_parser.dart

@@ -2,6 +2,8 @@
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
+import 'package:charcode/charcode.dart';
+
 import 'ast.dart';
 import 'document.dart';
 import 'emojis.dart';
@@ -87,29 +89,14 @@
     _stack.add(new TagState(0, 0, null, null));
 
     while (!isDone) {
-      var matched = false;
-
-      // See if any of the current tags on the stack match. We don't allow tags
-      // of the same kind to nest, so this takes priority over other possible
-      // matches.
-      for (var i = _stack.length - 1; i > 0; i--) {
-        if (_stack[i].tryMatch(this)) {
-          matched = true;
-          break;
-        }
-      }
-
-      if (matched) continue;
+      // See if any of the current tags on the stack match.  This takes
+      // priority over other possible matches.
+      if (_stack.reversed
+          .any((state) => state.syntax != null && state.tryMatch(this)))
+        continue;
 
       // See if the current text matches any defined markdown syntax.
-      for (var syntax in syntaxes) {
-        if (syntax.tryMatch(this)) {
-          matched = true;
-          break;
-        }
-      }
-
-      if (matched) continue;
+      if (syntaxes.any((syntax) => syntax.tryMatch(this))) continue;
 
       // If we got here, it's just text.
       advanceBy(1);
@@ -119,6 +106,8 @@
     return _stack[0].close(this, null);
   }
 
+  int charAt(int index) => source.codeUnitAt(index);
+
   void writeText() {
     writeTextRange(start, pos);
     start = pos;
@@ -139,10 +128,14 @@
     }
   }
 
+  /// Add [node] to the last [TagState] on the stack.
   void addNode(Node node) {
     _stack.last.children.add(node);
   }
 
+  /// Push [state] onto the stack of [TagState]s.
+  void openTag(TagState state) => _stack.add(state);
+
   bool get isDone => pos == source.length;
 
   void advanceBy(int length) {
@@ -229,7 +222,7 @@
 }
 
 /// Leave inline HTML tags alone, from
-/// [CommonMark 0.22](http://spec.commonmark.org/0.22/#raw-html).
+/// [CommonMark 0.28](http://spec.commonmark.org/0.28/#raw-html).
 ///
 /// This is not actually a good definition (nor CommonMark's) of an HTML tag,
 /// but it is fast. It will leave text like `<a href='hi">` alone, which is
@@ -238,7 +231,7 @@
 /// TODO(srawlins): improve accuracy while ensuring performance, once
 /// Markdown benchmarking is more mature.
 class InlineHtmlSyntax extends TextSyntax {
-  InlineHtmlSyntax() : super(r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?: [^>]*)?>');
+  InlineHtmlSyntax() : super(r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>');
 }
 
 /// Matches autolinks like `<foo@bar.example.com>`.
@@ -402,7 +395,7 @@
   // TODO(srawlins): Unicode whitespace
   static final String whitespace = ' \t\r\n';
 
-  final String char;
+  final int char;
   final int length;
   final bool isLeftFlanking;
   final bool isRightFlanking;
@@ -463,7 +456,7 @@
     }
 
     return new _DelimiterRun._(
-        char: parser.source.substring(runStart, runStart + 1),
+        char: parser.charAt(runStart),
         length: runEnd - runStart + 1,
         isLeftFlanking: leftFlanking,
         isRightFlanking: rightFlanking,
@@ -478,18 +471,23 @@
   // Whether a delimiter in this run can open emphasis or strong emphasis.
   bool get canOpen =>
       isLeftFlanking &&
-      (char == '*' || !isRightFlanking || isPrecededByPunctuation);
+      (char == $asterisk || !isRightFlanking || isPrecededByPunctuation);
 
   // Whether a delimiter in this run can close emphasis or strong emphasis.
   bool get canClose =>
       isRightFlanking &&
-      (char == '*' || !isLeftFlanking || isFollowedByPunctuation);
+      (char == $asterisk || !isLeftFlanking || isFollowedByPunctuation);
 }
 
 /// Matches syntax that has a pair of tags and becomes an element, like `*` for
 /// `<em>`. Allows nested tags.
 class TagSyntax extends InlineSyntax {
   final RegExp endPattern;
+
+  /// Whether this is parsed according to the same nesting rules as [emphasis
+  /// delimiters][].
+  ///
+  /// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis
   final bool requiresDelimiterRun;
 
   TagSyntax(String pattern, {String end, this.requiresDelimiterRun: false})
@@ -500,10 +498,15 @@
     var runLength = match.group(0).length;
     var matchStart = parser.pos;
     var matchEnd = parser.pos + runLength - 1;
+    if (!requiresDelimiterRun) {
+      parser.openTag(new TagState(parser.pos, matchEnd + 1, this, null));
+      return true;
+    }
+
     var delimiterRun = _DelimiterRun.tryParse(parser, matchStart, matchEnd);
     if (delimiterRun != null && delimiterRun.canOpen) {
-      parser._stack
-          .add(new TagState(parser.pos, matchEnd + 1, this, delimiterRun));
+      parser
+          .openTag(new TagState(parser.pos, matchEnd + 1, this, delimiterRun));
       return true;
     } else {
       parser.advanceBy(runLength);
@@ -517,9 +520,6 @@
     var matchEnd = parser.pos + runLength - 1;
     var openingRunLength = state.endPos - state.startPos;
     var delimiterRun = _DelimiterRun.tryParse(parser, matchStart, matchEnd);
-    if (!delimiterRun.isRightFlanking) {
-      return false;
-    }
 
     if (openingRunLength == 1 && runLength == 1) {
       parser.addNode(new Element('em', state.children));
@@ -528,7 +528,7 @@
       parser.pos = parser.pos - (runLength - 1);
       parser.start = parser.pos;
     } else if (openingRunLength > 1 && runLength == 1) {
-      parser._stack.add(
+      parser.openTag(
           new TagState(state.startPos, state.endPos - 1, this, delimiterRun));
       parser.addNode(new Element('em', state.children));
     } else if (openingRunLength == 2 && runLength == 2) {
@@ -538,11 +538,11 @@
       parser.pos = parser.pos - (runLength - 2);
       parser.start = parser.pos;
     } else if (openingRunLength > 2 && runLength == 2) {
-      parser._stack.add(
+      parser.openTag(
           new TagState(state.startPos, state.endPos - 2, this, delimiterRun));
       parser.addNode(new Element('strong', state.children));
     } else if (openingRunLength > 2 && runLength > 2) {
-      parser._stack.add(
+      parser.openTag(
           new TagState(state.startPos, state.endPos - 2, this, delimiterRun));
       parser.addNode(new Element('strong', state.children));
       parser.pos = parser.pos - (runLength - 2);
@@ -572,146 +572,454 @@
   }
 }
 
-/// Matches inline links like `[blah][id]` and `[blah](url)`.
+/// Matches links like `[blah][label]` and `[blah](url)`.
 class LinkSyntax extends TagSyntax {
+  static final _entirelyWhitespacePattern = new RegExp(r'^\s*$');
+
   final Resolver linkResolver;
 
-  /// The regex for the end of a link.
-  ///
-  /// This handles both reference-style and inline-style links as well as
-  /// optional titles for inline links. To make that a bit more palatable, this
-  /// breaks it into pieces.
-  static String get _linkPattern {
-    var refLink = r'\[([^\]]*)\]'; // `[id]` reflink id.
-    var title = r'(?:\s*"([^"]+?)"\s*|)'; // Optional title in quotes.
-    var inlineLink = '\\((\\S*?)$title\\)'; // `(url "title")` link.
-    return '\](?:($refLink|$inlineLink)|)';
+  LinkSyntax({Resolver linkResolver, String pattern: r'\['})
+      : this.linkResolver = (linkResolver ?? (String _, [String __]) => null),
+        super(pattern, end: r'\]');
 
-    // The groups matched by this are:
-    // 1: Will be non-empty if it's either a ref or inline link. Will be empty
-    //    if it's just a bare pair of square brackets with nothing after them.
-    // 2: Contains the id inside [] for a reference-style link.
-    // 3: Contains the URL for an inline link.
-    // 4: Contains the title, if present, for an inline link.
-  }
+  // The pending [TagState]s, all together, are "active" or "inactive" based on
+  // whether a link element has just been parsed.
+  //
+  // Links cannot be nested, so we must "deactivate" any pending ones. For
+  // example, take the following text:
+  //
+  //     Text [link and [more](links)](links).
+  //
+  // Once we have parsed `Text [`, there is one (pending) link in the state
+  // stack.  It is, by default, active. Once we parse the next possible link,
+  // `[more](links)`, as a real link, we must deactive the pending links (just
+  // the one, in this case).
+  var _pendingStatesAreActive = true;
 
-  LinkSyntax({this.linkResolver, String pattern: r'\['})
-      : super(pattern, end: _linkPattern);
+  bool onMatch(InlineParser parser, Match match) {
+    var matched = super.onMatch(parser, match);
+    if (!matched) return false;
 
-  Node createNode(InlineParser parser, Match match, TagState state) {
-    if (match[1] == null) {
-      // Try for a shortcut reference link, like `[foo]`.
-      var element = _createElement(parser, match, state);
-      if (element != null) return element;
+    _pendingStatesAreActive = true;
 
-      // If we didn't match refLink or inlineLink, and it's not a _shortcut_
-      // reflink, then it means it isn't a normal Markdown link at all. Instead,
-      // we allow users of the library to specify a special resolver function
-      // ([linkResolver]) that may choose to handle this. Otherwise, it's just
-      // treated as plain text.
-      if (linkResolver == null) return null;
-
-      // Treat the contents as unparsed text even if they happen to match. This
-      // way, we can handle things like [LINK_WITH_UNDERSCORES] as a link and
-      // not get confused by the emphasis.
-      var textToResolve = parser.source.substring(state.endPos, parser.pos);
-
-      // See if we have a resolver that will generate a link for us.
-      return linkResolver(textToResolve);
-    } else {
-      return _createElement(parser, match, state);
-    }
-  }
-
-  /// Given that [match] has matched both a title and URL, creates an `<a>`
-  /// [Element] for it.
-  Element _createElement(InlineParser parser, Match match, TagState state) {
-    var link = getLink(parser, match, state);
-    if (link == null) return null;
-
-    var element = new Element('a', state.children);
-
-    element.attributes["href"] = escapeHtml(link.destination);
-    if (link.title != null) {
-      element.attributes['title'] = escapeHtml(link.title);
-    }
-
-    return element;
-  }
-
-  /// Get the Link represented by [match].
-  ///
-  /// This method can return null, if the link is a reference link, and has no
-  /// accompanying link reference definition.
-  ///
-  /// Temporarily, this is returning [LinkReference]s, for convenience, which
-  /// is an improper use of [LinkReference]s. This should change before this
-  /// package is released.
-  LinkReference getLink(InlineParser parser, Match match, TagState state) {
-    if (match[3] != null) {
-      // Inline link like [foo](url).
-      var url = match[3];
-      var title = match[4];
-
-      // For whatever reason, Markdown allows angle-bracketed URLs here.
-      if (url.startsWith('<') && url.endsWith('>')) {
-        url = url.substring(1, url.length - 1);
-      }
-
-      return new LinkReference(null, url, title);
-    } else {
-      String label;
-      String _contents() {
-        var offset = pattern.pattern.length - 1;
-        return parser.source.substring(state.startPos + offset, parser.pos);
-      }
-
-      // Reference link like [foo][bar].
-      if (match[1] == null) {
-        // There are no reference brackets ("shortcut reference link"), so infer
-        // the label from the contents.
-        label = _contents();
-      } else if (match[2] == '') {
-        // The label is empty ("[]") so infer it from the contents.
-        label = _contents();
-      } else {
-        label = match[2];
-      }
-
-      // References are case-insensitive.
-      label = label.toLowerCase();
-      return parser.document.linkReferences[label];
-    }
+    return true;
   }
 
   bool onMatchEnd(InlineParser parser, Match match, TagState state) {
-    var node = createNode(parser, match, state);
-    if (node == null) return false;
+    if (!_pendingStatesAreActive) return false;
 
-    parser.addNode(node);
+    var text = parser.source.substring(state.endPos, parser.pos);
+    // The current character is the `]` that closed the link text. Examine the
+    // next character, to determine what type of link we might have (a '('
+    // means a possible inline link; otherwise a possible reference link).
+    if (parser.pos + 1 >= parser.source.length) {
+      // In this case, the Markdown document may have ended with a shortcut
+      // reference link.
+
+      return _tryAddReferenceLink(parser, state, text);
+    }
+    // Peek at the next character; don't advance, so as to avoid later stepping
+    // backward.
+    var char = parser.charAt(parser.pos + 1);
+
+    if (char == $lparen) {
+      // Maybe an inline link, like `[text](destination)`.
+      parser.advanceBy(1);
+      var leftParenIndex = parser.pos;
+      var inlineLink = _parseInlineLink(parser);
+      if (inlineLink != null)
+        return _tryAddInlineLink(parser, state, inlineLink);
+
+      // Reset the parser position.
+      parser.pos = leftParenIndex;
+
+      // At this point, we've matched `[...](`, but that `(` did not pan out to
+      // be an inline link. We must now check if `[...]` is simply a shortcut
+      // reference link.
+      parser.advanceBy(-1);
+      return _tryAddReferenceLink(parser, state, text);
+    }
+
+    if (char == $lbracket) {
+      parser.advanceBy(1);
+      // At this point, we've matched `[...][`. Maybe a *full* reference link,
+      // like `[foo][bar]` or a *collapsed* reference link, like `[foo][]`.
+      if (parser.pos + 1 < parser.source.length &&
+          parser.charAt(parser.pos + 1) == $rbracket) {
+        // That opening `[` is not actually part of the link. Maybe a
+        // *shortcut* reference link (followed by a `[`).
+        parser.advanceBy(1);
+        return _tryAddReferenceLink(parser, state, text);
+      }
+      var label = _parseReferenceLinkLabel(parser);
+      if (label != null) return _tryAddReferenceLink(parser, state, label);
+      return false;
+    }
+
+    // The link text (inside `[...]`) was not followed with a opening `(` nor
+    // an opening `[`. Perhaps just a simple shortcut reference link (`[...]`).
+
+    return _tryAddReferenceLink(parser, state, text);
+  }
+
+  /// Resolve a possible reference link.
+  ///
+  /// Uses [linkReferences], [linkResolver], and [_createNode] to try to
+  /// resolve [label] and [state] into a [Node]. If [label] is defined in
+  /// [linkReferences] or can be resolved by [linkResolver], returns a [Node]
+  /// that links to the resolved URL.
+  ///
+  /// Otherwise, returns `null`.
+  ///
+  /// [label] does not need to be normalized.
+  Node _resolveReferenceLink(
+      String label, TagState state, Map<String, LinkReference> linkReferences) {
+    var normalizedLabel = label.toLowerCase();
+    var linkReference = linkReferences[normalizedLabel];
+    if (linkReference != null) {
+      return _createNode(state, linkReference.destination, linkReference.title);
+    } else {
+      // This link has no reference definition. But we allow users of the
+      // library to specify a custom resolver function ([linkResolver]) that
+      // may choose to handle this. Otherwise, it's just treated as plain
+      // text.
+
+      // Normally, label text does not get parsed as inline Markdown. However,
+      // for the benefit of the link resolver, we need to at least escape
+      // brackets, so that, e.g. a link resolver can receive `[\[\]]` as `[]`.
+      return linkResolver(label
+          .replaceAll(r'\\', r'\')
+          .replaceAll(r'\[', '[')
+          .replaceAll(r'\]', ']'));
+    }
+  }
+
+  /// Create the node represented by a Markdown link.
+  Node _createNode(TagState state, String destination, String title) {
+    var element = new Element('a', state.children);
+    element.attributes['href'] = escapeAttribute(destination);
+    if (title != null && title.isNotEmpty) {
+      element.attributes['title'] = escapeAttribute(title);
+    }
+    return element;
+  }
+
+  // Add a reference link node to [parser]'s AST.
+  //
+  // Returns whether the link was added successfully.
+  bool _tryAddReferenceLink(InlineParser parser, TagState state, String label) {
+    var element =
+        _resolveReferenceLink(label, state, parser.document.linkReferences);
+    if (element == null) {
+      return false;
+    }
+    parser.addNode(element);
+    parser.start = parser.pos;
+    _pendingStatesAreActive = false;
     return true;
   }
+
+  // Add an inline link node to [parser]'s AST.
+  //
+  // Returns whether the link was added successfully.
+  bool _tryAddInlineLink(InlineParser parser, TagState state, InlineLink link) {
+    var element = _createNode(state, link.destination, link.title);
+    if (element == null) return false;
+    parser.addNode(element);
+    parser.start = parser.pos;
+    _pendingStatesAreActive = false;
+    return true;
+  }
+
+  /// Parse a reference link label at the current position.
+  ///
+  /// Specifically, [parser.pos] is expected to be pointing at the `[` which
+  /// opens the link label.
+  ///
+  /// Returns the label if it could be parsed, or `null` if not.
+  String _parseReferenceLinkLabel(InlineParser parser) {
+    // Walk past the opening `[`.
+    parser.advanceBy(1);
+    if (parser.isDone) return null;
+
+    var buffer = new StringBuffer();
+    while (true) {
+      var char = parser.charAt(parser.pos);
+      if (char == $backslash) {
+        parser.advanceBy(1);
+        var next = parser.charAt(parser.pos);
+        if (next != $backslash && next != $rbracket) {
+          buffer.writeCharCode(char);
+        }
+        buffer.writeCharCode(next);
+      } else if (char == $rbracket) {
+        break;
+      } else {
+        buffer.writeCharCode(char);
+      }
+      parser.advanceBy(1);
+      if (parser.isDone) return null;
+      // TODO(srawlins): only check 999 characters, for performance reasons?
+    }
+
+    var label = buffer.toString();
+
+    // A link label must contain at least one non-whitespace character.
+    if (_entirelyWhitespacePattern.hasMatch(label)) return null;
+
+    return label;
+  }
+
+  /// Parse an inline [InlineLink] at the current position.
+  ///
+  /// At this point, we have parsed a link's (or image's) opening `[`, and then
+  /// a matching closing `]`, and [parser.pos] is pointing at an opening `(`.
+  /// This method will then attempt to parse a link destination wrapped in `<>`,
+  /// such as `(<http://url>)`, or a bare link destination, such as
+  /// `(http://url)`, or a link destination with a title, such as
+  /// `(http://url "title")`.
+  ///
+  /// Returns the [InlineLink] if one was parsed, or `null` if not.
+  InlineLink _parseInlineLink(InlineParser parser) {
+    // Start walking to the character just after the opening `(`.
+    parser.advanceBy(1);
+
+    _moveThroughWhitespace(parser);
+    if (parser.isDone) return null; // EOF. Not a link.
+
+    if (parser.charAt(parser.pos) == $lt) {
+      // Maybe a `<...>`-enclosed link destination.
+      return _parseInlineBracketedLink(parser);
+    } else {
+      return _parseInlineBareDestinationLink(parser);
+    }
+  }
+
+  /// Parse an inline link with a bracketed destination (a destination wrapped
+  /// in `<...>`). The current position of the parser must be the first
+  /// character of the destination.
+  InlineLink _parseInlineBracketedLink(InlineParser parser) {
+    parser.advanceBy(1);
+
+    var buffer = new StringBuffer();
+    while (true) {
+      var char = parser.charAt(parser.pos);
+      if (char == $backslash) {
+        parser.advanceBy(1);
+        var next = parser.charAt(parser.pos);
+        if (char == $space || char == $lf || char == $cr || char == $ff) {
+          // Not a link (no whitespace allowed within `<...>`).
+          return null;
+        }
+        // TODO: Follow the backslash spec better here.
+        // http://spec.commonmark.org/0.28/#backslash-escapes
+        if (next != $backslash && next != $gt) {
+          buffer.writeCharCode(char);
+        }
+        buffer.writeCharCode(next);
+      } else if (char == $space || char == $lf || char == $cr || char == $ff) {
+        // Not a link (no whitespace allowed within `<...>`).
+        return null;
+      } else if (char == $gt) {
+        break;
+      } else {
+        buffer.writeCharCode(char);
+      }
+      parser.advanceBy(1);
+      if (parser.isDone) return null;
+    }
+    var destination = buffer.toString();
+
+    parser.advanceBy(1);
+    var char = parser.charAt(parser.pos);
+    if (char == $space || char == $lf || char == $cr || char == $ff) {
+      var title = _parseTitle(parser);
+      if (title == null && parser.charAt(parser.pos) != $rparen) {
+        // This looked like an inline link, until we found this $space
+        // followed by mystery characters; no longer a link.
+        return null;
+      }
+      return new InlineLink(destination, title: title);
+    } else if (char == $rparen) {
+      return new InlineLink(destination);
+    } else {
+      // We parsed something like `[foo](<url>X`. Not a link.
+      return null;
+    }
+  }
+
+  /// Parse an inline link with a "bare" destination (a destination _not_
+  /// wrapped in `<...>`). The current position of the parser must be the first
+  /// character of the destination.
+  InlineLink _parseInlineBareDestinationLink(InlineParser parser) {
+    // According to
+    // [CommonMark](http://spec.commonmark.org/0.28/#link-destination):
+    //
+    // > A link destination consists of [...] a nonempty sequence of
+    // > characters [...], and includes parentheses only if (a) they are
+    // > backslash-escaped or (b) they are part of a balanced pair of
+    // > unescaped parentheses.
+    //
+    // We need to count the open parens. We start with 1 for the paren that
+    // opened the destination.
+    var parenCount = 1;
+    var buffer = new StringBuffer();
+
+    while (true) {
+      var char = parser.charAt(parser.pos);
+      switch (char) {
+        case $backslash:
+          parser.advanceBy(1);
+          if (parser.isDone) return null; // EOF. Not a link.
+          var next = parser.charAt(parser.pos);
+          // Parentheses may be escaped.
+          //
+          // http://spec.commonmark.org/0.28/#example-467
+          if (next != $backslash && next != $lparen && next != $rparen) {
+            buffer.writeCharCode(char);
+          }
+          buffer.writeCharCode(next);
+          break;
+
+        case $space:
+        case $lf:
+        case $cr:
+        case $ff:
+          var destination = buffer.toString();
+          var title = _parseTitle(parser);
+          if (title == null && parser.charAt(parser.pos) != $rparen) {
+            // This looked like an inline link, until we found this $space
+            // followed by mystery characters; no longer a link.
+            return null;
+          }
+          // [_parseTitle] made sure the title was follwed by a closing `)`
+          // (but it's up to the code here to examine the balance of
+          // parentheses).
+          parenCount--;
+          if (parenCount == 0) {
+            return new InlineLink(destination, title: title);
+          }
+          break;
+
+        case $lparen:
+          parenCount++;
+          buffer.writeCharCode(char);
+          break;
+
+        case $rparen:
+          parenCount--;
+          if (parenCount == 0) {
+            var destination = buffer.toString();
+            return new InlineLink(destination);
+          }
+          buffer.writeCharCode(char);
+          break;
+
+        default:
+          buffer.writeCharCode(char);
+      }
+      parser.advanceBy(1);
+      if (parser.isDone) return null; // EOF. Not a link.
+    }
+  }
+
+  // Walk the parser forward through any whitespace.
+  void _moveThroughWhitespace(InlineParser parser) {
+    while (true) {
+      var char = parser.charAt(parser.pos);
+      if (char != $space &&
+          char != $tab &&
+          char != $lf &&
+          char != $vt &&
+          char != $cr &&
+          char != $ff) {
+        return;
+      }
+      parser.advanceBy(1);
+      if (parser.isDone) return;
+    }
+  }
+
+  // Parse a link title in [parser] at it's current position. The parser's
+  // current position should be a whitespace character that followed a link
+  // destination.
+  String _parseTitle(InlineParser parser) {
+    _moveThroughWhitespace(parser);
+    if (parser.isDone) return null;
+
+    // The whitespace should be followed by a title delimiter.
+    var delimiter = parser.charAt(parser.pos);
+    if (delimiter != $apostrophe &&
+        delimiter != $quote &&
+        delimiter != $lparen) {
+      return null;
+    }
+
+    var closeDelimiter = delimiter == $lparen ? $rparen : delimiter;
+    parser.advanceBy(1);
+
+    // Now we look for an un-escaped closing delimiter.
+    var buffer = new StringBuffer();
+    while (true) {
+      var char = parser.charAt(parser.pos);
+      if (char == $backslash) {
+        parser.advanceBy(1);
+        var next = parser.charAt(parser.pos);
+        if (next != $backslash && next != closeDelimiter) {
+          buffer.writeCharCode(char);
+        }
+        buffer.writeCharCode(next);
+      } else if (char == closeDelimiter) {
+        break;
+      } else {
+        buffer.writeCharCode(char);
+      }
+      parser.advanceBy(1);
+      if (parser.isDone) return null;
+    }
+    var title = buffer.toString();
+
+    // Advance past the closing delimiter.
+    parser.advanceBy(1);
+    if (parser.isDone) return null;
+    _moveThroughWhitespace(parser);
+    if (parser.isDone) return null;
+    if (parser.charAt(parser.pos) != $rparen) return null;
+    return title;
+  }
 }
 
 /// Matches images like `![alternate text](url "optional title")` and
-/// `![alternate text][url reference]`.
+/// `![alternate text][label]`.
 class ImageSyntax extends LinkSyntax {
   ImageSyntax({Resolver linkResolver})
       : super(linkResolver: linkResolver, pattern: r'!\[');
 
-  /// Creates an <img> element from the given complete [match].
-  Element _createElement(InlineParser parser, Match match, TagState state) {
-    var link = getLink(parser, match, state);
-    if (link == null) return null;
-    var image = new Element.empty("img");
-    image.attributes["src"] = escapeHtml(link.destination);
-    image.attributes["alt"] = state?.textContent ?? '';
-
-    if (link.title != null) {
-      image.attributes["title"] = escapeHtml(link.title);
+  Node _createNode(TagState state, String destination, String title) {
+    var element = new Element.empty('img');
+    element.attributes['src'] = escapeHtml(destination);
+    element.attributes['alt'] = state?.textContent ?? '';
+    if (title != null && title.isNotEmpty) {
+      element.attributes['title'] = escapeAttribute(title);
     }
+    return element;
+  }
 
-    return image;
+  // Add an image node to [parser]'s AST.
+  //
+  // If [label] is present, the potential image is treated as a reference image.
+  // Otherwise, it is treated as an inline image.
+  //
+  // Returns whether the image was added successfully.
+  bool _tryAddReferenceLink(InlineParser parser, TagState state, String label) {
+    var element =
+        _resolveReferenceLink(label, state, parser.document.linkReferences);
+    if (element == null) {
+      return false;
+    }
+    parser.addNode(element);
+    parser.start = parser.pos;
+    return true;
   }
 }
 
@@ -732,7 +1040,7 @@
   CodeSyntax() : super(_pattern);
 
   bool tryMatch(InlineParser parser, [int startMatchPos]) {
-    if (parser.pos > 0 && parser.source[parser.pos - 1] == '`') {
+    if (parser.pos > 0 && parser.charAt(parser.pos - 1) == $backquote) {
       // Not really a match! We can't just sneak past one backtick to try the
       // next character. An example of this situation would be:
       //
@@ -814,6 +1122,7 @@
       return true;
     }
 
+    // TODO: Move this logic into TagSyntax.
     var runLength = endMatch.group(0).length;
     var openingRunLength = endPos - startPos;
     var closingMatchStart = parser.pos;
@@ -884,3 +1193,10 @@
   String get textContent =>
       children.map((Node child) => child.textContent).join('');
 }
+
+class InlineLink {
+  final String destination;
+  final String title;
+
+  InlineLink(this.destination, {this.title});
+}

diff --git a/lib/src/util.dart b/lib/src/util.dart
index f670389..7e0df9c 100644
--- a/lib/src/util.dart
+++ b/lib/src/util.dart

@@ -1,4 +1,71 @@
 import 'dart:convert';
 
+import 'package:charcode/charcode.dart';
+
 String escapeHtml(String html) =>
     const HtmlEscape(HtmlEscapeMode.ELEMENT).convert(html);
+
+// Escape the contents of [value], so that it may be used as an HTML attribute.
+
+// Based on http://spec.commonmark.org/0.28/#backslash-escapes.
+String escapeAttribute(String value) {
+  var result = new StringBuffer();
+  int ch;
+  for (var i = 0; i < value.codeUnits.length; i++) {
+    ch = value.codeUnitAt(i);
+    if (ch == $backslash) {
+      i++;
+      if (i == value.codeUnits.length) {
+        result.writeCharCode(ch);
+        break;
+      }
+      ch = value.codeUnitAt(i);
+      switch (ch) {
+        case $quote:
+          result.write('&quot;');
+          break;
+        case $exclamation:
+        case $hash:
+        case $dollar:
+        case $percent:
+        case $ampersand:
+        case $apostrophe:
+        case $lparen:
+        case $rparen:
+        case $asterisk:
+        case $plus:
+        case $comma:
+        case $dash:
+        case $dot:
+        case $slash:
+        case $colon:
+        case $semicolon:
+        case $lt:
+        case $equal:
+        case $gt:
+        case $question:
+        case $at:
+        case $lbracket:
+        case $backslash:
+        case $rbracket:
+        case $caret:
+        case $underscore:
+        case $backquote:
+        case $lbrace:
+        case $bar:
+        case $rbrace:
+        case $tilde:
+          result.writeCharCode(ch);
+          break;
+        default:
+          result.write('%5C');
+          result.writeCharCode(ch);
+      }
+    } else if (ch == $quote) {
+      result.write('%22');
+    } else {
+      result.writeCharCode(ch);
+    }
+  }
+  return result.toString();
+}

diff --git a/pubspec.yaml b/pubspec.yaml
index 2bcabaf..05da83a 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml

@@ -12,6 +12,7 @@
 
 dependencies:
   args: '^1.0.0'
+  charcode: '^1.1.0'
 
 dev_dependencies:
   collection: '^1.2.0'

diff --git a/test/markdown_test.dart b/test/markdown_test.dart
index 9860d8e..bab44d2 100644
--- a/test/markdown_test.dart
+++ b/test/markdown_test.dart

@@ -25,7 +25,7 @@
       inlineSyntaxes: [new InlineHtmlSyntax()]);
 
   group('Resolver', () {
-    Node nyanResolver(String text) => new Text('~=[,,_${text}_,,]:3');
+    Node nyanResolver(String text, [_]) => new Text('~=[,,_${text}_,,]:3');
     validateCore(
         'simple link resolver',
         '''
@@ -55,6 +55,26 @@
 <p>resolve ~=[,,_*star* _underline__,,]:3 thing</p>
 ''',
         linkResolver: nyanResolver);
+
+    validateCore(
+        'link resolver uses un-normalized link label',
+        '''
+resolve [TH  IS] thing
+''',
+        '''
+<p>resolve ~=[,,_TH  IS_,,]:3 thing</p>
+''',
+        linkResolver: nyanResolver);
+
+    validateCore(
+        'can resolve brackets',
+        r'''
+resolve [\[\]] thing
+''',
+        '''
+<p>resolve ~=[,,_[]_,,]:3 thing</p>
+''',
+        linkResolver: nyanResolver);
   });
 
   group('Custom inline syntax', () {
@@ -69,7 +89,7 @@
 
     validateCore('dart custom links', 'links [are<foo>] awesome',
         '<p>links <a>are&lt;foo></a> awesome</p>\n',
-        linkResolver: (text) =>
+        linkResolver: (String text, [_]) =>
             new Element.text('a', text.replaceAll('<', '&lt;')));
 
     // TODO(amouravski): need more tests here for custom syntaxes, as some

diff --git a/test/original/inline_links.unit b/test/original/inline_links.unit
index dd0a65d..e62cddd 100644
--- a/test/original/inline_links.unit
+++ b/test/original/inline_links.unit

@@ -38,6 +38,11 @@
 
 <<<
 <p>links <a href="http://example.com">are</a> awesome</p>
+>>> URL wrapped in angle brackets with a title; https://github.com/commonmark/CommonMark/issues/521
+links [are](<http://example.com> "title") awesome
+
+<<<
+<p>links <a href="http://example.com" title="title">are</a> awesome</p>
 >>> multi-line link
 links [are
 awesome](<http://example.com>).
@@ -61,3 +66,13 @@
 
 <<<
 <p>links [are <em>awesome</em>]</p>
+>>> links with escaped parens
+[a](\(yes-a-link)
+[a](\(yes-a-link\))
+[a](\\(not-a-link\))
+[a](\\(yes-a-link\)))
+<<<
+<p><a href="(yes-a-link">a</a>
+<a href="(yes-a-link)">a</a>
+[a](\(not-a-link))
+<a href="(yes-a-link))">a</a></p>

diff --git a/tool/common_mark_stats.json b/tool/common_mark_stats.json
index 1a78497..4916da7 100644
--- a/tool/common_mark_stats.json
+++ b/tool/common_mark_stats.json

@@ -51,8 +51,8 @@
   "296": "strict",
   "297": "strict",
   "298": "strict",
-  "299": "fail",
-  "300": "fail",
+  "299": "strict",
+  "300": "strict",
   "301": "fail"
  },
  "Blank lines": {
@@ -414,22 +414,22 @@
   "463": "strict",
   "464": "loose",
   "465": "strict",
-  "466": "fail",
-  "467": "fail",
-  "468": "fail",
-  "469": "fail",
-  "470": "fail",
-  "471": "fail",
+  "466": "strict",
+  "467": "strict",
+  "468": "strict",
+  "469": "strict",
+  "470": "strict",
+  "471": "strict",
   "472": "strict",
-  "473": "fail",
+  "473": "strict",
   "474": "fail",
-  "475": "fail",
-  "476": "fail",
+  "475": "strict",
+  "476": "strict",
   "477": "fail",
   "478": "fail",
   "479": "loose",
   "480": "fail",
-  "481": "fail",
+  "481": "strict",
   "482": "strict",
   "483": "fail",
   "484": "strict",
@@ -437,8 +437,8 @@
   "486": "strict",
   "487": "strict",
   "488": "loose",
-  "489": "fail",
-  "490": "fail",
+  "489": "strict",
+  "490": "strict",
   "491": "fail",
   "492": "fail",
   "493": "strict",
@@ -451,8 +451,8 @@
   "500": "strict",
   "501": "strict",
   "502": "loose",
-  "503": "loose",
-  "504": "fail",
+  "503": "strict",
+  "504": "strict",
   "505": "fail",
   "506": "strict",
   "507": "strict",
@@ -460,7 +460,7 @@
   "509": "strict",
   "510": "strict",
   "511": "strict",
-  "512": "fail",
+  "512": "strict",
   "513": "strict",
   "514": "strict",
   "515": "strict",
@@ -488,9 +488,9 @@
   "537": "strict",
   "538": "strict",
   "539": "strict",
-  "540": "loose",
+  "540": "strict",
   "541": "strict",
-  "542": "loose"
+  "542": "strict"
  },
  "List items": {
   "216": "loose",
@@ -584,7 +584,7 @@
  "Raw HTML": {
   "584": "strict",
   "585": "fail",
-  "586": "fail",
+  "586": "strict",
   "587": "strict",
   "588": "strict",
   "589": "loose",

diff --git a/tool/common_mark_stats.txt b/tool/common_mark_stats.txt
index 9d03b4e..3692c47 100644
--- a/tool/common_mark_stats.txt
+++ b/tool/common_mark_stats.txt

@@ -1,6 +1,6 @@
   17 of   18 –  94.4%  ATX headings
   19 of   19 – 100.0%  Autolinks
-   9 of   13 –  69.2%  Backslash escapes
+  11 of   13 –  84.6%  Backslash escapes
    1 of    1 – 100.0%  Blank lines
   22 of   25 –  88.0%  Block quotes
   16 of   17 –  94.1%  Code spans
@@ -13,15 +13,15 @@
   11 of   12 –  91.7%  Indented code blocks
    1 of    1 – 100.0%  Inlines
   21 of   23 –  91.3%  Link reference definitions
-  60 of   84 –  71.4%  Links
+  75 of   84 –  89.3%  Links
   44 of   48 –  91.7%  List items
   18 of   24 –  75.0%  Lists
    8 of    8 – 100.0%  Paragraphs
    1 of    1 – 100.0%  Precedence
-  15 of   21 –  71.4%  Raw HTML
+  16 of   21 –  76.2%  Raw HTML
   25 of   26 –  96.2%  Setext headings
    2 of    2 – 100.0%  Soft line breaks
   11 of   11 – 100.0%  Tabs
    3 of    3 – 100.0%  Textual content
   19 of   19 – 100.0%  Thematic breaks
- 561 of  624 –  89.9%  TOTAL
+ 579 of  624 –  92.8%  TOTAL

diff --git a/tool/gfm_stats.json b/tool/gfm_stats.json
index 88d8819..2fae2fb 100644
--- a/tool/gfm_stats.json
+++ b/tool/gfm_stats.json

@@ -64,8 +64,8 @@
   "306": "strict",
   "307": "strict",
   "308": "strict",
-  "309": "fail",
-  "310": "fail",
+  "309": "strict",
+  "310": "strict",
   "311": "fail"
  },
  "Blank lines": {
@@ -430,22 +430,22 @@
   "476": "strict",
   "477": "loose",
   "478": "strict",
-  "479": "fail",
-  "480": "fail",
-  "481": "fail",
-  "482": "fail",
-  "483": "fail",
-  "484": "fail",
+  "479": "strict",
+  "480": "strict",
+  "481": "strict",
+  "482": "strict",
+  "483": "strict",
+  "484": "strict",
   "485": "strict",
-  "486": "fail",
+  "486": "strict",
   "487": "fail",
-  "488": "fail",
-  "489": "fail",
+  "488": "strict",
+  "489": "strict",
   "490": "fail",
   "491": "fail",
   "492": "loose",
   "493": "fail",
-  "494": "fail",
+  "494": "strict",
   "495": "strict",
   "496": "fail",
   "497": "strict",
@@ -453,8 +453,8 @@
   "499": "strict",
   "500": "strict",
   "501": "loose",
-  "502": "fail",
-  "503": "fail",
+  "502": "strict",
+  "503": "strict",
   "504": "fail",
   "505": "fail",
   "506": "strict",
@@ -467,8 +467,8 @@
   "513": "strict",
   "514": "strict",
   "515": "loose",
-  "516": "loose",
-  "517": "fail",
+  "516": "strict",
+  "517": "strict",
   "518": "fail",
   "519": "strict",
   "520": "strict",
@@ -476,7 +476,7 @@
   "522": "strict",
   "523": "strict",
   "524": "strict",
-  "525": "fail",
+  "525": "strict",
   "526": "strict",
   "527": "strict",
   "528": "strict",
@@ -504,9 +504,9 @@
   "550": "strict",
   "551": "strict",
   "552": "strict",
-  "553": "loose",
+  "553": "strict",
   "554": "strict",
-  "555": "loose"
+  "555": "strict"
  },
  "List items": {
   "224": "loose",
@@ -600,7 +600,7 @@
  "Raw HTML": {
   "608": "strict",
   "609": "fail",
-  "610": "fail",
+  "610": "strict",
   "611": "strict",
   "612": "strict",
   "613": "loose",

diff --git a/tool/gfm_stats.txt b/tool/gfm_stats.txt
index 11b8b83..848e96f 100644
--- a/tool/gfm_stats.txt
+++ b/tool/gfm_stats.txt

@@ -1,7 +1,7 @@
   17 of   18 –  94.4%  ATX headings
   17 of   19 –  89.5%  Autolinks
    8 of   11 –  72.7%  Autolinks (extension)
-   9 of   13 –  69.2%  Backslash escapes
+  11 of   13 –  84.6%  Backslash escapes
    1 of    1 – 100.0%  Blank lines
   22 of   25 –  88.0%  Block quotes
   16 of   17 –  94.1%  Code spans
@@ -15,12 +15,12 @@
   11 of   12 –  91.7%  Indented code blocks
    1 of    1 – 100.0%  Inlines
   21 of   23 –  91.3%  Link reference definitions
-  60 of   84 –  71.4%  Links
+  75 of   84 –  89.3%  Links
   44 of   48 –  91.7%  List items
   18 of   24 –  75.0%  Lists
    8 of    8 – 100.0%  Paragraphs
    1 of    1 – 100.0%  Precedence
-  15 of   21 –  71.4%  Raw HTML
+  16 of   21 –  76.2%  Raw HTML
   25 of   26 –  96.2%  Setext headings
    2 of    2 – 100.0%  Soft line breaks
    3 of    3 – 100.0%  Strikethrough (extension)
@@ -28,4 +28,4 @@
   11 of   11 – 100.0%  Tabs
    3 of    3 – 100.0%  Textual content
   19 of   19 – 100.0%  Thematic breaks
- 577 of  647 –  89.2%  TOTAL
+ 595 of  647 –  92.0%  TOTAL
commit	edd15a170e3ca36189dd1fc42b3b0fbe5d10ed60	[log] [tgz]
author	Sam Rawlins <sam.rawlins@gmail.com>	Fri Mar 23 12:26:38 2018 -0700
committer	GitHub <noreply@github.com>	Fri Mar 23 12:26:38 2018 -0700
tree	a09a0cce1a762a2569a7bd5831bc617a768a6518
parent	1b408e55095dec9f3eca30cae4c1d746ecd43dc5 [diff]