Split block_parser.dart and inline_parser.dart (#422)
diff --git a/lib/markdown.dart b/lib/markdown.dart
index 3af6c6c..205ba38 100644
--- a/lib/markdown.dart
+++ b/lib/markdown.dart
@@ -38,10 +38,47 @@
export 'src/ast.dart';
export 'src/block_parser.dart';
+export 'src/block_syntaxes/block_html_syntax.dart';
+export 'src/block_syntaxes/block_syntax.dart';
+export 'src/block_syntaxes/block_tag_block_html_syntax.dart';
+export 'src/block_syntaxes/blockquote_syntax.dart';
+export 'src/block_syntaxes/code_block_syntax.dart';
+export 'src/block_syntaxes/dummy_block_syntax.dart';
+export 'src/block_syntaxes/empty_block_syntax.dart';
+export 'src/block_syntaxes/fenced_blockquote_syntax.dart';
+export 'src/block_syntaxes/fenced_code_block_syntax.dart';
+export 'src/block_syntaxes/header_syntax.dart';
+export 'src/block_syntaxes/header_with_id_syntax.dart';
+export 'src/block_syntaxes/horizontal_rule_syntax.dart';
+export 'src/block_syntaxes/list_syntax.dart';
+export 'src/block_syntaxes/long_block_html_syntax.dart';
+export 'src/block_syntaxes/ordered_list_syntax.dart';
+export 'src/block_syntaxes/other_tag_block_html_syntax.dart';
+export 'src/block_syntaxes/paragraph_syntax.dart';
+export 'src/block_syntaxes/setext_header_syntax.dart';
+export 'src/block_syntaxes/setext_header_with_id_syntax.dart';
+export 'src/block_syntaxes/table_syntax.dart';
+export 'src/block_syntaxes/unordered_list_syntax.dart';
export 'src/document.dart';
export 'src/emojis.dart';
export 'src/extension_set.dart';
export 'src/html_renderer.dart';
export 'src/inline_parser.dart';
+export 'src/inline_syntaxes/autolink_extension_syntax.dart';
+export 'src/inline_syntaxes/autolink_syntax.dart';
+export 'src/inline_syntaxes/code_syntax.dart';
+export 'src/inline_syntaxes/delimiter_syntax.dart';
+export 'src/inline_syntaxes/email_autolink_syntax.dart';
+export 'src/inline_syntaxes/emoji_syntax.dart';
+export 'src/inline_syntaxes/emphasis_syntax.dart';
+export 'src/inline_syntaxes/escape_syntax.dart';
+export 'src/inline_syntaxes/image_syntax.dart';
+export 'src/inline_syntaxes/inline_html_syntax.dart';
+export 'src/inline_syntaxes/inline_syntax.dart';
+export 'src/inline_syntaxes/line_break_syntax.dart';
+export 'src/inline_syntaxes/link_syntax.dart';
+export 'src/inline_syntaxes/strikethrough_syntax.dart';
+export 'src/inline_syntaxes/tag_syntax.dart';
+export 'src/inline_syntaxes/text_syntax.dart';
const version = packageVersion;
diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart
index ab05de9..cb88d8e 100644
--- a/lib/src/block_parser.dart
+++ b/lib/src/block_parser.dart
@@ -2,62 +2,22 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-import 'package:charcode/charcode.dart';
-
import 'ast.dart';
+import 'block_syntaxes/block_syntax.dart';
+import 'block_syntaxes/block_tag_block_html_syntax.dart';
+import 'block_syntaxes/blockquote_syntax.dart';
+import 'block_syntaxes/code_block_syntax.dart';
+import 'block_syntaxes/dummy_block_syntax.dart';
+import 'block_syntaxes/empty_block_syntax.dart';
+import 'block_syntaxes/header_syntax.dart';
+import 'block_syntaxes/horizontal_rule_syntax.dart';
+import 'block_syntaxes/long_block_html_syntax.dart';
+import 'block_syntaxes/ordered_list_syntax.dart';
+import 'block_syntaxes/other_tag_block_html_syntax.dart';
+import 'block_syntaxes/paragraph_syntax.dart';
+import 'block_syntaxes/setext_header_syntax.dart';
+import 'block_syntaxes/unordered_list_syntax.dart';
import 'document.dart';
-import 'util.dart';
-
-/// The line contains only whitespace or is empty.
-final _emptyPattern = RegExp(r'^(?:[ \t]*)$');
-
-/// A series of `=` or `-` (on the next line) define setext-style headers.
-final _setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$');
-
-/// Leading (and trailing) `#` define atx-style headers.
-///
-/// Starts with 1-6 unescaped `#` characters which must not be followed by a
-/// non-space character. Line may end with any number of `#` characters,.
-final _headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$');
-
-/// The line starts with `>` with one optional space after.
-final _blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
-
-/// A line indented four spaces. Used for code blocks and lists.
-final _indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');
-
-/// Fenced code block.
-final _codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
-
-/// Fenced blockquotes.
-final _blockquoteFencePattern = RegExp(r'^>{3}\s*$');
-
-/// Three or more hyphens, asterisks or underscores by themselves. Note that
-/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
-/// SETEXT should win.
-final _hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
-
-/// A line starting with one of these markers: `-`, `*`, `+`. May have up to
-/// three leading spaces before the marker and any number of spaces or tabs
-/// after.
-///
-/// Contains a dummy group at [2], so that the groups in [_ulPattern] and
-/// [_olPattern] match up; in both, [2] is the length of the number that begins
-/// the list marker.
-final _ulPattern = RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$');
-
-/// A line starting with a number like `123.`. May have up to three leading
-/// spaces before the marker and any number of spaces or tabs after.
-final _olPattern =
- RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$');
-
-/// A line of hyphens separated by at least one pipe.
-final _tablePattern = RegExp(
- r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$');
-
-/// A pattern which should never be used. It just satisfies non-nullability of
-/// pattern fields.
-final _dummyPattern = RegExp('');
/// Maintains the internal state needed to parse a series of lines into blocks
/// of Markdown suitable for further inline parsing.
@@ -170,1146 +130,3 @@
return blocks;
}
}
-
-abstract class BlockSyntax {
- const BlockSyntax();
-
- /// Gets the regex used to identify the beginning of this block, if any.
- RegExp get pattern;
-
- bool canEndBlock(BlockParser parser) => true;
-
- bool canParse(BlockParser parser) {
- return pattern.hasMatch(parser.current);
- }
-
- Node? parse(BlockParser parser);
-
- List<String?> parseChildLines(BlockParser parser) {
- // Grab all of the lines that form the block element.
- final childLines = <String?>[];
-
- while (!parser.isDone) {
- final match = pattern.firstMatch(parser.current);
- if (match == null) break;
- childLines.add(match[1]);
- parser.advance();
- }
-
- return childLines;
- }
-
- /// Gets whether or not [parser]'s current line should end the previous block.
- static bool isAtBlockEnd(BlockParser parser) {
- if (parser.isDone) return true;
- return parser.blockSyntaxes
- .any((s) => s.canParse(parser) && s.canEndBlock(parser));
- }
-
- /// Generates a valid HTML anchor from the inner text of [element].
- static String generateAnchorHash(Element element) =>
- element.children!.first.textContent
- .toLowerCase()
- .trim()
- .replaceAll(RegExp('[^a-z0-9 _-]'), '')
- .replaceAll(RegExp(r'\s'), '-');
-}
-
-class EmptyBlockSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _emptyPattern;
-
- const EmptyBlockSyntax();
-
- @override
- Node? parse(BlockParser parser) {
- parser.encounteredBlankLine = true;
- parser.advance();
-
- // Don't actually emit anything.
- return null;
- }
-}
-
-/// Parses setext-style headers.
-class SetextHeaderSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _dummyPattern;
-
- const SetextHeaderSyntax();
-
- @override
- bool canParse(BlockParser parser) {
- if (!_interperableAsParagraph(parser.current)) return false;
- var i = 1;
- while (true) {
- final nextLine = parser.peek(i);
- if (nextLine == null) {
- // We never reached an underline.
- return false;
- }
- if (_setextPattern.hasMatch(nextLine)) {
- return true;
- }
- // Ensure that we're still in something like paragraph text.
- if (!_interperableAsParagraph(nextLine)) {
- return false;
- }
- i++;
- }
- }
-
- @override
- Node parse(BlockParser parser) {
- final lines = <String>[];
- String? tag;
- while (!parser.isDone) {
- final match = _setextPattern.firstMatch(parser.current);
- if (match == null) {
- // More text.
- lines.add(parser.current);
- parser.advance();
- continue;
- } else {
- // The underline.
- tag = (match[1]![0] == '=') ? 'h1' : 'h2';
- parser.advance();
- break;
- }
- }
-
- final contents = UnparsedContent(lines.join('\n').trimRight());
-
- return Element(tag!, [contents]);
- }
-
- bool _interperableAsParagraph(String line) =>
- !(_indentPattern.hasMatch(line) ||
- _codeFencePattern.hasMatch(line) ||
- _headerPattern.hasMatch(line) ||
- _blockquotePattern.hasMatch(line) ||
- _hrPattern.hasMatch(line) ||
- _ulPattern.hasMatch(line) ||
- _olPattern.hasMatch(line) ||
- _emptyPattern.hasMatch(line));
-}
-
-/// Parses setext-style headers, and adds generated IDs to the generated
-/// elements.
-class SetextHeaderWithIdSyntax extends SetextHeaderSyntax {
- const SetextHeaderWithIdSyntax();
-
- @override
- Node parse(BlockParser parser) {
- final element = super.parse(parser) as Element;
- element.generatedId = BlockSyntax.generateAnchorHash(element);
- return element;
- }
-}
-
-/// Parses atx-style headers: `## Header ##`.
-class HeaderSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _headerPattern;
-
- const HeaderSyntax();
-
- @override
- Node parse(BlockParser parser) {
- final match = pattern.firstMatch(parser.current)!;
- parser.advance();
- final level = match[1]!.length;
- final contents = UnparsedContent(match[2]!.trim());
- return Element('h$level', [contents]);
- }
-}
-
-/// Parses atx-style headers, and adds generated IDs to the generated elements.
-class HeaderWithIdSyntax extends HeaderSyntax {
- const HeaderWithIdSyntax();
-
- @override
- Node parse(BlockParser parser) {
- final element = super.parse(parser) as Element;
- element.generatedId = BlockSyntax.generateAnchorHash(element);
- return element;
- }
-}
-
-/// Parses lines fenced by `>>>` to blockquotes
-class FencedBlockquoteSyntax extends BlockSyntax {
- const FencedBlockquoteSyntax();
-
- @override
- RegExp get pattern => _blockquoteFencePattern;
-
- @override
- List<String> parseChildLines(BlockParser parser) {
- final childLines = <String>[];
- parser.advance();
-
- while (!parser.isDone) {
- final match = pattern.hasMatch(parser.current);
- if (!match) {
- childLines.add(parser.current);
- parser.advance();
- } else {
- parser.advance();
- break;
- }
- }
-
- return childLines;
- }
-
- @override
- Node? parse(BlockParser parser) {
- final childLines = parseChildLines(parser);
-
- // Recursively parse the contents of the blockquote.
- final children = BlockParser(childLines, parser.document).parseLines();
- return Element('blockquote', children);
- }
-}
-
-/// Parses email-style blockquotes: `> quote`.
-class BlockquoteSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _blockquotePattern;
-
- const BlockquoteSyntax();
-
- @override
- List<String> parseChildLines(BlockParser parser) {
- // Grab all of the lines that form the blockquote, stripping off the ">".
- final childLines = <String>[];
-
- bool encounteredCodeBlock = false;
- while (!parser.isDone) {
- final match = pattern.firstMatch(parser.current);
- if (match != null) {
- final line = match[1]!;
- childLines.add(line);
- encounteredCodeBlock = _indentPattern.hasMatch(line);
- parser.advance();
- continue;
- }
-
- // A paragraph continuation is OK. This is content that cannot be parsed
- // as any other syntax except Paragraph, and it doesn't match the bar in
- // a Setext header.
- // Because indented code blocks cannot interrupt paragraphs, a line
- // matched CodeBlockSyntax is also paragraph continuation text.
- final otherMatched =
- parser.blockSyntaxes.firstWhere((s) => s.canParse(parser));
- if (otherMatched is ParagraphSyntax ||
- (!encounteredCodeBlock && otherMatched is CodeBlockSyntax)) {
- childLines.add(parser.current);
- parser.advance();
- } else {
- break;
- }
- }
-
- return childLines;
- }
-
- @override
- Node parse(BlockParser parser) {
- final childLines = parseChildLines(parser);
-
- // Recursively parse the contents of the blockquote.
- final children = BlockParser(childLines, parser.document).parseLines();
-
- return Element('blockquote', children);
- }
-}
-
-/// Parses preformatted code blocks that are indented four spaces.
-class CodeBlockSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _indentPattern;
-
- @override
- bool canEndBlock(BlockParser parser) => false;
-
- const CodeBlockSyntax();
-
- @override
- List<String?> parseChildLines(BlockParser parser) {
- final childLines = <String?>[];
-
- while (!parser.isDone) {
- final match = pattern.firstMatch(parser.current);
- if (match != null) {
- childLines.add(match[1]);
- parser.advance();
- } else {
- // If there's a codeblock, then a newline, then a codeblock, keep the
- // code blocks together.
- final nextMatch =
- parser.next != null ? pattern.firstMatch(parser.next!) : null;
- if (parser.current.trim() == '' && nextMatch != null) {
- childLines.add('');
- childLines.add(nextMatch[1]);
- parser.advance();
- parser.advance();
- } else {
- break;
- }
- }
- }
- return childLines;
- }
-
- @override
- Node parse(BlockParser parser) {
- final childLines = parseChildLines(parser);
-
- // The Markdown tests expect a trailing newline.
- childLines.add('');
-
- var content = childLines.join('\n');
- if (parser.document.encodeHtml) {
- content = escapeHtml(content);
- }
-
- return Element('pre', [Element.text('code', content)]);
- }
-}
-
-/// Parses preformatted code blocks between two ~~~ or ``` sequences.
-///
-/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks
-class FencedCodeBlockSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _codeFencePattern;
-
- const FencedCodeBlockSyntax();
-
- @override
- bool canParse(BlockParser parser) {
- final match = pattern.firstMatch(parser.current);
- if (match == null) return false;
- final codeFence = match.group(1)!;
- final infoString = match.group(2);
- // From the CommonMark spec:
- //
- // > If the info string comes after a backtick fence, it may not contain
- // > any backtick characters.
- return (codeFence.codeUnitAt(0) != $backquote ||
- !infoString!.codeUnits.contains($backquote));
- }
-
- @override
- List<String> parseChildLines(BlockParser parser, [String? endBlock]) {
- endBlock ??= '';
-
- final childLines = <String>[];
- parser.advance();
-
- while (!parser.isDone) {
- final match = pattern.firstMatch(parser.current);
- if (match == null || !match[1]!.startsWith(endBlock)) {
- childLines.add(parser.current);
- parser.advance();
- } else {
- parser.advance();
- break;
- }
- }
-
- return childLines;
- }
-
- @override
- Node parse(BlockParser parser) {
- // Get the syntax identifier, if there is one.
- final match = pattern.firstMatch(parser.current)!;
- final endBlock = match.group(1);
- var infoString = match.group(2)!;
-
- final childLines = parseChildLines(parser, endBlock);
-
- // The Markdown tests expect a trailing newline.
- childLines.add('');
-
- var text = childLines.join('\n');
- if (parser.document.encodeHtml) {
- text = escapeHtml(text);
- }
- final code = Element.text('code', text);
-
- // the info-string should be trimmed
- // http://spec.commonmark.org/0.22/#example-100
- infoString = infoString.trim();
- if (infoString.isNotEmpty) {
- // only use the first word in the syntax
- // http://spec.commonmark.org/0.22/#example-100
- final firstSpace = infoString.indexOf(' ');
- if (firstSpace >= 0) {
- infoString = infoString.substring(0, firstSpace);
- }
- if (parser.document.encodeHtml) {
- infoString = escapeHtmlAttribute(infoString);
- }
- code.attributes['class'] = 'language-$infoString';
- }
-
- final element = Element('pre', [code]);
-
- return element;
- }
-}
-
-/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc.
-class HorizontalRuleSyntax extends BlockSyntax {
- @override
- RegExp get pattern => _hrPattern;
-
- const HorizontalRuleSyntax();
-
- @override
- Node parse(BlockParser parser) {
- parser.advance();
- return Element.empty('hr');
- }
-}
-
-/// Parses inline HTML at the block level. This differs from other Markdown
-/// implementations in several ways:
-///
-/// 1. This one is way way WAY simpler.
-/// 2. Essentially no HTML parsing or validation is done. We're a Markdown
-/// parser, not an HTML parser!
-abstract class BlockHtmlSyntax extends BlockSyntax {
- @override
- bool canEndBlock(BlockParser parser) => true;
-
- const BlockHtmlSyntax();
-}
-
-class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax {
- static final _pattern = RegExp(
- '^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|'
- 'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|'
- 'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|'
- 'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|'
- 'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|'
- 'title|tr|track|ul)'
- r'(?:\s|>|/>|$)');
-
- /// The [_pattern] regular expression above is very expensive, even on
- /// paragraphs of Markdown with no HTML. This regular expression can be used
- /// first as a basic check that the input might possibly be an HTML block
- /// tag, which occur very rarely in typical Markdown.
- static final _openBracketPattern = RegExp('^ {0,3}<');
-
- @override
- RegExp get pattern => _pattern;
-
- const BlockTagBlockHtmlSyntax();
-
- @override
- bool canParse(BlockParser parser) {
- if (!_openBracketPattern.hasMatch(parser.current)) return false;
- return super.canParse(parser);
- }
-
- @override
- Node parse(BlockParser parser) {
- final childLines = <String>[];
-
- // Eat until we hit a blank line.
- while (!parser.isDone && !parser.matches(_emptyPattern)) {
- childLines.add(parser.current);
- parser.advance();
- }
-
- return Text(childLines.join('\n').trimRight());
- }
-}
-
-class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax {
- @override
- bool canEndBlock(BlockParser parser) => false;
-
- // Really hacky way to detect "other" HTML. This matches:
- //
- // * any opening spaces
- // * open bracket and maybe a slash ("<" or "</")
- // * some word characters
- // * either:
- // * a close bracket, or
- // * whitespace followed by not-brackets followed by a close bracket
- // * possible whitespace and the end of the line.
- @override
- RegExp get pattern => RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$');
-
- const OtherTagBlockHtmlSyntax();
-}
-
-/// A BlockHtmlSyntax that has a specific `endPattern`.
-///
-/// In practice this means that the syntax dominates; it is allowed to eat
-/// many lines, including blank lines, before matching its `endPattern`.
-class LongBlockHtmlSyntax extends BlockHtmlSyntax {
- @override
- final RegExp pattern;
- final RegExp _endPattern;
-
- LongBlockHtmlSyntax(String pattern, String endPattern)
- : pattern = RegExp(pattern),
- _endPattern = RegExp(endPattern);
-
- @override
- Node parse(BlockParser parser) {
- final childLines = <String>[];
- // Eat until we hit [endPattern].
- while (!parser.isDone) {
- childLines.add(parser.current);
- if (parser.matches(_endPattern)) break;
- parser.advance();
- }
-
- parser.advance();
- return Text(childLines.join('\n').trimRight());
- }
-}
-
-class ListItem {
- bool forceBlock = false;
- final List<String> lines;
-
- ListItem(this.lines);
-}
-
-/// Base class for both ordered and unordered lists.
-abstract class ListSyntax extends BlockSyntax {
- @override
- bool canEndBlock(BlockParser parser) {
- // An empty list cannot interrupt a paragraph. See
- // https://spec.commonmark.org/0.29/#example-255.
- // Ideally, [BlockSyntax.canEndBlock] should be changed to be a method
- // which accepts a [BlockParser], but this would be a breaking change,
- // so we're going with this temporarily.
- final match = pattern.firstMatch(parser.current)!;
- // The seventh group, in both [_olPattern] and [_ulPattern] is the text
- // after the delimiter.
- return match[7]?.isNotEmpty ?? false;
- }
-
- String get listTag;
-
- const ListSyntax();
-
- /// A list of patterns that can start a valid block within a list item.
- static final blocksInList = [
- _blockquotePattern,
- _headerPattern,
- _hrPattern,
- _indentPattern,
- _ulPattern,
- _olPattern
- ];
-
- static final _whitespaceRe = RegExp('[ \t]*');
-
- @override
- Node parse(BlockParser parser) {
- final items = <ListItem>[];
- var childLines = <String>[];
-
- void endItem() {
- if (childLines.isNotEmpty) {
- items.add(ListItem(childLines));
- childLines = <String>[];
- }
- }
-
- late Match? match;
- bool tryMatch(RegExp pattern) {
- match = pattern.firstMatch(parser.current);
- return match != null;
- }
-
- String? listMarker;
- String? indent;
- // In case the first number in an ordered list is not 1, use it as the
- // "start".
- int? startNumber;
-
- while (!parser.isDone) {
- final leadingSpace =
- _whitespaceRe.matchAsPrefix(parser.current)!.group(0)!;
- final leadingExpandedTabLength = _expandedTabLength(leadingSpace);
- if (tryMatch(_emptyPattern)) {
- if (_emptyPattern.hasMatch(parser.next ?? '')) {
- // Two blank lines ends a list.
- break;
- }
- // Add a blank line to the current list item.
- childLines.add('');
- } else if (indent != null && indent.length <= leadingExpandedTabLength) {
- // Strip off indent and add to current item.
- final line = parser.current
- .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength)
- .replaceFirst(indent, '');
- childLines.add(line);
- } else if (tryMatch(_hrPattern)) {
- // Horizontal rule takes precedence to a new list item.
- break;
- } else if (tryMatch(_ulPattern) || tryMatch(_olPattern)) {
- final precedingWhitespace = match![1]!;
- final digits = match![2] ?? '';
- if (startNumber == null && digits.isNotEmpty) {
- startNumber = int.parse(digits);
- }
- final marker = match![3]!;
- final firstWhitespace = match![5] ?? '';
- final restWhitespace = match![6] ?? '';
- final content = match![7] ?? '';
- final isBlank = content.isEmpty;
- if (listMarker != null && listMarker != marker) {
- // Changing the bullet or ordered list delimiter starts a new list.
- break;
- }
- listMarker = marker;
- final markerAsSpaces = ' ' * (digits.length + marker.length);
- if (isBlank) {
- // See http://spec.commonmark.org/0.28/#list-items under "3. Item
- // starting with a blank line."
- //
- // If the list item starts with a blank line, the final piece of the
- // indentation is just a single space.
- indent = '$precedingWhitespace$markerAsSpaces ';
- } else if (restWhitespace.length >= 4) {
- // See http://spec.commonmark.org/0.28/#list-items under "2. Item
- // starting with indented code."
- //
- // If the list item starts with indented code, we need to _not_ count
- // any indentation past the required whitespace character.
- indent = precedingWhitespace + markerAsSpaces + firstWhitespace;
- } else {
- indent = precedingWhitespace +
- markerAsSpaces +
- firstWhitespace +
- restWhitespace;
- }
- // End the current list item and start a new one.
- endItem();
- childLines.add(restWhitespace + content);
- } else if (BlockSyntax.isAtBlockEnd(parser)) {
- // Done with the list.
- break;
- } else {
- // If the previous item is a blank line, this means we're done with the
- // list and are starting a new top-level paragraph.
- if ((childLines.isNotEmpty) && (childLines.last == '')) {
- parser.encounteredBlankLine = true;
- break;
- }
-
- // Anything else is paragraph continuation text.
- childLines.add(parser.current);
- }
- parser.advance();
- }
-
- endItem();
- final itemNodes = <Element>[];
-
- items.forEach(_removeLeadingEmptyLine);
- final anyEmptyLines = _removeTrailingEmptyLines(items);
- var anyEmptyLinesBetweenBlocks = false;
-
- for (final item in items) {
- final itemParser = BlockParser(item.lines, parser.document);
- final children = itemParser.parseLines();
- itemNodes.add(Element('li', children));
- anyEmptyLinesBetweenBlocks =
- anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine;
- }
-
- // Must strip paragraph tags if the list is "tight".
- // http://spec.commonmark.org/0.28/#lists
- final listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks;
-
- if (listIsTight) {
- // We must post-process the list items, converting any top-level paragraph
- // elements to just text elements.
- for (final item in itemNodes) {
- final children = item.children;
- if (children != null) {
- for (var i = 0; i < children.length; i++) {
- final child = children[i];
- if (child is Element && child.tag == 'p') {
- children.removeAt(i);
- children.insertAll(i, child.children!);
- }
- }
- }
- }
- }
-
- if (listTag == 'ol' && startNumber != 1) {
- return Element(listTag, itemNodes)..attributes['start'] = '$startNumber';
- } else {
- return Element(listTag, itemNodes);
- }
- }
-
- void _removeLeadingEmptyLine(ListItem item) {
- if (item.lines.isNotEmpty && _emptyPattern.hasMatch(item.lines.first)) {
- item.lines.removeAt(0);
- }
- }
-
- /// Removes any trailing empty lines and notes whether any items are separated
- /// by such lines.
- bool _removeTrailingEmptyLines(List<ListItem> items) {
- var anyEmpty = false;
- for (var i = 0; i < items.length; i++) {
- if (items[i].lines.length == 1) continue;
- while (items[i].lines.isNotEmpty &&
- _emptyPattern.hasMatch(items[i].lines.last)) {
- if (i < items.length - 1) {
- anyEmpty = true;
- }
- items[i].lines.removeLast();
- }
- }
- return anyEmpty;
- }
-
- static int _expandedTabLength(String input) {
- var length = 0;
- for (final char in input.codeUnits) {
- length += char == 0x9 ? 4 - (length % 4) : 1;
- }
- return length;
- }
-}
-
-/// Parses unordered lists.
-class UnorderedListSyntax extends ListSyntax {
- @override
- RegExp get pattern => _ulPattern;
-
- @override
- String get listTag => 'ul';
-
- const UnorderedListSyntax();
-}
-
-/// Parses ordered lists.
-class OrderedListSyntax extends ListSyntax {
- @override
- RegExp get pattern => _olPattern;
-
- @override
- String get listTag => 'ol';
-
- const OrderedListSyntax();
-}
-
-/// Parses tables.
-class TableSyntax extends BlockSyntax {
- @override
- bool canEndBlock(BlockParser parser) => false;
-
- @override
- RegExp get pattern => _dummyPattern;
-
- const TableSyntax();
-
- @override
- bool canParse(BlockParser parser) {
- // Note: matches *next* line, not the current one. We're looking for the
- // bar separating the head row from the body rows.
- return parser.matchesNext(_tablePattern);
- }
-
- /// Parses a table into its three parts:
- ///
- /// * a head row of head cells (`<th>` cells)
- /// * a divider of hyphens and pipes (not rendered)
- /// * many body rows of body cells (`<td>` cells)
- @override
- Node? parse(BlockParser parser) {
- final alignments = _parseAlignments(parser.next!);
- final columnCount = alignments.length;
- final headRow = _parseRow(parser, alignments, 'th');
- if (headRow.children!.length != columnCount) {
- return null;
- }
- final head = Element('thead', [headRow]);
-
- // Advance past the divider of hyphens.
- parser.advance();
-
- final rows = <Element>[];
- while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) {
- final row = _parseRow(parser, alignments, 'td');
- final children = row.children;
- if (children != null) {
- while (children.length < columnCount) {
- // Insert synthetic empty cells.
- children.add(Element.empty('td'));
- }
- while (children.length > columnCount) {
- children.removeLast();
- }
- }
- while (row.children!.length > columnCount) {
- row.children!.removeLast();
- }
- rows.add(row);
- }
- if (rows.isEmpty) {
- return Element('table', [head]);
- } else {
- final body = Element('tbody', rows);
-
- return Element('table', [head, body]);
- }
- }
-
- List<String?> _parseAlignments(String line) {
- final startIndex = _walkPastOpeningPipe(line);
-
- var endIndex = line.length - 1;
- while (endIndex > 0) {
- final ch = line.codeUnitAt(endIndex);
- if (ch == $pipe) {
- endIndex--;
- break;
- }
- if (ch != $space && ch != $tab) {
- break;
- }
- endIndex--;
- }
-
- // Optimization: We walk [line] too many times. One lap should do it.
- return line.substring(startIndex, endIndex + 1).split('|').map((column) {
- column = column.trim();
- if (column.startsWith(':') && column.endsWith(':')) return 'center';
- if (column.startsWith(':')) return 'left';
- if (column.endsWith(':')) return 'right';
- return null;
- }).toList();
- }
-
- /// Parses a table row at the current line into a table row element, with
- /// parsed table cells.
- ///
- /// [alignments] is used to annotate an alignment on each cell, and
- /// [cellType] is used to declare either "td" or "th" cells.
- Element _parseRow(
- BlockParser parser,
- List<String?> alignments,
- String cellType,
- ) {
- final line = parser.current;
- final cells = <String>[];
- var index = _walkPastOpeningPipe(line);
- final cellBuffer = StringBuffer();
-
- while (true) {
- if (index >= line.length) {
- // This row ended without a trailing pipe, which is fine.
- cells.add(cellBuffer.toString().trimRight());
- cellBuffer.clear();
- break;
- }
- final ch = line.codeUnitAt(index);
- if (ch == $backslash) {
- if (index == line.length - 1) {
- // A table row ending in a backslash is not well-specified, but it
- // looks like GitHub just allows the character as part of the text of
- // the last cell.
- cellBuffer.writeCharCode(ch);
- cells.add(cellBuffer.toString().trimRight());
- cellBuffer.clear();
- break;
- }
- final escaped = line.codeUnitAt(index + 1);
- if (escaped == $pipe) {
- // GitHub Flavored Markdown has a strange bit here; the pipe is to be
- // escaped before any other inline processing. One consequence, for
- // example, is that "| `\|` |" should be parsed as a cell with a code
- // element with text "|", rather than "\|". Most parsers are not
- // compliant with this corner, but this is what is specified, and what
- // GitHub does in practice.
- cellBuffer.writeCharCode(escaped);
- } else {
- // The [InlineParser] will handle the escaping.
- cellBuffer.writeCharCode(ch);
- cellBuffer.writeCharCode(escaped);
- }
- index += 2;
- } else if (ch == $pipe) {
- cells.add(cellBuffer.toString().trimRight());
- cellBuffer.clear();
- // Walk forward past any whitespace which leads the next cell.
- index++;
- index = _walkPastWhitespace(line, index);
- if (index >= line.length) {
- // This row ended with a trailing pipe.
- break;
- }
- } else {
- cellBuffer.writeCharCode(ch);
- index++;
- }
- }
- parser.advance();
- final row = [
- for (final cell in cells) Element(cellType, [UnparsedContent(cell)])
- ];
-
- for (var i = 0; i < row.length && i < alignments.length; i++) {
- if (alignments[i] == null) continue;
- row[i].attributes['style'] = 'text-align: ${alignments[i]};';
- }
-
- return Element('tr', row);
- }
-
- /// Walks past whitespace in [line] starting at [index].
- ///
- /// Returns the index of the first non-whitespace character.
- int _walkPastWhitespace(String line, int index) {
- while (index < line.length) {
- final ch = line.codeUnitAt(index);
- if (ch != $space && ch != $tab) {
- break;
- }
- index++;
- }
- return index;
- }
-
- /// Walks past the opening pipe (and any whitespace that surrounds it) in
- /// [line].
- ///
- /// Returns the index of the first non-whitespace character after the pipe.
- /// If no opening pipe is found, this just returns the index of the first
- /// non-whitespace character.
- int _walkPastOpeningPipe(String line) {
- var index = 0;
- while (index < line.length) {
- final ch = line.codeUnitAt(index);
- if (ch == $pipe) {
- index++;
- index = _walkPastWhitespace(line, index);
- }
- if (ch != $space && ch != $tab) {
- // No leading pipe.
- break;
- }
- index++;
- }
- return index;
- }
-}
-
-/// Parses paragraphs of regular text.
-class ParagraphSyntax extends BlockSyntax {
- static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');
-
- static final _whitespacePattern = RegExp(r'^\s*$');
-
- @override
- RegExp get pattern => _dummyPattern;
-
- @override
- bool canEndBlock(BlockParser parser) => false;
-
- const ParagraphSyntax();
-
- @override
- bool canParse(BlockParser parser) => true;
-
- @override
- Node parse(BlockParser parser) {
- final childLines = <String>[];
-
- // Eat until we hit something that ends a paragraph.
- while (!BlockSyntax.isAtBlockEnd(parser)) {
- childLines.add(parser.current);
- parser.advance();
- }
-
- final paragraphLines = _extractReflinkDefinitions(parser, childLines);
- if (paragraphLines == null) {
- // Paragraph consisted solely of reference link definitions.
- return Text('');
- } else {
- final contents = UnparsedContent(paragraphLines.join('\n').trimRight());
- return Element('p', [contents]);
- }
- }
-
- /// Extract reference link definitions from the front of the paragraph, and
- /// return the remaining paragraph lines.
- List<String>? _extractReflinkDefinitions(
- BlockParser parser,
- List<String> lines,
- ) {
- bool lineStartsReflinkDefinition(int i) =>
- lines[i].startsWith(_reflinkDefinitionStart);
-
- var i = 0;
- loopOverDefinitions:
- while (true) {
- // Check for reflink definitions.
- if (!lineStartsReflinkDefinition(i)) {
- // It's paragraph content from here on out.
- break;
- }
- var contents = lines[i];
- var j = i + 1;
- while (j < lines.length) {
- // Check to see if the _next_ line might start a new reflink definition.
- // Even if it turns out not to be, but it started with a '[', then it
- // is not a part of _this_ possible reflink definition.
- if (lineStartsReflinkDefinition(j)) {
- // Try to parse [contents] as a reflink definition.
- if (_parseReflinkDefinition(parser, contents)) {
- // Loop again, starting at the next possible reflink definition.
- i = j;
- continue loopOverDefinitions;
- } else {
- // Could not parse [contents] as a reflink definition.
- break;
- }
- } else {
- contents = '$contents\n${lines[j]}';
- j++;
- }
- }
- // End of the block.
- if (_parseReflinkDefinition(parser, contents)) {
- i = j;
- break;
- }
-
- // It may be that there is a reflink definition starting at [i], but it
- // does not extend all the way to [j], such as:
- //
- // [link]: url // line i
- // "title"
- // garbage
- // [link2]: url // line j
- //
- // In this case, [i, i+1] is a reflink definition, and the rest is
- // paragraph content.
- while (j >= i) {
- // This isn't the most efficient loop, what with this big ole'
- // Iterable allocation (`getRange`) followed by a big 'ole String
- // allocation, but we
- // must walk backwards, checking each range.
- contents = lines.getRange(i, j).join('\n');
- if (_parseReflinkDefinition(parser, contents)) {
- // That is the last reflink definition. The rest is paragraph
- // content.
- i = j;
- break;
- }
- j--;
- }
- // The ending was not a reflink definition at all. Just paragraph
- // content.
-
- break;
- }
-
- if (i == lines.length) {
- // No paragraph content.
- return null;
- } else {
- // Ends with paragraph content.
- return lines.sublist(i);
- }
- }
-
- // Parse [contents] as a reference link definition.
- //
- // Also adds the reference link definition to the document.
- //
- // Returns whether [contents] could be parsed as a reference link definition.
- bool _parseReflinkDefinition(BlockParser parser, String contents) {
- final pattern = RegExp(
- // Leading indentation.
- '''^[ ]{0,3}'''
- // Reference id in brackets, and URL.
- r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*'''
- // Title in double or single quotes, or parens.
- r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''',
- multiLine: true,
- );
- final match = pattern.firstMatch(contents);
- if (match == null) {
- // Not a reference link definition.
- return false;
- }
- if (match.match.length < contents.length) {
- // Trailing text. No good.
- return false;
- }
-
- var label = match[1]!;
- final destination = match[2] ?? match[3]!;
- var title = match[4];
-
- // The label must contain at least one non-whitespace character.
- if (_whitespacePattern.hasMatch(label)) {
- return false;
- }
-
- if (title == '') {
- // No title.
- title = null;
- } else {
- // Remove "", '', or ().
- title = title!.substring(1, title.length - 1);
- }
-
- // References are case-insensitive, and internal whitespace is compressed.
- label = normalizeLinkLabel(label);
-
- parser.document.linkReferences
- .putIfAbsent(label, () => LinkReference(label, destination, title));
- return true;
- }
-}
-
-/// Walks the parser forward through the lines does not match any [BlockSyntax].
-///
-/// Returns a [UnparsedContent] with the unmatched lines as `textContent`.
-class DummyBlockSyntax extends BlockSyntax {
- const DummyBlockSyntax();
-
- @override
- RegExp get pattern => _dummyPattern;
-
- @override
- bool canEndBlock(BlockParser parser) => false;
-
- @override
- bool canParse(BlockParser parser) => true;
-
- @override
- Node parse(BlockParser parser) {
- final childLines = <String>[];
-
- while (!BlockSyntax.isAtBlockEnd(parser)) {
- childLines.add(parser.current);
- parser.advance();
- }
-
- return UnparsedContent(childLines.join('\n'));
- }
-}
diff --git a/lib/src/block_syntaxes/block_html_syntax.dart b/lib/src/block_syntaxes/block_html_syntax.dart
new file mode 100644
index 0000000..122c940
--- /dev/null
+++ b/lib/src/block_syntaxes/block_html_syntax.dart
@@ -0,0 +1,19 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../block_parser.dart';
+import 'block_syntax.dart';
+
+/// Parses inline HTML at the block level. This differs from other Markdown
+/// implementations in several ways:
+///
+/// 1. This one is way way WAY simpler.
+/// 2. Essentially no HTML parsing or validation is done. We're a Markdown
+/// parser, not an HTML parser!
+abstract class BlockHtmlSyntax extends BlockSyntax {
+ @override
+ bool canEndBlock(BlockParser parser) => true;
+
+ const BlockHtmlSyntax();
+}
diff --git a/lib/src/block_syntaxes/block_syntax.dart b/lib/src/block_syntaxes/block_syntax.dart
new file mode 100644
index 0000000..f33195d
--- /dev/null
+++ b/lib/src/block_syntaxes/block_syntax.dart
@@ -0,0 +1,50 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+
+abstract class BlockSyntax {
+ const BlockSyntax();
+
+ /// Gets the regex used to identify the beginning of this block, if any.
+ RegExp get pattern;
+
+ bool canEndBlock(BlockParser parser) => true;
+
+ bool canParse(BlockParser parser) {
+ return pattern.hasMatch(parser.current);
+ }
+
+ Node? parse(BlockParser parser);
+
+ List<String?> parseChildLines(BlockParser parser) {
+ // Grab all of the lines that form the block element.
+ final childLines = <String?>[];
+
+ while (!parser.isDone) {
+ final match = pattern.firstMatch(parser.current);
+ if (match == null) break;
+ childLines.add(match[1]);
+ parser.advance();
+ }
+
+ return childLines;
+ }
+
+ /// Gets whether or not [parser]'s current line should end the previous block.
+ static bool isAtBlockEnd(BlockParser parser) {
+ if (parser.isDone) return true;
+ return parser.blockSyntaxes
+ .any((s) => s.canParse(parser) && s.canEndBlock(parser));
+ }
+
+ /// Generates a valid HTML anchor from the inner text of [element].
+ static String generateAnchorHash(Element element) =>
+ element.children!.first.textContent
+ .toLowerCase()
+ .trim()
+ .replaceAll(RegExp('[^a-z0-9 _-]'), '')
+ .replaceAll(RegExp(r'\s'), '-');
+}
diff --git a/lib/src/block_syntaxes/block_tag_block_html_syntax.dart b/lib/src/block_syntaxes/block_tag_block_html_syntax.dart
new file mode 100644
index 0000000..e017696
--- /dev/null
+++ b/lib/src/block_syntaxes/block_tag_block_html_syntax.dart
@@ -0,0 +1,49 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_html_syntax.dart';
+
+class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax {
+ static final _pattern = RegExp(
+ '^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|'
+ 'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|'
+ 'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|'
+ 'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|'
+ 'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|'
+ 'title|tr|track|ul)'
+ r'(?:\s|>|/>|$)');
+
+ /// The [_pattern] regular expression above is very expensive, even on
+ /// paragraphs of Markdown with no HTML. This regular expression can be used
+ /// first as a basic check that the input might possibly be an HTML block
+ /// tag, which occur very rarely in typical Markdown.
+ static final _openBracketPattern = RegExp('^ {0,3}<');
+
+ @override
+ RegExp get pattern => _pattern;
+
+ const BlockTagBlockHtmlSyntax();
+
+ @override
+ bool canParse(BlockParser parser) {
+ if (!_openBracketPattern.hasMatch(parser.current)) return false;
+ return super.canParse(parser);
+ }
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = <String>[];
+
+ // Eat until we hit a blank line.
+ while (!parser.isDone && !parser.matches(emptyPattern)) {
+ childLines.add(parser.current);
+ parser.advance();
+ }
+
+ return Text(childLines.join('\n').trimRight());
+ }
+}
diff --git a/lib/src/block_syntaxes/blockquote_syntax.dart b/lib/src/block_syntaxes/blockquote_syntax.dart
new file mode 100644
index 0000000..7d16e55
--- /dev/null
+++ b/lib/src/block_syntaxes/blockquote_syntax.dart
@@ -0,0 +1,63 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+import 'code_block_syntax.dart';
+import 'paragraph_syntax.dart';
+
+/// Parses email-style blockquotes: `> quote`.
+class BlockquoteSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => blockquotePattern;
+
+ const BlockquoteSyntax();
+
+ @override
+ List<String> parseChildLines(BlockParser parser) {
+ // Grab all of the lines that form the blockquote, stripping off the ">".
+ final childLines = <String>[];
+
+ bool encounteredCodeBlock = false;
+ while (!parser.isDone) {
+ final match = pattern.firstMatch(parser.current);
+ if (match != null) {
+ final line = match[1]!;
+ childLines.add(line);
+ encounteredCodeBlock = indentPattern.hasMatch(line);
+ parser.advance();
+ continue;
+ }
+
+ // A paragraph continuation is OK. This is content that cannot be parsed
+ // as any other syntax except Paragraph, and it doesn't match the bar in
+ // a Setext header.
+ // Because indented code blocks cannot interrupt paragraphs, a line
+ // matched CodeBlockSyntax is also paragraph continuation text.
+ final otherMatched =
+ parser.blockSyntaxes.firstWhere((s) => s.canParse(parser));
+ if (otherMatched is ParagraphSyntax ||
+ (!encounteredCodeBlock && otherMatched is CodeBlockSyntax)) {
+ childLines.add(parser.current);
+ parser.advance();
+ } else {
+ break;
+ }
+ }
+
+ return childLines;
+ }
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = parseChildLines(parser);
+
+ // Recursively parse the contents of the blockquote.
+ final children = BlockParser(childLines, parser.document).parseLines();
+
+ return Element('blockquote', children);
+ }
+}
diff --git a/lib/src/block_syntaxes/code_block_syntax.dart b/lib/src/block_syntaxes/code_block_syntax.dart
new file mode 100644
index 0000000..f37baed
--- /dev/null
+++ b/lib/src/block_syntaxes/code_block_syntax.dart
@@ -0,0 +1,62 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import '../util.dart';
+import 'block_syntax.dart';
+
+/// Parses preformatted code blocks that are indented four spaces.
+class CodeBlockSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => indentPattern;
+
+ @override
+ bool canEndBlock(BlockParser parser) => false;
+
+ const CodeBlockSyntax();
+
+ @override
+ List<String?> parseChildLines(BlockParser parser) {
+ final childLines = <String?>[];
+
+ while (!parser.isDone) {
+ final match = pattern.firstMatch(parser.current);
+ if (match != null) {
+ childLines.add(match[1]);
+ parser.advance();
+ } else {
+ // If there's a codeblock, then a newline, then a codeblock, keep the
+ // code blocks together.
+ final nextMatch =
+ parser.next != null ? pattern.firstMatch(parser.next!) : null;
+ if (parser.current.trim() == '' && nextMatch != null) {
+ childLines.add('');
+ childLines.add(nextMatch[1]);
+ parser.advance();
+ parser.advance();
+ } else {
+ break;
+ }
+ }
+ }
+ return childLines;
+ }
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = parseChildLines(parser);
+
+ // The Markdown tests expect a trailing newline.
+ childLines.add('');
+
+ var content = childLines.join('\n');
+ if (parser.document.encodeHtml) {
+ content = escapeHtml(content);
+ }
+
+ return Element('pre', [Element.text('code', content)]);
+ }
+}
diff --git a/lib/src/block_syntaxes/dummy_block_syntax.dart b/lib/src/block_syntaxes/dummy_block_syntax.dart
new file mode 100644
index 0000000..46ac98e
--- /dev/null
+++ b/lib/src/block_syntaxes/dummy_block_syntax.dart
@@ -0,0 +1,36 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Walks the parser forward through the lines does not match any [BlockSyntax].
+///
+/// Returns a [UnparsedContent] with the unmatched lines as `textContent`.
+class DummyBlockSyntax extends BlockSyntax {
+ const DummyBlockSyntax();
+
+ @override
+ RegExp get pattern => dummyPattern;
+
+ @override
+ bool canEndBlock(BlockParser parser) => false;
+
+ @override
+ bool canParse(BlockParser parser) => true;
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = <String>[];
+
+ while (!BlockSyntax.isAtBlockEnd(parser)) {
+ childLines.add(parser.current);
+ parser.advance();
+ }
+
+ return UnparsedContent(childLines.join('\n'));
+ }
+}
diff --git a/lib/src/block_syntaxes/empty_block_syntax.dart b/lib/src/block_syntaxes/empty_block_syntax.dart
new file mode 100644
index 0000000..54cc865
--- /dev/null
+++ b/lib/src/block_syntaxes/empty_block_syntax.dart
@@ -0,0 +1,24 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+class EmptyBlockSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => emptyPattern;
+
+ const EmptyBlockSyntax();
+
+ @override
+ Node? parse(BlockParser parser) {
+ parser.encounteredBlankLine = true;
+ parser.advance();
+
+ // Don't actually emit anything.
+ return null;
+ }
+}
diff --git a/lib/src/block_syntaxes/fenced_blockquote_syntax.dart b/lib/src/block_syntaxes/fenced_blockquote_syntax.dart
new file mode 100644
index 0000000..d4a3592
--- /dev/null
+++ b/lib/src/block_syntaxes/fenced_blockquote_syntax.dart
@@ -0,0 +1,44 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Parses lines fenced by `>>>` to blockquotes
+class FencedBlockquoteSyntax extends BlockSyntax {
+ const FencedBlockquoteSyntax();
+
+ @override
+ RegExp get pattern => blockquoteFencePattern;
+
+ @override
+ List<String> parseChildLines(BlockParser parser) {
+ final childLines = <String>[];
+ parser.advance();
+
+ while (!parser.isDone) {
+ final match = pattern.hasMatch(parser.current);
+ if (!match) {
+ childLines.add(parser.current);
+ parser.advance();
+ } else {
+ parser.advance();
+ break;
+ }
+ }
+
+ return childLines;
+ }
+
+ @override
+ Node? parse(BlockParser parser) {
+ final childLines = parseChildLines(parser);
+
+ // Recursively parse the contents of the blockquote.
+ final children = BlockParser(childLines, parser.document).parseLines();
+ return Element('blockquote', children);
+ }
+}
diff --git a/lib/src/block_syntaxes/fenced_code_block_syntax.dart b/lib/src/block_syntaxes/fenced_code_block_syntax.dart
new file mode 100644
index 0000000..cc80cdc
--- /dev/null
+++ b/lib/src/block_syntaxes/fenced_code_block_syntax.dart
@@ -0,0 +1,95 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import '../util.dart';
+import 'block_syntax.dart';
+
+/// Parses preformatted code blocks between two ~~~ or ``` sequences.
+///
+/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks
+class FencedCodeBlockSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => codeFencePattern;
+
+ const FencedCodeBlockSyntax();
+
+ @override
+ bool canParse(BlockParser parser) {
+ final match = pattern.firstMatch(parser.current);
+ if (match == null) return false;
+ final codeFence = match.group(1)!;
+ final infoString = match.group(2);
+ // From the CommonMark spec:
+ //
+ // > If the info string comes after a backtick fence, it may not contain
+ // > any backtick characters.
+ return (codeFence.codeUnitAt(0) != $backquote ||
+ !infoString!.codeUnits.contains($backquote));
+ }
+
+ @override
+ List<String> parseChildLines(BlockParser parser, [String? endBlock]) {
+ endBlock ??= '';
+
+ final childLines = <String>[];
+ parser.advance();
+
+ while (!parser.isDone) {
+ final match = pattern.firstMatch(parser.current);
+ if (match == null || !match[1]!.startsWith(endBlock)) {
+ childLines.add(parser.current);
+ parser.advance();
+ } else {
+ parser.advance();
+ break;
+ }
+ }
+
+ return childLines;
+ }
+
+ @override
+ Node parse(BlockParser parser) {
+ // Get the syntax identifier, if there is one.
+ final match = pattern.firstMatch(parser.current)!;
+ final endBlock = match.group(1);
+ var infoString = match.group(2)!;
+
+ final childLines = parseChildLines(parser, endBlock);
+
+ // The Markdown tests expect a trailing newline.
+ childLines.add('');
+
+ var text = childLines.join('\n');
+ if (parser.document.encodeHtml) {
+ text = escapeHtml(text);
+ }
+ final code = Element.text('code', text);
+
+ // the info-string should be trimmed
+ // http://spec.commonmark.org/0.22/#example-100
+ infoString = infoString.trim();
+ if (infoString.isNotEmpty) {
+ // only use the first word in the syntax
+ // http://spec.commonmark.org/0.22/#example-100
+ final firstSpace = infoString.indexOf(' ');
+ if (firstSpace >= 0) {
+ infoString = infoString.substring(0, firstSpace);
+ }
+ if (parser.document.encodeHtml) {
+ infoString = escapeHtmlAttribute(infoString);
+ }
+ code.attributes['class'] = 'language-$infoString';
+ }
+
+ final element = Element('pre', [code]);
+
+ return element;
+ }
+}
diff --git a/lib/src/block_syntaxes/header_syntax.dart b/lib/src/block_syntaxes/header_syntax.dart
new file mode 100644
index 0000000..7d2b94e
--- /dev/null
+++ b/lib/src/block_syntaxes/header_syntax.dart
@@ -0,0 +1,25 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Parses atx-style headers: `## Header ##`.
+class HeaderSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => headerPattern;
+
+ const HeaderSyntax();
+
+ @override
+ Node parse(BlockParser parser) {
+ final match = pattern.firstMatch(parser.current)!;
+ parser.advance();
+ final level = match[1]!.length;
+ final contents = UnparsedContent(match[2]!.trim());
+ return Element('h$level', [contents]);
+ }
+}
diff --git a/lib/src/block_syntaxes/header_with_id_syntax.dart b/lib/src/block_syntaxes/header_with_id_syntax.dart
new file mode 100644
index 0000000..15ad231
--- /dev/null
+++ b/lib/src/block_syntaxes/header_with_id_syntax.dart
@@ -0,0 +1,20 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import 'block_syntax.dart';
+import 'header_syntax.dart';
+
+/// Parses atx-style headers, and adds generated IDs to the generated elements.
+class HeaderWithIdSyntax extends HeaderSyntax {
+ const HeaderWithIdSyntax();
+
+ @override
+ Node parse(BlockParser parser) {
+ final element = super.parse(parser) as Element;
+ element.generatedId = BlockSyntax.generateAnchorHash(element);
+ return element;
+ }
+}
diff --git a/lib/src/block_syntaxes/horizontal_rule_syntax.dart b/lib/src/block_syntaxes/horizontal_rule_syntax.dart
new file mode 100644
index 0000000..12e7839
--- /dev/null
+++ b/lib/src/block_syntaxes/horizontal_rule_syntax.dart
@@ -0,0 +1,22 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc.
+class HorizontalRuleSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => hrPattern;
+
+ const HorizontalRuleSyntax();
+
+ @override
+ Node parse(BlockParser parser) {
+ parser.advance();
+ return Element.empty('hr');
+ }
+}
diff --git a/lib/src/block_syntaxes/list_syntax.dart b/lib/src/block_syntaxes/list_syntax.dart
new file mode 100644
index 0000000..06b1cf9
--- /dev/null
+++ b/lib/src/block_syntaxes/list_syntax.dart
@@ -0,0 +1,222 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+class ListItem {
+ ListItem(this.lines);
+
+ bool forceBlock = false;
+ final List<String> lines;
+}
+
+/// Base class for both ordered and unordered lists.
+abstract class ListSyntax extends BlockSyntax {
+ @override
+ bool canEndBlock(BlockParser parser) {
+ // An empty list cannot interrupt a paragraph. See
+ // https://spec.commonmark.org/0.29/#example-255.
+ // Ideally, [BlockSyntax.canEndBlock] should be changed to be a method
+ // which accepts a [BlockParser], but this would be a breaking change,
+ // so we're going with this temporarily.
+ final match = pattern.firstMatch(parser.current)!;
+ // The seventh group, in both [olPattern] and [ulPattern] is the text
+ // after the delimiter.
+ return match[7]?.isNotEmpty ?? false;
+ }
+
+ String get listTag;
+
+ const ListSyntax();
+
+ /// A list of patterns that can start a valid block within a list item.
+ static final blocksInList = [
+ blockquotePattern,
+ headerPattern,
+ hrPattern,
+ indentPattern,
+ ulPattern,
+ olPattern
+ ];
+
+ static final _whitespaceRe = RegExp('[ \t]*');
+
+ @override
+ Node parse(BlockParser parser) {
+ final items = <ListItem>[];
+ var childLines = <String>[];
+
+ void endItem() {
+ if (childLines.isNotEmpty) {
+ items.add(ListItem(childLines));
+ childLines = <String>[];
+ }
+ }
+
+ late Match? match;
+ bool tryMatch(RegExp pattern) {
+ match = pattern.firstMatch(parser.current);
+ return match != null;
+ }
+
+ String? listMarker;
+ String? indent;
+ // In case the first number in an ordered list is not 1, use it as the
+ // "start".
+ int? startNumber;
+
+ while (!parser.isDone) {
+ final leadingSpace =
+ _whitespaceRe.matchAsPrefix(parser.current)!.group(0)!;
+ final leadingExpandedTabLength = _expandedTabLength(leadingSpace);
+ if (tryMatch(emptyPattern)) {
+ if (emptyPattern.hasMatch(parser.next ?? '')) {
+ // Two blank lines ends a list.
+ break;
+ }
+ // Add a blank line to the current list item.
+ childLines.add('');
+ } else if (indent != null && indent.length <= leadingExpandedTabLength) {
+ // Strip off indent and add to current item.
+ final line = parser.current
+ .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength)
+ .replaceFirst(indent, '');
+ childLines.add(line);
+ } else if (tryMatch(hrPattern)) {
+ // Horizontal rule takes precedence to a new list item.
+ break;
+ } else if (tryMatch(ulPattern) || tryMatch(olPattern)) {
+ final precedingWhitespace = match![1]!;
+ final digits = match![2] ?? '';
+ if (startNumber == null && digits.isNotEmpty) {
+ startNumber = int.parse(digits);
+ }
+ final marker = match![3]!;
+ final firstWhitespace = match![5] ?? '';
+ final restWhitespace = match![6] ?? '';
+ final content = match![7] ?? '';
+ final isBlank = content.isEmpty;
+ if (listMarker != null && listMarker != marker) {
+ // Changing the bullet or ordered list delimiter starts a new list.
+ break;
+ }
+ listMarker = marker;
+ final markerAsSpaces = ' ' * (digits.length + marker.length);
+ if (isBlank) {
+ // See http://spec.commonmark.org/0.28/#list-items under "3. Item
+ // starting with a blank line."
+ //
+ // If the list item starts with a blank line, the final piece of the
+ // indentation is just a single space.
+ indent = '$precedingWhitespace$markerAsSpaces ';
+ } else if (restWhitespace.length >= 4) {
+ // See http://spec.commonmark.org/0.28/#list-items under "2. Item
+ // starting with indented code."
+ //
+ // If the list item starts with indented code, we need to _not_ count
+ // any indentation past the required whitespace character.
+ indent = precedingWhitespace + markerAsSpaces + firstWhitespace;
+ } else {
+ indent = precedingWhitespace +
+ markerAsSpaces +
+ firstWhitespace +
+ restWhitespace;
+ }
+ // End the current list item and start a new one.
+ endItem();
+ childLines.add(restWhitespace + content);
+ } else if (BlockSyntax.isAtBlockEnd(parser)) {
+ // Done with the list.
+ break;
+ } else {
+ // If the previous item is a blank line, this means we're done with the
+ // list and are starting a new top-level paragraph.
+ if ((childLines.isNotEmpty) && (childLines.last == '')) {
+ parser.encounteredBlankLine = true;
+ break;
+ }
+
+ // Anything else is paragraph continuation text.
+ childLines.add(parser.current);
+ }
+ parser.advance();
+ }
+
+ endItem();
+ final itemNodes = <Element>[];
+
+ items.forEach(_removeLeadingEmptyLine);
+ final anyEmptyLines = _removeTrailingEmptyLines(items);
+ var anyEmptyLinesBetweenBlocks = false;
+
+ for (final item in items) {
+ final itemParser = BlockParser(item.lines, parser.document);
+ final children = itemParser.parseLines();
+ itemNodes.add(Element('li', children));
+ anyEmptyLinesBetweenBlocks =
+ anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine;
+ }
+
+ // Must strip paragraph tags if the list is "tight".
+ // http://spec.commonmark.org/0.28/#lists
+ final listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks;
+
+ if (listIsTight) {
+ // We must post-process the list items, converting any top-level paragraph
+ // elements to just text elements.
+ for (final item in itemNodes) {
+ final children = item.children;
+ if (children != null) {
+ for (var i = 0; i < children.length; i++) {
+ final child = children[i];
+ if (child is Element && child.tag == 'p') {
+ children.removeAt(i);
+ children.insertAll(i, child.children!);
+ }
+ }
+ }
+ }
+ }
+
+ if (listTag == 'ol' && startNumber != 1) {
+ return Element(listTag, itemNodes)..attributes['start'] = '$startNumber';
+ } else {
+ return Element(listTag, itemNodes);
+ }
+ }
+
+ void _removeLeadingEmptyLine(ListItem item) {
+ if (item.lines.isNotEmpty && emptyPattern.hasMatch(item.lines.first)) {
+ item.lines.removeAt(0);
+ }
+ }
+
+ /// Removes any trailing empty lines and notes whether any items are separated
+ /// by such lines.
+ bool _removeTrailingEmptyLines(List<ListItem> items) {
+ var anyEmpty = false;
+ for (var i = 0; i < items.length; i++) {
+ if (items[i].lines.length == 1) continue;
+ while (items[i].lines.isNotEmpty &&
+ emptyPattern.hasMatch(items[i].lines.last)) {
+ if (i < items.length - 1) {
+ anyEmpty = true;
+ }
+ items[i].lines.removeLast();
+ }
+ }
+ return anyEmpty;
+ }
+
+ static int _expandedTabLength(String input) {
+ var length = 0;
+ for (final char in input.codeUnits) {
+ length += char == 0x9 ? 4 - (length % 4) : 1;
+ }
+ return length;
+ }
+}
diff --git a/lib/src/block_syntaxes/long_block_html_syntax.dart b/lib/src/block_syntaxes/long_block_html_syntax.dart
new file mode 100644
index 0000000..8332bce
--- /dev/null
+++ b/lib/src/block_syntaxes/long_block_html_syntax.dart
@@ -0,0 +1,35 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import 'block_html_syntax.dart';
+
+/// A BlockHtmlSyntax that has a specific `endPattern`.
+///
+/// In practice this means that the syntax dominates; it is allowed to eat
+/// many lines, including blank lines, before matching its `endPattern`.
+class LongBlockHtmlSyntax extends BlockHtmlSyntax {
+ @override
+ final RegExp pattern;
+ final RegExp _endPattern;
+
+ LongBlockHtmlSyntax(String pattern, String endPattern)
+ : pattern = RegExp(pattern),
+ _endPattern = RegExp(endPattern);
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = <String>[];
+ // Eat until we hit [endPattern].
+ while (!parser.isDone) {
+ childLines.add(parser.current);
+ if (parser.matches(_endPattern)) break;
+ parser.advance();
+ }
+
+ parser.advance();
+ return Text(childLines.join('\n').trimRight());
+ }
+}
diff --git a/lib/src/block_syntaxes/ordered_list_syntax.dart b/lib/src/block_syntaxes/ordered_list_syntax.dart
new file mode 100644
index 0000000..61570a3
--- /dev/null
+++ b/lib/src/block_syntaxes/ordered_list_syntax.dart
@@ -0,0 +1,17 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../patterns.dart';
+import 'list_syntax.dart';
+
+/// Parses ordered lists.
+class OrderedListSyntax extends ListSyntax {
+ @override
+ RegExp get pattern => olPattern;
+
+ @override
+ String get listTag => 'ol';
+
+ const OrderedListSyntax();
+}
diff --git a/lib/src/block_syntaxes/other_tag_block_html_syntax.dart b/lib/src/block_syntaxes/other_tag_block_html_syntax.dart
new file mode 100644
index 0000000..edb5bfb
--- /dev/null
+++ b/lib/src/block_syntaxes/other_tag_block_html_syntax.dart
@@ -0,0 +1,25 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../block_parser.dart';
+import 'block_tag_block_html_syntax.dart';
+
+class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax {
+ @override
+ bool canEndBlock(BlockParser parser) => false;
+
+ // Really hacky way to detect "other" HTML. This matches:
+ //
+ // * any opening spaces
+ // * open bracket and maybe a slash ("<" or "</")
+ // * some word characters
+ // * either:
+ // * a close bracket, or
+ // * whitespace followed by not-brackets followed by a close bracket
+ // * possible whitespace and the end of the line.
+ @override
+ RegExp get pattern => RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$');
+
+ const OtherTagBlockHtmlSyntax();
+}
diff --git a/lib/src/block_syntaxes/paragraph_syntax.dart b/lib/src/block_syntaxes/paragraph_syntax.dart
new file mode 100644
index 0000000..aaf4de7
--- /dev/null
+++ b/lib/src/block_syntaxes/paragraph_syntax.dart
@@ -0,0 +1,181 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../document.dart';
+import '../patterns.dart';
+import '../util.dart';
+import 'block_syntax.dart';
+
+/// Parses paragraphs of regular text.
+class ParagraphSyntax extends BlockSyntax {
+ static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');
+
+ static final _whitespacePattern = RegExp(r'^\s*$');
+
+ @override
+ RegExp get pattern => dummyPattern;
+
+ @override
+ bool canEndBlock(BlockParser parser) => false;
+
+ const ParagraphSyntax();
+
+ @override
+ bool canParse(BlockParser parser) => true;
+
+ @override
+ Node parse(BlockParser parser) {
+ final childLines = <String>[];
+
+ // Eat until we hit something that ends a paragraph.
+ while (!BlockSyntax.isAtBlockEnd(parser)) {
+ childLines.add(parser.current);
+ parser.advance();
+ }
+
+ final paragraphLines = _extractReflinkDefinitions(parser, childLines);
+ if (paragraphLines == null) {
+ // Paragraph consisted solely of reference link definitions.
+ return Text('');
+ } else {
+ final contents = UnparsedContent(paragraphLines.join('\n').trimRight());
+ return Element('p', [contents]);
+ }
+ }
+
+ /// Extract reference link definitions from the front of the paragraph, and
+ /// return the remaining paragraph lines.
+ List<String>? _extractReflinkDefinitions(
+ BlockParser parser,
+ List<String> lines,
+ ) {
+ bool lineStartsReflinkDefinition(int i) =>
+ lines[i].startsWith(_reflinkDefinitionStart);
+
+ var i = 0;
+ loopOverDefinitions:
+ while (true) {
+ // Check for reflink definitions.
+ if (!lineStartsReflinkDefinition(i)) {
+ // It's paragraph content from here on out.
+ break;
+ }
+ var contents = lines[i];
+ var j = i + 1;
+ while (j < lines.length) {
+ // Check to see if the _next_ line might start a new reflink definition.
+ // Even if it turns out not to be, but it started with a '[', then it
+ // is not a part of _this_ possible reflink definition.
+ if (lineStartsReflinkDefinition(j)) {
+ // Try to parse [contents] as a reflink definition.
+ if (_parseReflinkDefinition(parser, contents)) {
+ // Loop again, starting at the next possible reflink definition.
+ i = j;
+ continue loopOverDefinitions;
+ } else {
+ // Could not parse [contents] as a reflink definition.
+ break;
+ }
+ } else {
+ contents = '$contents\n${lines[j]}';
+ j++;
+ }
+ }
+ // End of the block.
+ if (_parseReflinkDefinition(parser, contents)) {
+ i = j;
+ break;
+ }
+
+ // It may be that there is a reflink definition starting at [i], but it
+ // does not extend all the way to [j], such as:
+ //
+ // [link]: url // line i
+ // "title"
+ // garbage
+ // [link2]: url // line j
+ //
+ // In this case, [i, i+1] is a reflink definition, and the rest is
+ // paragraph content.
+ while (j >= i) {
+ // This isn't the most efficient loop, what with this big ole'
+ // Iterable allocation (`getRange`) followed by a big 'ole String
+ // allocation, but we
+ // must walk backwards, checking each range.
+ contents = lines.getRange(i, j).join('\n');
+ if (_parseReflinkDefinition(parser, contents)) {
+ // That is the last reflink definition. The rest is paragraph
+ // content.
+ i = j;
+ break;
+ }
+ j--;
+ }
+ // The ending was not a reflink definition at all. Just paragraph
+ // content.
+
+ break;
+ }
+
+ if (i == lines.length) {
+ // No paragraph content.
+ return null;
+ } else {
+ // Ends with paragraph content.
+ return lines.sublist(i);
+ }
+ }
+
+ // Parse [contents] as a reference link definition.
+ //
+ // Also adds the reference link definition to the document.
+ //
+ // Returns whether [contents] could be parsed as a reference link definition.
+ bool _parseReflinkDefinition(BlockParser parser, String contents) {
+ final pattern = RegExp(
+ // Leading indentation.
+ '''^[ ]{0,3}'''
+ // Reference id in brackets, and URL.
+ r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*'''
+ // Title in double or single quotes, or parens.
+ r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''',
+ multiLine: true,
+ );
+ final match = pattern.firstMatch(contents);
+ if (match == null) {
+ // Not a reference link definition.
+ return false;
+ }
+ if (match.match.length < contents.length) {
+ // Trailing text. No good.
+ return false;
+ }
+
+ var label = match[1]!;
+ final destination = match[2] ?? match[3]!;
+ var title = match[4];
+
+ // The label must contain at least one non-whitespace character.
+ if (_whitespacePattern.hasMatch(label)) {
+ return false;
+ }
+
+ if (title == '') {
+ // No title.
+ title = null;
+ } else {
+ // Remove "", '', or ().
+ title = title!.substring(1, title.length - 1);
+ }
+
+ // References are case-insensitive, and internal whitespace is compressed.
+ label = normalizeLinkLabel(label);
+
+ parser.document.linkReferences
+ .putIfAbsent(label, () => LinkReference(label, destination, title));
+ return true;
+ }
+}
diff --git a/lib/src/block_syntaxes/setext_header_syntax.dart b/lib/src/block_syntaxes/setext_header_syntax.dart
new file mode 100644
index 0000000..552e983
--- /dev/null
+++ b/lib/src/block_syntaxes/setext_header_syntax.dart
@@ -0,0 +1,71 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Parses setext-style headers.
+class SetextHeaderSyntax extends BlockSyntax {
+ @override
+ RegExp get pattern => dummyPattern;
+
+ const SetextHeaderSyntax();
+
+ @override
+ bool canParse(BlockParser parser) {
+ if (!_interperableAsParagraph(parser.current)) return false;
+ var i = 1;
+ while (true) {
+ final nextLine = parser.peek(i);
+ if (nextLine == null) {
+ // We never reached an underline.
+ return false;
+ }
+ if (setextPattern.hasMatch(nextLine)) {
+ return true;
+ }
+ // Ensure that we're still in something like paragraph text.
+ if (!_interperableAsParagraph(nextLine)) {
+ return false;
+ }
+ i++;
+ }
+ }
+
+ @override
+ Node parse(BlockParser parser) {
+ final lines = <String>[];
+ String? tag;
+ while (!parser.isDone) {
+ final match = setextPattern.firstMatch(parser.current);
+ if (match == null) {
+ // More text.
+ lines.add(parser.current);
+ parser.advance();
+ continue;
+ } else {
+ // The underline.
+ tag = (match[1]![0] == '=') ? 'h1' : 'h2';
+ parser.advance();
+ break;
+ }
+ }
+
+ final contents = UnparsedContent(lines.join('\n').trimRight());
+
+ return Element(tag!, [contents]);
+ }
+
+ bool _interperableAsParagraph(String line) =>
+ !(indentPattern.hasMatch(line) ||
+ codeFencePattern.hasMatch(line) ||
+ headerPattern.hasMatch(line) ||
+ blockquotePattern.hasMatch(line) ||
+ hrPattern.hasMatch(line) ||
+ ulPattern.hasMatch(line) ||
+ olPattern.hasMatch(line) ||
+ emptyPattern.hasMatch(line));
+}
diff --git a/lib/src/block_syntaxes/setext_header_with_id_syntax.dart b/lib/src/block_syntaxes/setext_header_with_id_syntax.dart
new file mode 100644
index 0000000..fffe992
--- /dev/null
+++ b/lib/src/block_syntaxes/setext_header_with_id_syntax.dart
@@ -0,0 +1,21 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../block_parser.dart';
+import 'block_syntax.dart';
+import 'setext_header_syntax.dart';
+
+/// Parses setext-style headers, and adds generated IDs to the generated
+/// elements.
+class SetextHeaderWithIdSyntax extends SetextHeaderSyntax {
+ const SetextHeaderWithIdSyntax();
+
+ @override
+ Node parse(BlockParser parser) {
+ final element = super.parse(parser) as Element;
+ element.generatedId = BlockSyntax.generateAnchorHash(element);
+ return element;
+ }
+}
diff --git a/lib/src/block_syntaxes/table_syntax.dart b/lib/src/block_syntaxes/table_syntax.dart
new file mode 100644
index 0000000..cf0307a
--- /dev/null
+++ b/lib/src/block_syntaxes/table_syntax.dart
@@ -0,0 +1,212 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../block_parser.dart';
+import '../patterns.dart';
+import 'block_syntax.dart';
+
+/// Parses tables.
+class TableSyntax extends BlockSyntax {
+ @override
+ bool canEndBlock(BlockParser parser) => false;
+
+ @override
+ RegExp get pattern => dummyPattern;
+
+ const TableSyntax();
+
+ @override
+ bool canParse(BlockParser parser) {
+ // Note: matches *next* line, not the current one. We're looking for the
+ // bar separating the head row from the body rows.
+ return parser.matchesNext(tablePattern);
+ }
+
+ /// Parses a table into its three parts:
+ ///
+ /// * a head row of head cells (`<th>` cells)
+ /// * a divider of hyphens and pipes (not rendered)
+ /// * many body rows of body cells (`<td>` cells)
+ @override
+ Node? parse(BlockParser parser) {
+ final alignments = _parseAlignments(parser.next!);
+ final columnCount = alignments.length;
+ final headRow = _parseRow(parser, alignments, 'th');
+ if (headRow.children!.length != columnCount) {
+ return null;
+ }
+ final head = Element('thead', [headRow]);
+
+ // Advance past the divider of hyphens.
+ parser.advance();
+
+ final rows = <Element>[];
+ while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) {
+ final row = _parseRow(parser, alignments, 'td');
+ final children = row.children;
+ if (children != null) {
+ while (children.length < columnCount) {
+ // Insert synthetic empty cells.
+ children.add(Element.empty('td'));
+ }
+ while (children.length > columnCount) {
+ children.removeLast();
+ }
+ }
+ while (row.children!.length > columnCount) {
+ row.children!.removeLast();
+ }
+ rows.add(row);
+ }
+ if (rows.isEmpty) {
+ return Element('table', [head]);
+ } else {
+ final body = Element('tbody', rows);
+
+ return Element('table', [head, body]);
+ }
+ }
+
+ List<String?> _parseAlignments(String line) {
+ final startIndex = _walkPastOpeningPipe(line);
+
+ var endIndex = line.length - 1;
+ while (endIndex > 0) {
+ final ch = line.codeUnitAt(endIndex);
+ if (ch == $pipe) {
+ endIndex--;
+ break;
+ }
+ if (ch != $space && ch != $tab) {
+ break;
+ }
+ endIndex--;
+ }
+
+ // Optimization: We walk [line] too many times. One lap should do it.
+ return line.substring(startIndex, endIndex + 1).split('|').map((column) {
+ column = column.trim();
+ if (column.startsWith(':') && column.endsWith(':')) return 'center';
+ if (column.startsWith(':')) return 'left';
+ if (column.endsWith(':')) return 'right';
+ return null;
+ }).toList();
+ }
+
+ /// Parses a table row at the current line into a table row element, with
+ /// parsed table cells.
+ ///
+ /// [alignments] is used to annotate an alignment on each cell, and
+ /// [cellType] is used to declare either "td" or "th" cells.
+ Element _parseRow(
+ BlockParser parser,
+ List<String?> alignments,
+ String cellType,
+ ) {
+ final line = parser.current;
+ final cells = <String>[];
+ var index = _walkPastOpeningPipe(line);
+ final cellBuffer = StringBuffer();
+
+ while (true) {
+ if (index >= line.length) {
+ // This row ended without a trailing pipe, which is fine.
+ cells.add(cellBuffer.toString().trimRight());
+ cellBuffer.clear();
+ break;
+ }
+ final ch = line.codeUnitAt(index);
+ if (ch == $backslash) {
+ if (index == line.length - 1) {
+ // A table row ending in a backslash is not well-specified, but it
+ // looks like GitHub just allows the character as part of the text of
+ // the last cell.
+ cellBuffer.writeCharCode(ch);
+ cells.add(cellBuffer.toString().trimRight());
+ cellBuffer.clear();
+ break;
+ }
+ final escaped = line.codeUnitAt(index + 1);
+ if (escaped == $pipe) {
+ // GitHub Flavored Markdown has a strange bit here; the pipe is to be
+ // escaped before any other inline processing. One consequence, for
+ // example, is that "| `\|` |" should be parsed as a cell with a code
+ // element with text "|", rather than "\|". Most parsers are not
+ // compliant with this corner, but this is what is specified, and what
+ // GitHub does in practice.
+ cellBuffer.writeCharCode(escaped);
+ } else {
+ // The [InlineParser] will handle the escaping.
+ cellBuffer.writeCharCode(ch);
+ cellBuffer.writeCharCode(escaped);
+ }
+ index += 2;
+ } else if (ch == $pipe) {
+ cells.add(cellBuffer.toString().trimRight());
+ cellBuffer.clear();
+ // Walk forward past any whitespace which leads the next cell.
+ index++;
+ index = _walkPastWhitespace(line, index);
+ if (index >= line.length) {
+ // This row ended with a trailing pipe.
+ break;
+ }
+ } else {
+ cellBuffer.writeCharCode(ch);
+ index++;
+ }
+ }
+ parser.advance();
+ final row = [
+ for (final cell in cells) Element(cellType, [UnparsedContent(cell)])
+ ];
+
+ for (var i = 0; i < row.length && i < alignments.length; i++) {
+ if (alignments[i] == null) continue;
+ row[i].attributes['style'] = 'text-align: ${alignments[i]};';
+ }
+
+ return Element('tr', row);
+ }
+
+ /// Walks past whitespace in [line] starting at [index].
+ ///
+ /// Returns the index of the first non-whitespace character.
+ int _walkPastWhitespace(String line, int index) {
+ while (index < line.length) {
+ final ch = line.codeUnitAt(index);
+ if (ch != $space && ch != $tab) {
+ break;
+ }
+ index++;
+ }
+ return index;
+ }
+
+ /// Walks past the opening pipe (and any whitespace that surrounds it) in
+ /// [line].
+ ///
+ /// Returns the index of the first non-whitespace character after the pipe.
+ /// If no opening pipe is found, this just returns the index of the first
+ /// non-whitespace character.
+ int _walkPastOpeningPipe(String line) {
+ var index = 0;
+ while (index < line.length) {
+ final ch = line.codeUnitAt(index);
+ if (ch == $pipe) {
+ index++;
+ index = _walkPastWhitespace(line, index);
+ }
+ if (ch != $space && ch != $tab) {
+ // No leading pipe.
+ break;
+ }
+ index++;
+ }
+ return index;
+ }
+}
diff --git a/lib/src/block_syntaxes/unordered_list_syntax.dart b/lib/src/block_syntaxes/unordered_list_syntax.dart
new file mode 100644
index 0000000..6b1ae10
--- /dev/null
+++ b/lib/src/block_syntaxes/unordered_list_syntax.dart
@@ -0,0 +1,17 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../patterns.dart';
+import 'list_syntax.dart';
+
+/// Parses unordered lists.
+class UnorderedListSyntax extends ListSyntax {
+ @override
+ RegExp get pattern => ulPattern;
+
+ @override
+ String get listTag => 'ul';
+
+ const UnorderedListSyntax();
+}
diff --git a/lib/src/document.dart b/lib/src/document.dart
index 5ee75ed..1428dcd 100644
--- a/lib/src/document.dart
+++ b/lib/src/document.dart
@@ -4,8 +4,10 @@
import 'ast.dart';
import 'block_parser.dart';
+import 'block_syntaxes/block_syntax.dart';
import 'extension_set.dart';
import 'inline_parser.dart';
+import 'inline_syntaxes/inline_syntax.dart';
/// Maintains the context needed to parse a Markdown document.
class Document {
diff --git a/lib/src/extension_set.dart b/lib/src/extension_set.dart
index 9c8b823..c0e0b13 100644
--- a/lib/src/extension_set.dart
+++ b/lib/src/extension_set.dart
@@ -1,5 +1,13 @@
-import 'block_parser.dart';
-import 'inline_parser.dart';
+import 'block_syntaxes/block_syntax.dart';
+import 'block_syntaxes/fenced_code_block_syntax.dart';
+import 'block_syntaxes/header_with_id_syntax.dart';
+import 'block_syntaxes/setext_header_with_id_syntax.dart';
+import 'block_syntaxes/table_syntax.dart';
+import 'inline_syntaxes/autolink_extension_syntax.dart';
+import 'inline_syntaxes/emoji_syntax.dart';
+import 'inline_syntaxes/inline_html_syntax.dart';
+import 'inline_syntaxes/inline_syntax.dart';
+import 'inline_syntaxes/strikethrough_syntax.dart';
/// ExtensionSets provide a simple grouping mechanism for common Markdown
/// flavors.
diff --git a/lib/src/html_renderer.dart b/lib/src/html_renderer.dart
index 5e74b06..f317707 100644
--- a/lib/src/html_renderer.dart
+++ b/lib/src/html_renderer.dart
@@ -5,10 +5,10 @@
import 'dart:convert';
import 'ast.dart';
-import 'block_parser.dart';
+import 'block_syntaxes/block_syntax.dart';
import 'document.dart';
import 'extension_set.dart';
-import 'inline_parser.dart';
+import 'inline_syntaxes/inline_syntax.dart';
/// Converts the given string of Markdown to HTML.
String markdownToHtml(
diff --git a/lib/src/inline_parser.dart b/lib/src/inline_parser.dart
index d41d050..e731776 100644
--- a/lib/src/inline_parser.dart
+++ b/lib/src/inline_parser.dart
@@ -6,8 +6,17 @@
import 'ast.dart';
import 'document.dart';
-import 'emojis.dart';
-import 'util.dart';
+import 'inline_syntaxes/autolink_syntax.dart';
+import 'inline_syntaxes/code_syntax.dart';
+import 'inline_syntaxes/delimiter_syntax.dart';
+import 'inline_syntaxes/email_autolink_syntax.dart';
+import 'inline_syntaxes/emphasis_syntax.dart';
+import 'inline_syntaxes/escape_syntax.dart';
+import 'inline_syntaxes/image_syntax.dart';
+import 'inline_syntaxes/inline_syntax.dart';
+import 'inline_syntaxes/line_break_syntax.dart';
+import 'inline_syntaxes/link_syntax.dart';
+import 'inline_syntaxes/text_syntax.dart';
/// Maintains the internal state needed to parse inline span elements in
/// Markdown.
@@ -91,7 +100,7 @@
syntaxes.addAll(_defaultSyntaxes);
}
- if (_encodeHtml) {
+ if (encodeHtml) {
syntaxes.addAll(_htmlSyntaxes);
}
}
@@ -338,7 +347,7 @@
}
/// Push [delimiter] onto the stack of [Delimiter]s.
- void _pushDelimiter(Delimiter delimiter) => _delimiterStack.add(delimiter);
+ void pushDelimiter(Delimiter delimiter) => _delimiterStack.add(delimiter);
bool get isDone => pos == source.length;
@@ -351,1211 +360,5 @@
start = pos;
}
- bool get _encodeHtml => document.encodeHtml;
-}
-
-/// Represents one kind of Markdown tag that can be parsed.
-abstract class InlineSyntax {
- final RegExp pattern;
-
- /// The first character of [pattern], to be used as an efficient first check
- /// that this syntax matches the current parser position.
- final int? _startCharacter;
-
- /// Create a new [InlineSyntax] which matches text on [pattern].
- ///
- /// If [startCharacter] is passed, it is used as a pre-matching check which
- /// is faster than matching against [pattern].
- ///
- /// If [caseSensitive] is disabled, then case is ignored when matching
- /// the [pattern].
- InlineSyntax(String pattern, {int? startCharacter, bool caseSensitive = true})
- : pattern =
- RegExp(pattern, multiLine: true, caseSensitive: caseSensitive),
- _startCharacter = startCharacter;
-
- /// Tries to match at the parser's current position.
- ///
- /// The parser's position can be overriden with [startMatchPos].
- /// Returns whether or not the pattern successfully matched.
- bool tryMatch(InlineParser parser, [int? startMatchPos]) {
- startMatchPos ??= parser.pos;
-
- // Before matching with the regular expression [pattern], which can be
- // expensive on some platforms, check if even the first character matches
- // this syntax.
- if (_startCharacter != null &&
- parser.source.codeUnitAt(startMatchPos) != _startCharacter) {
- return false;
- }
-
- final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos);
- if (startMatch == null) return false;
-
- // Write any existing plain text up to this point.
- parser.writeText();
-
- if (onMatch(parser, startMatch)) parser.consume(startMatch.match.length);
- return true;
- }
-
- /// Processes [match], adding nodes to [parser] and possibly advancing
- /// [parser].
- ///
- /// Returns whether the caller should advance [parser] by `match[0].length`.
- bool onMatch(InlineParser parser, Match match);
-}
-
-/// Represents a hard line break.
-class LineBreakSyntax extends InlineSyntax {
- LineBreakSyntax() : super(r'(?:\\| +)\n');
-
- /// Create a void <br> element.
- @override
- bool onMatch(InlineParser parser, Match match) {
- parser.addNode(Element.empty('br'));
- return true;
- }
-}
-
-/// Matches stuff that should just be passed through as straight text.
-class TextSyntax extends InlineSyntax {
- final String substitute;
-
- /// Create a new [TextSyntax] which matches text on [pattern].
- ///
- /// If [sub] is passed, it is used as a simple replacement for [pattern]. If
- /// [startCharacter] is passed, it is used as a pre-matching check which is
- /// faster than matching against [pattern].
- TextSyntax(String pattern, {String sub = '', int? startCharacter})
- : substitute = sub,
- super(pattern, startCharacter: startCharacter);
-
- /// Adds a [Text] node to [parser] and returns `true` if there is a
- /// [substitute], as long as the preceding character (if any) is not a `/`.
- ///
- /// Otherwise, the parser is advanced by the length of [match] and `false` is
- /// returned.
- @override
- bool onMatch(InlineParser parser, Match match) {
- if (substitute.isEmpty ||
- (match.start > 0 &&
- match.input.substring(match.start - 1, match.start) == '/')) {
- // Just use the original matched text.
- parser.advanceBy(match.match.length);
- return false;
- }
-
- // Insert the substitution.
- parser.addNode(Text(substitute));
- return true;
- }
-}
-
-/// Escape punctuation preceded by a backslash.
-class EscapeSyntax extends InlineSyntax {
- EscapeSyntax() : super(r'''\\[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]''');
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- final chars = match.match;
- final char = chars.codeUnitAt(1);
- // Insert the substitution. Why these three charactes are replaced with
- // their equivalent HTML entity referenced appears to be missing from the
- // CommonMark spec, but is very present in all of the examples.
- // https://talk.commonmark.org/t/entity-ification-of-quotes-and-brackets-missing-from-spec/3207
- if (parser._encodeHtml) {
- if (char == $double_quote) {
- parser.addNode(Text('"'));
- } else if (char == $lt) {
- parser.addNode(Text('<'));
- } else if (char == $gt) {
- parser.addNode(Text('>'));
- } else {
- parser.addNode(Text(chars[1]));
- }
- } else {
- parser.addNode(Text(chars[1]));
- }
- return true;
- }
-}
-
-/// Leave inline HTML tags alone, from
-/// [CommonMark 0.28](http://spec.commonmark.org/0.28/#raw-html).
-///
-/// This is not actually a good definition (nor CommonMark's) of an HTML tag,
-/// but it is fast. It will leave text like `<a href='hi">` alone, which is
-/// incorrect.
-///
-/// TODO(srawlins): improve accuracy while ensuring performance, once
-/// Markdown benchmarking is more mature.
-class InlineHtmlSyntax extends TextSyntax {
- InlineHtmlSyntax()
- : super(
- r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>',
- startCharacter: $lt,
- );
-}
-
-/// Matches autolinks like `<foo@bar.example.com>`.
-///
-/// See <http://spec.commonmark.org/0.28/#email-address>.
-class EmailAutolinkSyntax extends InlineSyntax {
- static final _email =
- r'''[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}'''
- r'''[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*''';
-
- EmailAutolinkSyntax() : super('<($_email)>', startCharacter: $lt);
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- final url = match[1]!;
- final text = parser._encodeHtml ? escapeHtml(url) : url;
- final anchor = Element.text('a', text);
- anchor.attributes['href'] = Uri.encodeFull('mailto:$url');
- parser.addNode(anchor);
-
- return true;
- }
-}
-
-/// Matches autolinks like `<http://foo.com>`.
-class AutolinkSyntax extends InlineSyntax {
- AutolinkSyntax() : super(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>');
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- final url = match[1]!;
- final text = parser._encodeHtml ? escapeHtml(url) : url;
- final anchor = Element.text('a', text);
- anchor.attributes['href'] = Uri.encodeFull(url);
- parser.addNode(anchor);
-
- return true;
- }
-}
-
-/// Matches autolinks like `http://foo.com`.
-class AutolinkExtensionSyntax extends InlineSyntax {
- /// Broken up parts of the autolink regex for reusability and readability
-
- // Autolinks can only come at the beginning of a line, after whitespace, or
- // any of the delimiting characters *, _, ~, and (.
- static const start = r'(?:^|[\s*_~(>])';
-
- // An extended url autolink will be recognized when one of the schemes
- // http://, https://, or ftp://, followed by a valid domain
- static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)';
-
- // A valid domain consists of alphanumeric characters, underscores (_),
- // hyphens (-) and periods (.). There must be at least one period, and no
- // underscores may be present in the last two segments of the domain.
- static const domainPart = r'\w\-';
- static const domain = '[$domainPart][$domainPart.]+';
-
- // A valid domain consists of alphanumeric characters, underscores (_),
- // hyphens (-) and periods (.).
- static const path = r'[^\s<]*';
-
- // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not
- // be considered part of the autolink
- static const truncatingPunctuationPositive = '[?!.,:*_~]';
-
- static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$');
- static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$');
- static final regExpWhiteSpace = RegExp(r'\s');
-
- AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))');
-
- @override
- bool tryMatch(InlineParser parser, [int? startMatchPos]) {
- return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0);
- }
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- var url = match[1]!;
- var href = url;
- var matchLength = url.length;
-
- if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) {
- url = url.substring(1, url.length - 1);
- href = href.substring(1, href.length - 1);
- parser.pos++;
- matchLength--;
- }
-
- // Prevent accidental standard autolink matches
- if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') {
- return false;
- }
-
- // When an autolink ends in ), we scan the entire autolink for the total
- // number of parentheses. If there is a greater number of closing
- // parentheses than opening ones, we don’t consider the last character
- // part of the autolink, in order to facilitate including an autolink
- // inside a parenthesis:
- // https://github.github.com/gfm/#example-600
- if (url.endsWith(')')) {
- final opening = _countChars(url, '(');
- final closing = _countChars(url, ')');
-
- if (closing > opening) {
- url = url.substring(0, url.length - 1);
- href = href.substring(0, href.length - 1);
- matchLength--;
- }
- }
-
- // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will
- // not be considered part of the autolink, though they may be included
- // in the interior of the link:
- // https://github.github.com/gfm/#example-599
- final trailingPunc = regExpTrailingPunc.firstMatch(url);
- if (trailingPunc != null) {
- final trailingLength = trailingPunc.match.length;
- url = url.substring(0, url.length - trailingLength);
- href = href.substring(0, href.length - trailingLength);
- matchLength -= trailingLength;
- }
-
- // If an autolink ends in a semicolon (;), we check to see if it appears
- // to resemble an
- // [entity reference](https://github.github.com/gfm/#entity-references);
- // if the preceding text is & followed by one or more alphanumeric
- // characters. If so, it is excluded from the autolink:
- // https://github.github.com/gfm/#example-602
- if (url.endsWith(';')) {
- final entityRef = regExpEndsWithColon.firstMatch(url);
- if (entityRef != null) {
- // Strip out HTML entity reference
- final entityRefLength = entityRef.match.length;
- url = url.substring(0, url.length - entityRefLength);
- href = href.substring(0, href.length - entityRefLength);
- matchLength -= entityRefLength;
- }
- }
-
- // The scheme http will be inserted automatically
- if (!href.startsWith('http://') &&
- !href.startsWith('https://') &&
- !href.startsWith('ftp://')) {
- href = 'http://$href';
- }
-
- final text = parser._encodeHtml ? escapeHtml(url) : url;
- final anchor = Element.text('a', text);
- anchor.attributes['href'] = Uri.encodeFull(href);
- parser.addNode(anchor);
-
- parser.consume(matchLength);
- return false;
- }
-
- int _countChars(String input, String char) {
- var count = 0;
-
- for (var i = 0; i < input.length; i++) {
- if (input[i] == char) count++;
- }
-
- return count;
- }
-}
-
-/// A delimiter indicating the possible "open" or possible "close" of a tag for
-/// a [DelimiterSyntax].
-abstract class Delimiter {
- /// The [Text] node representing the plain text representing this delimiter.
- abstract Text node;
-
- /// The type of delimiter.
- ///
- /// For the two-character image delimiter, `](links).
- ///
- /// Once we have parsed `Text [`, there is one (pending) link in the state
- /// stack. It is, by default, active. Once we parse the next possible link,
- /// `[more](links)`, as a real link, we must deactive the pending links (just
- /// the one, in this case).
- abstract bool isActive;
-
- /// Whether this delimiter can open emphasis or strong emphasis.
- bool get canOpen;
-
- /// Whether this delimiter can close emphasis or strong emphasis.
- bool get canClose;
-
- /// The syntax which uses this delimiter to parse a tag.
- DelimiterSyntax get syntax;
-}
-
-/// A simple delimiter implements the [Delimiter] interface with basic fields,
-/// and does not have the concept of "left-flanking" or "right-flanking".
-class SimpleDelimiter implements Delimiter {
- @override
- Text node;
-
- @override
- final int char;
-
- @override
- final int length;
-
- @override
- bool isActive;
-
- @override
- final bool canOpen;
-
- @override
- final bool canClose;
-
- @override
- final DelimiterSyntax syntax;
-
- final int endPos;
-
- SimpleDelimiter({
- required this.node,
- required this.char,
- required this.length,
- required this.canOpen,
- required this.canClose,
- required this.syntax,
- required this.endPos,
- }) : isActive = true;
-}
-
-/// An implementation of [Delimiter] which uses concepts of "left-flanking" and
-/// "right-flanking" to determine the values of [canOpen] and [canClose].
-///
-/// This is primarily used when parsing emphasis and strong emphasis, but can
-/// also be used by other extensions of [DelimiterSyntax].
-class DelimiterRun implements Delimiter {
- /// According to
- /// [CommonMark](https://spec.commonmark.org/0.29/#punctuation-character):
- ///
- /// > A punctuation character is an ASCII punctuation character or anything in
- /// > the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or
- /// > `Ps`.
- // This RegExp is inspired by
- // https://github.com/commonmark/commonmark.js/blob/1f7d09099c20d7861a674674a5a88733f55ff729/lib/inlines.js#L39.
- // I don't know if there is any way to simplify it or maintain it.
- static final RegExp punctuation = RegExp('['
- r'''!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~'''
- r'\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE'
- r'\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E'
- r'\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E'
- r'\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14'
- r'\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB'
- r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736'
- r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F'
- r'\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E'
- r'\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051'
- r'\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A'
- r'\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC'
- r'\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42'
- r'\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE'
- r'\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF'
- r'\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF'
- r'\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19'
- r'\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03'
- r'\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F'
- r'\uFF5B\uFF5D\uFF5F-\uFF65'
- ']');
-
- // TODO(srawlins): Unicode whitespace
- static final String whitespace = ' \t\r\n';
-
- @override
- Text node;
-
- @override
- final int char;
-
- @override
- int get length => node.text.length;
-
- @override
- bool isActive;
-
- @override
- final DelimiterSyntax syntax;
-
- final bool allowIntraWord;
-
- @override
- final bool canOpen;
-
- @override
- final bool canClose;
-
- final List<DelimiterTag> tags;
-
- DelimiterRun._({
- required this.node,
- required this.char,
- required this.syntax,
- required this.tags,
- required bool isLeftFlanking,
- required bool isRightFlanking,
- required bool isPrecededByPunctuation,
- required bool isFollowedByPunctuation,
- required this.allowIntraWord,
- }) : canOpen = isLeftFlanking &&
- (!isRightFlanking || allowIntraWord || isPrecededByPunctuation),
- canClose = isRightFlanking &&
- (!isLeftFlanking || allowIntraWord || isFollowedByPunctuation),
- isActive = true;
-
- /// Tries to parse a delimiter run from [runStart] (inclusive) to [runEnd]
- /// (exclusive).
- static DelimiterRun? tryParse(
- InlineParser parser,
- int runStart,
- int runEnd, {
- required DelimiterSyntax syntax,
- required List<DelimiterTag> tags,
- required Text node,
- bool allowIntraWord = false,
- }) {
- bool leftFlanking,
- rightFlanking,
- precededByPunctuation,
- followedByPunctuation;
- String preceding, following;
- if (runStart == 0) {
- rightFlanking = false;
- preceding = '\n';
- } else {
- preceding = parser.source.substring(runStart - 1, runStart);
- }
- precededByPunctuation = punctuation.hasMatch(preceding);
-
- if (runEnd == parser.source.length) {
- leftFlanking = false;
- following = '\n';
- } else {
- following = parser.source.substring(runEnd, runEnd + 1);
- }
- followedByPunctuation = punctuation.hasMatch(following);
-
- // http://spec.commonmark.org/0.30/#left-flanking-delimiter-run
- if (whitespace.contains(following)) {
- leftFlanking = false;
- } else {
- leftFlanking = !followedByPunctuation ||
- whitespace.contains(preceding) ||
- precededByPunctuation;
- }
-
- // http://spec.commonmark.org/0.30/#right-flanking-delimiter-run
- if (whitespace.contains(preceding)) {
- rightFlanking = false;
- } else {
- rightFlanking = !precededByPunctuation ||
- whitespace.contains(following) ||
- followedByPunctuation;
- }
-
- if (!leftFlanking && !rightFlanking) {
- // Could not parse a delimiter run.
- return null;
- }
-
- tags.sort((a, b) => a.indicatorLength.compareTo(b.indicatorLength));
-
- return DelimiterRun._(
- node: node,
- char: parser.charAt(runStart),
- syntax: syntax,
- tags: tags,
- isLeftFlanking: leftFlanking,
- isRightFlanking: rightFlanking,
- isPrecededByPunctuation: precededByPunctuation,
- isFollowedByPunctuation: followedByPunctuation,
- allowIntraWord: allowIntraWord,
- );
- }
-
- @override
- String toString() => '<char: $char, length: $length, canOpen: $canOpen, '
- 'canClose: $canClose>';
-}
-
-class DelimiterTag {
- DelimiterTag(this.tag, this.indicatorLength);
-
- // Tag name of the HTML element.
- final String tag;
-
- final int indicatorLength;
-}
-
-/// Matches syntax that has a pair of tags and becomes an element, like `*` for
-/// `<em>`. Allows nested tags.
-class DelimiterSyntax extends InlineSyntax {
- /// Whether this is parsed according to the same nesting rules as [emphasis
- /// delimiters][].
- ///
- /// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis
- final bool requiresDelimiterRun;
-
- /// Whether to allow intra-word delimiter runs. CommonMark emphasis and
- /// strong emphasis does not allow this, but GitHub-Flavored Markdown allows
- /// it on strikethrough.
- final bool allowIntraWord;
-
- final List<DelimiterTag>? tags;
-
- /// Creates a new [DelimiterSyntax] which matches text on [pattern].
- ///
- /// The [pattern] is used to find the matching text. If [requiresDelimiterRun]
- /// is passed, this syntax parses according to the same nesting rules as
- /// emphasis delimiters. If [startCharacter] is passed, it is used as a
- /// pre-matching check which is faster than matching against [pattern].
- DelimiterSyntax(
- String pattern, {
- this.requiresDelimiterRun = false,
- int? startCharacter,
- this.allowIntraWord = false,
- this.tags,
- }) : super(pattern, startCharacter: startCharacter);
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- final runLength = match.group(0)!.length;
- final matchStart = parser.pos;
- final matchEnd = parser.pos + runLength;
- final text = Text(parser.source.substring(matchStart, matchEnd));
- if (!requiresDelimiterRun) {
- parser._pushDelimiter(SimpleDelimiter(
- node: text,
- length: runLength,
- char: parser.source.codeUnitAt(matchStart),
- canOpen: true,
- canClose: false,
- syntax: this,
- endPos: matchEnd,
- ));
- parser.addNode(text);
- return true;
- }
-
- final delimiterRun = DelimiterRun.tryParse(
- parser,
- matchStart,
- matchEnd,
- syntax: this,
- node: text,
- allowIntraWord: allowIntraWord,
- tags: tags ?? [],
- );
- if (delimiterRun != null) {
- parser._pushDelimiter(delimiterRun);
- parser.addNode(text);
- return true;
- } else {
- parser.advanceBy(runLength);
- return false;
- }
- }
-
- /// Attempts to close this tag at the current position.
- ///
- /// If a tag cannot be closed at the current position (for example, if a link
- /// reference cannot be found for a link tag's label), then `null` is
- /// returned.
- ///
- /// If a tag can be closed at the current position, then this method calls
- /// [getChildren], in which [parser] parses any nested text into child nodes.
- /// The returned [Node] incorpororates these child nodes.
- Node? close(
- InlineParser parser,
- Delimiter opener,
- Delimiter closer, {
- required String tag,
- required List<Node> Function() getChildren,
- }) {
- return Element(tag, getChildren());
- }
-}
-
-class EmphasisSyntax extends DelimiterSyntax {
- /// Parses `__strong__` and `_emphasis_`.
- EmphasisSyntax.underscore()
- : super('_+', requiresDelimiterRun: true, tags: _tags);
-
- /// Parses `**strong**` and `*emphasis*`.
- EmphasisSyntax.asterisk()
- : super(
- r'\*+',
- requiresDelimiterRun: true,
- allowIntraWord: true,
- tags: _tags,
- );
-
- static final _tags = [DelimiterTag('em', 1), DelimiterTag('strong', 2)];
-}
-
-/// Matches strikethrough syntax according to the GFM spec.
-class StrikethroughSyntax extends DelimiterSyntax {
- StrikethroughSyntax()
- : super(
- '~+',
- requiresDelimiterRun: true,
- allowIntraWord: true,
- tags: [DelimiterTag('del', 2)],
- );
-}
-
-@Deprecated('Use DelimiterSyntax instead')
-class TagSyntax extends DelimiterSyntax {
- TagSyntax(String pattern, {bool requiresDelimiterRun = false})
- : super(pattern, requiresDelimiterRun: requiresDelimiterRun);
-}
-
-/// Matches links like `[blah][label]` and `[blah](url)`.
-class LinkSyntax extends DelimiterSyntax {
- static final _entirelyWhitespacePattern = RegExp(r'^\s*$');
-
- final Resolver linkResolver;
-
- LinkSyntax({
- Resolver? linkResolver,
- String pattern = r'\[',
- int startCharacter = $lbracket,
- }) : linkResolver = (linkResolver ?? ((String _, [String? __]) => null)),
- super(pattern, startCharacter: startCharacter);
-
- @override
- Node? close(
- InlineParser parser,
- covariant SimpleDelimiter opener,
- Delimiter? closer, {
- String? tag,
- required List<Node> Function() getChildren,
- }) {
- final text = parser.source.substring(opener.endPos, parser.pos);
- // The current character is the `]` that closed the link text. Examine the
- // next character, to determine what type of link we might have (a '('
- // means a possible inline link; otherwise a possible reference link).
- if (parser.pos + 1 >= parser.source.length) {
- // The `]` is at the end of the document, but this may still be a valid
- // shortcut reference link.
- return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
- }
-
- // Peek at the next character; don't advance, so as to avoid later stepping
- // backward.
- final char = parser.charAt(parser.pos + 1);
-
- if (char == $lparen) {
- // Maybe an inline link, like `[text](destination)`.
- parser.advanceBy(1);
- final leftParenIndex = parser.pos;
- final inlineLink = _parseInlineLink(parser);
- if (inlineLink != null) {
- return _tryCreateInlineLink(
- parser,
- inlineLink,
- getChildren: getChildren,
- );
- }
- // At this point, we've matched `[...](`, but that `(` did not pan out to
- // be an inline link. We must now check if `[...]` is simply a shortcut
- // reference link.
-
- // Reset the parser position.
- parser.pos = leftParenIndex;
- parser.advanceBy(-1);
- return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
- }
-
- if (char == $lbracket) {
- parser.advanceBy(1);
- // At this point, we've matched `[...][`. Maybe a *full* reference link,
- // like `[foo][bar]` or a *collapsed* reference link, like `[foo][]`.
- if (parser.pos + 1 < parser.source.length &&
- parser.charAt(parser.pos + 1) == $rbracket) {
- // That opening `[` is not actually part of the link. Maybe a
- // *shortcut* reference link (followed by a `[`).
- parser.advanceBy(1);
- return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
- }
- final label = _parseReferenceLinkLabel(parser);
- if (label != null) {
- return _tryCreateReferenceLink(parser, label, getChildren: getChildren);
- }
- return null;
- }
-
- // The link text (inside `[...]`) was not followed with a opening `(` nor
- // an opening `[`. Perhaps just a simple shortcut reference link (`[...]`).
- return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
- }
-
- /// Resolve a possible reference link.
- ///
- /// Uses [linkReferences], [linkResolver], and [_createNode] to try to
- /// resolve [label] into a [Node]. If [label] is defined in
- /// [linkReferences] or can be resolved by [linkResolver], returns a [Node]
- /// that links to the resolved URL.
- ///
- /// Otherwise, returns `null`.
- ///
- /// [label] does not need to be normalized.
- Node? _resolveReferenceLink(
- String label,
- Map<String, LinkReference> linkReferences, {
- required List<Node> Function() getChildren,
- }) {
- final linkReference = linkReferences[normalizeLinkLabel(label)];
- if (linkReference != null) {
- return _createNode(
- linkReference.destination,
- linkReference.title,
- getChildren: getChildren,
- );
- } else {
- // This link has no reference definition. But we allow users of the
- // library to specify a custom resolver function ([linkResolver]) that
- // may choose to handle this. Otherwise, it's just treated as plain
- // text.
-
- // Normally, label text does not get parsed as inline Markdown. However,
- // for the benefit of the link resolver, we need to at least escape
- // brackets, so that, e.g. a link resolver can receive `[\[\]]` as `[]`.
- final resolved = linkResolver(label
- .replaceAll(r'\\', r'\')
- .replaceAll(r'\[', '[')
- .replaceAll(r'\]', ']'));
- if (resolved != null) {
- getChildren();
- }
- return resolved;
- }
- }
-
- /// Create the node represented by a Markdown link.
- Node _createNode(
- String destination,
- String? title, {
- required List<Node> Function() getChildren,
- }) {
- final children = getChildren();
- final element = Element('a', children);
- element.attributes['href'] = escapeAttribute(destination);
- if (title != null && title.isNotEmpty) {
- element.attributes['title'] = escapeAttribute(title);
- }
- return element;
- }
-
- /// Tries to create a reference link node.
- ///
- /// Returns the link if it was successfully created, `null` otherwise.
- Node? _tryCreateReferenceLink(
- InlineParser parser,
- String label, {
- required List<Node> Function() getChildren,
- }) {
- return _resolveReferenceLink(
- label,
- parser.document.linkReferences,
- getChildren: getChildren,
- );
- }
-
- // Tries to create an inline link node.
- //
- /// Returns the link if it was successfully created, `null` otherwise.
- Node _tryCreateInlineLink(
- InlineParser parser,
- InlineLink link, {
- required List<Node> Function() getChildren,
- }) {
- return _createNode(link.destination, link.title, getChildren: getChildren);
- }
-
- /// Parse a reference link label at the current position.
- ///
- /// Specifically, [parser.pos] is expected to be pointing at the `[` which
- /// opens the link label.
- ///
- /// Returns the label if it could be parsed, or `null` if not.
- String? _parseReferenceLinkLabel(InlineParser parser) {
- // Walk past the opening `[`.
- parser.advanceBy(1);
- if (parser.isDone) return null;
-
- final buffer = StringBuffer();
- while (true) {
- final char = parser.charAt(parser.pos);
- if (char == $backslash) {
- parser.advanceBy(1);
- final next = parser.charAt(parser.pos);
- if (next != $backslash && next != $rbracket) {
- buffer.writeCharCode(char);
- }
- buffer.writeCharCode(next);
- } else if (char == $lbracket) {
- return null;
- } else if (char == $rbracket) {
- break;
- } else {
- buffer.writeCharCode(char);
- }
- parser.advanceBy(1);
- if (parser.isDone) return null;
- // TODO(srawlins): only check 999 characters, for performance reasons?
- }
-
- final label = buffer.toString();
-
- // A link label must contain at least one non-whitespace character.
- if (_entirelyWhitespacePattern.hasMatch(label)) return null;
-
- return label;
- }
-
- /// Parse an inline [InlineLink] at the current position.
- ///
- /// At this point, we have parsed a link's (or image's) opening `[`, and then
- /// a matching closing `]`, and [parser.pos] is pointing at an opening `(`.
- /// This method will then attempt to parse a link destination wrapped in `<>`,
- /// such as `(<http://url>)`, or a bare link destination, such as
- /// `(http://url)`, or a link destination with a title, such as
- /// `(http://url "title")`.
- ///
- /// Returns the [InlineLink] if one was parsed, or `null` if not.
- InlineLink? _parseInlineLink(InlineParser parser) {
- // Start walking to the character just after the opening `(`.
- parser.advanceBy(1);
-
- _moveThroughWhitespace(parser);
- if (parser.isDone) return null; // EOF. Not a link.
-
- if (parser.charAt(parser.pos) == $lt) {
- // Maybe a `<...>`-enclosed link destination.
- return _parseInlineBracketedLink(parser);
- } else {
- return _parseInlineBareDestinationLink(parser);
- }
- }
-
- /// Parse an inline link with a bracketed destination (a destination wrapped
- /// in `<...>`). The current position of the parser must be the first
- /// character of the destination.
- ///
- /// Returns the link if it was successfully created, `null` otherwise.
- InlineLink? _parseInlineBracketedLink(InlineParser parser) {
- parser.advanceBy(1);
-
- final buffer = StringBuffer();
- while (true) {
- final char = parser.charAt(parser.pos);
- if (char == $backslash) {
- parser.advanceBy(1);
- final next = parser.charAt(parser.pos);
- // TODO: Follow the backslash spec better here.
- // http://spec.commonmark.org/0.29/#backslash-escapes
- if (next != $backslash && next != $gt) {
- buffer.writeCharCode(char);
- }
- buffer.writeCharCode(next);
- } else if (char == $lf || char == $cr || char == $ff) {
- // Not a link (no line breaks allowed within `<...>`).
- return null;
- } else if (char == $space) {
- buffer.write('%20');
- } else if (char == $gt) {
- break;
- } else {
- buffer.writeCharCode(char);
- }
- parser.advanceBy(1);
- if (parser.isDone) return null;
- }
- final destination = buffer.toString();
-
- parser.advanceBy(1);
- final char = parser.charAt(parser.pos);
- if (char == $space || char == $lf || char == $cr || char == $ff) {
- final title = _parseTitle(parser);
- if (title == null &&
- (parser.isDone || parser.charAt(parser.pos) != $rparen)) {
- // This looked like an inline link, until we found this $space
- // followed by mystery characters; no longer a link.
- return null;
- }
- return InlineLink(destination, title: title);
- } else if (char == $rparen) {
- return InlineLink(destination);
- } else {
- // We parsed something like `[foo](<url>X`. Not a link.
- return null;
- }
- }
-
- /// Parse an inline link with a "bare" destination (a destination _not_
- /// wrapped in `<...>`). The current position of the parser must be the first
- /// character of the destination.
- ///
- /// Returns the link if it was successfully created, `null` otherwise.
- InlineLink? _parseInlineBareDestinationLink(InlineParser parser) {
- // According to
- // [CommonMark](http://spec.commonmark.org/0.28/#link-destination):
- //
- // > A link destination consists of [...] a nonempty sequence of
- // > characters [...], and includes parentheses only if (a) they are
- // > backslash-escaped or (b) they are part of a balanced pair of
- // > unescaped parentheses.
- //
- // We need to count the open parens. We start with 1 for the paren that
- // opened the destination.
- var parenCount = 1;
- final buffer = StringBuffer();
-
- while (true) {
- final char = parser.charAt(parser.pos);
- switch (char) {
- case $backslash:
- parser.advanceBy(1);
- if (parser.isDone) return null; // EOF. Not a link.
- final next = parser.charAt(parser.pos);
- // Parentheses may be escaped.
- //
- // http://spec.commonmark.org/0.28/#example-467
- if (next != $backslash && next != $lparen && next != $rparen) {
- buffer.writeCharCode(char);
- }
- buffer.writeCharCode(next);
- break;
-
- case $space:
- case $lf:
- case $cr:
- case $ff:
- final destination = buffer.toString();
- final title = _parseTitle(parser);
- if (title == null &&
- (parser.isDone || parser.charAt(parser.pos) != $rparen)) {
- // This looked like an inline link, until we found this $space
- // followed by mystery characters; no longer a link.
- return null;
- }
- // [_parseTitle] made sure the title was follwed by a closing `)`
- // (but it's up to the code here to examine the balance of
- // parentheses).
- parenCount--;
- if (parenCount == 0) {
- return InlineLink(destination, title: title);
- }
- break;
-
- case $lparen:
- parenCount++;
- buffer.writeCharCode(char);
- break;
-
- case $rparen:
- parenCount--;
- if (parenCount == 0) {
- final destination = buffer.toString();
- return InlineLink(destination);
- }
- buffer.writeCharCode(char);
- break;
-
- default:
- buffer.writeCharCode(char);
- }
- parser.advanceBy(1);
- if (parser.isDone) return null; // EOF. Not a link.
- }
- }
-
- // Walk the parser forward through any whitespace.
- void _moveThroughWhitespace(InlineParser parser) {
- while (!parser.isDone) {
- final char = parser.charAt(parser.pos);
- if (char != $space &&
- char != $tab &&
- char != $lf &&
- char != $vt &&
- char != $cr &&
- char != $ff) {
- return;
- }
- parser.advanceBy(1);
- }
- }
-
- /// Parses a link title in [parser] at it's current position. The parser's
- /// current position should be a whitespace character that followed a link
- /// destination.
- ///
- /// Returns the title if it was successfully parsed, `null` otherwise.
- String? _parseTitle(InlineParser parser) {
- _moveThroughWhitespace(parser);
- if (parser.isDone) return null;
-
- // The whitespace should be followed by a title delimiter.
- final delimiter = parser.charAt(parser.pos);
- if (delimiter != $apostrophe &&
- delimiter != $quote &&
- delimiter != $lparen) {
- return null;
- }
-
- final closeDelimiter = delimiter == $lparen ? $rparen : delimiter;
- parser.advanceBy(1);
-
- // Now we look for an un-escaped closing delimiter.
- final buffer = StringBuffer();
- while (true) {
- final char = parser.charAt(parser.pos);
- if (char == $backslash) {
- parser.advanceBy(1);
- final next = parser.charAt(parser.pos);
- if (next != $backslash && next != closeDelimiter) {
- buffer.writeCharCode(char);
- }
- buffer.writeCharCode(next);
- } else if (char == closeDelimiter) {
- break;
- } else {
- buffer.writeCharCode(char);
- }
- parser.advanceBy(1);
- if (parser.isDone) return null;
- }
- final title = buffer.toString();
-
- // Advance past the closing delimiter.
- parser.advanceBy(1);
- if (parser.isDone) return null;
- _moveThroughWhitespace(parser);
- if (parser.isDone) return null;
- if (parser.charAt(parser.pos) != $rparen) return null;
- return title;
- }
-}
-
-/// Matches images like `` and
-/// `![alternate text][label]`.
-class ImageSyntax extends LinkSyntax {
- ImageSyntax({Resolver? linkResolver})
- : super(
- linkResolver: linkResolver,
- pattern: r'!\[',
- startCharacter: $exclamation,
- );
-
- @override
- Element _createNode(
- String destination,
- String? title, {
- required List<Node> Function() getChildren,
- }) {
- final element = Element.empty('img');
- final children = getChildren();
- element.attributes['src'] = destination;
- element.attributes['alt'] = children.map((node) => node.textContent).join();
- if (title != null && title.isNotEmpty) {
- element.attributes['title'] =
- escapeAttribute(title.replaceAll('&', '&'));
- }
- return element;
- }
-}
-
-/// Matches backtick-enclosed inline code blocks.
-class CodeSyntax extends InlineSyntax {
- // This pattern matches:
- //
- // * a string of backticks (not followed by any more), followed by
- // * a non-greedy string of anything, including newlines, ending with anything
- // except a backtick, followed by
- // * a string of backticks the same length as the first, not followed by any
- // more.
- //
- // This conforms to the delimiters of inline code, both in Markdown.pl, and
- // CommonMark.
- static final String _pattern = r'(`+(?!`))((?:.|\n)*?[^`])\1(?!`)';
-
- CodeSyntax() : super(_pattern);
-
- @override
- bool tryMatch(InlineParser parser, [int? startMatchPos]) {
- if (parser.pos > 0 && parser.charAt(parser.pos - 1) == $backquote) {
- // Not really a match! We can't just sneak past one backtick to try the
- // next character. An example of this situation would be:
- //
- // before ``` and `` after.
- // ^--parser.pos
- return false;
- }
-
- final match = pattern.matchAsPrefix(parser.source, parser.pos);
- if (match == null) {
- return false;
- }
- parser.writeText();
- if (onMatch(parser, match)) parser.consume(match.match.length);
- return true;
- }
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- var code = match[2]!.trim().replaceAll('\n', ' ');
- if (parser._encodeHtml) code = escapeHtml(code);
- parser.addNode(Element.text('code', code));
-
- return true;
- }
-}
-
-/// Matches GitHub Markdown emoji syntax like `:smile:`.
-///
-/// There is no formal specification of GitHub's support for this colon-based
-/// emoji support, so this syntax is based on the results of Markdown-enabled
-/// text fields at github.com.
-class EmojiSyntax extends InlineSyntax {
- // Emoji "aliases" are mostly limited to lower-case letters, numbers, and
- // underscores, but GitHub also supports `:+1:` and `:-1:`.
- EmojiSyntax() : super(':([a-z0-9_+-]+):');
-
- @override
- bool onMatch(InlineParser parser, Match match) {
- final alias = match[1]!;
- final emoji = emojis[alias];
- if (emoji == null) {
- parser.advanceBy(1);
- return false;
- }
- parser.addNode(Text(emoji));
-
- return true;
- }
-}
-
-class InlineLink {
- final String destination;
- final String? title;
-
- InlineLink(this.destination, {this.title});
+ bool get encodeHtml => document.encodeHtml;
}
diff --git a/lib/src/inline_syntaxes/autolink_extension_syntax.dart b/lib/src/inline_syntaxes/autolink_extension_syntax.dart
new file mode 100644
index 0000000..ff2bc59
--- /dev/null
+++ b/lib/src/inline_syntaxes/autolink_extension_syntax.dart
@@ -0,0 +1,136 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Matches autolinks like `http://foo.com`.
+class AutolinkExtensionSyntax extends InlineSyntax {
+ /// Broken up parts of the autolink regex for reusability and readability
+
+ // Autolinks can only come at the beginning of a line, after whitespace, or
+ // any of the delimiting characters *, _, ~, and (.
+ static const start = r'(?:^|[\s*_~(>])';
+
+ // An extended url autolink will be recognized when one of the schemes
+ // http://, https://, or ftp://, followed by a valid domain
+ static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)';
+
+ // A valid domain consists of alphanumeric characters, underscores (_),
+ // hyphens (-) and periods (.). There must be at least one period, and no
+ // underscores may be present in the last two segments of the domain.
+ static const domainPart = r'\w\-';
+ static const domain = '[$domainPart][$domainPart.]+';
+
+ // A valid domain consists of alphanumeric characters, underscores (_),
+ // hyphens (-) and periods (.).
+ static const path = r'[^\s<]*';
+
+ // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not
+ // be considered part of the autolink
+ static const truncatingPunctuationPositive = '[?!.,:*_~]';
+
+ static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$');
+ static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$');
+ static final regExpWhiteSpace = RegExp(r'\s');
+
+ AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))');
+
+ @override
+ bool tryMatch(InlineParser parser, [int? startMatchPos]) {
+ return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0);
+ }
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ var url = match[1]!;
+ var href = url;
+ var matchLength = url.length;
+
+ if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) {
+ url = url.substring(1, url.length - 1);
+ href = href.substring(1, href.length - 1);
+ parser.pos++;
+ matchLength--;
+ }
+
+ // Prevent accidental standard autolink matches
+ if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') {
+ return false;
+ }
+
+ // When an autolink ends in ), we scan the entire autolink for the total
+ // number of parentheses. If there is a greater number of closing
+ // parentheses than opening ones, we don’t consider the last character
+ // part of the autolink, in order to facilitate including an autolink
+ // inside a parenthesis:
+ // https://github.github.com/gfm/#example-600
+ if (url.endsWith(')')) {
+ final opening = _countChars(url, '(');
+ final closing = _countChars(url, ')');
+
+ if (closing > opening) {
+ url = url.substring(0, url.length - 1);
+ href = href.substring(0, href.length - 1);
+ matchLength--;
+ }
+ }
+
+ // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will
+ // not be considered part of the autolink, though they may be included
+ // in the interior of the link:
+ // https://github.github.com/gfm/#example-599
+ final trailingPunc = regExpTrailingPunc.firstMatch(url);
+ if (trailingPunc != null) {
+ final trailingLength = trailingPunc.match.length;
+ url = url.substring(0, url.length - trailingLength);
+ href = href.substring(0, href.length - trailingLength);
+ matchLength -= trailingLength;
+ }
+
+ // If an autolink ends in a semicolon (;), we check to see if it appears
+ // to resemble an
+ // [entity reference](https://github.github.com/gfm/#entity-references);
+ // if the preceding text is & followed by one or more alphanumeric
+ // characters. If so, it is excluded from the autolink:
+ // https://github.github.com/gfm/#example-602
+ if (url.endsWith(';')) {
+ final entityRef = regExpEndsWithColon.firstMatch(url);
+ if (entityRef != null) {
+ // Strip out HTML entity reference
+ final entityRefLength = entityRef.match.length;
+ url = url.substring(0, url.length - entityRefLength);
+ href = href.substring(0, href.length - entityRefLength);
+ matchLength -= entityRefLength;
+ }
+ }
+
+ // The scheme http will be inserted automatically
+ if (!href.startsWith('http://') &&
+ !href.startsWith('https://') &&
+ !href.startsWith('ftp://')) {
+ href = 'http://$href';
+ }
+
+ final text = parser.encodeHtml ? escapeHtml(url) : url;
+ final anchor = Element.text('a', text);
+ anchor.attributes['href'] = Uri.encodeFull(href);
+ parser.addNode(anchor);
+
+ parser.consume(matchLength);
+ return false;
+ }
+
+ int _countChars(String input, String char) {
+ var count = 0;
+
+ for (var i = 0; i < input.length; i++) {
+ if (input[i] == char) count++;
+ }
+
+ return count;
+ }
+}
diff --git a/lib/src/inline_syntaxes/autolink_syntax.dart b/lib/src/inline_syntaxes/autolink_syntax.dart
new file mode 100644
index 0000000..05eb41c
--- /dev/null
+++ b/lib/src/inline_syntaxes/autolink_syntax.dart
@@ -0,0 +1,24 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Matches autolinks like `<http://foo.com>`.
+class AutolinkSyntax extends InlineSyntax {
+ AutolinkSyntax() : super(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>');
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ final url = match[1]!;
+ final text = parser.encodeHtml ? escapeHtml(url) : url;
+ final anchor = Element.text('a', text);
+ anchor.attributes['href'] = Uri.encodeFull(url);
+ parser.addNode(anchor);
+
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/code_syntax.dart b/lib/src/inline_syntaxes/code_syntax.dart
new file mode 100644
index 0000000..8df4cd9
--- /dev/null
+++ b/lib/src/inline_syntaxes/code_syntax.dart
@@ -0,0 +1,56 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Matches backtick-enclosed inline code blocks.
+class CodeSyntax extends InlineSyntax {
+ // This pattern matches:
+ //
+ // * a string of backticks (not followed by any more), followed by
+ // * a non-greedy string of anything, including newlines, ending with anything
+ // except a backtick, followed by
+ // * a string of backticks the same length as the first, not followed by any
+ // more.
+ //
+ // This conforms to the delimiters of inline code, both in Markdown.pl, and
+ // CommonMark.
+ static final String _pattern = r'(`+(?!`))((?:.|\n)*?[^`])\1(?!`)';
+
+ CodeSyntax() : super(_pattern);
+
+ @override
+ bool tryMatch(InlineParser parser, [int? startMatchPos]) {
+ if (parser.pos > 0 && parser.charAt(parser.pos - 1) == $backquote) {
+ // Not really a match! We can't just sneak past one backtick to try the
+ // next character. An example of this situation would be:
+ //
+ // before ``` and `` after.
+ // ^--parser.pos
+ return false;
+ }
+
+ final match = pattern.matchAsPrefix(parser.source, parser.pos);
+ if (match == null) {
+ return false;
+ }
+ parser.writeText();
+ if (onMatch(parser, match)) parser.consume(match.match.length);
+ return true;
+ }
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ var code = match[2]!.trim().replaceAll('\n', ' ');
+ if (parser.encodeHtml) code = escapeHtml(code);
+ parser.addNode(Element.text('code', code));
+
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/delimiter_syntax.dart b/lib/src/inline_syntaxes/delimiter_syntax.dart
new file mode 100644
index 0000000..7dbad63
--- /dev/null
+++ b/lib/src/inline_syntaxes/delimiter_syntax.dart
@@ -0,0 +1,336 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import 'inline_syntax.dart';
+
+/// Matches syntax that has a pair of tags and becomes an element, like `*` for
+/// `<em>`. Allows nested tags.
+class DelimiterSyntax extends InlineSyntax {
+ /// Whether this is parsed according to the same nesting rules as [emphasis
+ /// delimiters][].
+ ///
+ /// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis
+ final bool requiresDelimiterRun;
+
+ /// Whether to allow intra-word delimiter runs. CommonMark emphasis and
+ /// strong emphasis does not allow this, but GitHub-Flavored Markdown allows
+ /// it on strikethrough.
+ final bool allowIntraWord;
+
+ final List<DelimiterTag>? tags;
+
+ /// Creates a new [DelimiterSyntax] which matches text on [pattern].
+ ///
+ /// The [pattern] is used to find the matching text. If [requiresDelimiterRun]
+ /// is passed, this syntax parses according to the same nesting rules as
+ /// emphasis delimiters. If [startCharacter] is passed, it is used as a
+ /// pre-matching check which is faster than matching against [pattern].
+ DelimiterSyntax(
+ String pattern, {
+ this.requiresDelimiterRun = false,
+ int? startCharacter,
+ this.allowIntraWord = false,
+ this.tags,
+ }) : super(pattern, startCharacter: startCharacter);
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ final runLength = match.group(0)!.length;
+ final matchStart = parser.pos;
+ final matchEnd = parser.pos + runLength;
+ final text = Text(parser.source.substring(matchStart, matchEnd));
+ if (!requiresDelimiterRun) {
+ parser.pushDelimiter(SimpleDelimiter(
+ node: text,
+ length: runLength,
+ char: parser.source.codeUnitAt(matchStart),
+ canOpen: true,
+ canClose: false,
+ syntax: this,
+ endPos: matchEnd,
+ ));
+ parser.addNode(text);
+ return true;
+ }
+
+ final delimiterRun = DelimiterRun.tryParse(
+ parser,
+ matchStart,
+ matchEnd,
+ syntax: this,
+ node: text,
+ allowIntraWord: allowIntraWord,
+ tags: tags ?? [],
+ );
+ if (delimiterRun != null) {
+ parser.pushDelimiter(delimiterRun);
+ parser.addNode(text);
+ return true;
+ } else {
+ parser.advanceBy(runLength);
+ return false;
+ }
+ }
+
+ /// Attempts to close this tag at the current position.
+ ///
+ /// If a tag cannot be closed at the current position (for example, if a link
+ /// reference cannot be found for a link tag's label), then `null` is
+ /// returned.
+ ///
+ /// If a tag can be closed at the current position, then this method calls
+ /// [getChildren], in which [parser] parses any nested text into child nodes.
+ /// The returned [Node] incorpororates these child nodes.
+ Node? close(
+ InlineParser parser,
+ Delimiter opener,
+ Delimiter closer, {
+ required String tag,
+ required List<Node> Function() getChildren,
+ }) {
+ return Element(tag, getChildren());
+ }
+}
+
+class DelimiterTag {
+ DelimiterTag(this.tag, this.indicatorLength);
+
+ // Tag name of the HTML element.
+ final String tag;
+
+ final int indicatorLength;
+}
+
+/// A delimiter indicating the possible "open" or possible "close" of a tag for
+/// a [DelimiterSyntax].
+abstract class Delimiter {
+ /// The [Text] node representing the plain text representing this delimiter.
+ abstract Text node;
+
+ /// The type of delimiter.
+ ///
+ /// For the two-character image delimiter, `](links).
+ ///
+ /// Once we have parsed `Text [`, there is one (pending) link in the state
+ /// stack. It is, by default, active. Once we parse the next possible link,
+ /// `[more](links)`, as a real link, we must deactive the pending links (just
+ /// the one, in this case).
+ abstract bool isActive;
+
+ /// Whether this delimiter can open emphasis or strong emphasis.
+ bool get canOpen;
+
+ /// Whether this delimiter can close emphasis or strong emphasis.
+ bool get canClose;
+
+ /// The syntax which uses this delimiter to parse a tag.
+ DelimiterSyntax get syntax;
+}
+
+/// A simple delimiter implements the [Delimiter] interface with basic fields,
+/// and does not have the concept of "left-flanking" or "right-flanking".
+class SimpleDelimiter implements Delimiter {
+ @override
+ Text node;
+
+ @override
+ final int char;
+
+ @override
+ final int length;
+
+ @override
+ bool isActive;
+
+ @override
+ final bool canOpen;
+
+ @override
+ final bool canClose;
+
+ @override
+ final DelimiterSyntax syntax;
+
+ final int endPos;
+
+ SimpleDelimiter({
+ required this.node,
+ required this.char,
+ required this.length,
+ required this.canOpen,
+ required this.canClose,
+ required this.syntax,
+ required this.endPos,
+ }) : isActive = true;
+}
+
+/// An implementation of [Delimiter] which uses concepts of "left-flanking" and
+/// "right-flanking" to determine the values of [canOpen] and [canClose].
+///
+/// This is primarily used when parsing emphasis and strong emphasis, but can
+/// also be used by other extensions of [DelimiterSyntax].
+class DelimiterRun implements Delimiter {
+ /// According to
+ /// [CommonMark](https://spec.commonmark.org/0.29/#punctuation-character):
+ ///
+ /// > A punctuation character is an ASCII punctuation character or anything in
+ /// > the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or
+ /// > `Ps`.
+ // This RegExp is inspired by
+ // https://github.com/commonmark/commonmark.js/blob/1f7d09099c20d7861a674674a5a88733f55ff729/lib/inlines.js#L39.
+ // I don't know if there is any way to simplify it or maintain it.
+ static final RegExp punctuation = RegExp('['
+ r'''!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~'''
+ r'\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE'
+ r'\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E'
+ r'\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E'
+ r'\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14'
+ r'\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB'
+ r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736'
+ r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F'
+ r'\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E'
+ r'\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051'
+ r'\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A'
+ r'\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC'
+ r'\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42'
+ r'\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE'
+ r'\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF'
+ r'\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF'
+ r'\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19'
+ r'\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03'
+ r'\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F'
+ r'\uFF5B\uFF5D\uFF5F-\uFF65'
+ ']');
+
+ // TODO(srawlins): Unicode whitespace
+ static final String whitespace = ' \t\r\n';
+
+ @override
+ Text node;
+
+ @override
+ final int char;
+
+ @override
+ int get length => node.text.length;
+
+ @override
+ bool isActive;
+
+ @override
+ final DelimiterSyntax syntax;
+
+ final bool allowIntraWord;
+
+ @override
+ final bool canOpen;
+
+ @override
+ final bool canClose;
+
+ final List<DelimiterTag> tags;
+
+ DelimiterRun._({
+ required this.node,
+ required this.char,
+ required this.syntax,
+ required this.tags,
+ required bool isLeftFlanking,
+ required bool isRightFlanking,
+ required bool isPrecededByPunctuation,
+ required bool isFollowedByPunctuation,
+ required this.allowIntraWord,
+ }) : canOpen = isLeftFlanking &&
+ (!isRightFlanking || allowIntraWord || isPrecededByPunctuation),
+ canClose = isRightFlanking &&
+ (!isLeftFlanking || allowIntraWord || isFollowedByPunctuation),
+ isActive = true;
+
+ /// Tries to parse a delimiter run from [runStart] (inclusive) to [runEnd]
+ /// (exclusive).
+ static DelimiterRun? tryParse(
+ InlineParser parser,
+ int runStart,
+ int runEnd, {
+ required DelimiterSyntax syntax,
+ required List<DelimiterTag> tags,
+ required Text node,
+ bool allowIntraWord = false,
+ }) {
+ bool leftFlanking,
+ rightFlanking,
+ precededByPunctuation,
+ followedByPunctuation;
+ String preceding, following;
+ if (runStart == 0) {
+ rightFlanking = false;
+ preceding = '\n';
+ } else {
+ preceding = parser.source.substring(runStart - 1, runStart);
+ }
+ precededByPunctuation = punctuation.hasMatch(preceding);
+
+ if (runEnd == parser.source.length) {
+ leftFlanking = false;
+ following = '\n';
+ } else {
+ following = parser.source.substring(runEnd, runEnd + 1);
+ }
+ followedByPunctuation = punctuation.hasMatch(following);
+
+ // http://spec.commonmark.org/0.30/#left-flanking-delimiter-run
+ if (whitespace.contains(following)) {
+ leftFlanking = false;
+ } else {
+ leftFlanking = !followedByPunctuation ||
+ whitespace.contains(preceding) ||
+ precededByPunctuation;
+ }
+
+ // http://spec.commonmark.org/0.30/#right-flanking-delimiter-run
+ if (whitespace.contains(preceding)) {
+ rightFlanking = false;
+ } else {
+ rightFlanking = !precededByPunctuation ||
+ whitespace.contains(following) ||
+ followedByPunctuation;
+ }
+
+ if (!leftFlanking && !rightFlanking) {
+ // Could not parse a delimiter run.
+ return null;
+ }
+
+ tags.sort((a, b) => a.indicatorLength.compareTo(b.indicatorLength));
+
+ return DelimiterRun._(
+ node: node,
+ char: parser.charAt(runStart),
+ syntax: syntax,
+ tags: tags,
+ isLeftFlanking: leftFlanking,
+ isRightFlanking: rightFlanking,
+ isPrecededByPunctuation: precededByPunctuation,
+ isFollowedByPunctuation: followedByPunctuation,
+ allowIntraWord: allowIntraWord,
+ );
+ }
+
+ @override
+ String toString() => '<char: $char, length: $length, canOpen: $canOpen, '
+ 'canClose: $canClose>';
+}
diff --git a/lib/src/inline_syntaxes/email_autolink_syntax.dart b/lib/src/inline_syntaxes/email_autolink_syntax.dart
new file mode 100644
index 0000000..28d8c3d
--- /dev/null
+++ b/lib/src/inline_syntaxes/email_autolink_syntax.dart
@@ -0,0 +1,32 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Matches autolinks like `<foo@bar.example.com>`.
+///
+/// See <http://spec.commonmark.org/0.28/#email-address>.
+class EmailAutolinkSyntax extends InlineSyntax {
+ static final _email =
+ r'''[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}'''
+ r'''[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*''';
+
+ EmailAutolinkSyntax() : super('<($_email)>', startCharacter: $lt);
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ final url = match[1]!;
+ final text = parser.encodeHtml ? escapeHtml(url) : url;
+ final anchor = Element.text('a', text);
+ anchor.attributes['href'] = Uri.encodeFull('mailto:$url');
+ parser.addNode(anchor);
+
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/emoji_syntax.dart b/lib/src/inline_syntaxes/emoji_syntax.dart
new file mode 100644
index 0000000..a068c6b
--- /dev/null
+++ b/lib/src/inline_syntaxes/emoji_syntax.dart
@@ -0,0 +1,32 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../emojis.dart';
+import '../inline_parser.dart';
+import 'inline_syntax.dart';
+
+/// Matches GitHub Markdown emoji syntax like `:smile:`.
+///
+/// There is no formal specification of GitHub's support for this colon-based
+/// emoji support, so this syntax is based on the results of Markdown-enabled
+/// text fields at github.com.
+class EmojiSyntax extends InlineSyntax {
+ // Emoji "aliases" are mostly limited to lower-case letters, numbers, and
+ // underscores, but GitHub also supports `:+1:` and `:-1:`.
+ EmojiSyntax() : super(':([a-z0-9_+-]+):');
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ final alias = match[1]!;
+ final emoji = emojis[alias];
+ if (emoji == null) {
+ parser.advanceBy(1);
+ return false;
+ }
+ parser.addNode(Text(emoji));
+
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/emphasis_syntax.dart b/lib/src/inline_syntaxes/emphasis_syntax.dart
new file mode 100644
index 0000000..9a70b17
--- /dev/null
+++ b/lib/src/inline_syntaxes/emphasis_syntax.dart
@@ -0,0 +1,22 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'delimiter_syntax.dart';
+
+class EmphasisSyntax extends DelimiterSyntax {
+ /// Parses `__strong__` and `_emphasis_`.
+ EmphasisSyntax.underscore()
+ : super('_+', requiresDelimiterRun: true, tags: _tags);
+
+ /// Parses `**strong**` and `*emphasis*`.
+ EmphasisSyntax.asterisk()
+ : super(
+ r'\*+',
+ requiresDelimiterRun: true,
+ allowIntraWord: true,
+ tags: _tags,
+ );
+
+ static final _tags = [DelimiterTag('em', 1), DelimiterTag('strong', 2)];
+}
diff --git a/lib/src/inline_syntaxes/escape_syntax.dart b/lib/src/inline_syntaxes/escape_syntax.dart
new file mode 100644
index 0000000..30d655f
--- /dev/null
+++ b/lib/src/inline_syntaxes/escape_syntax.dart
@@ -0,0 +1,39 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Escape punctuation preceded by a backslash.
+class EscapeSyntax extends InlineSyntax {
+ EscapeSyntax() : super(r'''\\[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]''');
+
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ final chars = match.match;
+ final char = chars.codeUnitAt(1);
+ // Insert the substitution. Why these three charactes are replaced with
+ // their equivalent HTML entity referenced appears to be missing from the
+ // CommonMark spec, but is very present in all of the examples.
+ // https://talk.commonmark.org/t/entity-ification-of-quotes-and-brackets-missing-from-spec/3207
+ if (parser.encodeHtml) {
+ if (char == $double_quote) {
+ parser.addNode(Text('"'));
+ } else if (char == $lt) {
+ parser.addNode(Text('<'));
+ } else if (char == $gt) {
+ parser.addNode(Text('>'));
+ } else {
+ parser.addNode(Text(chars[1]));
+ }
+ } else {
+ parser.addNode(Text(chars[1]));
+ }
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/image_syntax.dart b/lib/src/inline_syntaxes/image_syntax.dart
new file mode 100644
index 0000000..72fa071
--- /dev/null
+++ b/lib/src/inline_syntaxes/image_syntax.dart
@@ -0,0 +1,37 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../util.dart';
+import 'link_syntax.dart';
+
+/// Matches images like `` and
+/// `![alternate text][label]`.
+class ImageSyntax extends LinkSyntax {
+ ImageSyntax({Resolver? linkResolver})
+ : super(
+ linkResolver: linkResolver,
+ pattern: r'!\[',
+ startCharacter: $exclamation,
+ );
+
+ @override
+ Element createNode(
+ String destination,
+ String? title, {
+ required List<Node> Function() getChildren,
+ }) {
+ final element = Element.empty('img');
+ final children = getChildren();
+ element.attributes['src'] = destination;
+ element.attributes['alt'] = children.map((node) => node.textContent).join();
+ if (title != null && title.isNotEmpty) {
+ element.attributes['title'] =
+ escapeAttribute(title.replaceAll('&', '&'));
+ }
+ return element;
+ }
+}
diff --git a/lib/src/inline_syntaxes/inline_html_syntax.dart b/lib/src/inline_syntaxes/inline_html_syntax.dart
new file mode 100644
index 0000000..31a62f7
--- /dev/null
+++ b/lib/src/inline_syntaxes/inline_html_syntax.dart
@@ -0,0 +1,24 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../../markdown.dart';
+
+/// Leave inline HTML tags alone, from
+/// [CommonMark 0.28](http://spec.commonmark.org/0.28/#raw-html).
+///
+/// This is not actually a good definition (nor CommonMark's) of an HTML tag,
+/// but it is fast. It will leave text like `<a href='hi">` alone, which is
+/// incorrect.
+///
+/// TODO(srawlins): improve accuracy while ensuring performance, once
+/// Markdown benchmarking is more mature.
+class InlineHtmlSyntax extends TextSyntax {
+ InlineHtmlSyntax()
+ : super(
+ r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>',
+ startCharacter: $lt,
+ );
+}
diff --git a/lib/src/inline_syntaxes/inline_syntax.dart b/lib/src/inline_syntaxes/inline_syntax.dart
new file mode 100644
index 0000000..997d03a
--- /dev/null
+++ b/lib/src/inline_syntaxes/inline_syntax.dart
@@ -0,0 +1,58 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../inline_parser.dart';
+import '../util.dart';
+
+/// Represents one kind of Markdown tag that can be parsed.
+abstract class InlineSyntax {
+ final RegExp pattern;
+
+ /// The first character of [pattern], to be used as an efficient first check
+ /// that this syntax matches the current parser position.
+ final int? _startCharacter;
+
+ /// Create a new [InlineSyntax] which matches text on [pattern].
+ ///
+ /// If [startCharacter] is passed, it is used as a pre-matching check which
+ /// is faster than matching against [pattern].
+ ///
+ /// If [caseSensitive] is disabled, then case is ignored when matching
+ /// the [pattern].
+ InlineSyntax(String pattern, {int? startCharacter, bool caseSensitive = true})
+ : pattern =
+ RegExp(pattern, multiLine: true, caseSensitive: caseSensitive),
+ _startCharacter = startCharacter;
+
+ /// Tries to match at the parser's current position.
+ ///
+ /// The parser's position can be overriden with [startMatchPos].
+ /// Returns whether or not the pattern successfully matched.
+ bool tryMatch(InlineParser parser, [int? startMatchPos]) {
+ startMatchPos ??= parser.pos;
+
+ // Before matching with the regular expression [pattern], which can be
+ // expensive on some platforms, check if even the first character matches
+ // this syntax.
+ if (_startCharacter != null &&
+ parser.source.codeUnitAt(startMatchPos) != _startCharacter) {
+ return false;
+ }
+
+ final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos);
+ if (startMatch == null) return false;
+
+ // Write any existing plain text up to this point.
+ parser.writeText();
+
+ if (onMatch(parser, startMatch)) parser.consume(startMatch.match.length);
+ return true;
+ }
+
+ /// Processes [match], adding nodes to [parser] and possibly advancing
+ /// [parser].
+ ///
+ /// Returns whether the caller should advance [parser] by `match[0].length`.
+ bool onMatch(InlineParser parser, Match match);
+}
diff --git a/lib/src/inline_syntaxes/line_break_syntax.dart b/lib/src/inline_syntaxes/line_break_syntax.dart
new file mode 100644
index 0000000..0a5fb01
--- /dev/null
+++ b/lib/src/inline_syntaxes/line_break_syntax.dart
@@ -0,0 +1,19 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import 'inline_syntax.dart';
+
+/// Represents a hard line break.
+class LineBreakSyntax extends InlineSyntax {
+ LineBreakSyntax() : super(r'(?:\\| +)\n');
+
+ /// Create a void <br> element.
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ parser.addNode(Element.empty('br'));
+ return true;
+ }
+}
diff --git a/lib/src/inline_syntaxes/link_syntax.dart b/lib/src/inline_syntaxes/link_syntax.dart
new file mode 100644
index 0000000..1769be7
--- /dev/null
+++ b/lib/src/inline_syntaxes/link_syntax.dart
@@ -0,0 +1,446 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+
+import '../ast.dart';
+import '../document.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'delimiter_syntax.dart';
+
+/// Matches links like `[blah][label]` and `[blah](url)`.
+class LinkSyntax extends DelimiterSyntax {
+ static final _entirelyWhitespacePattern = RegExp(r'^\s*$');
+
+ final Resolver linkResolver;
+
+ LinkSyntax({
+ Resolver? linkResolver,
+ String pattern = r'\[',
+ int startCharacter = $lbracket,
+ }) : linkResolver = (linkResolver ?? ((String _, [String? __]) => null)),
+ super(pattern, startCharacter: startCharacter);
+
+ @override
+ Node? close(
+ InlineParser parser,
+ covariant SimpleDelimiter opener,
+ Delimiter? closer, {
+ String? tag,
+ required List<Node> Function() getChildren,
+ }) {
+ final text = parser.source.substring(opener.endPos, parser.pos);
+ // The current character is the `]` that closed the link text. Examine the
+ // next character, to determine what type of link we might have (a '('
+ // means a possible inline link; otherwise a possible reference link).
+ if (parser.pos + 1 >= parser.source.length) {
+ // The `]` is at the end of the document, but this may still be a valid
+ // shortcut reference link.
+ return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
+ }
+
+ // Peek at the next character; don't advance, so as to avoid later stepping
+ // backward.
+ final char = parser.charAt(parser.pos + 1);
+
+ if (char == $lparen) {
+ // Maybe an inline link, like `[text](destination)`.
+ parser.advanceBy(1);
+ final leftParenIndex = parser.pos;
+ final inlineLink = _parseInlineLink(parser);
+ if (inlineLink != null) {
+ return _tryCreateInlineLink(
+ parser,
+ inlineLink,
+ getChildren: getChildren,
+ );
+ }
+ // At this point, we've matched `[...](`, but that `(` did not pan out to
+ // be an inline link. We must now check if `[...]` is simply a shortcut
+ // reference link.
+
+ // Reset the parser position.
+ parser.pos = leftParenIndex;
+ parser.advanceBy(-1);
+ return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
+ }
+
+ if (char == $lbracket) {
+ parser.advanceBy(1);
+ // At this point, we've matched `[...][`. Maybe a *full* reference link,
+ // like `[foo][bar]` or a *collapsed* reference link, like `[foo][]`.
+ if (parser.pos + 1 < parser.source.length &&
+ parser.charAt(parser.pos + 1) == $rbracket) {
+ // That opening `[` is not actually part of the link. Maybe a
+ // *shortcut* reference link (followed by a `[`).
+ parser.advanceBy(1);
+ return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
+ }
+ final label = _parseReferenceLinkLabel(parser);
+ if (label != null) {
+ return _tryCreateReferenceLink(parser, label, getChildren: getChildren);
+ }
+ return null;
+ }
+
+ // The link text (inside `[...]`) was not followed with a opening `(` nor
+ // an opening `[`. Perhaps just a simple shortcut reference link (`[...]`).
+ return _tryCreateReferenceLink(parser, text, getChildren: getChildren);
+ }
+
+ /// Resolve a possible reference link.
+ ///
+ /// Uses [linkReferences], [linkResolver], and [createNode] to try to
+ /// resolve [label] into a [Node]. If [label] is defined in
+ /// [linkReferences] or can be resolved by [linkResolver], returns a [Node]
+ /// that links to the resolved URL.
+ ///
+ /// Otherwise, returns `null`.
+ ///
+ /// [label] does not need to be normalized.
+ Node? _resolveReferenceLink(
+ String label,
+ Map<String, LinkReference> linkReferences, {
+ required List<Node> Function() getChildren,
+ }) {
+ final linkReference = linkReferences[normalizeLinkLabel(label)];
+ if (linkReference != null) {
+ return createNode(
+ linkReference.destination,
+ linkReference.title,
+ getChildren: getChildren,
+ );
+ } else {
+ // This link has no reference definition. But we allow users of the
+ // library to specify a custom resolver function ([linkResolver]) that
+ // may choose to handle this. Otherwise, it's just treated as plain
+ // text.
+
+ // Normally, label text does not get parsed as inline Markdown. However,
+ // for the benefit of the link resolver, we need to at least escape
+ // brackets, so that, e.g. a link resolver can receive `[\[\]]` as `[]`.
+ final resolved = linkResolver(label
+ .replaceAll(r'\\', r'\')
+ .replaceAll(r'\[', '[')
+ .replaceAll(r'\]', ']'));
+ if (resolved != null) {
+ getChildren();
+ }
+ return resolved;
+ }
+ }
+
+ /// Create the node represented by a Markdown link.
+ Node createNode(
+ String destination,
+ String? title, {
+ required List<Node> Function() getChildren,
+ }) {
+ final children = getChildren();
+ final element = Element('a', children);
+ element.attributes['href'] = escapeAttribute(destination);
+ if (title != null && title.isNotEmpty) {
+ element.attributes['title'] = escapeAttribute(title);
+ }
+ return element;
+ }
+
+ /// Tries to create a reference link node.
+ ///
+ /// Returns the link if it was successfully created, `null` otherwise.
+ Node? _tryCreateReferenceLink(
+ InlineParser parser,
+ String label, {
+ required List<Node> Function() getChildren,
+ }) {
+ return _resolveReferenceLink(
+ label,
+ parser.document.linkReferences,
+ getChildren: getChildren,
+ );
+ }
+
+ // Tries to create an inline link node.
+ //
+ /// Returns the link if it was successfully created, `null` otherwise.
+ Node _tryCreateInlineLink(
+ InlineParser parser,
+ InlineLink link, {
+ required List<Node> Function() getChildren,
+ }) {
+ return createNode(link.destination, link.title, getChildren: getChildren);
+ }
+
+ /// Parse a reference link label at the current position.
+ ///
+ /// Specifically, [parser.pos] is expected to be pointing at the `[` which
+ /// opens the link label.
+ ///
+ /// Returns the label if it could be parsed, or `null` if not.
+ String? _parseReferenceLinkLabel(InlineParser parser) {
+ // Walk past the opening `[`.
+ parser.advanceBy(1);
+ if (parser.isDone) return null;
+
+ final buffer = StringBuffer();
+ while (true) {
+ final char = parser.charAt(parser.pos);
+ if (char == $backslash) {
+ parser.advanceBy(1);
+ final next = parser.charAt(parser.pos);
+ if (next != $backslash && next != $rbracket) {
+ buffer.writeCharCode(char);
+ }
+ buffer.writeCharCode(next);
+ } else if (char == $lbracket) {
+ return null;
+ } else if (char == $rbracket) {
+ break;
+ } else {
+ buffer.writeCharCode(char);
+ }
+ parser.advanceBy(1);
+ if (parser.isDone) return null;
+ // TODO(srawlins): only check 999 characters, for performance reasons?
+ }
+
+ final label = buffer.toString();
+
+ // A link label must contain at least one non-whitespace character.
+ if (_entirelyWhitespacePattern.hasMatch(label)) return null;
+
+ return label;
+ }
+
+ /// Parse an inline [InlineLink] at the current position.
+ ///
+ /// At this point, we have parsed a link's (or image's) opening `[`, and then
+ /// a matching closing `]`, and [parser.pos] is pointing at an opening `(`.
+ /// This method will then attempt to parse a link destination wrapped in `<>`,
+ /// such as `(<http://url>)`, or a bare link destination, such as
+ /// `(http://url)`, or a link destination with a title, such as
+ /// `(http://url "title")`.
+ ///
+ /// Returns the [InlineLink] if one was parsed, or `null` if not.
+ InlineLink? _parseInlineLink(InlineParser parser) {
+ // Start walking to the character just after the opening `(`.
+ parser.advanceBy(1);
+
+ _moveThroughWhitespace(parser);
+ if (parser.isDone) return null; // EOF. Not a link.
+
+ if (parser.charAt(parser.pos) == $lt) {
+ // Maybe a `<...>`-enclosed link destination.
+ return _parseInlineBracketedLink(parser);
+ } else {
+ return _parseInlineBareDestinationLink(parser);
+ }
+ }
+
+ /// Parse an inline link with a bracketed destination (a destination wrapped
+ /// in `<...>`). The current position of the parser must be the first
+ /// character of the destination.
+ ///
+ /// Returns the link if it was successfully created, `null` otherwise.
+ InlineLink? _parseInlineBracketedLink(InlineParser parser) {
+ parser.advanceBy(1);
+
+ final buffer = StringBuffer();
+ while (true) {
+ final char = parser.charAt(parser.pos);
+ if (char == $backslash) {
+ parser.advanceBy(1);
+ final next = parser.charAt(parser.pos);
+ // TODO: Follow the backslash spec better here.
+ // http://spec.commonmark.org/0.29/#backslash-escapes
+ if (next != $backslash && next != $gt) {
+ buffer.writeCharCode(char);
+ }
+ buffer.writeCharCode(next);
+ } else if (char == $lf || char == $cr || char == $ff) {
+ // Not a link (no line breaks allowed within `<...>`).
+ return null;
+ } else if (char == $space) {
+ buffer.write('%20');
+ } else if (char == $gt) {
+ break;
+ } else {
+ buffer.writeCharCode(char);
+ }
+ parser.advanceBy(1);
+ if (parser.isDone) return null;
+ }
+ final destination = buffer.toString();
+
+ parser.advanceBy(1);
+ final char = parser.charAt(parser.pos);
+ if (char == $space || char == $lf || char == $cr || char == $ff) {
+ final title = _parseTitle(parser);
+ if (title == null &&
+ (parser.isDone || parser.charAt(parser.pos) != $rparen)) {
+ // This looked like an inline link, until we found this $space
+ // followed by mystery characters; no longer a link.
+ return null;
+ }
+ return InlineLink(destination, title: title);
+ } else if (char == $rparen) {
+ return InlineLink(destination);
+ } else {
+ // We parsed something like `[foo](<url>X`. Not a link.
+ return null;
+ }
+ }
+
+ /// Parse an inline link with a "bare" destination (a destination _not_
+ /// wrapped in `<...>`). The current position of the parser must be the first
+ /// character of the destination.
+ ///
+ /// Returns the link if it was successfully created, `null` otherwise.
+ InlineLink? _parseInlineBareDestinationLink(InlineParser parser) {
+ // According to
+ // [CommonMark](http://spec.commonmark.org/0.28/#link-destination):
+ //
+ // > A link destination consists of [...] a nonempty sequence of
+ // > characters [...], and includes parentheses only if (a) they are
+ // > backslash-escaped or (b) they are part of a balanced pair of
+ // > unescaped parentheses.
+ //
+ // We need to count the open parens. We start with 1 for the paren that
+ // opened the destination.
+ var parenCount = 1;
+ final buffer = StringBuffer();
+
+ while (true) {
+ final char = parser.charAt(parser.pos);
+ switch (char) {
+ case $backslash:
+ parser.advanceBy(1);
+ if (parser.isDone) return null; // EOF. Not a link.
+ final next = parser.charAt(parser.pos);
+ // Parentheses may be escaped.
+ //
+ // http://spec.commonmark.org/0.28/#example-467
+ if (next != $backslash && next != $lparen && next != $rparen) {
+ buffer.writeCharCode(char);
+ }
+ buffer.writeCharCode(next);
+ break;
+
+ case $space:
+ case $lf:
+ case $cr:
+ case $ff:
+ final destination = buffer.toString();
+ final title = _parseTitle(parser);
+ if (title == null &&
+ (parser.isDone || parser.charAt(parser.pos) != $rparen)) {
+ // This looked like an inline link, until we found this $space
+ // followed by mystery characters; no longer a link.
+ return null;
+ }
+ // [_parseTitle] made sure the title was follwed by a closing `)`
+ // (but it's up to the code here to examine the balance of
+ // parentheses).
+ parenCount--;
+ if (parenCount == 0) {
+ return InlineLink(destination, title: title);
+ }
+ break;
+
+ case $lparen:
+ parenCount++;
+ buffer.writeCharCode(char);
+ break;
+
+ case $rparen:
+ parenCount--;
+ if (parenCount == 0) {
+ final destination = buffer.toString();
+ return InlineLink(destination);
+ }
+ buffer.writeCharCode(char);
+ break;
+
+ default:
+ buffer.writeCharCode(char);
+ }
+ parser.advanceBy(1);
+ if (parser.isDone) return null; // EOF. Not a link.
+ }
+ }
+
+ // Walk the parser forward through any whitespace.
+ void _moveThroughWhitespace(InlineParser parser) {
+ while (!parser.isDone) {
+ final char = parser.charAt(parser.pos);
+ if (char != $space &&
+ char != $tab &&
+ char != $lf &&
+ char != $vt &&
+ char != $cr &&
+ char != $ff) {
+ return;
+ }
+ parser.advanceBy(1);
+ }
+ }
+
+ /// Parses a link title in [parser] at it's current position. The parser's
+ /// current position should be a whitespace character that followed a link
+ /// destination.
+ ///
+ /// Returns the title if it was successfully parsed, `null` otherwise.
+ String? _parseTitle(InlineParser parser) {
+ _moveThroughWhitespace(parser);
+ if (parser.isDone) return null;
+
+ // The whitespace should be followed by a title delimiter.
+ final delimiter = parser.charAt(parser.pos);
+ if (delimiter != $apostrophe &&
+ delimiter != $quote &&
+ delimiter != $lparen) {
+ return null;
+ }
+
+ final closeDelimiter = delimiter == $lparen ? $rparen : delimiter;
+ parser.advanceBy(1);
+
+ // Now we look for an un-escaped closing delimiter.
+ final buffer = StringBuffer();
+ while (true) {
+ final char = parser.charAt(parser.pos);
+ if (char == $backslash) {
+ parser.advanceBy(1);
+ final next = parser.charAt(parser.pos);
+ if (next != $backslash && next != closeDelimiter) {
+ buffer.writeCharCode(char);
+ }
+ buffer.writeCharCode(next);
+ } else if (char == closeDelimiter) {
+ break;
+ } else {
+ buffer.writeCharCode(char);
+ }
+ parser.advanceBy(1);
+ if (parser.isDone) return null;
+ }
+ final title = buffer.toString();
+
+ // Advance past the closing delimiter.
+ parser.advanceBy(1);
+ if (parser.isDone) return null;
+ _moveThroughWhitespace(parser);
+ if (parser.isDone) return null;
+ if (parser.charAt(parser.pos) != $rparen) return null;
+ return title;
+ }
+}
+
+class InlineLink {
+ final String destination;
+ final String? title;
+
+ InlineLink(this.destination, {this.title});
+}
diff --git a/lib/src/inline_syntaxes/strikethrough_syntax.dart b/lib/src/inline_syntaxes/strikethrough_syntax.dart
new file mode 100644
index 0000000..9b4dc7b
--- /dev/null
+++ b/lib/src/inline_syntaxes/strikethrough_syntax.dart
@@ -0,0 +1,16 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'delimiter_syntax.dart';
+
+/// Matches strikethrough syntax according to the GFM spec.
+class StrikethroughSyntax extends DelimiterSyntax {
+ StrikethroughSyntax()
+ : super(
+ '~+',
+ requiresDelimiterRun: true,
+ allowIntraWord: true,
+ tags: [DelimiterTag('del', 2)],
+ );
+}
diff --git a/lib/src/inline_syntaxes/tag_syntax.dart b/lib/src/inline_syntaxes/tag_syntax.dart
new file mode 100644
index 0000000..d284282
--- /dev/null
+++ b/lib/src/inline_syntaxes/tag_syntax.dart
@@ -0,0 +1,11 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'delimiter_syntax.dart';
+
+@Deprecated('Use DelimiterSyntax instead')
+class TagSyntax extends DelimiterSyntax {
+ TagSyntax(String pattern, {bool requiresDelimiterRun = false})
+ : super(pattern, requiresDelimiterRun: requiresDelimiterRun);
+}
diff --git a/lib/src/inline_syntaxes/text_syntax.dart b/lib/src/inline_syntaxes/text_syntax.dart
new file mode 100644
index 0000000..9ab02bb
--- /dev/null
+++ b/lib/src/inline_syntaxes/text_syntax.dart
@@ -0,0 +1,42 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import '../ast.dart';
+import '../inline_parser.dart';
+import '../util.dart';
+import 'inline_syntax.dart';
+
+/// Matches stuff that should just be passed through as straight text.
+class TextSyntax extends InlineSyntax {
+ final String substitute;
+
+ /// Create a new [TextSyntax] which matches text on [pattern].
+ ///
+ /// If [sub] is passed, it is used as a simple replacement for [pattern]. If
+ /// [startCharacter] is passed, it is used as a pre-matching check which is
+ /// faster than matching against [pattern].
+ TextSyntax(String pattern, {String sub = '', int? startCharacter})
+ : substitute = sub,
+ super(pattern, startCharacter: startCharacter);
+
+ /// Adds a [Text] node to [parser] and returns `true` if there is a
+ /// [substitute], as long as the preceding character (if any) is not a `/`.
+ ///
+ /// Otherwise, the parser is advanced by the length of [match] and `false` is
+ /// returned.
+ @override
+ bool onMatch(InlineParser parser, Match match) {
+ if (substitute.isEmpty ||
+ (match.start > 0 &&
+ match.input.substring(match.start - 1, match.start) == '/')) {
+ // Just use the original matched text.
+ parser.advanceBy(match.match.length);
+ return false;
+ }
+
+ // Insert the substitution.
+ parser.addNode(Text(substitute));
+ return true;
+ }
+}
diff --git a/lib/src/patterns.dart b/lib/src/patterns.dart
new file mode 100644
index 0000000..4ddc184
--- /dev/null
+++ b/lib/src/patterns.dart
@@ -0,0 +1,53 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// The line contains only whitespace or is empty.
+final emptyPattern = RegExp(r'^(?:[ \t]*)$');
+
+/// A series of `=` or `-` (on the next line) define setext-style headers.
+final setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$');
+
+/// Leading (and trailing) `#` define atx-style headers.
+///
+/// Starts with 1-6 unescaped `#` characters which must not be followed by a
+/// non-space character. Line may end with any number of `#` characters,.
+final headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$');
+
+/// The line starts with `>` with one optional space after.
+final blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
+
+/// A line indented four spaces. Used for code blocks and lists.
+final indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');
+
+/// Fenced code block.
+final codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
+
+/// Fenced blockquotes.
+final blockquoteFencePattern = RegExp(r'^>{3}\s*$');
+
+/// Three or more hyphens, asterisks or underscores by themselves. Note that
+/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
+/// SETEXT should win.
+final hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
+
+/// A line starting with one of these markers: `-`, `*`, `+`. May have up to
+/// three leading spaces before the marker and any number of spaces or tabs
+/// after.
+///
+/// Contains a dummy group at [2], so that the groups in [ulPattern] and
+/// [olPattern] match up; in both, [2] is the length of the number that begins
+/// the list marker.
+final ulPattern = RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$');
+
+/// A line starting with a number like `123.`. May have up to three leading
+/// spaces before the marker and any number of spaces or tabs after.
+final olPattern = RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$');
+
+/// A line of hyphens separated by at least one pipe.
+final tablePattern = RegExp(
+ r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$');
+
+/// A pattern which should never be used. It just satisfies non-nullability of
+/// pattern fields.
+final dummyPattern = RegExp('');
diff --git a/tool/update_emojis.dart b/tool/update_emojis.dart
index 864d07a..76b82e2 100644
--- a/tool/update_emojis.dart
+++ b/tool/update_emojis.dart
@@ -1,3 +1,7 @@
+// Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
import 'dart:async';
import 'dart:convert';
import 'dart:io';