diff --git a/CHANGELOG.md b/CHANGELOG.md index e22c442..46ea57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md
@@ -1,9 +1,34 @@ -## 5.0.0-dev +## 6.0.0-dev + +* **Breaking change**: The `TagSyntax` is _deprecated_. +* Add new syntax `DelimiterSyntax`. +* **Breaking change**: `StrikethroughSyntax` now extends `DelimiterSyntax` + instead of `TagSyntax`. +* **Breaking change**: `LinkSyntax` now extends `DelimiterSyntax` + instead of `TagSyntax`. +* Add two new emphasis syntaxes `EmphasisSyntax.underscore` and + `EmphasisSyntax.asterisk`. + +## 5.0.0 * Breaking change: Change the type of `parseInline`'s parameter from `String?` to `String`. * Fix table-rendering bug when table rows have trailing whitespace. [#368](https://github.com/dart-lang/markdown/issues/368). +* Do not allow reference link labels to contain left brackets. Thanks + @chenzhiguang. + [#335](https://github.com/dart-lang/markdown/issues/335). +* Treat lines matching a code block syntax as continuations of paragraphs, + inside blockquotes. Thanks @chenzhiguang. + [#358](https://github.com/dart-lang/markdown/issues/358). +* Add a syntax for GitLab-flavored fenced blockquotes. GitLab-flavored Markdown + will be evaluated into an ExtensionSet, in a future release. Thanks + @chenzhiguang. + [#359](https://github.com/dart-lang/markdown/issues/359). +* Add `bool withDefaultInlineSyntaxes` and `bool withDefaultBlockSyntaxes` + parameters to `markdownToHtml` and `Document` to support the case of + specifying exactly the list of desired syntaxes. Thanks @chenzhiguang. + [#393](https://github.com/dart-lang/markdown/issues/393). ## 4.0.1
diff --git a/README.md b/README.md index 66bb887..cf6a000 100644 --- a/README.md +++ b/README.md
@@ -1,4 +1,5 @@ [](https://github.com/dart-lang/markdown/actions?query=workflow%3A"Dart+CI"+branch%3Amaster) +[](https://pub.dev/packages/markdown) A portable Markdown library written in Dart. It can parse Markdown into HTML on both the client and server.
diff --git a/analysis_options.yaml b/analysis_options.yaml index 2f0f043..92ca266 100644 --- a/analysis_options.yaml +++ b/analysis_options.yaml
@@ -31,3 +31,8 @@ - package_api_docs - test_types_in_equals - throw_in_finally + - prefer_final_locals + - use_if_null_to_convert_nulls_to_bools + - use_raw_strings + - unnecessary_raw_strings + - prefer_interpolation_to_compose_strings
diff --git a/benchmark/benchmark.dart b/benchmark/benchmark.dart index f3e934c..4c4d61f 100644 --- a/benchmark/benchmark.dart +++ b/benchmark/benchmark.dart
@@ -19,7 +19,7 @@ // Run the benchmark several times. This ensures the VM is warmed up and lets // us see how much variance there is. for (var i = 0; i <= numTrials; i++) { - var start = DateTime.now(); + final start = DateTime.now(); // For a single benchmark, convert the source multiple times. late String result; @@ -27,7 +27,7 @@ result = markdownToHtml(source); } - var elapsed = + final elapsed = DateTime.now().difference(start).inMilliseconds / runsPerTrial; // Keep track of the best run so far. @@ -51,7 +51,7 @@ } String _loadFile(String name) { - var path = p.join(p.dirname(p.fromUri(Platform.script)), name); + final path = p.join(p.dirname(p.fromUri(Platform.script)), name); return File(path).readAsStringSync(); }
diff --git a/bin/markdown.dart b/bin/markdown.dart index fd4eee2..76926c6 100644 --- a/bin/markdown.dart +++ b/bin/markdown.dart
@@ -16,20 +16,22 @@ }; Future<void> main(List<String> args) async { - var parser = ArgParser() + final parser = ArgParser() ..addFlag('help', negatable: false, help: 'Print help text and exit') ..addFlag('version', negatable: false, help: 'Print version and exit') - ..addOption('extension-set', - allowed: ['none', 'CommonMark', 'GitHubFlavored', 'GitHubWeb'], - defaultsTo: 'CommonMark', - help: 'Specify a set of extensions', - allowedHelp: { - 'none': 'No extensions; similar to Markdown.pl', - 'CommonMark': 'Parse like CommonMark Markdown (default)', - 'GitHubFlavored': 'Parse like GitHub Flavored Markdown', - 'GitHubWeb': 'Parse like GitHub\'s Markdown-enabled web input fields', - }); - var results = parser.parse(args); + ..addOption( + 'extension-set', + allowed: ['none', 'CommonMark', 'GitHubFlavored', 'GitHubWeb'], + defaultsTo: 'CommonMark', + help: 'Specify a set of extensions', + allowedHelp: { + 'none': 'No extensions; similar to Markdown.pl', + 'CommonMark': 'Parse like CommonMark Markdown (default)', + 'GitHubFlavored': 'Parse like GitHub Flavored Markdown', + 'GitHubWeb': 'Parse like GitHub\'s Markdown-enabled web input fields', + }, + ); + final results = parser.parse(args); if (results['help'] as bool) { printUsage(parser); @@ -41,7 +43,7 @@ return; } - var extensionSet = extensionSets[results['extension-set']]; + final extensionSet = extensionSets[results['extension-set']]; if (results.rest.length > 1) { printUsage(parser); @@ -51,13 +53,13 @@ if (results.rest.length == 1) { // Read argument as a file path. - var input = File(results.rest.first).readAsStringSync(); + final input = File(results.rest.first).readAsStringSync(); print(markdownToHtml(input, extensionSet: extensionSet)); return; } // Read from stdin. - var buffer = StringBuffer(); + final buffer = StringBuffer(); String? line; while ((line = stdin.readLineSync()) != null) { buffer.writeln(line);
diff --git a/example/app.dart b/example/app.dart index ace4fa3..0b06b6b 100644 --- a/example/app.dart +++ b/example/app.dart
@@ -14,7 +14,7 @@ final nullSanitizer = NullTreeSanitizer(); const typing = Duration(milliseconds: 150); -final introText = r'''Markdown is the **best**! +final introText = '''Markdown is the **best**! * It has lists. * It has [links](https://dart.dev). @@ -36,7 +36,7 @@ versionSpan.text = 'v${md.version}'; markdownInput.onKeyUp.listen(_renderMarkdown); - var savedMarkdown = window.localStorage['markdown']; + final savedMarkdown = window.localStorage['markdown']; if (savedMarkdown != null && savedMarkdown.isNotEmpty && @@ -60,12 +60,14 @@ } void _renderMarkdown([Event? event]) { - var markdown = markdownInput.value!; + final markdown = markdownInput.value!; - htmlDiv.setInnerHtml(md.markdownToHtml(markdown, extensionSet: extensionSet), - treeSanitizer: nullSanitizer); + htmlDiv.setInnerHtml( + md.markdownToHtml(markdown, extensionSet: extensionSet), + treeSanitizer: nullSanitizer, + ); - for (var block in htmlDiv.querySelectorAll('pre code')) { + for (final block in htmlDiv.querySelectorAll('pre code')) { try { highlightElement(block); } catch (e) { @@ -100,7 +102,7 @@ } void _switchFlavor(Event e) { - var target = e.currentTarget as HtmlElement; + final target = e.currentTarget as HtmlElement; if (!target.attributes.containsKey('checked')) { if (basicRadio != target) { basicRadio.attributes.remove('checked');
diff --git a/example/index.html b/example/index.html index ba9d207..97515c6 100644 --- a/example/index.html +++ b/example/index.html
@@ -4,9 +4,10 @@ <link rel="stylesheet" href="style.css"> <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono|Roboto"> <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons+Extended"> - <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/styles/default.min.css"> - <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/highlight.min.js"></script> - <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/languages/dart.min.js"></script> + <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.5.1/styles/default.min.css" integrity="sha512-hasIneQUHlh06VNBe7f6ZcHmeRTLIaQWFd43YriJ0UND19bvYRauxthDg8E4eVNPm9bRUhr5JGeqH7FRFXQu5g==" crossorigin="anonymous" referrerpolicy="no-referrer" /> + <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.1.0/github-markdown-light.min.css" integrity="sha512-zb2pp+R+czM7GAemdSUQt6jFmr3qCo6ikvBgVU6F5GvwEDR0C2sefFiPEJ9QUpmAKdD5EqDUdNRtbOYnbF/eyQ==" crossorigin="anonymous" referrerpolicy="no-referrer" /> + <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.5.1/highlight.min.js" integrity="sha512-yUUc0qWm2rhM7X0EFe82LNnv2moqArj5nro/w1bi05A09hRVeIZbN6jlMoyu0+4I/Bu4Ck/85JQIU82T82M28w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> + <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.5.1/languages/dart.min.js" integrity="sha512-14QR6tzX5xTNeMJKXzSK+xCquDvtNEr1jM5NlKy/149BBY50Kv70qqxHtzo6zClbtc1gIG7G0CGWXuMgPIMt0g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> <script defer src="app.dart.js"></script> <title>Dart Markdown Live Editor</title> </head> @@ -37,7 +38,7 @@ <div class="toolbar"> <h2>HTML</h2> </div> - <div id="html"></div> + <div id="html" class="markdown-body"></div> </div> </div> <footer>
diff --git a/lib/markdown.dart b/lib/markdown.dart index 3af6c6c..205ba38 100644 --- a/lib/markdown.dart +++ b/lib/markdown.dart
@@ -38,10 +38,47 @@ export 'src/ast.dart'; export 'src/block_parser.dart'; +export 'src/block_syntaxes/block_html_syntax.dart'; +export 'src/block_syntaxes/block_syntax.dart'; +export 'src/block_syntaxes/block_tag_block_html_syntax.dart'; +export 'src/block_syntaxes/blockquote_syntax.dart'; +export 'src/block_syntaxes/code_block_syntax.dart'; +export 'src/block_syntaxes/dummy_block_syntax.dart'; +export 'src/block_syntaxes/empty_block_syntax.dart'; +export 'src/block_syntaxes/fenced_blockquote_syntax.dart'; +export 'src/block_syntaxes/fenced_code_block_syntax.dart'; +export 'src/block_syntaxes/header_syntax.dart'; +export 'src/block_syntaxes/header_with_id_syntax.dart'; +export 'src/block_syntaxes/horizontal_rule_syntax.dart'; +export 'src/block_syntaxes/list_syntax.dart'; +export 'src/block_syntaxes/long_block_html_syntax.dart'; +export 'src/block_syntaxes/ordered_list_syntax.dart'; +export 'src/block_syntaxes/other_tag_block_html_syntax.dart'; +export 'src/block_syntaxes/paragraph_syntax.dart'; +export 'src/block_syntaxes/setext_header_syntax.dart'; +export 'src/block_syntaxes/setext_header_with_id_syntax.dart'; +export 'src/block_syntaxes/table_syntax.dart'; +export 'src/block_syntaxes/unordered_list_syntax.dart'; export 'src/document.dart'; export 'src/emojis.dart'; export 'src/extension_set.dart'; export 'src/html_renderer.dart'; export 'src/inline_parser.dart'; +export 'src/inline_syntaxes/autolink_extension_syntax.dart'; +export 'src/inline_syntaxes/autolink_syntax.dart'; +export 'src/inline_syntaxes/code_syntax.dart'; +export 'src/inline_syntaxes/delimiter_syntax.dart'; +export 'src/inline_syntaxes/email_autolink_syntax.dart'; +export 'src/inline_syntaxes/emoji_syntax.dart'; +export 'src/inline_syntaxes/emphasis_syntax.dart'; +export 'src/inline_syntaxes/escape_syntax.dart'; +export 'src/inline_syntaxes/image_syntax.dart'; +export 'src/inline_syntaxes/inline_html_syntax.dart'; +export 'src/inline_syntaxes/inline_syntax.dart'; +export 'src/inline_syntaxes/line_break_syntax.dart'; +export 'src/inline_syntaxes/link_syntax.dart'; +export 'src/inline_syntaxes/strikethrough_syntax.dart'; +export 'src/inline_syntaxes/tag_syntax.dart'; +export 'src/inline_syntaxes/text_syntax.dart'; const version = packageVersion;
diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart index 4b11b3d..cb88d8e 100644 --- a/lib/src/block_parser.dart +++ b/lib/src/block_parser.dart
@@ -2,62 +2,22 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -import 'package:charcode/charcode.dart'; - import 'ast.dart'; +import 'block_syntaxes/block_syntax.dart'; +import 'block_syntaxes/block_tag_block_html_syntax.dart'; +import 'block_syntaxes/blockquote_syntax.dart'; +import 'block_syntaxes/code_block_syntax.dart'; +import 'block_syntaxes/dummy_block_syntax.dart'; +import 'block_syntaxes/empty_block_syntax.dart'; +import 'block_syntaxes/header_syntax.dart'; +import 'block_syntaxes/horizontal_rule_syntax.dart'; +import 'block_syntaxes/long_block_html_syntax.dart'; +import 'block_syntaxes/ordered_list_syntax.dart'; +import 'block_syntaxes/other_tag_block_html_syntax.dart'; +import 'block_syntaxes/paragraph_syntax.dart'; +import 'block_syntaxes/setext_header_syntax.dart'; +import 'block_syntaxes/unordered_list_syntax.dart'; import 'document.dart'; -import 'util.dart'; - -/// The line contains only whitespace or is empty. -final _emptyPattern = RegExp(r'^(?:[ \t]*)$'); - -/// A series of `=` or `-` (on the next line) define setext-style headers. -final _setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$'); - -/// Leading (and trailing) `#` define atx-style headers. -/// -/// Starts with 1-6 unescaped `#` characters which must not be followed by a -/// non-space character. Line may end with any number of `#` characters,. -final _headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$'); - -/// The line starts with `>` with one optional space after. -final _blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$'); - -/// A line indented four spaces. Used for code blocks and lists. -final _indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$'); - -/// Fenced code block. -final _codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$'); - -/// Fenced blockquotes. -final _blockquoteFencePattern = RegExp(r'^>{3}\s*$'); - -/// Three or more hyphens, asterisks or underscores by themselves. Note that -/// a line like `----` is valid as both HR and SETEXT. In case of a tie, -/// SETEXT should win. -final _hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$'); - -/// A line starting with one of these markers: `-`, `*`, `+`. May have up to -/// three leading spaces before the marker and any number of spaces or tabs -/// after. -/// -/// Contains a dummy group at [2], so that the groups in [_ulPattern] and -/// [_olPattern] match up; in both, [2] is the length of the number that begins -/// the list marker. -final _ulPattern = RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$'); - -/// A line starting with a number like `123.`. May have up to three leading -/// spaces before the marker and any number of spaces or tabs after. -final _olPattern = - RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$'); - -/// A line of hyphens separated by at least one pipe. -final _tablePattern = RegExp( - r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$'); - -/// A pattern which should never be used. It just satisfies non-nullability of -/// pattern fields. -final _dummyPattern = RegExp(''); /// Maintains the internal state needed to parse a series of lines into blocks /// of Markdown suitable for further inline parsing. @@ -88,9 +48,9 @@ LongBlockHtmlSyntax(r'^ {0,3}<script(?:\s|>|$)', '</script>'), LongBlockHtmlSyntax(r'^ {0,3}<style(?:\s|>|$)', '</style>'), LongBlockHtmlSyntax('^ {0,3}<!--', '-->'), - LongBlockHtmlSyntax('^ {0,3}<\\?', '\\?>'), + LongBlockHtmlSyntax(r'^ {0,3}<\?', r'\?>'), LongBlockHtmlSyntax('^ {0,3}<![A-Z]', '>'), - LongBlockHtmlSyntax('^ {0,3}<!\\[CDATA\\[', '\\]\\]>'), + LongBlockHtmlSyntax(r'^ {0,3}<!\[CDATA\[', r'\]\]>'), const OtherTagBlockHtmlSyntax(), const SetextHeaderSyntax(), const HeaderSyntax(), @@ -156,11 +116,11 @@ } List<Node> parseLines() { - var blocks = <Node>[]; + final blocks = <Node>[]; while (!isDone) { - for (var syntax in blockSyntaxes) { + for (final syntax in blockSyntaxes) { if (syntax.canParse(this)) { - var block = syntax.parse(this); + final block = syntax.parse(this); if (block != null) blocks.add(block); break; } @@ -170,1139 +130,3 @@ return blocks; } } - -abstract class BlockSyntax { - const BlockSyntax(); - - /// Gets the regex used to identify the beginning of this block, if any. - RegExp get pattern; - - bool canEndBlock(BlockParser parser) => true; - - bool canParse(BlockParser parser) { - return pattern.hasMatch(parser.current); - } - - Node? parse(BlockParser parser); - - List<String?> parseChildLines(BlockParser parser) { - // Grab all of the lines that form the block element. - var childLines = <String?>[]; - - while (!parser.isDone) { - var match = pattern.firstMatch(parser.current); - if (match == null) break; - childLines.add(match[1]); - parser.advance(); - } - - return childLines; - } - - /// Gets whether or not [parser]'s current line should end the previous block. - static bool isAtBlockEnd(BlockParser parser) { - if (parser.isDone) return true; - return parser.blockSyntaxes - .any((s) => s.canParse(parser) && s.canEndBlock(parser)); - } - - /// Generates a valid HTML anchor from the inner text of [element]. - static String generateAnchorHash(Element element) => - element.children!.first.textContent - .toLowerCase() - .trim() - .replaceAll(RegExp(r'[^a-z0-9 _-]'), '') - .replaceAll(RegExp(r'\s'), '-'); -} - -class EmptyBlockSyntax extends BlockSyntax { - @override - RegExp get pattern => _emptyPattern; - - const EmptyBlockSyntax(); - - @override - Node? parse(BlockParser parser) { - parser.encounteredBlankLine = true; - parser.advance(); - - // Don't actually emit anything. - return null; - } -} - -/// Parses setext-style headers. -class SetextHeaderSyntax extends BlockSyntax { - @override - RegExp get pattern => _dummyPattern; - - const SetextHeaderSyntax(); - - @override - bool canParse(BlockParser parser) { - if (!_interperableAsParagraph(parser.current)) return false; - var i = 1; - while (true) { - var nextLine = parser.peek(i); - if (nextLine == null) { - // We never reached an underline. - return false; - } - if (_setextPattern.hasMatch(nextLine)) { - return true; - } - // Ensure that we're still in something like paragraph text. - if (!_interperableAsParagraph(nextLine)) { - return false; - } - i++; - } - } - - @override - Node parse(BlockParser parser) { - var lines = <String>[]; - String? tag; - while (!parser.isDone) { - var match = _setextPattern.firstMatch(parser.current); - if (match == null) { - // More text. - lines.add(parser.current); - parser.advance(); - continue; - } else { - // The underline. - tag = (match[1]![0] == '=') ? 'h1' : 'h2'; - parser.advance(); - break; - } - } - - var contents = UnparsedContent(lines.join('\n').trimRight()); - - return Element(tag!, [contents]); - } - - bool _interperableAsParagraph(String line) => - !(_indentPattern.hasMatch(line) || - _codeFencePattern.hasMatch(line) || - _headerPattern.hasMatch(line) || - _blockquotePattern.hasMatch(line) || - _hrPattern.hasMatch(line) || - _ulPattern.hasMatch(line) || - _olPattern.hasMatch(line) || - _emptyPattern.hasMatch(line)); -} - -/// Parses setext-style headers, and adds generated IDs to the generated -/// elements. -class SetextHeaderWithIdSyntax extends SetextHeaderSyntax { - const SetextHeaderWithIdSyntax(); - - @override - Node parse(BlockParser parser) { - var element = super.parse(parser) as Element; - element.generatedId = BlockSyntax.generateAnchorHash(element); - return element; - } -} - -/// Parses atx-style headers: `## Header ##`. -class HeaderSyntax extends BlockSyntax { - @override - RegExp get pattern => _headerPattern; - - const HeaderSyntax(); - - @override - Node parse(BlockParser parser) { - var match = pattern.firstMatch(parser.current)!; - parser.advance(); - var level = match[1]!.length; - var contents = UnparsedContent(match[2]!.trim()); - return Element('h$level', [contents]); - } -} - -/// Parses atx-style headers, and adds generated IDs to the generated elements. -class HeaderWithIdSyntax extends HeaderSyntax { - const HeaderWithIdSyntax(); - - @override - Node parse(BlockParser parser) { - var element = super.parse(parser) as Element; - element.generatedId = BlockSyntax.generateAnchorHash(element); - return element; - } -} - -/// Parses lines fenced by `>>>` to blockquotes -class FencedBlockquoteSyntax extends BlockSyntax { - const FencedBlockquoteSyntax(); - - @override - RegExp get pattern => _blockquoteFencePattern; - - @override - List<String> parseChildLines(BlockParser parser) { - final childLines = <String>[]; - parser.advance(); - - while (!parser.isDone) { - final match = pattern.hasMatch(parser.current); - if (!match) { - childLines.add(parser.current); - parser.advance(); - } else { - parser.advance(); - break; - } - } - - return childLines; - } - - @override - Node? parse(BlockParser parser) { - final childLines = parseChildLines(parser); - - // Recursively parse the contents of the blockquote. - final children = BlockParser(childLines, parser.document).parseLines(); - return Element('blockquote', children); - } -} - -/// Parses email-style blockquotes: `> quote`. -class BlockquoteSyntax extends BlockSyntax { - @override - RegExp get pattern => _blockquotePattern; - - const BlockquoteSyntax(); - - @override - List<String> parseChildLines(BlockParser parser) { - // Grab all of the lines that form the blockquote, stripping off the ">". - var childLines = <String>[]; - - bool encounteredCodeBlock = false; - while (!parser.isDone) { - var match = pattern.firstMatch(parser.current); - if (match != null) { - final line = match[1]!; - childLines.add(line); - encounteredCodeBlock = _indentPattern.hasMatch(line); - parser.advance(); - continue; - } - - // A paragraph continuation is OK. This is content that cannot be parsed - // as any other syntax except Paragraph, and it doesn't match the bar in - // a Setext header. - // Because indented code blocks cannot interrupt paragraphs, a line - // matched CodeBlockSyntax is also paragraph continuation text. - final otherMatched = - parser.blockSyntaxes.firstWhere((s) => s.canParse(parser)); - if (otherMatched is ParagraphSyntax || - (!encounteredCodeBlock && otherMatched is CodeBlockSyntax)) { - childLines.add(parser.current); - parser.advance(); - } else { - break; - } - } - - return childLines; - } - - @override - Node parse(BlockParser parser) { - var childLines = parseChildLines(parser); - - // Recursively parse the contents of the blockquote. - var children = BlockParser(childLines, parser.document).parseLines(); - - return Element('blockquote', children); - } -} - -/// Parses preformatted code blocks that are indented four spaces. -class CodeBlockSyntax extends BlockSyntax { - @override - RegExp get pattern => _indentPattern; - - @override - bool canEndBlock(BlockParser parser) => false; - - const CodeBlockSyntax(); - - @override - List<String?> parseChildLines(BlockParser parser) { - var childLines = <String?>[]; - - while (!parser.isDone) { - var match = pattern.firstMatch(parser.current); - if (match != null) { - childLines.add(match[1]); - parser.advance(); - } else { - // If there's a codeblock, then a newline, then a codeblock, keep the - // code blocks together. - var nextMatch = - parser.next != null ? pattern.firstMatch(parser.next!) : null; - if (parser.current.trim() == '' && nextMatch != null) { - childLines.add(''); - childLines.add(nextMatch[1]); - parser.advance(); - parser.advance(); - } else { - break; - } - } - } - return childLines; - } - - @override - Node parse(BlockParser parser) { - var childLines = parseChildLines(parser); - - // The Markdown tests expect a trailing newline. - childLines.add(''); - - var content = childLines.join('\n'); - if (parser.document.encodeHtml) { - content = escapeHtml(content); - } - - return Element('pre', [Element.text('code', content)]); - } -} - -/// Parses preformatted code blocks between two ~~~ or ``` sequences. -/// -/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks -class FencedCodeBlockSyntax extends BlockSyntax { - @override - RegExp get pattern => _codeFencePattern; - - const FencedCodeBlockSyntax(); - - @override - bool canParse(BlockParser parser) { - final match = pattern.firstMatch(parser.current); - if (match == null) return false; - final codeFence = match.group(1)!; - final infoString = match.group(2); - // From the CommonMark spec: - // - // > If the info string comes after a backtick fence, it may not contain - // > any backtick characters. - return (codeFence.codeUnitAt(0) != $backquote || - !infoString!.codeUnits.contains($backquote)); - } - - @override - List<String> parseChildLines(BlockParser parser, [String? endBlock]) { - endBlock ??= ''; - - var childLines = <String>[]; - parser.advance(); - - while (!parser.isDone) { - var match = pattern.firstMatch(parser.current); - if (match == null || !match[1]!.startsWith(endBlock)) { - childLines.add(parser.current); - parser.advance(); - } else { - parser.advance(); - break; - } - } - - return childLines; - } - - @override - Node parse(BlockParser parser) { - // Get the syntax identifier, if there is one. - var match = pattern.firstMatch(parser.current)!; - var endBlock = match.group(1); - var infoString = match.group(2)!; - - var childLines = parseChildLines(parser, endBlock); - - // The Markdown tests expect a trailing newline. - childLines.add(''); - - var text = childLines.join('\n'); - if (parser.document.encodeHtml) { - text = escapeHtml(text); - } - var code = Element.text('code', text); - - // the info-string should be trimmed - // http://spec.commonmark.org/0.22/#example-100 - infoString = infoString.trim(); - if (infoString.isNotEmpty) { - // only use the first word in the syntax - // http://spec.commonmark.org/0.22/#example-100 - var firstSpace = infoString.indexOf(' '); - if (firstSpace >= 0) { - infoString = infoString.substring(0, firstSpace); - } - if (parser.document.encodeHtml) { - infoString = escapeHtmlAttribute(infoString); - } - code.attributes['class'] = 'language-$infoString'; - } - - var element = Element('pre', [code]); - - return element; - } -} - -/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc. -class HorizontalRuleSyntax extends BlockSyntax { - @override - RegExp get pattern => _hrPattern; - - const HorizontalRuleSyntax(); - - @override - Node parse(BlockParser parser) { - parser.advance(); - return Element.empty('hr'); - } -} - -/// Parses inline HTML at the block level. This differs from other Markdown -/// implementations in several ways: -/// -/// 1. This one is way way WAY simpler. -/// 2. Essentially no HTML parsing or validation is done. We're a Markdown -/// parser, not an HTML parser! -abstract class BlockHtmlSyntax extends BlockSyntax { - @override - bool canEndBlock(BlockParser parser) => true; - - const BlockHtmlSyntax(); -} - -class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax { - static final _pattern = RegExp( - r'^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|' - r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|' - r'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|' - r'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|' - r'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|' - r'title|tr|track|ul)' - r'(?:\s|>|/>|$)'); - - /// The [_pattern] regular expression above is very expensive, even on - /// paragraphs of Markdown with no HTML. This regular expression can be used - /// first as a basic check that the input might possibly be an HTML block - /// tag, which occur very rarely in typical Markdown. - static final _openBracketPattern = RegExp(r'^ {0,3}<'); - - @override - RegExp get pattern => _pattern; - - const BlockTagBlockHtmlSyntax(); - - @override - bool canParse(BlockParser parser) { - if (!_openBracketPattern.hasMatch(parser.current)) return false; - return super.canParse(parser); - } - - @override - Node parse(BlockParser parser) { - var childLines = <String>[]; - - // Eat until we hit a blank line. - while (!parser.isDone && !parser.matches(_emptyPattern)) { - childLines.add(parser.current); - parser.advance(); - } - - return Text(childLines.join('\n').trimRight()); - } -} - -class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax { - @override - bool canEndBlock(BlockParser parser) => false; - - // Really hacky way to detect "other" HTML. This matches: - // - // * any opening spaces - // * open bracket and maybe a slash ("<" or "</") - // * some word characters - // * either: - // * a close bracket, or - // * whitespace followed by not-brackets followed by a close bracket - // * possible whitespace and the end of the line. - @override - RegExp get pattern => RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$'); - - const OtherTagBlockHtmlSyntax(); -} - -/// A BlockHtmlSyntax that has a specific `endPattern`. -/// -/// In practice this means that the syntax dominates; it is allowed to eat -/// many lines, including blank lines, before matching its `endPattern`. -class LongBlockHtmlSyntax extends BlockHtmlSyntax { - @override - final RegExp pattern; - final RegExp _endPattern; - - LongBlockHtmlSyntax(String pattern, String endPattern) - : pattern = RegExp(pattern), - _endPattern = RegExp(endPattern); - - @override - Node parse(BlockParser parser) { - var childLines = <String>[]; - // Eat until we hit [endPattern]. - while (!parser.isDone) { - childLines.add(parser.current); - if (parser.matches(_endPattern)) break; - parser.advance(); - } - - parser.advance(); - return Text(childLines.join('\n').trimRight()); - } -} - -class ListItem { - bool forceBlock = false; - final List<String> lines; - - ListItem(this.lines); -} - -/// Base class for both ordered and unordered lists. -abstract class ListSyntax extends BlockSyntax { - @override - bool canEndBlock(BlockParser parser) { - // An empty list cannot interrupt a paragraph. See - // https://spec.commonmark.org/0.29/#example-255. - // Ideally, [BlockSyntax.canEndBlock] should be changed to be a method - // which accepts a [BlockParser], but this would be a breaking change, - // so we're going with this temporarily. - var match = pattern.firstMatch(parser.current)!; - // The seventh group, in both [_olPattern] and [_ulPattern] is the text - // after the delimiter. - return match[7]?.isNotEmpty ?? false; - } - - String get listTag; - - const ListSyntax(); - - /// A list of patterns that can start a valid block within a list item. - static final blocksInList = [ - _blockquotePattern, - _headerPattern, - _hrPattern, - _indentPattern, - _ulPattern, - _olPattern - ]; - - static final _whitespaceRe = RegExp('[ \t]*'); - - @override - Node parse(BlockParser parser) { - var items = <ListItem>[]; - var childLines = <String>[]; - - void endItem() { - if (childLines.isNotEmpty) { - items.add(ListItem(childLines)); - childLines = <String>[]; - } - } - - late Match? match; - bool tryMatch(RegExp pattern) { - match = pattern.firstMatch(parser.current); - return match != null; - } - - String? listMarker; - String? indent; - // In case the first number in an ordered list is not 1, use it as the - // "start". - int? startNumber; - - while (!parser.isDone) { - var leadingSpace = _whitespaceRe.matchAsPrefix(parser.current)!.group(0)!; - var leadingExpandedTabLength = _expandedTabLength(leadingSpace); - if (tryMatch(_emptyPattern)) { - if (_emptyPattern.hasMatch(parser.next ?? '')) { - // Two blank lines ends a list. - break; - } - // Add a blank line to the current list item. - childLines.add(''); - } else if (indent != null && indent.length <= leadingExpandedTabLength) { - // Strip off indent and add to current item. - var line = parser.current - .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength) - .replaceFirst(indent, ''); - childLines.add(line); - } else if (tryMatch(_hrPattern)) { - // Horizontal rule takes precedence to a new list item. - break; - } else if (tryMatch(_ulPattern) || tryMatch(_olPattern)) { - var precedingWhitespace = match![1]!; - var digits = match![2] ?? ''; - if (startNumber == null && digits.isNotEmpty) { - startNumber = int.parse(digits); - } - var marker = match![3]!; - var firstWhitespace = match![5] ?? ''; - var restWhitespace = match![6] ?? ''; - var content = match![7] ?? ''; - var isBlank = content.isEmpty; - if (listMarker != null && listMarker != marker) { - // Changing the bullet or ordered list delimiter starts a new list. - break; - } - listMarker = marker; - var markerAsSpaces = ' ' * (digits.length + marker.length); - if (isBlank) { - // See http://spec.commonmark.org/0.28/#list-items under "3. Item - // starting with a blank line." - // - // If the list item starts with a blank line, the final piece of the - // indentation is just a single space. - indent = precedingWhitespace + markerAsSpaces + ' '; - } else if (restWhitespace.length >= 4) { - // See http://spec.commonmark.org/0.28/#list-items under "2. Item - // starting with indented code." - // - // If the list item starts with indented code, we need to _not_ count - // any indentation past the required whitespace character. - indent = precedingWhitespace + markerAsSpaces + firstWhitespace; - } else { - indent = precedingWhitespace + - markerAsSpaces + - firstWhitespace + - restWhitespace; - } - // End the current list item and start a new one. - endItem(); - childLines.add(restWhitespace + content); - } else if (BlockSyntax.isAtBlockEnd(parser)) { - // Done with the list. - break; - } else { - // If the previous item is a blank line, this means we're done with the - // list and are starting a new top-level paragraph. - if ((childLines.isNotEmpty) && (childLines.last == '')) { - parser.encounteredBlankLine = true; - break; - } - - // Anything else is paragraph continuation text. - childLines.add(parser.current); - } - parser.advance(); - } - - endItem(); - var itemNodes = <Element>[]; - - items.forEach(_removeLeadingEmptyLine); - var anyEmptyLines = _removeTrailingEmptyLines(items); - var anyEmptyLinesBetweenBlocks = false; - - for (var item in items) { - var itemParser = BlockParser(item.lines, parser.document); - var children = itemParser.parseLines(); - itemNodes.add(Element('li', children)); - anyEmptyLinesBetweenBlocks = - anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine; - } - - // Must strip paragraph tags if the list is "tight". - // http://spec.commonmark.org/0.28/#lists - var listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks; - - if (listIsTight) { - // We must post-process the list items, converting any top-level paragraph - // elements to just text elements. - for (var item in itemNodes) { - var children = item.children; - if (children != null) { - for (var i = 0; i < children.length; i++) { - var child = children[i]; - if (child is Element && child.tag == 'p') { - children.removeAt(i); - children.insertAll(i, child.children!); - } - } - } - } - } - - if (listTag == 'ol' && startNumber != 1) { - return Element(listTag, itemNodes)..attributes['start'] = '$startNumber'; - } else { - return Element(listTag, itemNodes); - } - } - - void _removeLeadingEmptyLine(ListItem item) { - if (item.lines.isNotEmpty && _emptyPattern.hasMatch(item.lines.first)) { - item.lines.removeAt(0); - } - } - - /// Removes any trailing empty lines and notes whether any items are separated - /// by such lines. - bool _removeTrailingEmptyLines(List<ListItem> items) { - var anyEmpty = false; - for (var i = 0; i < items.length; i++) { - if (items[i].lines.length == 1) continue; - while (items[i].lines.isNotEmpty && - _emptyPattern.hasMatch(items[i].lines.last)) { - if (i < items.length - 1) { - anyEmpty = true; - } - items[i].lines.removeLast(); - } - } - return anyEmpty; - } - - static int _expandedTabLength(String input) { - var length = 0; - for (var char in input.codeUnits) { - length += char == 0x9 ? 4 - (length % 4) : 1; - } - return length; - } -} - -/// Parses unordered lists. -class UnorderedListSyntax extends ListSyntax { - @override - RegExp get pattern => _ulPattern; - - @override - String get listTag => 'ul'; - - const UnorderedListSyntax(); -} - -/// Parses ordered lists. -class OrderedListSyntax extends ListSyntax { - @override - RegExp get pattern => _olPattern; - - @override - String get listTag => 'ol'; - - const OrderedListSyntax(); -} - -/// Parses tables. -class TableSyntax extends BlockSyntax { - @override - bool canEndBlock(BlockParser parser) => false; - - @override - RegExp get pattern => _dummyPattern; - - const TableSyntax(); - - @override - bool canParse(BlockParser parser) { - // Note: matches *next* line, not the current one. We're looking for the - // bar separating the head row from the body rows. - return parser.matchesNext(_tablePattern); - } - - /// Parses a table into its three parts: - /// - /// * a head row of head cells (`<th>` cells) - /// * a divider of hyphens and pipes (not rendered) - /// * many body rows of body cells (`<td>` cells) - @override - Node? parse(BlockParser parser) { - var alignments = _parseAlignments(parser.next!); - var columnCount = alignments.length; - var headRow = _parseRow(parser, alignments, 'th'); - if (headRow.children!.length != columnCount) { - return null; - } - var head = Element('thead', [headRow]); - - // Advance past the divider of hyphens. - parser.advance(); - - var rows = <Element>[]; - while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) { - var row = _parseRow(parser, alignments, 'td'); - var children = row.children; - if (children != null) { - while (children.length < columnCount) { - // Insert synthetic empty cells. - children.add(Element.empty('td')); - } - while (children.length > columnCount) { - children.removeLast(); - } - } - while (row.children!.length > columnCount) { - row.children!.removeLast(); - } - rows.add(row); - } - if (rows.isEmpty) { - return Element('table', [head]); - } else { - var body = Element('tbody', rows); - - return Element('table', [head, body]); - } - } - - List<String?> _parseAlignments(String line) { - var startIndex = _walkPastOpeningPipe(line); - - var endIndex = line.length - 1; - while (endIndex > 0) { - var ch = line.codeUnitAt(endIndex); - if (ch == $pipe) { - endIndex--; - break; - } - if (ch != $space && ch != $tab) { - break; - } - endIndex--; - } - - // Optimization: We walk [line] too many times. One lap should do it. - return line.substring(startIndex, endIndex + 1).split('|').map((column) { - column = column.trim(); - if (column.startsWith(':') && column.endsWith(':')) return 'center'; - if (column.startsWith(':')) return 'left'; - if (column.endsWith(':')) return 'right'; - return null; - }).toList(); - } - - /// Parses a table row at the current line into a table row element, with - /// parsed table cells. - /// - /// [alignments] is used to annotate an alignment on each cell, and - /// [cellType] is used to declare either "td" or "th" cells. - Element _parseRow( - BlockParser parser, List<String?> alignments, String cellType) { - var line = parser.current; - var cells = <String>[]; - var index = _walkPastOpeningPipe(line); - var cellBuffer = StringBuffer(); - - while (true) { - if (index >= line.length) { - // This row ended without a trailing pipe, which is fine. - cells.add(cellBuffer.toString().trimRight()); - cellBuffer.clear(); - break; - } - var ch = line.codeUnitAt(index); - if (ch == $backslash) { - if (index == line.length - 1) { - // A table row ending in a backslash is not well-specified, but it - // looks like GitHub just allows the character as part of the text of - // the last cell. - cellBuffer.writeCharCode(ch); - cells.add(cellBuffer.toString().trimRight()); - cellBuffer.clear(); - break; - } - var escaped = line.codeUnitAt(index + 1); - if (escaped == $pipe) { - // GitHub Flavored Markdown has a strange bit here; the pipe is to be - // escaped before any other inline processing. One consequence, for - // example, is that "| `\|` |" should be parsed as a cell with a code - // element with text "|", rather than "\|". Most parsers are not - // compliant with this corner, but this is what is specified, and what - // GitHub does in practice. - cellBuffer.writeCharCode(escaped); - } else { - // The [InlineParser] will handle the escaping. - cellBuffer.writeCharCode(ch); - cellBuffer.writeCharCode(escaped); - } - index += 2; - } else if (ch == $pipe) { - cells.add(cellBuffer.toString().trimRight()); - cellBuffer.clear(); - // Walk forward past any whitespace which leads the next cell. - index++; - index = _walkPastWhitespace(line, index); - if (index >= line.length) { - // This row ended with a trailing pipe. - break; - } - } else { - cellBuffer.writeCharCode(ch); - index++; - } - } - parser.advance(); - var row = [ - for (var cell in cells) Element(cellType, [UnparsedContent(cell)]) - ]; - - for (var i = 0; i < row.length && i < alignments.length; i++) { - if (alignments[i] == null) continue; - row[i].attributes['style'] = 'text-align: ${alignments[i]};'; - } - - return Element('tr', row); - } - - /// Walks past whitespace in [line] starting at [index]. - /// - /// Returns the index of the first non-whitespace character. - int _walkPastWhitespace(String line, int index) { - while (index < line.length) { - var ch = line.codeUnitAt(index); - if (ch != $space && ch != $tab) { - break; - } - index++; - } - return index; - } - - /// Walks past the opening pipe (and any whitespace that surrounds it) in - /// [line]. - /// - /// Returns the index of the first non-whitespace character after the pipe. - /// If no opening pipe is found, this just returns the index of the first - /// non-whitespace character. - int _walkPastOpeningPipe(String line) { - var index = 0; - while (index < line.length) { - var ch = line.codeUnitAt(index); - if (ch == $pipe) { - index++; - index = _walkPastWhitespace(line, index); - } - if (ch != $space && ch != $tab) { - // No leading pipe. - break; - } - index++; - } - return index; - } -} - -/// Parses paragraphs of regular text. -class ParagraphSyntax extends BlockSyntax { - static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\['); - - static final _whitespacePattern = RegExp(r'^\s*$'); - - @override - RegExp get pattern => _dummyPattern; - - @override - bool canEndBlock(BlockParser parser) => false; - - const ParagraphSyntax(); - - @override - bool canParse(BlockParser parser) => true; - - @override - Node parse(BlockParser parser) { - var childLines = <String>[]; - - // Eat until we hit something that ends a paragraph. - while (!BlockSyntax.isAtBlockEnd(parser)) { - childLines.add(parser.current); - parser.advance(); - } - - var paragraphLines = _extractReflinkDefinitions(parser, childLines); - if (paragraphLines == null) { - // Paragraph consisted solely of reference link definitions. - return Text(''); - } else { - var contents = UnparsedContent(paragraphLines.join('\n').trimRight()); - return Element('p', [contents]); - } - } - - /// Extract reference link definitions from the front of the paragraph, and - /// return the remaining paragraph lines. - List<String>? _extractReflinkDefinitions( - BlockParser parser, List<String> lines) { - bool lineStartsReflinkDefinition(int i) => - lines[i].startsWith(_reflinkDefinitionStart); - - var i = 0; - loopOverDefinitions: - while (true) { - // Check for reflink definitions. - if (!lineStartsReflinkDefinition(i)) { - // It's paragraph content from here on out. - break; - } - var contents = lines[i]; - var j = i + 1; - while (j < lines.length) { - // Check to see if the _next_ line might start a new reflink definition. - // Even if it turns out not to be, but it started with a '[', then it - // is not a part of _this_ possible reflink definition. - if (lineStartsReflinkDefinition(j)) { - // Try to parse [contents] as a reflink definition. - if (_parseReflinkDefinition(parser, contents)) { - // Loop again, starting at the next possible reflink definition. - i = j; - continue loopOverDefinitions; - } else { - // Could not parse [contents] as a reflink definition. - break; - } - } else { - contents = contents + '\n' + lines[j]; - j++; - } - } - // End of the block. - if (_parseReflinkDefinition(parser, contents)) { - i = j; - break; - } - - // It may be that there is a reflink definition starting at [i], but it - // does not extend all the way to [j], such as: - // - // [link]: url // line i - // "title" - // garbage - // [link2]: url // line j - // - // In this case, [i, i+1] is a reflink definition, and the rest is - // paragraph content. - while (j >= i) { - // This isn't the most efficient loop, what with this big ole' - // Iterable allocation (`getRange`) followed by a big 'ole String - // allocation, but we - // must walk backwards, checking each range. - contents = lines.getRange(i, j).join('\n'); - if (_parseReflinkDefinition(parser, contents)) { - // That is the last reflink definition. The rest is paragraph - // content. - i = j; - break; - } - j--; - } - // The ending was not a reflink definition at all. Just paragraph - // content. - - break; - } - - if (i == lines.length) { - // No paragraph content. - return null; - } else { - // Ends with paragraph content. - return lines.sublist(i); - } - } - - // Parse [contents] as a reference link definition. - // - // Also adds the reference link definition to the document. - // - // Returns whether [contents] could be parsed as a reference link definition. - bool _parseReflinkDefinition(BlockParser parser, String contents) { - var pattern = RegExp( - // Leading indentation. - r'''^[ ]{0,3}''' - // Reference id in brackets, and URL. - r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*''' - // Title in double or single quotes, or parens. - r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''', - multiLine: true); - var match = pattern.firstMatch(contents); - if (match == null) { - // Not a reference link definition. - return false; - } - if (match.match.length < contents.length) { - // Trailing text. No good. - return false; - } - - var label = match[1]!; - var destination = match[2] ?? match[3]!; - var title = match[4]; - - // The label must contain at least one non-whitespace character. - if (_whitespacePattern.hasMatch(label)) { - return false; - } - - if (title == '') { - // No title. - title = null; - } else { - // Remove "", '', or (). - title = title!.substring(1, title.length - 1); - } - - // References are case-insensitive, and internal whitespace is compressed. - label = normalizeLinkLabel(label); - - parser.document.linkReferences - .putIfAbsent(label, () => LinkReference(label, destination, title)); - return true; - } -} - -/// Walks the parser forward through the lines does not match any [BlockSyntax]. -/// -/// Returns a [UnparsedContent] with the unmatched lines as `textContent`. -class DummyBlockSyntax extends BlockSyntax { - const DummyBlockSyntax(); - - @override - RegExp get pattern => _dummyPattern; - - @override - bool canEndBlock(BlockParser parser) => false; - - @override - bool canParse(BlockParser parser) => true; - - @override - Node parse(BlockParser parser) { - final childLines = <String>[]; - - while (!BlockSyntax.isAtBlockEnd(parser)) { - childLines.add(parser.current); - parser.advance(); - } - - return UnparsedContent(childLines.join('\n')); - } -}
diff --git a/lib/src/block_syntaxes/block_html_syntax.dart b/lib/src/block_syntaxes/block_html_syntax.dart new file mode 100644 index 0000000..122c940 --- /dev/null +++ b/lib/src/block_syntaxes/block_html_syntax.dart
@@ -0,0 +1,19 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../block_parser.dart'; +import 'block_syntax.dart'; + +/// Parses inline HTML at the block level. This differs from other Markdown +/// implementations in several ways: +/// +/// 1. This one is way way WAY simpler. +/// 2. Essentially no HTML parsing or validation is done. We're a Markdown +/// parser, not an HTML parser! +abstract class BlockHtmlSyntax extends BlockSyntax { + @override + bool canEndBlock(BlockParser parser) => true; + + const BlockHtmlSyntax(); +}
diff --git a/lib/src/block_syntaxes/block_syntax.dart b/lib/src/block_syntaxes/block_syntax.dart new file mode 100644 index 0000000..f33195d --- /dev/null +++ b/lib/src/block_syntaxes/block_syntax.dart
@@ -0,0 +1,50 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; + +abstract class BlockSyntax { + const BlockSyntax(); + + /// Gets the regex used to identify the beginning of this block, if any. + RegExp get pattern; + + bool canEndBlock(BlockParser parser) => true; + + bool canParse(BlockParser parser) { + return pattern.hasMatch(parser.current); + } + + Node? parse(BlockParser parser); + + List<String?> parseChildLines(BlockParser parser) { + // Grab all of the lines that form the block element. + final childLines = <String?>[]; + + while (!parser.isDone) { + final match = pattern.firstMatch(parser.current); + if (match == null) break; + childLines.add(match[1]); + parser.advance(); + } + + return childLines; + } + + /// Gets whether or not [parser]'s current line should end the previous block. + static bool isAtBlockEnd(BlockParser parser) { + if (parser.isDone) return true; + return parser.blockSyntaxes + .any((s) => s.canParse(parser) && s.canEndBlock(parser)); + } + + /// Generates a valid HTML anchor from the inner text of [element]. + static String generateAnchorHash(Element element) => + element.children!.first.textContent + .toLowerCase() + .trim() + .replaceAll(RegExp('[^a-z0-9 _-]'), '') + .replaceAll(RegExp(r'\s'), '-'); +}
diff --git a/lib/src/block_syntaxes/block_tag_block_html_syntax.dart b/lib/src/block_syntaxes/block_tag_block_html_syntax.dart new file mode 100644 index 0000000..e017696 --- /dev/null +++ b/lib/src/block_syntaxes/block_tag_block_html_syntax.dart
@@ -0,0 +1,49 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_html_syntax.dart'; + +class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax { + static final _pattern = RegExp( + '^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|' + 'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|' + 'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|' + 'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|' + 'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|' + 'title|tr|track|ul)' + r'(?:\s|>|/>|$)'); + + /// The [_pattern] regular expression above is very expensive, even on + /// paragraphs of Markdown with no HTML. This regular expression can be used + /// first as a basic check that the input might possibly be an HTML block + /// tag, which occur very rarely in typical Markdown. + static final _openBracketPattern = RegExp('^ {0,3}<'); + + @override + RegExp get pattern => _pattern; + + const BlockTagBlockHtmlSyntax(); + + @override + bool canParse(BlockParser parser) { + if (!_openBracketPattern.hasMatch(parser.current)) return false; + return super.canParse(parser); + } + + @override + Node parse(BlockParser parser) { + final childLines = <String>[]; + + // Eat until we hit a blank line. + while (!parser.isDone && !parser.matches(emptyPattern)) { + childLines.add(parser.current); + parser.advance(); + } + + return Text(childLines.join('\n').trimRight()); + } +}
diff --git a/lib/src/block_syntaxes/blockquote_syntax.dart b/lib/src/block_syntaxes/blockquote_syntax.dart new file mode 100644 index 0000000..7d16e55 --- /dev/null +++ b/lib/src/block_syntaxes/blockquote_syntax.dart
@@ -0,0 +1,63 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; +import 'code_block_syntax.dart'; +import 'paragraph_syntax.dart'; + +/// Parses email-style blockquotes: `> quote`. +class BlockquoteSyntax extends BlockSyntax { + @override + RegExp get pattern => blockquotePattern; + + const BlockquoteSyntax(); + + @override + List<String> parseChildLines(BlockParser parser) { + // Grab all of the lines that form the blockquote, stripping off the ">". + final childLines = <String>[]; + + bool encounteredCodeBlock = false; + while (!parser.isDone) { + final match = pattern.firstMatch(parser.current); + if (match != null) { + final line = match[1]!; + childLines.add(line); + encounteredCodeBlock = indentPattern.hasMatch(line); + parser.advance(); + continue; + } + + // A paragraph continuation is OK. This is content that cannot be parsed + // as any other syntax except Paragraph, and it doesn't match the bar in + // a Setext header. + // Because indented code blocks cannot interrupt paragraphs, a line + // matched CodeBlockSyntax is also paragraph continuation text. + final otherMatched = + parser.blockSyntaxes.firstWhere((s) => s.canParse(parser)); + if (otherMatched is ParagraphSyntax || + (!encounteredCodeBlock && otherMatched is CodeBlockSyntax)) { + childLines.add(parser.current); + parser.advance(); + } else { + break; + } + } + + return childLines; + } + + @override + Node parse(BlockParser parser) { + final childLines = parseChildLines(parser); + + // Recursively parse the contents of the blockquote. + final children = BlockParser(childLines, parser.document).parseLines(); + + return Element('blockquote', children); + } +}
diff --git a/lib/src/block_syntaxes/code_block_syntax.dart b/lib/src/block_syntaxes/code_block_syntax.dart new file mode 100644 index 0000000..f37baed --- /dev/null +++ b/lib/src/block_syntaxes/code_block_syntax.dart
@@ -0,0 +1,62 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import '../util.dart'; +import 'block_syntax.dart'; + +/// Parses preformatted code blocks that are indented four spaces. +class CodeBlockSyntax extends BlockSyntax { + @override + RegExp get pattern => indentPattern; + + @override + bool canEndBlock(BlockParser parser) => false; + + const CodeBlockSyntax(); + + @override + List<String?> parseChildLines(BlockParser parser) { + final childLines = <String?>[]; + + while (!parser.isDone) { + final match = pattern.firstMatch(parser.current); + if (match != null) { + childLines.add(match[1]); + parser.advance(); + } else { + // If there's a codeblock, then a newline, then a codeblock, keep the + // code blocks together. + final nextMatch = + parser.next != null ? pattern.firstMatch(parser.next!) : null; + if (parser.current.trim() == '' && nextMatch != null) { + childLines.add(''); + childLines.add(nextMatch[1]); + parser.advance(); + parser.advance(); + } else { + break; + } + } + } + return childLines; + } + + @override + Node parse(BlockParser parser) { + final childLines = parseChildLines(parser); + + // The Markdown tests expect a trailing newline. + childLines.add(''); + + var content = childLines.join('\n'); + if (parser.document.encodeHtml) { + content = escapeHtml(content); + } + + return Element('pre', [Element.text('code', content)]); + } +}
diff --git a/lib/src/block_syntaxes/dummy_block_syntax.dart b/lib/src/block_syntaxes/dummy_block_syntax.dart new file mode 100644 index 0000000..46ac98e --- /dev/null +++ b/lib/src/block_syntaxes/dummy_block_syntax.dart
@@ -0,0 +1,36 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Walks the parser forward through the lines does not match any [BlockSyntax]. +/// +/// Returns a [UnparsedContent] with the unmatched lines as `textContent`. +class DummyBlockSyntax extends BlockSyntax { + const DummyBlockSyntax(); + + @override + RegExp get pattern => dummyPattern; + + @override + bool canEndBlock(BlockParser parser) => false; + + @override + bool canParse(BlockParser parser) => true; + + @override + Node parse(BlockParser parser) { + final childLines = <String>[]; + + while (!BlockSyntax.isAtBlockEnd(parser)) { + childLines.add(parser.current); + parser.advance(); + } + + return UnparsedContent(childLines.join('\n')); + } +}
diff --git a/lib/src/block_syntaxes/empty_block_syntax.dart b/lib/src/block_syntaxes/empty_block_syntax.dart new file mode 100644 index 0000000..54cc865 --- /dev/null +++ b/lib/src/block_syntaxes/empty_block_syntax.dart
@@ -0,0 +1,24 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +class EmptyBlockSyntax extends BlockSyntax { + @override + RegExp get pattern => emptyPattern; + + const EmptyBlockSyntax(); + + @override + Node? parse(BlockParser parser) { + parser.encounteredBlankLine = true; + parser.advance(); + + // Don't actually emit anything. + return null; + } +}
diff --git a/lib/src/block_syntaxes/fenced_blockquote_syntax.dart b/lib/src/block_syntaxes/fenced_blockquote_syntax.dart new file mode 100644 index 0000000..d4a3592 --- /dev/null +++ b/lib/src/block_syntaxes/fenced_blockquote_syntax.dart
@@ -0,0 +1,44 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Parses lines fenced by `>>>` to blockquotes +class FencedBlockquoteSyntax extends BlockSyntax { + const FencedBlockquoteSyntax(); + + @override + RegExp get pattern => blockquoteFencePattern; + + @override + List<String> parseChildLines(BlockParser parser) { + final childLines = <String>[]; + parser.advance(); + + while (!parser.isDone) { + final match = pattern.hasMatch(parser.current); + if (!match) { + childLines.add(parser.current); + parser.advance(); + } else { + parser.advance(); + break; + } + } + + return childLines; + } + + @override + Node? parse(BlockParser parser) { + final childLines = parseChildLines(parser); + + // Recursively parse the contents of the blockquote. + final children = BlockParser(childLines, parser.document).parseLines(); + return Element('blockquote', children); + } +}
diff --git a/lib/src/block_syntaxes/fenced_code_block_syntax.dart b/lib/src/block_syntaxes/fenced_code_block_syntax.dart new file mode 100644 index 0000000..a64e031 --- /dev/null +++ b/lib/src/block_syntaxes/fenced_code_block_syntax.dart
@@ -0,0 +1,94 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../charcode.dart'; +import '../patterns.dart'; +import '../util.dart'; +import 'block_syntax.dart'; + +/// Parses preformatted code blocks between two ~~~ or ``` sequences. +/// +/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks +class FencedCodeBlockSyntax extends BlockSyntax { + @override + RegExp get pattern => codeFencePattern; + + const FencedCodeBlockSyntax(); + + @override + bool canParse(BlockParser parser) { + final match = pattern.firstMatch(parser.current); + if (match == null) return false; + final codeFence = match.group(1)!; + final infoString = match.group(2); + // From the CommonMark spec: + // + // > If the info string comes after a backtick fence, it may not contain + // > any backtick characters. + return (codeFence.codeUnitAt(0) != $backquote || + !infoString!.codeUnits.contains($backquote)); + } + + @override + List<String> parseChildLines(BlockParser parser, [String? endBlock]) { + endBlock ??= ''; + + final childLines = <String>[]; + parser.advance(); + + while (!parser.isDone) { + final match = pattern.firstMatch(parser.current); + if (match == null || !match[1]!.startsWith(endBlock)) { + childLines.add(parser.current); + parser.advance(); + } else { + parser.advance(); + break; + } + } + + return childLines; + } + + @override + Node parse(BlockParser parser) { + // Get the syntax identifier, if there is one. + final match = pattern.firstMatch(parser.current)!; + final endBlock = match.group(1); + var infoString = match.group(2)!; + + final childLines = parseChildLines(parser, endBlock); + + // The Markdown tests expect a trailing newline. + childLines.add(''); + + var text = childLines.join('\n'); + if (parser.document.encodeHtml) { + text = escapeHtml(text); + } + final code = Element.text('code', text); + + // the info-string should be trimmed + // http://spec.commonmark.org/0.22/#example-100 + infoString = infoString.trim(); + if (infoString.isNotEmpty) { + // only use the first word in the syntax + // http://spec.commonmark.org/0.22/#example-100 + final firstSpace = infoString.indexOf(' '); + if (firstSpace >= 0) { + infoString = infoString.substring(0, firstSpace); + } + if (parser.document.encodeHtml) { + infoString = escapeHtmlAttribute(infoString); + } + code.attributes['class'] = 'language-$infoString'; + } + + final element = Element('pre', [code]); + + return element; + } +}
diff --git a/lib/src/block_syntaxes/header_syntax.dart b/lib/src/block_syntaxes/header_syntax.dart new file mode 100644 index 0000000..7d2b94e --- /dev/null +++ b/lib/src/block_syntaxes/header_syntax.dart
@@ -0,0 +1,25 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Parses atx-style headers: `## Header ##`. +class HeaderSyntax extends BlockSyntax { + @override + RegExp get pattern => headerPattern; + + const HeaderSyntax(); + + @override + Node parse(BlockParser parser) { + final match = pattern.firstMatch(parser.current)!; + parser.advance(); + final level = match[1]!.length; + final contents = UnparsedContent(match[2]!.trim()); + return Element('h$level', [contents]); + } +}
diff --git a/lib/src/block_syntaxes/header_with_id_syntax.dart b/lib/src/block_syntaxes/header_with_id_syntax.dart new file mode 100644 index 0000000..15ad231 --- /dev/null +++ b/lib/src/block_syntaxes/header_with_id_syntax.dart
@@ -0,0 +1,20 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import 'block_syntax.dart'; +import 'header_syntax.dart'; + +/// Parses atx-style headers, and adds generated IDs to the generated elements. +class HeaderWithIdSyntax extends HeaderSyntax { + const HeaderWithIdSyntax(); + + @override + Node parse(BlockParser parser) { + final element = super.parse(parser) as Element; + element.generatedId = BlockSyntax.generateAnchorHash(element); + return element; + } +}
diff --git a/lib/src/block_syntaxes/horizontal_rule_syntax.dart b/lib/src/block_syntaxes/horizontal_rule_syntax.dart new file mode 100644 index 0000000..12e7839 --- /dev/null +++ b/lib/src/block_syntaxes/horizontal_rule_syntax.dart
@@ -0,0 +1,22 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc. +class HorizontalRuleSyntax extends BlockSyntax { + @override + RegExp get pattern => hrPattern; + + const HorizontalRuleSyntax(); + + @override + Node parse(BlockParser parser) { + parser.advance(); + return Element.empty('hr'); + } +}
diff --git a/lib/src/block_syntaxes/list_syntax.dart b/lib/src/block_syntaxes/list_syntax.dart new file mode 100644 index 0000000..06b1cf9 --- /dev/null +++ b/lib/src/block_syntaxes/list_syntax.dart
@@ -0,0 +1,222 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +class ListItem { + ListItem(this.lines); + + bool forceBlock = false; + final List<String> lines; +} + +/// Base class for both ordered and unordered lists. +abstract class ListSyntax extends BlockSyntax { + @override + bool canEndBlock(BlockParser parser) { + // An empty list cannot interrupt a paragraph. See + // https://spec.commonmark.org/0.29/#example-255. + // Ideally, [BlockSyntax.canEndBlock] should be changed to be a method + // which accepts a [BlockParser], but this would be a breaking change, + // so we're going with this temporarily. + final match = pattern.firstMatch(parser.current)!; + // The seventh group, in both [olPattern] and [ulPattern] is the text + // after the delimiter. + return match[7]?.isNotEmpty ?? false; + } + + String get listTag; + + const ListSyntax(); + + /// A list of patterns that can start a valid block within a list item. + static final blocksInList = [ + blockquotePattern, + headerPattern, + hrPattern, + indentPattern, + ulPattern, + olPattern + ]; + + static final _whitespaceRe = RegExp('[ \t]*'); + + @override + Node parse(BlockParser parser) { + final items = <ListItem>[]; + var childLines = <String>[]; + + void endItem() { + if (childLines.isNotEmpty) { + items.add(ListItem(childLines)); + childLines = <String>[]; + } + } + + late Match? match; + bool tryMatch(RegExp pattern) { + match = pattern.firstMatch(parser.current); + return match != null; + } + + String? listMarker; + String? indent; + // In case the first number in an ordered list is not 1, use it as the + // "start". + int? startNumber; + + while (!parser.isDone) { + final leadingSpace = + _whitespaceRe.matchAsPrefix(parser.current)!.group(0)!; + final leadingExpandedTabLength = _expandedTabLength(leadingSpace); + if (tryMatch(emptyPattern)) { + if (emptyPattern.hasMatch(parser.next ?? '')) { + // Two blank lines ends a list. + break; + } + // Add a blank line to the current list item. + childLines.add(''); + } else if (indent != null && indent.length <= leadingExpandedTabLength) { + // Strip off indent and add to current item. + final line = parser.current + .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength) + .replaceFirst(indent, ''); + childLines.add(line); + } else if (tryMatch(hrPattern)) { + // Horizontal rule takes precedence to a new list item. + break; + } else if (tryMatch(ulPattern) || tryMatch(olPattern)) { + final precedingWhitespace = match![1]!; + final digits = match![2] ?? ''; + if (startNumber == null && digits.isNotEmpty) { + startNumber = int.parse(digits); + } + final marker = match![3]!; + final firstWhitespace = match![5] ?? ''; + final restWhitespace = match![6] ?? ''; + final content = match![7] ?? ''; + final isBlank = content.isEmpty; + if (listMarker != null && listMarker != marker) { + // Changing the bullet or ordered list delimiter starts a new list. + break; + } + listMarker = marker; + final markerAsSpaces = ' ' * (digits.length + marker.length); + if (isBlank) { + // See http://spec.commonmark.org/0.28/#list-items under "3. Item + // starting with a blank line." + // + // If the list item starts with a blank line, the final piece of the + // indentation is just a single space. + indent = '$precedingWhitespace$markerAsSpaces '; + } else if (restWhitespace.length >= 4) { + // See http://spec.commonmark.org/0.28/#list-items under "2. Item + // starting with indented code." + // + // If the list item starts with indented code, we need to _not_ count + // any indentation past the required whitespace character. + indent = precedingWhitespace + markerAsSpaces + firstWhitespace; + } else { + indent = precedingWhitespace + + markerAsSpaces + + firstWhitespace + + restWhitespace; + } + // End the current list item and start a new one. + endItem(); + childLines.add(restWhitespace + content); + } else if (BlockSyntax.isAtBlockEnd(parser)) { + // Done with the list. + break; + } else { + // If the previous item is a blank line, this means we're done with the + // list and are starting a new top-level paragraph. + if ((childLines.isNotEmpty) && (childLines.last == '')) { + parser.encounteredBlankLine = true; + break; + } + + // Anything else is paragraph continuation text. + childLines.add(parser.current); + } + parser.advance(); + } + + endItem(); + final itemNodes = <Element>[]; + + items.forEach(_removeLeadingEmptyLine); + final anyEmptyLines = _removeTrailingEmptyLines(items); + var anyEmptyLinesBetweenBlocks = false; + + for (final item in items) { + final itemParser = BlockParser(item.lines, parser.document); + final children = itemParser.parseLines(); + itemNodes.add(Element('li', children)); + anyEmptyLinesBetweenBlocks = + anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine; + } + + // Must strip paragraph tags if the list is "tight". + // http://spec.commonmark.org/0.28/#lists + final listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks; + + if (listIsTight) { + // We must post-process the list items, converting any top-level paragraph + // elements to just text elements. + for (final item in itemNodes) { + final children = item.children; + if (children != null) { + for (var i = 0; i < children.length; i++) { + final child = children[i]; + if (child is Element && child.tag == 'p') { + children.removeAt(i); + children.insertAll(i, child.children!); + } + } + } + } + } + + if (listTag == 'ol' && startNumber != 1) { + return Element(listTag, itemNodes)..attributes['start'] = '$startNumber'; + } else { + return Element(listTag, itemNodes); + } + } + + void _removeLeadingEmptyLine(ListItem item) { + if (item.lines.isNotEmpty && emptyPattern.hasMatch(item.lines.first)) { + item.lines.removeAt(0); + } + } + + /// Removes any trailing empty lines and notes whether any items are separated + /// by such lines. + bool _removeTrailingEmptyLines(List<ListItem> items) { + var anyEmpty = false; + for (var i = 0; i < items.length; i++) { + if (items[i].lines.length == 1) continue; + while (items[i].lines.isNotEmpty && + emptyPattern.hasMatch(items[i].lines.last)) { + if (i < items.length - 1) { + anyEmpty = true; + } + items[i].lines.removeLast(); + } + } + return anyEmpty; + } + + static int _expandedTabLength(String input) { + var length = 0; + for (final char in input.codeUnits) { + length += char == 0x9 ? 4 - (length % 4) : 1; + } + return length; + } +}
diff --git a/lib/src/block_syntaxes/long_block_html_syntax.dart b/lib/src/block_syntaxes/long_block_html_syntax.dart new file mode 100644 index 0000000..8332bce --- /dev/null +++ b/lib/src/block_syntaxes/long_block_html_syntax.dart
@@ -0,0 +1,35 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import 'block_html_syntax.dart'; + +/// A BlockHtmlSyntax that has a specific `endPattern`. +/// +/// In practice this means that the syntax dominates; it is allowed to eat +/// many lines, including blank lines, before matching its `endPattern`. +class LongBlockHtmlSyntax extends BlockHtmlSyntax { + @override + final RegExp pattern; + final RegExp _endPattern; + + LongBlockHtmlSyntax(String pattern, String endPattern) + : pattern = RegExp(pattern), + _endPattern = RegExp(endPattern); + + @override + Node parse(BlockParser parser) { + final childLines = <String>[]; + // Eat until we hit [endPattern]. + while (!parser.isDone) { + childLines.add(parser.current); + if (parser.matches(_endPattern)) break; + parser.advance(); + } + + parser.advance(); + return Text(childLines.join('\n').trimRight()); + } +}
diff --git a/lib/src/block_syntaxes/ordered_list_syntax.dart b/lib/src/block_syntaxes/ordered_list_syntax.dart new file mode 100644 index 0000000..61570a3 --- /dev/null +++ b/lib/src/block_syntaxes/ordered_list_syntax.dart
@@ -0,0 +1,17 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../patterns.dart'; +import 'list_syntax.dart'; + +/// Parses ordered lists. +class OrderedListSyntax extends ListSyntax { + @override + RegExp get pattern => olPattern; + + @override + String get listTag => 'ol'; + + const OrderedListSyntax(); +}
diff --git a/lib/src/block_syntaxes/other_tag_block_html_syntax.dart b/lib/src/block_syntaxes/other_tag_block_html_syntax.dart new file mode 100644 index 0000000..edb5bfb --- /dev/null +++ b/lib/src/block_syntaxes/other_tag_block_html_syntax.dart
@@ -0,0 +1,25 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../block_parser.dart'; +import 'block_tag_block_html_syntax.dart'; + +class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax { + @override + bool canEndBlock(BlockParser parser) => false; + + // Really hacky way to detect "other" HTML. This matches: + // + // * any opening spaces + // * open bracket and maybe a slash ("<" or "</") + // * some word characters + // * either: + // * a close bracket, or + // * whitespace followed by not-brackets followed by a close bracket + // * possible whitespace and the end of the line. + @override + RegExp get pattern => RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$'); + + const OtherTagBlockHtmlSyntax(); +}
diff --git a/lib/src/block_syntaxes/paragraph_syntax.dart b/lib/src/block_syntaxes/paragraph_syntax.dart new file mode 100644 index 0000000..aaf4de7 --- /dev/null +++ b/lib/src/block_syntaxes/paragraph_syntax.dart
@@ -0,0 +1,181 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../document.dart'; +import '../patterns.dart'; +import '../util.dart'; +import 'block_syntax.dart'; + +/// Parses paragraphs of regular text. +class ParagraphSyntax extends BlockSyntax { + static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\['); + + static final _whitespacePattern = RegExp(r'^\s*$'); + + @override + RegExp get pattern => dummyPattern; + + @override + bool canEndBlock(BlockParser parser) => false; + + const ParagraphSyntax(); + + @override + bool canParse(BlockParser parser) => true; + + @override + Node parse(BlockParser parser) { + final childLines = <String>[]; + + // Eat until we hit something that ends a paragraph. + while (!BlockSyntax.isAtBlockEnd(parser)) { + childLines.add(parser.current); + parser.advance(); + } + + final paragraphLines = _extractReflinkDefinitions(parser, childLines); + if (paragraphLines == null) { + // Paragraph consisted solely of reference link definitions. + return Text(''); + } else { + final contents = UnparsedContent(paragraphLines.join('\n').trimRight()); + return Element('p', [contents]); + } + } + + /// Extract reference link definitions from the front of the paragraph, and + /// return the remaining paragraph lines. + List<String>? _extractReflinkDefinitions( + BlockParser parser, + List<String> lines, + ) { + bool lineStartsReflinkDefinition(int i) => + lines[i].startsWith(_reflinkDefinitionStart); + + var i = 0; + loopOverDefinitions: + while (true) { + // Check for reflink definitions. + if (!lineStartsReflinkDefinition(i)) { + // It's paragraph content from here on out. + break; + } + var contents = lines[i]; + var j = i + 1; + while (j < lines.length) { + // Check to see if the _next_ line might start a new reflink definition. + // Even if it turns out not to be, but it started with a '[', then it + // is not a part of _this_ possible reflink definition. + if (lineStartsReflinkDefinition(j)) { + // Try to parse [contents] as a reflink definition. + if (_parseReflinkDefinition(parser, contents)) { + // Loop again, starting at the next possible reflink definition. + i = j; + continue loopOverDefinitions; + } else { + // Could not parse [contents] as a reflink definition. + break; + } + } else { + contents = '$contents\n${lines[j]}'; + j++; + } + } + // End of the block. + if (_parseReflinkDefinition(parser, contents)) { + i = j; + break; + } + + // It may be that there is a reflink definition starting at [i], but it + // does not extend all the way to [j], such as: + // + // [link]: url // line i + // "title" + // garbage + // [link2]: url // line j + // + // In this case, [i, i+1] is a reflink definition, and the rest is + // paragraph content. + while (j >= i) { + // This isn't the most efficient loop, what with this big ole' + // Iterable allocation (`getRange`) followed by a big 'ole String + // allocation, but we + // must walk backwards, checking each range. + contents = lines.getRange(i, j).join('\n'); + if (_parseReflinkDefinition(parser, contents)) { + // That is the last reflink definition. The rest is paragraph + // content. + i = j; + break; + } + j--; + } + // The ending was not a reflink definition at all. Just paragraph + // content. + + break; + } + + if (i == lines.length) { + // No paragraph content. + return null; + } else { + // Ends with paragraph content. + return lines.sublist(i); + } + } + + // Parse [contents] as a reference link definition. + // + // Also adds the reference link definition to the document. + // + // Returns whether [contents] could be parsed as a reference link definition. + bool _parseReflinkDefinition(BlockParser parser, String contents) { + final pattern = RegExp( + // Leading indentation. + '''^[ ]{0,3}''' + // Reference id in brackets, and URL. + r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*''' + // Title in double or single quotes, or parens. + r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''', + multiLine: true, + ); + final match = pattern.firstMatch(contents); + if (match == null) { + // Not a reference link definition. + return false; + } + if (match.match.length < contents.length) { + // Trailing text. No good. + return false; + } + + var label = match[1]!; + final destination = match[2] ?? match[3]!; + var title = match[4]; + + // The label must contain at least one non-whitespace character. + if (_whitespacePattern.hasMatch(label)) { + return false; + } + + if (title == '') { + // No title. + title = null; + } else { + // Remove "", '', or (). + title = title!.substring(1, title.length - 1); + } + + // References are case-insensitive, and internal whitespace is compressed. + label = normalizeLinkLabel(label); + + parser.document.linkReferences + .putIfAbsent(label, () => LinkReference(label, destination, title)); + return true; + } +}
diff --git a/lib/src/block_syntaxes/setext_header_syntax.dart b/lib/src/block_syntaxes/setext_header_syntax.dart new file mode 100644 index 0000000..552e983 --- /dev/null +++ b/lib/src/block_syntaxes/setext_header_syntax.dart
@@ -0,0 +1,71 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Parses setext-style headers. +class SetextHeaderSyntax extends BlockSyntax { + @override + RegExp get pattern => dummyPattern; + + const SetextHeaderSyntax(); + + @override + bool canParse(BlockParser parser) { + if (!_interperableAsParagraph(parser.current)) return false; + var i = 1; + while (true) { + final nextLine = parser.peek(i); + if (nextLine == null) { + // We never reached an underline. + return false; + } + if (setextPattern.hasMatch(nextLine)) { + return true; + } + // Ensure that we're still in something like paragraph text. + if (!_interperableAsParagraph(nextLine)) { + return false; + } + i++; + } + } + + @override + Node parse(BlockParser parser) { + final lines = <String>[]; + String? tag; + while (!parser.isDone) { + final match = setextPattern.firstMatch(parser.current); + if (match == null) { + // More text. + lines.add(parser.current); + parser.advance(); + continue; + } else { + // The underline. + tag = (match[1]![0] == '=') ? 'h1' : 'h2'; + parser.advance(); + break; + } + } + + final contents = UnparsedContent(lines.join('\n').trimRight()); + + return Element(tag!, [contents]); + } + + bool _interperableAsParagraph(String line) => + !(indentPattern.hasMatch(line) || + codeFencePattern.hasMatch(line) || + headerPattern.hasMatch(line) || + blockquotePattern.hasMatch(line) || + hrPattern.hasMatch(line) || + ulPattern.hasMatch(line) || + olPattern.hasMatch(line) || + emptyPattern.hasMatch(line)); +}
diff --git a/lib/src/block_syntaxes/setext_header_with_id_syntax.dart b/lib/src/block_syntaxes/setext_header_with_id_syntax.dart new file mode 100644 index 0000000..fffe992 --- /dev/null +++ b/lib/src/block_syntaxes/setext_header_with_id_syntax.dart
@@ -0,0 +1,21 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import 'block_syntax.dart'; +import 'setext_header_syntax.dart'; + +/// Parses setext-style headers, and adds generated IDs to the generated +/// elements. +class SetextHeaderWithIdSyntax extends SetextHeaderSyntax { + const SetextHeaderWithIdSyntax(); + + @override + Node parse(BlockParser parser) { + final element = super.parse(parser) as Element; + element.generatedId = BlockSyntax.generateAnchorHash(element); + return element; + } +}
diff --git a/lib/src/block_syntaxes/table_syntax.dart b/lib/src/block_syntaxes/table_syntax.dart new file mode 100644 index 0000000..2cbd4dd --- /dev/null +++ b/lib/src/block_syntaxes/table_syntax.dart
@@ -0,0 +1,211 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../block_parser.dart'; +import '../charcode.dart'; +import '../patterns.dart'; +import 'block_syntax.dart'; + +/// Parses tables. +class TableSyntax extends BlockSyntax { + @override + bool canEndBlock(BlockParser parser) => false; + + @override + RegExp get pattern => dummyPattern; + + const TableSyntax(); + + @override + bool canParse(BlockParser parser) { + // Note: matches *next* line, not the current one. We're looking for the + // bar separating the head row from the body rows. + return parser.matchesNext(tablePattern); + } + + /// Parses a table into its three parts: + /// + /// * a head row of head cells (`<th>` cells) + /// * a divider of hyphens and pipes (not rendered) + /// * many body rows of body cells (`<td>` cells) + @override + Node? parse(BlockParser parser) { + final alignments = _parseAlignments(parser.next!); + final columnCount = alignments.length; + final headRow = _parseRow(parser, alignments, 'th'); + if (headRow.children!.length != columnCount) { + return null; + } + final head = Element('thead', [headRow]); + + // Advance past the divider of hyphens. + parser.advance(); + + final rows = <Element>[]; + while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) { + final row = _parseRow(parser, alignments, 'td'); + final children = row.children; + if (children != null) { + while (children.length < columnCount) { + // Insert synthetic empty cells. + children.add(Element.empty('td')); + } + while (children.length > columnCount) { + children.removeLast(); + } + } + while (row.children!.length > columnCount) { + row.children!.removeLast(); + } + rows.add(row); + } + if (rows.isEmpty) { + return Element('table', [head]); + } else { + final body = Element('tbody', rows); + + return Element('table', [head, body]); + } + } + + List<String?> _parseAlignments(String line) { + final startIndex = _walkPastOpeningPipe(line); + + var endIndex = line.length - 1; + while (endIndex > 0) { + final ch = line.codeUnitAt(endIndex); + if (ch == $pipe) { + endIndex--; + break; + } + if (ch != $space && ch != $tab) { + break; + } + endIndex--; + } + + // Optimization: We walk [line] too many times. One lap should do it. + return line.substring(startIndex, endIndex + 1).split('|').map((column) { + column = column.trim(); + if (column.startsWith(':') && column.endsWith(':')) return 'center'; + if (column.startsWith(':')) return 'left'; + if (column.endsWith(':')) return 'right'; + return null; + }).toList(); + } + + /// Parses a table row at the current line into a table row element, with + /// parsed table cells. + /// + /// [alignments] is used to annotate an alignment on each cell, and + /// [cellType] is used to declare either "td" or "th" cells. + Element _parseRow( + BlockParser parser, + List<String?> alignments, + String cellType, + ) { + final line = parser.current; + final cells = <String>[]; + var index = _walkPastOpeningPipe(line); + final cellBuffer = StringBuffer(); + + while (true) { + if (index >= line.length) { + // This row ended without a trailing pipe, which is fine. + cells.add(cellBuffer.toString().trimRight()); + cellBuffer.clear(); + break; + } + final ch = line.codeUnitAt(index); + if (ch == $backslash) { + if (index == line.length - 1) { + // A table row ending in a backslash is not well-specified, but it + // looks like GitHub just allows the character as part of the text of + // the last cell. + cellBuffer.writeCharCode(ch); + cells.add(cellBuffer.toString().trimRight()); + cellBuffer.clear(); + break; + } + final escaped = line.codeUnitAt(index + 1); + if (escaped == $pipe) { + // GitHub Flavored Markdown has a strange bit here; the pipe is to be + // escaped before any other inline processing. One consequence, for + // example, is that "| `\|` |" should be parsed as a cell with a code + // element with text "|", rather than "\|". Most parsers are not + // compliant with this corner, but this is what is specified, and what + // GitHub does in practice. + cellBuffer.writeCharCode(escaped); + } else { + // The [InlineParser] will handle the escaping. + cellBuffer.writeCharCode(ch); + cellBuffer.writeCharCode(escaped); + } + index += 2; + } else if (ch == $pipe) { + cells.add(cellBuffer.toString().trimRight()); + cellBuffer.clear(); + // Walk forward past any whitespace which leads the next cell. + index++; + index = _walkPastWhitespace(line, index); + if (index >= line.length) { + // This row ended with a trailing pipe. + break; + } + } else { + cellBuffer.writeCharCode(ch); + index++; + } + } + parser.advance(); + final row = [ + for (final cell in cells) Element(cellType, [UnparsedContent(cell)]) + ]; + + for (var i = 0; i < row.length && i < alignments.length; i++) { + if (alignments[i] == null) continue; + row[i].attributes['style'] = 'text-align: ${alignments[i]};'; + } + + return Element('tr', row); + } + + /// Walks past whitespace in [line] starting at [index]. + /// + /// Returns the index of the first non-whitespace character. + int _walkPastWhitespace(String line, int index) { + while (index < line.length) { + final ch = line.codeUnitAt(index); + if (ch != $space && ch != $tab) { + break; + } + index++; + } + return index; + } + + /// Walks past the opening pipe (and any whitespace that surrounds it) in + /// [line]. + /// + /// Returns the index of the first non-whitespace character after the pipe. + /// If no opening pipe is found, this just returns the index of the first + /// non-whitespace character. + int _walkPastOpeningPipe(String line) { + var index = 0; + while (index < line.length) { + final ch = line.codeUnitAt(index); + if (ch == $pipe) { + index++; + index = _walkPastWhitespace(line, index); + } + if (ch != $space && ch != $tab) { + // No leading pipe. + break; + } + index++; + } + return index; + } +}
diff --git a/lib/src/block_syntaxes/unordered_list_syntax.dart b/lib/src/block_syntaxes/unordered_list_syntax.dart new file mode 100644 index 0000000..6b1ae10 --- /dev/null +++ b/lib/src/block_syntaxes/unordered_list_syntax.dart
@@ -0,0 +1,17 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../patterns.dart'; +import 'list_syntax.dart'; + +/// Parses unordered lists. +class UnorderedListSyntax extends ListSyntax { + @override + RegExp get pattern => ulPattern; + + @override + String get listTag => 'ul'; + + const UnorderedListSyntax(); +}
diff --git a/lib/src/charcode.dart b/lib/src/charcode.dart new file mode 100644 index 0000000..693fcc5 --- /dev/null +++ b/lib/src/charcode.dart
@@ -0,0 +1,123 @@ +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source is governed by a +// BSD-style license that can be found in the LICENSE file + +/// "Horizontal Tab" control character, common name. +const int $tab = 0x09; + +/// "Line feed" control character. +const int $lf = 0x0A; + +/// "Vertical Tab" control character. +const int $vt = 0x0B; + +/// "Form feed" control character. +const int $ff = 0x0C; + +/// "Carriage return" control character. +const int $cr = 0x0D; + +/// Space character. +const int $space = 0x20; + +/// Character `!`. +const int $exclamation = 0x21; + +/// Character `"`. +const int $quote = 0x22; + +/// Character `"`. +const int $double_quote = 0x22; // ignore: constant_identifier_names + +/// Character `#`. +const int $hash = 0x23; + +/// Character `$`. +const int $dollar = 0x24; + +/// Character `%`. +const int $percent = 0x25; + +/// Character `&`. +const int $ampersand = 0x26; + +/// Character `'`. +const int $apostrophe = 0x27; + +/// Character `(`. +const int $lparen = 0x28; + +/// Character `)`. +const int $rparen = 0x29; + +/// Character `*`. +const int $asterisk = 0x2A; + +/// Character `+`. +const int $plus = 0x2B; + +/// Character `,`. +const int $comma = 0x2C; + +/// Character `-`. +const int $dash = 0x2D; + +/// Character `.`. +const int $dot = 0x2E; + +/// Character `/`. +const int $slash = 0x2F; + +/// Character `:`. +const int $colon = 0x3A; + +/// Character `;`. +const int $semicolon = 0x3B; + +/// Character `<`. +const int $lt = 0x3C; + +/// Character `=`. +const int $equal = 0x3D; + +/// Character `>`. +const int $gt = 0x3E; + +/// Character `?`. +const int $question = 0x3F; + +/// Character `@`. +const int $at = 0x40; + +/// Character `[`. +const int $lbracket = 0x5B; + +/// Character `\`. +const int $backslash = 0x5C; + +/// Character `]`. +const int $rbracket = 0x5D; + +/// Character `^`. +const int $caret = 0x5E; + +/// Character `_`. +const int $underscore = 0x5F; + +/// Character `` ` ``. +const int $backquote = 0x60; + +/// Character `{`. +const int $lbrace = 0x7B; + +/// Character `|`. +const int $pipe = 0x7C; + +/// Character `|`. +const int $bar = 0x7C; + +/// Character `}`. +const int $rbrace = 0x7D; + +/// Character `~`. +const int $tilde = 0x7E;
diff --git a/lib/src/document.dart b/lib/src/document.dart index 374f955..1428dcd 100644 --- a/lib/src/document.dart +++ b/lib/src/document.dart
@@ -4,8 +4,10 @@ import 'ast.dart'; import 'block_parser.dart'; +import 'block_syntaxes/block_syntax.dart'; import 'extension_set.dart'; import 'inline_parser.dart'; +import 'inline_syntaxes/inline_syntax.dart'; /// Maintains the context needed to parse a Markdown document. class Document { @@ -61,7 +63,7 @@ /// Parses the given [lines] of Markdown to a series of AST nodes. List<Node> parseLines(List<String> lines) { - var nodes = BlockParser(lines, this).parseLines(); + final nodes = BlockParser(lines, this).parseLines(); _parseInlineContent(nodes); return nodes; } @@ -71,9 +73,9 @@ void _parseInlineContent(List<Node> nodes) { for (var i = 0; i < nodes.length; i++) { - var node = nodes[i]; + final node = nodes[i]; if (node is UnparsedContent) { - var inlineNodes = parseInline(node.textContent); + final inlineNodes = parseInline(node.textContent); nodes.removeAt(i); nodes.insertAll(i, inlineNodes); i += inlineNodes.length - 1;
diff --git a/lib/src/extension_set.dart b/lib/src/extension_set.dart index 9c8b823..c0e0b13 100644 --- a/lib/src/extension_set.dart +++ b/lib/src/extension_set.dart
@@ -1,5 +1,13 @@ -import 'block_parser.dart'; -import 'inline_parser.dart'; +import 'block_syntaxes/block_syntax.dart'; +import 'block_syntaxes/fenced_code_block_syntax.dart'; +import 'block_syntaxes/header_with_id_syntax.dart'; +import 'block_syntaxes/setext_header_with_id_syntax.dart'; +import 'block_syntaxes/table_syntax.dart'; +import 'inline_syntaxes/autolink_extension_syntax.dart'; +import 'inline_syntaxes/emoji_syntax.dart'; +import 'inline_syntaxes/inline_html_syntax.dart'; +import 'inline_syntaxes/inline_syntax.dart'; +import 'inline_syntaxes/strikethrough_syntax.dart'; /// ExtensionSets provide a simple grouping mechanism for common Markdown /// flavors.
diff --git a/lib/src/html_renderer.dart b/lib/src/html_renderer.dart index 728d35f..f317707 100644 --- a/lib/src/html_renderer.dart +++ b/lib/src/html_renderer.dart
@@ -5,10 +5,10 @@ import 'dart:convert'; import 'ast.dart'; -import 'block_parser.dart'; +import 'block_syntaxes/block_syntax.dart'; import 'document.dart'; import 'extension_set.dart'; -import 'inline_parser.dart'; +import 'inline_syntaxes/inline_syntax.dart'; /// Converts the given string of Markdown to HTML. String markdownToHtml( @@ -23,7 +23,7 @@ bool withDefaultBlockSyntaxes = true, bool withDefaultInlineSyntaxes = true, }) { - var document = Document( + final document = Document( blockSyntaxes: blockSyntaxes, inlineSyntaxes: inlineSyntaxes, extensionSet: extensionSet, @@ -37,9 +37,11 @@ if (inlineOnly) return renderToHtml(document.parseInline(markdown)); // Replace windows line endings with unix line endings, and split. - var lines = markdown.replaceAll('\r\n', '\n').split('\n'); + final lines = markdown.replaceAll('\r\n', '\n').split('\n'); - return renderToHtml(document.parseLines(lines)) + '\n'; + final nodes = document.parseLines(lines); + + return '${renderToHtml(nodes)}\n'; } /// Renders [nodes] to HTML. @@ -103,7 +105,7 @@ void visitText(Text text) { var content = text.text; if (const ['br', 'p', 'li'].contains(_lastVisitedTag)) { - var lines = LineSplitter.split(content); + final lines = LineSplitter.split(content); content = content.contains('<pre>') ? lines.join('\n') : lines.map((line) => line.trimLeft()).join('\n'); @@ -125,11 +127,11 @@ buffer.write('<${element.tag}'); - for (var entry in element.attributes.entries) { + for (final entry in element.attributes.entries) { buffer.write(' ${entry.key}="${entry.value}"'); } - var generatedId = element.generatedId; + final generatedId = element.generatedId; // attach header anchor ids generated from text if (generatedId != null) {
diff --git a/lib/src/inline_parser.dart b/lib/src/inline_parser.dart index 88896e0..603d3da 100644 --- a/lib/src/inline_parser.dart +++ b/lib/src/inline_parser.dart
@@ -2,12 +2,20 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -import 'package:charcode/charcode.dart'; - import 'ast.dart'; +import 'charcode.dart'; import 'document.dart'; -import 'emojis.dart'; -import 'util.dart'; +import 'inline_syntaxes/autolink_syntax.dart'; +import 'inline_syntaxes/code_syntax.dart'; +import 'inline_syntaxes/delimiter_syntax.dart'; +import 'inline_syntaxes/email_autolink_syntax.dart'; +import 'inline_syntaxes/emphasis_syntax.dart'; +import 'inline_syntaxes/escape_syntax.dart'; +import 'inline_syntaxes/image_syntax.dart'; +import 'inline_syntaxes/inline_syntax.dart'; +import 'inline_syntaxes/line_break_syntax.dart'; +import 'inline_syntaxes/link_syntax.dart'; +import 'inline_syntaxes/text_syntax.dart'; /// Maintains the internal state needed to parse inline span elements in /// Markdown. @@ -22,11 +30,11 @@ // "*" surrounded by spaces is left alone. TextSyntax(r' \* ', startCharacter: $space), // "_" surrounded by spaces is left alone. - TextSyntax(r' _ ', startCharacter: $space), + TextSyntax(' _ ', startCharacter: $space), // Parse "**strong**" and "*emphasis*" tags. - TagSyntax(r'\*+', requiresDelimiterRun: true), + EmphasisSyntax.asterisk(), // Parse "__strong__" and "_emphasis_" tags. - TagSyntax(r'_+', requiresDelimiterRun: true), + EmphasisSyntax.underscore(), CodeSyntax(), // We will add the LinkSyntax once we know about the specific link resolver. ]); @@ -35,13 +43,13 @@ List<InlineSyntax>.unmodifiable(<InlineSyntax>[ // Leave already-encoded HTML entities alone. Ensures we don't turn // "&" into "&amp;" - TextSyntax(r'&[#a-zA-Z0-9]*;', startCharacter: $ampersand), + TextSyntax('&[#a-zA-Z0-9]*;', startCharacter: $ampersand), // Encode "&". - TextSyntax(r'&', sub: '&', startCharacter: $ampersand), + TextSyntax('&', sub: '&', startCharacter: $ampersand), // Encode "<". - TextSyntax(r'<', sub: '<', startCharacter: $lt), + TextSyntax('<', sub: '<', startCharacter: $lt), // Encode ">". - TextSyntax(r'>', sub: '>', startCharacter: $gt), + TextSyntax('>', sub: '>', startCharacter: $gt), ]); /// The string of Markdown being parsed. @@ -59,7 +67,7 @@ int start = 0; /// The delimiter stack tracking possible opening delimiters and closing - /// delimiters for [TagSyntax] nodes. + /// delimiters for [DelimiterSyntax] nodes. final _delimiterStack = <Delimiter>[]; /// The tree of parsed HTML nodes. @@ -91,7 +99,7 @@ syntaxes.addAll(_defaultSyntaxes); } - if (_encodeHtml) { + if (encodeHtml) { syntaxes.addAll(_htmlSyntaxes); } } @@ -116,7 +124,7 @@ // Write any trailing text content to a Text node. writeText(); - _processEmphasis(-1); + _processDelimiterRun(-1); _combineAdjacentText(_tree); return _tree; } @@ -127,7 +135,7 @@ /// This is the "look for link or image" routine from the CommonMark spec: /// https://spec.commonmark.org/0.29/#-look-for-link-or-image-. void _linkOrImage() { - var index = _delimiterStack + final index = _delimiterStack .lastIndexWhere((d) => d.char == $lbracket || d.char == $exclamation); if (index == -1) { // Never found a possible open bracket. This is just a literal "]". @@ -136,7 +144,7 @@ start = pos; return; } - var delimiter = _delimiterStack[index] as SimpleDelimiter; + final delimiter = _delimiterStack[index] as SimpleDelimiter; if (!delimiter.isActive) { _delimiterStack.removeAt(index); addNode(Text(']')); @@ -144,21 +152,21 @@ start = pos; return; } - var syntax = delimiter.syntax; + final syntax = delimiter.syntax; if (syntax is LinkSyntax && syntaxes.any(((e) => e is LinkSyntax))) { - var nodeIndex = _tree.lastIndexWhere((n) => n == delimiter.node); - var linkNode = syntax.close(this, delimiter, null, getChildren: () { - _processEmphasis(index); + final nodeIndex = _tree.lastIndexWhere((n) => n == delimiter.node); + final linkNode = syntax.close(this, delimiter, null, getChildren: () { + _processDelimiterRun(index); // All of the nodes which lie past [index] are children of this // link/image. - var children = _tree.sublist(nodeIndex + 1, _tree.length); + final children = _tree.sublist(nodeIndex + 1, _tree.length); _tree.removeRange(nodeIndex + 1, _tree.length); return children; }); if (linkNode != null) { _delimiterStack.removeAt(index); if (delimiter.char == $lbracket) { - for (var d in _delimiterStack.sublist(0, index)) { + for (final d in _delimiterStack.sublist(0, index)) { if (d.char == $lbracket) d.isActive = false; } } @@ -187,51 +195,68 @@ } } - /// Processes emphasis (and other [TagSyntax] delimiters) from [bottomIndex] - /// and up. + /// Processes [DelimiterRun] type delimiters from [bottomIndex] and up. /// - /// This is the "process emphasis" routine according to the CommonMark spec: - /// https://spec.commonmark.org/0.29/#-process-emphasis-. - void _processEmphasis(int bottomIndex) { + /// This is the same strategy as "process emphasis" routine according to the + /// CommonMark spec: https://spec.commonmark.org/0.30/#phase-2-inline-structure. + void _processDelimiterRun(int bottomIndex) { var currentIndex = bottomIndex + 1; // Track the lowest index where we might find an open delimiter given a // closing delimiter length modulo 3. // Each key in this map is an open delimiter character. Each value is a // 3-element list. Each value in the list is the lowest index for the given // delimiter length modulo 3 (0, 1, 2). - var openersBottom = <int, List<int>>{}; + final openersBottom = <int, List<int>>{}; while (currentIndex < _delimiterStack.length) { - var closer = _delimiterStack[currentIndex]; - if (!closer.canClose) { - currentIndex++; - continue; - } - if (closer.char == $lbracket || closer.char == $exclamation) { + final closer = _delimiterStack[currentIndex]; + if (!closer.canClose || closer is! DelimiterRun) { currentIndex++; continue; } openersBottom.putIfAbsent(closer.char, () => List.filled(3, bottomIndex)); - var openersBottomPerCloserLength = openersBottom[closer.char]!; - var openerBottom = openersBottomPerCloserLength[closer.length % 3]; - var openerIndex = _delimiterStack.lastIndexWhere( + final openersBottomPerCloserLength = openersBottom[closer.char]!; + final openerBottom = openersBottomPerCloserLength[closer.length % 3]; + final openerIndex = _delimiterStack.lastIndexWhere( (d) => d.char == closer.char && d.canOpen && _canFormEmphasis(d, closer), currentIndex - 1); if (openerIndex > bottomIndex && openerIndex > openerBottom) { // Found an opener for [closer]. - var opener = _delimiterStack[openerIndex]; - var strong = opener.length >= 2 && closer.length >= 2; - var openerTextNode = opener.node; - var openerTextNodeIndex = _tree.indexOf(openerTextNode); - var closerTextNode = closer.node; + final opener = _delimiterStack[openerIndex]; + if (opener is! DelimiterRun) { + currentIndex++; + continue; + } + final matchedTagIndex = opener.tags.lastIndexWhere((e) => + opener.length >= e.indicatorLength && + closer.length >= e.indicatorLength); + if (matchedTagIndex == -1) { + currentIndex++; + continue; + } + final matchedTag = opener.tags[matchedTagIndex]; + final indicatorLength = matchedTag.indicatorLength; + final openerTextNode = opener.node; + final openerTextNodeIndex = _tree.indexOf(openerTextNode); + final closerTextNode = closer.node; var closerTextNodeIndex = _tree.indexOf(closerTextNode); - var node = opener.syntax.close(this, opener, closer, - getChildren: () => - _tree.sublist(openerTextNodeIndex + 1, closerTextNodeIndex)); + final node = opener.syntax.close( + this, + opener, + closer, + tag: matchedTag.tag, + getChildren: () => _tree.sublist( + openerTextNodeIndex + 1, + closerTextNodeIndex, + ), + ); // Replace all of the nodes between the opener and the closer (which // are now the new emphasis node's children) with the emphasis node. _tree.replaceRange( - openerTextNodeIndex + 1, closerTextNodeIndex, [node!]); + openerTextNodeIndex + 1, + closerTextNodeIndex, + [node!], + ); // Slide [closerTextNodeIndex] back accordingly. closerTextNodeIndex = openerTextNodeIndex + 2; @@ -241,29 +266,27 @@ // Remove delimiter characters, possibly removing nodes from the tree // and Delimiters from the delimiter stack. - if ((strong && openerTextNode.text.length == 2) || - (!strong && openerTextNode.text.length == 1)) { + if (opener.length == indicatorLength) { _tree.removeAt(openerTextNodeIndex); _delimiterStack.removeAt(openerIndex); // Slide [currentIndex] and [closerTextNodeIndex] back accordingly. currentIndex--; closerTextNodeIndex--; } else { - var newOpenerTextNode = - Text(openerTextNode.text.substring(strong ? 2 : 1)); + final newOpenerTextNode = + Text(openerTextNode.text.substring(indicatorLength)); _tree[openerTextNodeIndex] = newOpenerTextNode; opener.node = newOpenerTextNode; } - if ((strong && closerTextNode.text.length == 2) || - (!strong && closerTextNode.text.length == 1)) { + if (closer.length == indicatorLength) { _tree.removeAt(closerTextNodeIndex); _delimiterStack.removeAt(currentIndex); // [currentIndex] has just moved to point at the next delimiter; // leave it. } else { - var newCloserTextNode = - Text(closerTextNode.text.substring(strong ? 2 : 1)); + final newCloserTextNode = + Text(closerTextNode.text.substring(indicatorLength)); _tree[closerTextNodeIndex] = newCloserTextNode; closer.node = newCloserTextNode; // [currentIndex] needs to be considered again; leave it. @@ -287,13 +310,13 @@ // correct output across newlines, where whitespace is sometimes compressed. void _combineAdjacentText(List<Node> nodes) { for (var i = 0; i < nodes.length - 1; i++) { - var node = nodes[i]; + final node = nodes[i]; if (node is Element && node.children != null) { _combineAdjacentText(node.children!); continue; } if (node is Text && nodes[i + 1] is Text) { - var buffer = + final buffer = StringBuffer('${node.textContent}${nodes[i + 1].textContent}'); var j = i + 2; while (j < nodes.length && nodes[j] is Text) { @@ -312,7 +335,7 @@ if (pos == start) { return; } - var text = source.substring(start, pos); + final text = source.substring(start, pos); _tree.add(Text(text)); start = pos; } @@ -323,7 +346,7 @@ } /// Push [delimiter] onto the stack of [Delimiter]s. - void _pushDelimiter(Delimiter delimiter) => _delimiterStack.add(delimiter); + void pushDelimiter(Delimiter delimiter) => _delimiterStack.add(delimiter); bool get isDone => pos == source.length; @@ -336,1126 +359,5 @@ start = pos; } - bool get _encodeHtml => document.encodeHtml; -} - -/// Represents one kind of Markdown tag that can be parsed. -abstract class InlineSyntax { - final RegExp pattern; - - /// The first character of [pattern], to be used as an efficient first check - /// that this syntax matches the current parser position. - final int? _startCharacter; - - /// Create a new [InlineSyntax] which matches text on [pattern]. - /// - /// If [startCharacter] is passed, it is used as a pre-matching check which - /// is faster than matching against [pattern]. - InlineSyntax(String pattern, {int? startCharacter}) - : pattern = RegExp(pattern, multiLine: true), - _startCharacter = startCharacter; - - /// Tries to match at the parser's current position. - /// - /// The parser's position can be overriden with [startMatchPos]. - /// Returns whether or not the pattern successfully matched. - bool tryMatch(InlineParser parser, [int? startMatchPos]) { - startMatchPos ??= parser.pos; - - // Before matching with the regular expression [pattern], which can be - // expensive on some platforms, check if even the first character matches - // this syntax. - if (_startCharacter != null && - parser.source.codeUnitAt(startMatchPos) != _startCharacter) { - return false; - } - - final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos); - if (startMatch == null) return false; - - // Write any existing plain text up to this point. - parser.writeText(); - - if (onMatch(parser, startMatch)) parser.consume(startMatch.match.length); - return true; - } - - /// Processes [match], adding nodes to [parser] and possibly advancing - /// [parser]. - /// - /// Returns whether the caller should advance [parser] by `match[0].length`. - bool onMatch(InlineParser parser, Match match); -} - -/// Represents a hard line break. -class LineBreakSyntax extends InlineSyntax { - LineBreakSyntax() : super(r'(?:\\| +)\n'); - - /// Create a void <br> element. - @override - bool onMatch(InlineParser parser, Match match) { - parser.addNode(Element.empty('br')); - return true; - } -} - -/// Matches stuff that should just be passed through as straight text. -class TextSyntax extends InlineSyntax { - final String substitute; - - /// Create a new [TextSyntax] which matches text on [pattern]. - /// - /// If [sub] is passed, it is used as a simple replacement for [pattern]. If - /// [startCharacter] is passed, it is used as a pre-matching check which is - /// faster than matching against [pattern]. - TextSyntax(String pattern, {String sub = '', int? startCharacter}) - : substitute = sub, - super(pattern, startCharacter: startCharacter); - - /// Adds a [Text] node to [parser] and returns `true` if there is a - /// [substitute], as long as the preceding character (if any) is not a `/`. - /// - /// Otherwise, the parser is advanced by the length of [match] and `false` is - /// returned. - @override - bool onMatch(InlineParser parser, Match match) { - if (substitute.isEmpty || - (match.start > 0 && - match.input.substring(match.start - 1, match.start) == '/')) { - // Just use the original matched text. - parser.advanceBy(match.match.length); - return false; - } - - // Insert the substitution. - parser.addNode(Text(substitute)); - return true; - } -} - -/// Escape punctuation preceded by a backslash. -class EscapeSyntax extends InlineSyntax { - EscapeSyntax() : super(r'''\\[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]'''); - - @override - bool onMatch(InlineParser parser, Match match) { - var chars = match.match; - var char = chars.codeUnitAt(1); - // Insert the substitution. Why these three charactes are replaced with - // their equivalent HTML entity referenced appears to be missing from the - // CommonMark spec, but is very present in all of the examples. - // https://talk.commonmark.org/t/entity-ification-of-quotes-and-brackets-missing-from-spec/3207 - if (parser._encodeHtml) { - if (char == $double_quote) { - parser.addNode(Text('"')); - } else if (char == $lt) { - parser.addNode(Text('<')); - } else if (char == $gt) { - parser.addNode(Text('>')); - } else { - parser.addNode(Text(chars[1])); - } - } else { - parser.addNode(Text(chars[1])); - } - return true; - } -} - -/// Leave inline HTML tags alone, from -/// [CommonMark 0.28](http://spec.commonmark.org/0.28/#raw-html). -/// -/// This is not actually a good definition (nor CommonMark's) of an HTML tag, -/// but it is fast. It will leave text like `<a href='hi">` alone, which is -/// incorrect. -/// -/// TODO(srawlins): improve accuracy while ensuring performance, once -/// Markdown benchmarking is more mature. -class InlineHtmlSyntax extends TextSyntax { - InlineHtmlSyntax() - : super(r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>', - startCharacter: $lt); -} - -/// Matches autolinks like `<foo@bar.example.com>`. -/// -/// See <http://spec.commonmark.org/0.28/#email-address>. -class EmailAutolinkSyntax extends InlineSyntax { - static final _email = - r'''[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}''' - r'''[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*'''; - - EmailAutolinkSyntax() : super('<($_email)>', startCharacter: $lt); - - @override - bool onMatch(InlineParser parser, Match match) { - var url = match[1]!; - var text = parser._encodeHtml ? escapeHtml(url) : url; - var anchor = Element.text('a', text); - anchor.attributes['href'] = Uri.encodeFull('mailto:$url'); - parser.addNode(anchor); - - return true; - } -} - -/// Matches autolinks like `<http://foo.com>`. -class AutolinkSyntax extends InlineSyntax { - AutolinkSyntax() : super(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>'); - - @override - bool onMatch(InlineParser parser, Match match) { - var url = match[1]!; - var text = parser._encodeHtml ? escapeHtml(url) : url; - var anchor = Element.text('a', text); - anchor.attributes['href'] = Uri.encodeFull(url); - parser.addNode(anchor); - - return true; - } -} - -/// Matches autolinks like `http://foo.com`. -class AutolinkExtensionSyntax extends InlineSyntax { - /// Broken up parts of the autolink regex for reusability and readability - - // Autolinks can only come at the beginning of a line, after whitespace, or - // any of the delimiting characters *, _, ~, and (. - static const start = r'(?:^|[\s*_~(>])'; - - // An extended url autolink will be recognized when one of the schemes - // http://, https://, or ftp://, followed by a valid domain - static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)'; - - // A valid domain consists of alphanumeric characters, underscores (_), - // hyphens (-) and periods (.). There must be at least one period, and no - // underscores may be present in the last two segments of the domain. - static const domainPart = r'\w\-'; - static const domain = '[$domainPart][$domainPart.]+'; - - // A valid domain consists of alphanumeric characters, underscores (_), - // hyphens (-) and periods (.). - static const path = r'[^\s<]*'; - - // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not - // be considered part of the autolink - static const truncatingPunctuationPositive = r'[?!.,:*_~]'; - - static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$'); - static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$'); - static final regExpWhiteSpace = RegExp(r'\s'); - - AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))'); - - @override - bool tryMatch(InlineParser parser, [int? startMatchPos]) { - return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0); - } - - @override - bool onMatch(InlineParser parser, Match match) { - var url = match[1]!; - var href = url; - var matchLength = url.length; - - if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) { - url = url.substring(1, url.length - 1); - href = href.substring(1, href.length - 1); - parser.pos++; - matchLength--; - } - - // Prevent accidental standard autolink matches - if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') { - return false; - } - - // When an autolink ends in ), we scan the entire autolink for the total - // number of parentheses. If there is a greater number of closing - // parentheses than opening ones, we don’t consider the last character - // part of the autolink, in order to facilitate including an autolink - // inside a parenthesis: - // https://github.github.com/gfm/#example-600 - if (url.endsWith(')')) { - final opening = _countChars(url, '('); - final closing = _countChars(url, ')'); - - if (closing > opening) { - url = url.substring(0, url.length - 1); - href = href.substring(0, href.length - 1); - matchLength--; - } - } - - // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will - // not be considered part of the autolink, though they may be included - // in the interior of the link: - // https://github.github.com/gfm/#example-599 - final trailingPunc = regExpTrailingPunc.firstMatch(url); - if (trailingPunc != null) { - var trailingLength = trailingPunc.match.length; - url = url.substring(0, url.length - trailingLength); - href = href.substring(0, href.length - trailingLength); - matchLength -= trailingLength; - } - - // If an autolink ends in a semicolon (;), we check to see if it appears - // to resemble an - // [entity reference](https://github.github.com/gfm/#entity-references); - // if the preceding text is & followed by one or more alphanumeric - // characters. If so, it is excluded from the autolink: - // https://github.github.com/gfm/#example-602 - if (url.endsWith(';')) { - final entityRef = regExpEndsWithColon.firstMatch(url); - if (entityRef != null) { - // Strip out HTML entity reference - var entityRefLength = entityRef.match.length; - url = url.substring(0, url.length - entityRefLength); - href = href.substring(0, href.length - entityRefLength); - matchLength -= entityRefLength; - } - } - - // The scheme http will be inserted automatically - if (!href.startsWith('http://') && - !href.startsWith('https://') && - !href.startsWith('ftp://')) { - href = 'http://$href'; - } - - final text = parser._encodeHtml ? escapeHtml(url) : url; - final anchor = Element.text('a', text); - anchor.attributes['href'] = Uri.encodeFull(href); - parser.addNode(anchor); - - parser.consume(matchLength); - return false; - } - - int _countChars(String input, String char) { - var count = 0; - - for (var i = 0; i < input.length; i++) { - if (input[i] == char) count++; - } - - return count; - } -} - -/// A delimiter indicating the possible "open" or possible "close" of a tag for -/// a [TagSyntax]. -abstract class Delimiter { - /// The [Text] node representing the plain text representing this delimiter. - abstract Text node; - - /// The type of delimiter. - /// - /// For the two-character image delimiter, `](links). - /// - /// Once we have parsed `Text [`, there is one (pending) link in the state - /// stack. It is, by default, active. Once we parse the next possible link, - /// `[more](links)`, as a real link, we must deactive the pending links (just - /// the one, in this case). - abstract bool isActive; - - /// Whether this delimiter can open emphasis or strong emphasis. - bool get canOpen; - - /// Whether this delimiter can close emphasis or strong emphasis. - bool get canClose; - - /// The syntax which uses this delimiter to parse a tag. - TagSyntax get syntax; -} - -/// A simple delimiter implements the [Delimiter] interface with basic fields, -/// and does not have the concept of "left-flanking" or "right-flanking". -class SimpleDelimiter implements Delimiter { - @override - Text node; - - @override - final int char; - - @override - final int length; - - @override - bool isActive; - - @override - final bool canOpen; - - @override - final bool canClose; - - @override - final TagSyntax syntax; - - final int endPos; - - SimpleDelimiter( - {required this.node, - required this.char, - required this.length, - required this.canOpen, - required this.canClose, - required this.syntax, - required this.endPos}) - : isActive = true; -} - -/// An implementation of [Delimiter] which uses concepts of "left-flanking" and -/// "right-flanking" to determine the values of [canOpen] and [canClose]. -/// -/// This is primarily used when parsing emphasis and strong emphasis, but can -/// also be used by other extensions of [TagSyntax]. -class DelimiterRun implements Delimiter { - /// According to - /// [CommonMark](https://spec.commonmark.org/0.29/#punctuation-character): - /// - /// > A punctuation character is an ASCII punctuation character or anything in - /// > the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or - /// > `Ps`. - // This RegExp is inspired by - // https://github.com/commonmark/commonmark.js/blob/1f7d09099c20d7861a674674a5a88733f55ff729/lib/inlines.js#L39. - // I don't know if there is any way to simplify it or maintain it. - static final RegExp punctuation = RegExp(r'[' - r'''!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~''' - r'\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE' - r'\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E' - r'\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E' - r'\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14' - r'\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB' - r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736' - r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F' - r'\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E' - r'\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051' - r'\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A' - r'\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC' - r'\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42' - r'\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE' - r'\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF' - r'\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF' - r'\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19' - r'\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03' - r'\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F' - r'\uFF5B\uFF5D\uFF5F-\uFF65' - ']'); - - // TODO(srawlins): Unicode whitespace - static final String whitespace = ' \t\r\n'; - - @override - Text node; - - @override - final int char; - - @override - int get length => node.text.length; - - @override - bool isActive; - - @override - final TagSyntax syntax; - - final bool allowIntraWord; - - @override - final bool canOpen; - - @override - final bool canClose; - - DelimiterRun._({ - required this.node, - required this.char, - required this.syntax, - required bool isLeftFlanking, - required bool isRightFlanking, - required bool isPrecededByPunctuation, - required bool isFollowedByPunctuation, - required this.allowIntraWord, - }) : canOpen = isLeftFlanking && - (char == $asterisk || - !isRightFlanking || - allowIntraWord || - isPrecededByPunctuation), - canClose = isRightFlanking && - (char == $asterisk || - !isLeftFlanking || - allowIntraWord || - isFollowedByPunctuation), - isActive = true; - - /// Tries to parse a delimiter run from [runStart] (inclusive) to [runEnd] - /// (exclusive). - static DelimiterRun? tryParse(InlineParser parser, int runStart, int runEnd, - {required TagSyntax syntax, - required Text node, - bool allowIntraWord = false}) { - bool leftFlanking, - rightFlanking, - precededByPunctuation, - followedByPunctuation; - String preceding, following; - if (runStart == 0) { - rightFlanking = false; - preceding = '\n'; - } else { - preceding = parser.source.substring(runStart - 1, runStart); - } - precededByPunctuation = punctuation.hasMatch(preceding); - - if (runEnd == parser.source.length) { - leftFlanking = false; - following = '\n'; - } else { - following = parser.source.substring(runEnd, runEnd + 1); - } - followedByPunctuation = punctuation.hasMatch(following); - - // http://spec.commonmark.org/0.28/#left-flanking-delimiter-run - if (whitespace.contains(following)) { - leftFlanking = false; - } else { - leftFlanking = !followedByPunctuation || - whitespace.contains(preceding) || - precededByPunctuation || - allowIntraWord; - } - - // http://spec.commonmark.org/0.28/#right-flanking-delimiter-run - if (whitespace.contains(preceding)) { - rightFlanking = false; - } else { - rightFlanking = !precededByPunctuation || - whitespace.contains(following) || - followedByPunctuation || - allowIntraWord; - } - - if (!leftFlanking && !rightFlanking) { - // Could not parse a delimiter run. - return null; - } - - return DelimiterRun._( - node: node, - char: parser.charAt(runStart), - syntax: syntax, - isLeftFlanking: leftFlanking, - isRightFlanking: rightFlanking, - isPrecededByPunctuation: precededByPunctuation, - isFollowedByPunctuation: followedByPunctuation, - allowIntraWord: allowIntraWord, - ); - } - - @override - String toString() => '<char: $char, length: $length, canOpen: $canOpen, ' - 'canClose: $canClose>'; -} - -/// Matches syntax that has a pair of tags and becomes an element, like `*` for -/// `<em>`. Allows nested tags. -class TagSyntax extends InlineSyntax { - /// Whether this is parsed according to the same nesting rules as [emphasis - /// delimiters][]. - /// - /// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis - final bool requiresDelimiterRun; - - /// Whether to allow intra-word delimiter runs. CommonMark emphasis and - /// strong emphasis does not allow this, but GitHub-Flavored Markdown allows - /// it on strikethrough. - final bool allowIntraWord; - - /// Create a new [TagSyntax] which matches text on [pattern]. - /// - /// The [pattern] is used to find the matching text. If [requiresDelimiterRun] is - /// passed, this syntax parses according to the same nesting rules as - /// emphasis delimiters. If [startCharacter] is passed, it is used as a - /// pre-matching check which is faster than matching against [pattern]. - TagSyntax(String pattern, - {this.requiresDelimiterRun = false, - int? startCharacter, - this.allowIntraWord = false}) - : super(pattern, startCharacter: startCharacter); - - @override - bool onMatch(InlineParser parser, Match match) { - var runLength = match.group(0)!.length; - var matchStart = parser.pos; - var matchEnd = parser.pos + runLength; - var text = Text(parser.source.substring(matchStart, matchEnd)); - if (!requiresDelimiterRun) { - parser._pushDelimiter(SimpleDelimiter( - node: text, - length: runLength, - char: parser.source.codeUnitAt(matchStart), - canOpen: true, - canClose: false, - syntax: this, - endPos: matchEnd)); - parser.addNode(text); - return true; - } - - var delimiterRun = DelimiterRun.tryParse(parser, matchStart, matchEnd, - syntax: this, node: text, allowIntraWord: allowIntraWord); - if (delimiterRun != null) { - parser._pushDelimiter(delimiterRun); - parser.addNode(text); - return true; - } else { - parser.advanceBy(runLength); - return false; - } - } - - /// Attempts to close this tag at the current position. - /// - /// If a tag cannot be closed at the current position (for example, if a link - /// reference cannot be found for a link tag's label), then `null` is - /// returned. - /// - /// If a tag can be closed at the current position, then this method calls - /// [getChildren], in which [parser] parses any nested text into child nodes. - /// The returned [Node] incorpororates these child nodes. - Node? close(InlineParser parser, Delimiter opener, Delimiter closer, - {required List<Node> Function() getChildren}) { - var strong = opener.length >= 2 && closer.length >= 2; - return Element(strong ? 'strong' : 'em', getChildren()); - } -} - -/// Matches strikethrough syntax according to the GFM spec. -class StrikethroughSyntax extends TagSyntax { - StrikethroughSyntax() - : super('~+', requiresDelimiterRun: true, allowIntraWord: true); - - @override - Node close(InlineParser parser, Delimiter opener, Delimiter closer, - {required List<Node> Function() getChildren}) { - return Element('del', getChildren()); - } -} - -/// Matches links like `[blah][label]` and `[blah](url)`. -class LinkSyntax extends TagSyntax { - static final _entirelyWhitespacePattern = RegExp(r'^\s*$'); - - final Resolver linkResolver; - - LinkSyntax( - {Resolver? linkResolver, - String pattern = r'\[', - int startCharacter = $lbracket}) - : linkResolver = (linkResolver ?? ((String _, [String? __]) => null)), - super(pattern, startCharacter: startCharacter); - - @override - Node? close( - InlineParser parser, covariant SimpleDelimiter opener, Delimiter? closer, - {required List<Node> Function() getChildren}) { - var text = parser.source.substring(opener.endPos, parser.pos); - // The current character is the `]` that closed the link text. Examine the - // next character, to determine what type of link we might have (a '(' - // means a possible inline link; otherwise a possible reference link). - if (parser.pos + 1 >= parser.source.length) { - // The `]` is at the end of the document, but this may still be a valid - // shortcut reference link. - return _tryCreateReferenceLink(parser, text, getChildren: getChildren); - } - - // Peek at the next character; don't advance, so as to avoid later stepping - // backward. - var char = parser.charAt(parser.pos + 1); - - if (char == $lparen) { - // Maybe an inline link, like `[text](destination)`. - parser.advanceBy(1); - var leftParenIndex = parser.pos; - var inlineLink = _parseInlineLink(parser); - if (inlineLink != null) { - return _tryCreateInlineLink(parser, inlineLink, - getChildren: getChildren); - } - // At this point, we've matched `[...](`, but that `(` did not pan out to - // be an inline link. We must now check if `[...]` is simply a shortcut - // reference link. - - // Reset the parser position. - parser.pos = leftParenIndex; - parser.advanceBy(-1); - return _tryCreateReferenceLink(parser, text, getChildren: getChildren); - } - - if (char == $lbracket) { - parser.advanceBy(1); - // At this point, we've matched `[...][`. Maybe a *full* reference link, - // like `[foo][bar]` or a *collapsed* reference link, like `[foo][]`. - if (parser.pos + 1 < parser.source.length && - parser.charAt(parser.pos + 1) == $rbracket) { - // That opening `[` is not actually part of the link. Maybe a - // *shortcut* reference link (followed by a `[`). - parser.advanceBy(1); - return _tryCreateReferenceLink(parser, text, getChildren: getChildren); - } - var label = _parseReferenceLinkLabel(parser); - if (label != null) { - return _tryCreateReferenceLink(parser, label, getChildren: getChildren); - } - return null; - } - - // The link text (inside `[...]`) was not followed with a opening `(` nor - // an opening `[`. Perhaps just a simple shortcut reference link (`[...]`). - return _tryCreateReferenceLink(parser, text, getChildren: getChildren); - } - - /// Resolve a possible reference link. - /// - /// Uses [linkReferences], [linkResolver], and [_createNode] to try to - /// resolve [label] into a [Node]. If [label] is defined in - /// [linkReferences] or can be resolved by [linkResolver], returns a [Node] - /// that links to the resolved URL. - /// - /// Otherwise, returns `null`. - /// - /// [label] does not need to be normalized. - Node? _resolveReferenceLink( - String label, Map<String, LinkReference> linkReferences, - {required List<Node> Function() getChildren}) { - var linkReference = linkReferences[normalizeLinkLabel(label)]; - if (linkReference != null) { - return _createNode(linkReference.destination, linkReference.title, - getChildren: getChildren); - } else { - // This link has no reference definition. But we allow users of the - // library to specify a custom resolver function ([linkResolver]) that - // may choose to handle this. Otherwise, it's just treated as plain - // text. - - // Normally, label text does not get parsed as inline Markdown. However, - // for the benefit of the link resolver, we need to at least escape - // brackets, so that, e.g. a link resolver can receive `[\[\]]` as `[]`. - var resolved = linkResolver(label - .replaceAll(r'\\', r'\') - .replaceAll(r'\[', '[') - .replaceAll(r'\]', ']')); - if (resolved != null) { - getChildren(); - } - return resolved; - } - } - - /// Create the node represented by a Markdown link. - Node _createNode(String destination, String? title, - {required List<Node> Function() getChildren}) { - var children = getChildren(); - var element = Element('a', children); - element.attributes['href'] = escapeAttribute(destination); - if (title != null && title.isNotEmpty) { - element.attributes['title'] = escapeAttribute(title); - } - return element; - } - - /// Tries to create a reference link node. - /// - /// Returns the link if it was successfully created, `null` otherwise. - Node? _tryCreateReferenceLink(InlineParser parser, String label, - {required List<Node> Function() getChildren}) { - return _resolveReferenceLink(label, parser.document.linkReferences, - getChildren: getChildren); - } - - // Tries to create an inline link node. - // - /// Returns the link if it was successfully created, `null` otherwise. - Node _tryCreateInlineLink(InlineParser parser, InlineLink link, - {required List<Node> Function() getChildren}) { - return _createNode(link.destination, link.title, getChildren: getChildren); - } - - /// Parse a reference link label at the current position. - /// - /// Specifically, [parser.pos] is expected to be pointing at the `[` which - /// opens the link label. - /// - /// Returns the label if it could be parsed, or `null` if not. - String? _parseReferenceLinkLabel(InlineParser parser) { - // Walk past the opening `[`. - parser.advanceBy(1); - if (parser.isDone) return null; - - var buffer = StringBuffer(); - while (true) { - var char = parser.charAt(parser.pos); - if (char == $backslash) { - parser.advanceBy(1); - var next = parser.charAt(parser.pos); - if (next != $backslash && next != $rbracket) { - buffer.writeCharCode(char); - } - buffer.writeCharCode(next); - } else if (char == $lbracket) { - return null; - } else if (char == $rbracket) { - break; - } else { - buffer.writeCharCode(char); - } - parser.advanceBy(1); - if (parser.isDone) return null; - // TODO(srawlins): only check 999 characters, for performance reasons? - } - - var label = buffer.toString(); - - // A link label must contain at least one non-whitespace character. - if (_entirelyWhitespacePattern.hasMatch(label)) return null; - - return label; - } - - /// Parse an inline [InlineLink] at the current position. - /// - /// At this point, we have parsed a link's (or image's) opening `[`, and then - /// a matching closing `]`, and [parser.pos] is pointing at an opening `(`. - /// This method will then attempt to parse a link destination wrapped in `<>`, - /// such as `(<http://url>)`, or a bare link destination, such as - /// `(http://url)`, or a link destination with a title, such as - /// `(http://url "title")`. - /// - /// Returns the [InlineLink] if one was parsed, or `null` if not. - InlineLink? _parseInlineLink(InlineParser parser) { - // Start walking to the character just after the opening `(`. - parser.advanceBy(1); - - _moveThroughWhitespace(parser); - if (parser.isDone) return null; // EOF. Not a link. - - if (parser.charAt(parser.pos) == $lt) { - // Maybe a `<...>`-enclosed link destination. - return _parseInlineBracketedLink(parser); - } else { - return _parseInlineBareDestinationLink(parser); - } - } - - /// Parse an inline link with a bracketed destination (a destination wrapped - /// in `<...>`). The current position of the parser must be the first - /// character of the destination. - /// - /// Returns the link if it was successfully created, `null` otherwise. - InlineLink? _parseInlineBracketedLink(InlineParser parser) { - parser.advanceBy(1); - - var buffer = StringBuffer(); - while (true) { - var char = parser.charAt(parser.pos); - if (char == $backslash) { - parser.advanceBy(1); - var next = parser.charAt(parser.pos); - // TODO: Follow the backslash spec better here. - // http://spec.commonmark.org/0.29/#backslash-escapes - if (next != $backslash && next != $gt) { - buffer.writeCharCode(char); - } - buffer.writeCharCode(next); - } else if (char == $lf || char == $cr || char == $ff) { - // Not a link (no line breaks allowed within `<...>`). - return null; - } else if (char == $space) { - buffer.write('%20'); - } else if (char == $gt) { - break; - } else { - buffer.writeCharCode(char); - } - parser.advanceBy(1); - if (parser.isDone) return null; - } - var destination = buffer.toString(); - - parser.advanceBy(1); - var char = parser.charAt(parser.pos); - if (char == $space || char == $lf || char == $cr || char == $ff) { - var title = _parseTitle(parser); - if (title == null && parser.charAt(parser.pos) != $rparen) { - // This looked like an inline link, until we found this $space - // followed by mystery characters; no longer a link. - return null; - } - return InlineLink(destination, title: title); - } else if (char == $rparen) { - return InlineLink(destination); - } else { - // We parsed something like `[foo](<url>X`. Not a link. - return null; - } - } - - /// Parse an inline link with a "bare" destination (a destination _not_ - /// wrapped in `<...>`). The current position of the parser must be the first - /// character of the destination. - /// - /// Returns the link if it was successfully created, `null` otherwise. - InlineLink? _parseInlineBareDestinationLink(InlineParser parser) { - // According to - // [CommonMark](http://spec.commonmark.org/0.28/#link-destination): - // - // > A link destination consists of [...] a nonempty sequence of - // > characters [...], and includes parentheses only if (a) they are - // > backslash-escaped or (b) they are part of a balanced pair of - // > unescaped parentheses. - // - // We need to count the open parens. We start with 1 for the paren that - // opened the destination. - var parenCount = 1; - var buffer = StringBuffer(); - - while (true) { - var char = parser.charAt(parser.pos); - switch (char) { - case $backslash: - parser.advanceBy(1); - if (parser.isDone) return null; // EOF. Not a link. - var next = parser.charAt(parser.pos); - // Parentheses may be escaped. - // - // http://spec.commonmark.org/0.28/#example-467 - if (next != $backslash && next != $lparen && next != $rparen) { - buffer.writeCharCode(char); - } - buffer.writeCharCode(next); - break; - - case $space: - case $lf: - case $cr: - case $ff: - var destination = buffer.toString(); - var title = _parseTitle(parser); - if (title == null && - (parser.isDone || parser.charAt(parser.pos) != $rparen)) { - // This looked like an inline link, until we found this $space - // followed by mystery characters; no longer a link. - return null; - } - // [_parseTitle] made sure the title was follwed by a closing `)` - // (but it's up to the code here to examine the balance of - // parentheses). - parenCount--; - if (parenCount == 0) { - return InlineLink(destination, title: title); - } - break; - - case $lparen: - parenCount++; - buffer.writeCharCode(char); - break; - - case $rparen: - parenCount--; - if (parenCount == 0) { - var destination = buffer.toString(); - return InlineLink(destination); - } - buffer.writeCharCode(char); - break; - - default: - buffer.writeCharCode(char); - } - parser.advanceBy(1); - if (parser.isDone) return null; // EOF. Not a link. - } - } - - // Walk the parser forward through any whitespace. - void _moveThroughWhitespace(InlineParser parser) { - while (!parser.isDone) { - var char = parser.charAt(parser.pos); - if (char != $space && - char != $tab && - char != $lf && - char != $vt && - char != $cr && - char != $ff) { - return; - } - parser.advanceBy(1); - } - } - - /// Parses a link title in [parser] at it's current position. The parser's - /// current position should be a whitespace character that followed a link - /// destination. - /// - /// Returns the title if it was successfully parsed, `null` otherwise. - String? _parseTitle(InlineParser parser) { - _moveThroughWhitespace(parser); - if (parser.isDone) return null; - - // The whitespace should be followed by a title delimiter. - var delimiter = parser.charAt(parser.pos); - if (delimiter != $apostrophe && - delimiter != $quote && - delimiter != $lparen) { - return null; - } - - var closeDelimiter = delimiter == $lparen ? $rparen : delimiter; - parser.advanceBy(1); - - // Now we look for an un-escaped closing delimiter. - var buffer = StringBuffer(); - while (true) { - var char = parser.charAt(parser.pos); - if (char == $backslash) { - parser.advanceBy(1); - var next = parser.charAt(parser.pos); - if (next != $backslash && next != closeDelimiter) { - buffer.writeCharCode(char); - } - buffer.writeCharCode(next); - } else if (char == closeDelimiter) { - break; - } else { - buffer.writeCharCode(char); - } - parser.advanceBy(1); - if (parser.isDone) return null; - } - var title = buffer.toString(); - - // Advance past the closing delimiter. - parser.advanceBy(1); - if (parser.isDone) return null; - _moveThroughWhitespace(parser); - if (parser.isDone) return null; - if (parser.charAt(parser.pos) != $rparen) return null; - return title; - } -} - -/// Matches images like `` and -/// `![alternate text][label]`. -class ImageSyntax extends LinkSyntax { - ImageSyntax({Resolver? linkResolver}) - : super( - linkResolver: linkResolver, - pattern: r'!\[', - startCharacter: $exclamation); - - @override - Element _createNode(String destination, String? title, - {required List<Node> Function() getChildren}) { - var element = Element.empty('img'); - var children = getChildren(); - element.attributes['src'] = destination; - element.attributes['alt'] = children.map((node) => node.textContent).join(); - if (title != null && title.isNotEmpty) { - element.attributes['title'] = - escapeAttribute(title.replaceAll('&', '&')); - } - return element; - } -} - -/// Matches backtick-enclosed inline code blocks. -class CodeSyntax extends InlineSyntax { - // This pattern matches: - // - // * a string of backticks (not followed by any more), followed by - // * a non-greedy string of anything, including newlines, ending with anything - // except a backtick, followed by - // * a string of backticks the same length as the first, not followed by any - // more. - // - // This conforms to the delimiters of inline code, both in Markdown.pl, and - // CommonMark. - static final String _pattern = r'(`+(?!`))((?:.|\n)*?[^`])\1(?!`)'; - - CodeSyntax() : super(_pattern); - - @override - bool tryMatch(InlineParser parser, [int? startMatchPos]) { - if (parser.pos > 0 && parser.charAt(parser.pos - 1) == $backquote) { - // Not really a match! We can't just sneak past one backtick to try the - // next character. An example of this situation would be: - // - // before ``` and `` after. - // ^--parser.pos - return false; - } - - var match = pattern.matchAsPrefix(parser.source, parser.pos); - if (match == null) { - return false; - } - parser.writeText(); - if (onMatch(parser, match)) parser.consume(match.match.length); - return true; - } - - @override - bool onMatch(InlineParser parser, Match match) { - var code = match[2]!.trim().replaceAll('\n', ' '); - if (parser._encodeHtml) code = escapeHtml(code); - parser.addNode(Element.text('code', code)); - - return true; - } -} - -/// Matches GitHub Markdown emoji syntax like `:smile:`. -/// -/// There is no formal specification of GitHub's support for this colon-based -/// emoji support, so this syntax is based on the results of Markdown-enabled -/// text fields at github.com. -class EmojiSyntax extends InlineSyntax { - // Emoji "aliases" are mostly limited to lower-case letters, numbers, and - // underscores, but GitHub also supports `:+1:` and `:-1:`. - EmojiSyntax() : super(':([a-z0-9_+-]+):'); - - @override - bool onMatch(InlineParser parser, Match match) { - var alias = match[1]!; - var emoji = emojis[alias]; - if (emoji == null) { - parser.advanceBy(1); - return false; - } - parser.addNode(Text(emoji)); - - return true; - } -} - -class InlineLink { - final String destination; - final String? title; - - InlineLink(this.destination, {this.title}); + bool get encodeHtml => document.encodeHtml; }
diff --git a/lib/src/inline_syntaxes/autolink_extension_syntax.dart b/lib/src/inline_syntaxes/autolink_extension_syntax.dart new file mode 100644 index 0000000..ff2bc59 --- /dev/null +++ b/lib/src/inline_syntaxes/autolink_extension_syntax.dart
@@ -0,0 +1,136 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Matches autolinks like `http://foo.com`. +class AutolinkExtensionSyntax extends InlineSyntax { + /// Broken up parts of the autolink regex for reusability and readability + + // Autolinks can only come at the beginning of a line, after whitespace, or + // any of the delimiting characters *, _, ~, and (. + static const start = r'(?:^|[\s*_~(>])'; + + // An extended url autolink will be recognized when one of the schemes + // http://, https://, or ftp://, followed by a valid domain + static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)'; + + // A valid domain consists of alphanumeric characters, underscores (_), + // hyphens (-) and periods (.). There must be at least one period, and no + // underscores may be present in the last two segments of the domain. + static const domainPart = r'\w\-'; + static const domain = '[$domainPart][$domainPart.]+'; + + // A valid domain consists of alphanumeric characters, underscores (_), + // hyphens (-) and periods (.). + static const path = r'[^\s<]*'; + + // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not + // be considered part of the autolink + static const truncatingPunctuationPositive = '[?!.,:*_~]'; + + static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$'); + static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$'); + static final regExpWhiteSpace = RegExp(r'\s'); + + AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))'); + + @override + bool tryMatch(InlineParser parser, [int? startMatchPos]) { + return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0); + } + + @override + bool onMatch(InlineParser parser, Match match) { + var url = match[1]!; + var href = url; + var matchLength = url.length; + + if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) { + url = url.substring(1, url.length - 1); + href = href.substring(1, href.length - 1); + parser.pos++; + matchLength--; + } + + // Prevent accidental standard autolink matches + if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') { + return false; + } + + // When an autolink ends in ), we scan the entire autolink for the total + // number of parentheses. If there is a greater number of closing + // parentheses than opening ones, we don’t consider the last character + // part of the autolink, in order to facilitate including an autolink + // inside a parenthesis: + // https://github.github.com/gfm/#example-600 + if (url.endsWith(')')) { + final opening = _countChars(url, '('); + final closing = _countChars(url, ')'); + + if (closing > opening) { + url = url.substring(0, url.length - 1); + href = href.substring(0, href.length - 1); + matchLength--; + } + } + + // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will + // not be considered part of the autolink, though they may be included + // in the interior of the link: + // https://github.github.com/gfm/#example-599 + final trailingPunc = regExpTrailingPunc.firstMatch(url); + if (trailingPunc != null) { + final trailingLength = trailingPunc.match.length; + url = url.substring(0, url.length - trailingLength); + href = href.substring(0, href.length - trailingLength); + matchLength -= trailingLength; + } + + // If an autolink ends in a semicolon (;), we check to see if it appears + // to resemble an + // [entity reference](https://github.github.com/gfm/#entity-references); + // if the preceding text is & followed by one or more alphanumeric + // characters. If so, it is excluded from the autolink: + // https://github.github.com/gfm/#example-602 + if (url.endsWith(';')) { + final entityRef = regExpEndsWithColon.firstMatch(url); + if (entityRef != null) { + // Strip out HTML entity reference + final entityRefLength = entityRef.match.length; + url = url.substring(0, url.length - entityRefLength); + href = href.substring(0, href.length - entityRefLength); + matchLength -= entityRefLength; + } + } + + // The scheme http will be inserted automatically + if (!href.startsWith('http://') && + !href.startsWith('https://') && + !href.startsWith('ftp://')) { + href = 'http://$href'; + } + + final text = parser.encodeHtml ? escapeHtml(url) : url; + final anchor = Element.text('a', text); + anchor.attributes['href'] = Uri.encodeFull(href); + parser.addNode(anchor); + + parser.consume(matchLength); + return false; + } + + int _countChars(String input, String char) { + var count = 0; + + for (var i = 0; i < input.length; i++) { + if (input[i] == char) count++; + } + + return count; + } +}
diff --git a/lib/src/inline_syntaxes/autolink_syntax.dart b/lib/src/inline_syntaxes/autolink_syntax.dart new file mode 100644 index 0000000..05eb41c --- /dev/null +++ b/lib/src/inline_syntaxes/autolink_syntax.dart
@@ -0,0 +1,24 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Matches autolinks like `<http://foo.com>`. +class AutolinkSyntax extends InlineSyntax { + AutolinkSyntax() : super(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>'); + + @override + bool onMatch(InlineParser parser, Match match) { + final url = match[1]!; + final text = parser.encodeHtml ? escapeHtml(url) : url; + final anchor = Element.text('a', text); + anchor.attributes['href'] = Uri.encodeFull(url); + parser.addNode(anchor); + + return true; + } +}
diff --git a/lib/src/inline_syntaxes/code_syntax.dart b/lib/src/inline_syntaxes/code_syntax.dart new file mode 100644 index 0000000..efc415d --- /dev/null +++ b/lib/src/inline_syntaxes/code_syntax.dart
@@ -0,0 +1,55 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../charcode.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Matches backtick-enclosed inline code blocks. +class CodeSyntax extends InlineSyntax { + // This pattern matches: + // + // * a string of backticks (not followed by any more), followed by + // * a non-greedy string of anything, including newlines, ending with anything + // except a backtick, followed by + // * a string of backticks the same length as the first, not followed by any + // more. + // + // This conforms to the delimiters of inline code, both in Markdown.pl, and + // CommonMark. + static final String _pattern = r'(`+(?!`))((?:.|\n)*?[^`])\1(?!`)'; + + CodeSyntax() : super(_pattern); + + @override + bool tryMatch(InlineParser parser, [int? startMatchPos]) { + if (parser.pos > 0 && parser.charAt(parser.pos - 1) == $backquote) { + // Not really a match! We can't just sneak past one backtick to try the + // next character. An example of this situation would be: + // + // before ``` and `` after. + // ^--parser.pos + return false; + } + + final match = pattern.matchAsPrefix(parser.source, parser.pos); + if (match == null) { + return false; + } + parser.writeText(); + if (onMatch(parser, match)) parser.consume(match.match.length); + return true; + } + + @override + bool onMatch(InlineParser parser, Match match) { + var code = match[2]!.trim().replaceAll('\n', ' '); + if (parser.encodeHtml) code = escapeHtml(code); + parser.addNode(Element.text('code', code)); + + return true; + } +}
diff --git a/lib/src/inline_syntaxes/delimiter_syntax.dart b/lib/src/inline_syntaxes/delimiter_syntax.dart new file mode 100644 index 0000000..7dbad63 --- /dev/null +++ b/lib/src/inline_syntaxes/delimiter_syntax.dart
@@ -0,0 +1,336 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../inline_parser.dart'; +import 'inline_syntax.dart'; + +/// Matches syntax that has a pair of tags and becomes an element, like `*` for +/// `<em>`. Allows nested tags. +class DelimiterSyntax extends InlineSyntax { + /// Whether this is parsed according to the same nesting rules as [emphasis + /// delimiters][]. + /// + /// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis + final bool requiresDelimiterRun; + + /// Whether to allow intra-word delimiter runs. CommonMark emphasis and + /// strong emphasis does not allow this, but GitHub-Flavored Markdown allows + /// it on strikethrough. + final bool allowIntraWord; + + final List<DelimiterTag>? tags; + + /// Creates a new [DelimiterSyntax] which matches text on [pattern]. + /// + /// The [pattern] is used to find the matching text. If [requiresDelimiterRun] + /// is passed, this syntax parses according to the same nesting rules as + /// emphasis delimiters. If [startCharacter] is passed, it is used as a + /// pre-matching check which is faster than matching against [pattern]. + DelimiterSyntax( + String pattern, { + this.requiresDelimiterRun = false, + int? startCharacter, + this.allowIntraWord = false, + this.tags, + }) : super(pattern, startCharacter: startCharacter); + + @override + bool onMatch(InlineParser parser, Match match) { + final runLength = match.group(0)!.length; + final matchStart = parser.pos; + final matchEnd = parser.pos + runLength; + final text = Text(parser.source.substring(matchStart, matchEnd)); + if (!requiresDelimiterRun) { + parser.pushDelimiter(SimpleDelimiter( + node: text, + length: runLength, + char: parser.source.codeUnitAt(matchStart), + canOpen: true, + canClose: false, + syntax: this, + endPos: matchEnd, + )); + parser.addNode(text); + return true; + } + + final delimiterRun = DelimiterRun.tryParse( + parser, + matchStart, + matchEnd, + syntax: this, + node: text, + allowIntraWord: allowIntraWord, + tags: tags ?? [], + ); + if (delimiterRun != null) { + parser.pushDelimiter(delimiterRun); + parser.addNode(text); + return true; + } else { + parser.advanceBy(runLength); + return false; + } + } + + /// Attempts to close this tag at the current position. + /// + /// If a tag cannot be closed at the current position (for example, if a link + /// reference cannot be found for a link tag's label), then `null` is + /// returned. + /// + /// If a tag can be closed at the current position, then this method calls + /// [getChildren], in which [parser] parses any nested text into child nodes. + /// The returned [Node] incorpororates these child nodes. + Node? close( + InlineParser parser, + Delimiter opener, + Delimiter closer, { + required String tag, + required List<Node> Function() getChildren, + }) { + return Element(tag, getChildren()); + } +} + +class DelimiterTag { + DelimiterTag(this.tag, this.indicatorLength); + + // Tag name of the HTML element. + final String tag; + + final int indicatorLength; +} + +/// A delimiter indicating the possible "open" or possible "close" of a tag for +/// a [DelimiterSyntax]. +abstract class Delimiter { + /// The [Text] node representing the plain text representing this delimiter. + abstract Text node; + + /// The type of delimiter. + /// + /// For the two-character image delimiter, `](links). + /// + /// Once we have parsed `Text [`, there is one (pending) link in the state + /// stack. It is, by default, active. Once we parse the next possible link, + /// `[more](links)`, as a real link, we must deactive the pending links (just + /// the one, in this case). + abstract bool isActive; + + /// Whether this delimiter can open emphasis or strong emphasis. + bool get canOpen; + + /// Whether this delimiter can close emphasis or strong emphasis. + bool get canClose; + + /// The syntax which uses this delimiter to parse a tag. + DelimiterSyntax get syntax; +} + +/// A simple delimiter implements the [Delimiter] interface with basic fields, +/// and does not have the concept of "left-flanking" or "right-flanking". +class SimpleDelimiter implements Delimiter { + @override + Text node; + + @override + final int char; + + @override + final int length; + + @override + bool isActive; + + @override + final bool canOpen; + + @override + final bool canClose; + + @override + final DelimiterSyntax syntax; + + final int endPos; + + SimpleDelimiter({ + required this.node, + required this.char, + required this.length, + required this.canOpen, + required this.canClose, + required this.syntax, + required this.endPos, + }) : isActive = true; +} + +/// An implementation of [Delimiter] which uses concepts of "left-flanking" and +/// "right-flanking" to determine the values of [canOpen] and [canClose]. +/// +/// This is primarily used when parsing emphasis and strong emphasis, but can +/// also be used by other extensions of [DelimiterSyntax]. +class DelimiterRun implements Delimiter { + /// According to + /// [CommonMark](https://spec.commonmark.org/0.29/#punctuation-character): + /// + /// > A punctuation character is an ASCII punctuation character or anything in + /// > the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or + /// > `Ps`. + // This RegExp is inspired by + // https://github.com/commonmark/commonmark.js/blob/1f7d09099c20d7861a674674a5a88733f55ff729/lib/inlines.js#L39. + // I don't know if there is any way to simplify it or maintain it. + static final RegExp punctuation = RegExp('[' + r'''!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~''' + r'\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE' + r'\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E' + r'\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E' + r'\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14' + r'\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB' + r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736' + r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F' + r'\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E' + r'\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051' + r'\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A' + r'\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC' + r'\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42' + r'\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE' + r'\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF' + r'\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF' + r'\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19' + r'\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03' + r'\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F' + r'\uFF5B\uFF5D\uFF5F-\uFF65' + ']'); + + // TODO(srawlins): Unicode whitespace + static final String whitespace = ' \t\r\n'; + + @override + Text node; + + @override + final int char; + + @override + int get length => node.text.length; + + @override + bool isActive; + + @override + final DelimiterSyntax syntax; + + final bool allowIntraWord; + + @override + final bool canOpen; + + @override + final bool canClose; + + final List<DelimiterTag> tags; + + DelimiterRun._({ + required this.node, + required this.char, + required this.syntax, + required this.tags, + required bool isLeftFlanking, + required bool isRightFlanking, + required bool isPrecededByPunctuation, + required bool isFollowedByPunctuation, + required this.allowIntraWord, + }) : canOpen = isLeftFlanking && + (!isRightFlanking || allowIntraWord || isPrecededByPunctuation), + canClose = isRightFlanking && + (!isLeftFlanking || allowIntraWord || isFollowedByPunctuation), + isActive = true; + + /// Tries to parse a delimiter run from [runStart] (inclusive) to [runEnd] + /// (exclusive). + static DelimiterRun? tryParse( + InlineParser parser, + int runStart, + int runEnd, { + required DelimiterSyntax syntax, + required List<DelimiterTag> tags, + required Text node, + bool allowIntraWord = false, + }) { + bool leftFlanking, + rightFlanking, + precededByPunctuation, + followedByPunctuation; + String preceding, following; + if (runStart == 0) { + rightFlanking = false; + preceding = '\n'; + } else { + preceding = parser.source.substring(runStart - 1, runStart); + } + precededByPunctuation = punctuation.hasMatch(preceding); + + if (runEnd == parser.source.length) { + leftFlanking = false; + following = '\n'; + } else { + following = parser.source.substring(runEnd, runEnd + 1); + } + followedByPunctuation = punctuation.hasMatch(following); + + // http://spec.commonmark.org/0.30/#left-flanking-delimiter-run + if (whitespace.contains(following)) { + leftFlanking = false; + } else { + leftFlanking = !followedByPunctuation || + whitespace.contains(preceding) || + precededByPunctuation; + } + + // http://spec.commonmark.org/0.30/#right-flanking-delimiter-run + if (whitespace.contains(preceding)) { + rightFlanking = false; + } else { + rightFlanking = !precededByPunctuation || + whitespace.contains(following) || + followedByPunctuation; + } + + if (!leftFlanking && !rightFlanking) { + // Could not parse a delimiter run. + return null; + } + + tags.sort((a, b) => a.indicatorLength.compareTo(b.indicatorLength)); + + return DelimiterRun._( + node: node, + char: parser.charAt(runStart), + syntax: syntax, + tags: tags, + isLeftFlanking: leftFlanking, + isRightFlanking: rightFlanking, + isPrecededByPunctuation: precededByPunctuation, + isFollowedByPunctuation: followedByPunctuation, + allowIntraWord: allowIntraWord, + ); + } + + @override + String toString() => '<char: $char, length: $length, canOpen: $canOpen, ' + 'canClose: $canClose>'; +}
diff --git a/lib/src/inline_syntaxes/email_autolink_syntax.dart b/lib/src/inline_syntaxes/email_autolink_syntax.dart new file mode 100644 index 0000000..f5b5962 --- /dev/null +++ b/lib/src/inline_syntaxes/email_autolink_syntax.dart
@@ -0,0 +1,31 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../charcode.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Matches autolinks like `<foo@bar.example.com>`. +/// +/// See <http://spec.commonmark.org/0.28/#email-address>. +class EmailAutolinkSyntax extends InlineSyntax { + static final _email = + r'''[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}''' + r'''[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*'''; + + EmailAutolinkSyntax() : super('<($_email)>', startCharacter: $lt); + + @override + bool onMatch(InlineParser parser, Match match) { + final url = match[1]!; + final text = parser.encodeHtml ? escapeHtml(url) : url; + final anchor = Element.text('a', text); + anchor.attributes['href'] = Uri.encodeFull('mailto:$url'); + parser.addNode(anchor); + + return true; + } +}
diff --git a/lib/src/inline_syntaxes/emoji_syntax.dart b/lib/src/inline_syntaxes/emoji_syntax.dart new file mode 100644 index 0000000..a068c6b --- /dev/null +++ b/lib/src/inline_syntaxes/emoji_syntax.dart
@@ -0,0 +1,32 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../emojis.dart'; +import '../inline_parser.dart'; +import 'inline_syntax.dart'; + +/// Matches GitHub Markdown emoji syntax like `:smile:`. +/// +/// There is no formal specification of GitHub's support for this colon-based +/// emoji support, so this syntax is based on the results of Markdown-enabled +/// text fields at github.com. +class EmojiSyntax extends InlineSyntax { + // Emoji "aliases" are mostly limited to lower-case letters, numbers, and + // underscores, but GitHub also supports `:+1:` and `:-1:`. + EmojiSyntax() : super(':([a-z0-9_+-]+):'); + + @override + bool onMatch(InlineParser parser, Match match) { + final alias = match[1]!; + final emoji = emojis[alias]; + if (emoji == null) { + parser.advanceBy(1); + return false; + } + parser.addNode(Text(emoji)); + + return true; + } +}
diff --git a/lib/src/inline_syntaxes/emphasis_syntax.dart b/lib/src/inline_syntaxes/emphasis_syntax.dart new file mode 100644 index 0000000..9a70b17 --- /dev/null +++ b/lib/src/inline_syntaxes/emphasis_syntax.dart
@@ -0,0 +1,22 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'delimiter_syntax.dart'; + +class EmphasisSyntax extends DelimiterSyntax { + /// Parses `__strong__` and `_emphasis_`. + EmphasisSyntax.underscore() + : super('_+', requiresDelimiterRun: true, tags: _tags); + + /// Parses `**strong**` and `*emphasis*`. + EmphasisSyntax.asterisk() + : super( + r'\*+', + requiresDelimiterRun: true, + allowIntraWord: true, + tags: _tags, + ); + + static final _tags = [DelimiterTag('em', 1), DelimiterTag('strong', 2)]; +}
diff --git a/lib/src/inline_syntaxes/escape_syntax.dart b/lib/src/inline_syntaxes/escape_syntax.dart new file mode 100644 index 0000000..48a6426 --- /dev/null +++ b/lib/src/inline_syntaxes/escape_syntax.dart
@@ -0,0 +1,38 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../charcode.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Escape punctuation preceded by a backslash. +class EscapeSyntax extends InlineSyntax { + EscapeSyntax() : super(r'''\\[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]'''); + + @override + bool onMatch(InlineParser parser, Match match) { + final chars = match.match; + final char = chars.codeUnitAt(1); + // Insert the substitution. Why these three charactes are replaced with + // their equivalent HTML entity referenced appears to be missing from the + // CommonMark spec, but is very present in all of the examples. + // https://talk.commonmark.org/t/entity-ification-of-quotes-and-brackets-missing-from-spec/3207 + if (parser.encodeHtml) { + if (char == $double_quote) { + parser.addNode(Text('"')); + } else if (char == $lt) { + parser.addNode(Text('<')); + } else if (char == $gt) { + parser.addNode(Text('>')); + } else { + parser.addNode(Text(chars[1])); + } + } else { + parser.addNode(Text(chars[1])); + } + return true; + } +}
diff --git a/lib/src/inline_syntaxes/image_syntax.dart b/lib/src/inline_syntaxes/image_syntax.dart new file mode 100644 index 0000000..9643228 --- /dev/null +++ b/lib/src/inline_syntaxes/image_syntax.dart
@@ -0,0 +1,36 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../charcode.dart'; +import '../util.dart'; +import 'link_syntax.dart'; + +/// Matches images like `` and +/// `![alternate text][label]`. +class ImageSyntax extends LinkSyntax { + ImageSyntax({Resolver? linkResolver}) + : super( + linkResolver: linkResolver, + pattern: r'!\[', + startCharacter: $exclamation, + ); + + @override + Element createNode( + String destination, + String? title, { + required List<Node> Function() getChildren, + }) { + final element = Element.empty('img'); + final children = getChildren(); + element.attributes['src'] = destination; + element.attributes['alt'] = children.map((node) => node.textContent).join(); + if (title != null && title.isNotEmpty) { + element.attributes['title'] = + escapeAttribute(title.replaceAll('&', '&')); + } + return element; + } +}
diff --git a/lib/src/inline_syntaxes/inline_html_syntax.dart b/lib/src/inline_syntaxes/inline_html_syntax.dart new file mode 100644 index 0000000..0151ba0 --- /dev/null +++ b/lib/src/inline_syntaxes/inline_html_syntax.dart
@@ -0,0 +1,23 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../../markdown.dart'; +import '../charcode.dart'; + +/// Leave inline HTML tags alone, from +/// [CommonMark 0.28](http://spec.commonmark.org/0.28/#raw-html). +/// +/// This is not actually a good definition (nor CommonMark's) of an HTML tag, +/// but it is fast. It will leave text like `<a href='hi">` alone, which is +/// incorrect. +/// +/// TODO(srawlins): improve accuracy while ensuring performance, once +/// Markdown benchmarking is more mature. +class InlineHtmlSyntax extends TextSyntax { + InlineHtmlSyntax() + : super( + r'<[/!?]?[A-Za-z][A-Za-z0-9-]*(?:\s[^>]*)?>', + startCharacter: $lt, + ); +}
diff --git a/lib/src/inline_syntaxes/inline_syntax.dart b/lib/src/inline_syntaxes/inline_syntax.dart new file mode 100644 index 0000000..997d03a --- /dev/null +++ b/lib/src/inline_syntaxes/inline_syntax.dart
@@ -0,0 +1,58 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../inline_parser.dart'; +import '../util.dart'; + +/// Represents one kind of Markdown tag that can be parsed. +abstract class InlineSyntax { + final RegExp pattern; + + /// The first character of [pattern], to be used as an efficient first check + /// that this syntax matches the current parser position. + final int? _startCharacter; + + /// Create a new [InlineSyntax] which matches text on [pattern]. + /// + /// If [startCharacter] is passed, it is used as a pre-matching check which + /// is faster than matching against [pattern]. + /// + /// If [caseSensitive] is disabled, then case is ignored when matching + /// the [pattern]. + InlineSyntax(String pattern, {int? startCharacter, bool caseSensitive = true}) + : pattern = + RegExp(pattern, multiLine: true, caseSensitive: caseSensitive), + _startCharacter = startCharacter; + + /// Tries to match at the parser's current position. + /// + /// The parser's position can be overriden with [startMatchPos]. + /// Returns whether or not the pattern successfully matched. + bool tryMatch(InlineParser parser, [int? startMatchPos]) { + startMatchPos ??= parser.pos; + + // Before matching with the regular expression [pattern], which can be + // expensive on some platforms, check if even the first character matches + // this syntax. + if (_startCharacter != null && + parser.source.codeUnitAt(startMatchPos) != _startCharacter) { + return false; + } + + final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos); + if (startMatch == null) return false; + + // Write any existing plain text up to this point. + parser.writeText(); + + if (onMatch(parser, startMatch)) parser.consume(startMatch.match.length); + return true; + } + + /// Processes [match], adding nodes to [parser] and possibly advancing + /// [parser]. + /// + /// Returns whether the caller should advance [parser] by `match[0].length`. + bool onMatch(InlineParser parser, Match match); +}
diff --git a/lib/src/inline_syntaxes/line_break_syntax.dart b/lib/src/inline_syntaxes/line_break_syntax.dart new file mode 100644 index 0000000..0a5fb01 --- /dev/null +++ b/lib/src/inline_syntaxes/line_break_syntax.dart
@@ -0,0 +1,19 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../inline_parser.dart'; +import 'inline_syntax.dart'; + +/// Represents a hard line break. +class LineBreakSyntax extends InlineSyntax { + LineBreakSyntax() : super(r'(?:\\| +)\n'); + + /// Create a void <br> element. + @override + bool onMatch(InlineParser parser, Match match) { + parser.addNode(Element.empty('br')); + return true; + } +}
diff --git a/lib/src/inline_syntaxes/link_syntax.dart b/lib/src/inline_syntaxes/link_syntax.dart new file mode 100644 index 0000000..7522036 --- /dev/null +++ b/lib/src/inline_syntaxes/link_syntax.dart
@@ -0,0 +1,445 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../charcode.dart'; +import '../document.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'delimiter_syntax.dart'; + +/// Matches links like `[blah][label]` and `[blah](url)`. +class LinkSyntax extends DelimiterSyntax { + static final _entirelyWhitespacePattern = RegExp(r'^\s*$'); + + final Resolver linkResolver; + + LinkSyntax({ + Resolver? linkResolver, + String pattern = r'\[', + int startCharacter = $lbracket, + }) : linkResolver = (linkResolver ?? ((String _, [String? __]) => null)), + super(pattern, startCharacter: startCharacter); + + @override + Node? close( + InlineParser parser, + covariant SimpleDelimiter opener, + Delimiter? closer, { + String? tag, + required List<Node> Function() getChildren, + }) { + final text = parser.source.substring(opener.endPos, parser.pos); + // The current character is the `]` that closed the link text. Examine the + // next character, to determine what type of link we might have (a '(' + // means a possible inline link; otherwise a possible reference link). + if (parser.pos + 1 >= parser.source.length) { + // The `]` is at the end of the document, but this may still be a valid + // shortcut reference link. + return _tryCreateReferenceLink(parser, text, getChildren: getChildren); + } + + // Peek at the next character; don't advance, so as to avoid later stepping + // backward. + final char = parser.charAt(parser.pos + 1); + + if (char == $lparen) { + // Maybe an inline link, like `[text](destination)`. + parser.advanceBy(1); + final leftParenIndex = parser.pos; + final inlineLink = _parseInlineLink(parser); + if (inlineLink != null) { + return _tryCreateInlineLink( + parser, + inlineLink, + getChildren: getChildren, + ); + } + // At this point, we've matched `[...](`, but that `(` did not pan out to + // be an inline link. We must now check if `[...]` is simply a shortcut + // reference link. + + // Reset the parser position. + parser.pos = leftParenIndex; + parser.advanceBy(-1); + return _tryCreateReferenceLink(parser, text, getChildren: getChildren); + } + + if (char == $lbracket) { + parser.advanceBy(1); + // At this point, we've matched `[...][`. Maybe a *full* reference link, + // like `[foo][bar]` or a *collapsed* reference link, like `[foo][]`. + if (parser.pos + 1 < parser.source.length && + parser.charAt(parser.pos + 1) == $rbracket) { + // That opening `[` is not actually part of the link. Maybe a + // *shortcut* reference link (followed by a `[`). + parser.advanceBy(1); + return _tryCreateReferenceLink(parser, text, getChildren: getChildren); + } + final label = _parseReferenceLinkLabel(parser); + if (label != null) { + return _tryCreateReferenceLink(parser, label, getChildren: getChildren); + } + return null; + } + + // The link text (inside `[...]`) was not followed with a opening `(` nor + // an opening `[`. Perhaps just a simple shortcut reference link (`[...]`). + return _tryCreateReferenceLink(parser, text, getChildren: getChildren); + } + + /// Resolve a possible reference link. + /// + /// Uses [linkReferences], [linkResolver], and [createNode] to try to + /// resolve [label] into a [Node]. If [label] is defined in + /// [linkReferences] or can be resolved by [linkResolver], returns a [Node] + /// that links to the resolved URL. + /// + /// Otherwise, returns `null`. + /// + /// [label] does not need to be normalized. + Node? _resolveReferenceLink( + String label, + Map<String, LinkReference> linkReferences, { + required List<Node> Function() getChildren, + }) { + final linkReference = linkReferences[normalizeLinkLabel(label)]; + if (linkReference != null) { + return createNode( + linkReference.destination, + linkReference.title, + getChildren: getChildren, + ); + } else { + // This link has no reference definition. But we allow users of the + // library to specify a custom resolver function ([linkResolver]) that + // may choose to handle this. Otherwise, it's just treated as plain + // text. + + // Normally, label text does not get parsed as inline Markdown. However, + // for the benefit of the link resolver, we need to at least escape + // brackets, so that, e.g. a link resolver can receive `[\[\]]` as `[]`. + final resolved = linkResolver(label + .replaceAll(r'\\', r'\') + .replaceAll(r'\[', '[') + .replaceAll(r'\]', ']')); + if (resolved != null) { + getChildren(); + } + return resolved; + } + } + + /// Create the node represented by a Markdown link. + Node createNode( + String destination, + String? title, { + required List<Node> Function() getChildren, + }) { + final children = getChildren(); + final element = Element('a', children); + element.attributes['href'] = escapeAttribute(destination); + if (title != null && title.isNotEmpty) { + element.attributes['title'] = escapeAttribute(title); + } + return element; + } + + /// Tries to create a reference link node. + /// + /// Returns the link if it was successfully created, `null` otherwise. + Node? _tryCreateReferenceLink( + InlineParser parser, + String label, { + required List<Node> Function() getChildren, + }) { + return _resolveReferenceLink( + label, + parser.document.linkReferences, + getChildren: getChildren, + ); + } + + // Tries to create an inline link node. + // + /// Returns the link if it was successfully created, `null` otherwise. + Node _tryCreateInlineLink( + InlineParser parser, + InlineLink link, { + required List<Node> Function() getChildren, + }) { + return createNode(link.destination, link.title, getChildren: getChildren); + } + + /// Parse a reference link label at the current position. + /// + /// Specifically, [parser.pos] is expected to be pointing at the `[` which + /// opens the link label. + /// + /// Returns the label if it could be parsed, or `null` if not. + String? _parseReferenceLinkLabel(InlineParser parser) { + // Walk past the opening `[`. + parser.advanceBy(1); + if (parser.isDone) return null; + + final buffer = StringBuffer(); + while (true) { + final char = parser.charAt(parser.pos); + if (char == $backslash) { + parser.advanceBy(1); + final next = parser.charAt(parser.pos); + if (next != $backslash && next != $rbracket) { + buffer.writeCharCode(char); + } + buffer.writeCharCode(next); + } else if (char == $lbracket) { + return null; + } else if (char == $rbracket) { + break; + } else { + buffer.writeCharCode(char); + } + parser.advanceBy(1); + if (parser.isDone) return null; + // TODO(srawlins): only check 999 characters, for performance reasons? + } + + final label = buffer.toString(); + + // A link label must contain at least one non-whitespace character. + if (_entirelyWhitespacePattern.hasMatch(label)) return null; + + return label; + } + + /// Parse an inline [InlineLink] at the current position. + /// + /// At this point, we have parsed a link's (or image's) opening `[`, and then + /// a matching closing `]`, and [parser.pos] is pointing at an opening `(`. + /// This method will then attempt to parse a link destination wrapped in `<>`, + /// such as `(<http://url>)`, or a bare link destination, such as + /// `(http://url)`, or a link destination with a title, such as + /// `(http://url "title")`. + /// + /// Returns the [InlineLink] if one was parsed, or `null` if not. + InlineLink? _parseInlineLink(InlineParser parser) { + // Start walking to the character just after the opening `(`. + parser.advanceBy(1); + + _moveThroughWhitespace(parser); + if (parser.isDone) return null; // EOF. Not a link. + + if (parser.charAt(parser.pos) == $lt) { + // Maybe a `<...>`-enclosed link destination. + return _parseInlineBracketedLink(parser); + } else { + return _parseInlineBareDestinationLink(parser); + } + } + + /// Parse an inline link with a bracketed destination (a destination wrapped + /// in `<...>`). The current position of the parser must be the first + /// character of the destination. + /// + /// Returns the link if it was successfully created, `null` otherwise. + InlineLink? _parseInlineBracketedLink(InlineParser parser) { + parser.advanceBy(1); + + final buffer = StringBuffer(); + while (true) { + final char = parser.charAt(parser.pos); + if (char == $backslash) { + parser.advanceBy(1); + final next = parser.charAt(parser.pos); + // TODO: Follow the backslash spec better here. + // http://spec.commonmark.org/0.29/#backslash-escapes + if (next != $backslash && next != $gt) { + buffer.writeCharCode(char); + } + buffer.writeCharCode(next); + } else if (char == $lf || char == $cr || char == $ff) { + // Not a link (no line breaks allowed within `<...>`). + return null; + } else if (char == $space) { + buffer.write('%20'); + } else if (char == $gt) { + break; + } else { + buffer.writeCharCode(char); + } + parser.advanceBy(1); + if (parser.isDone) return null; + } + final destination = buffer.toString(); + + parser.advanceBy(1); + final char = parser.charAt(parser.pos); + if (char == $space || char == $lf || char == $cr || char == $ff) { + final title = _parseTitle(parser); + if (title == null && + (parser.isDone || parser.charAt(parser.pos) != $rparen)) { + // This looked like an inline link, until we found this $space + // followed by mystery characters; no longer a link. + return null; + } + return InlineLink(destination, title: title); + } else if (char == $rparen) { + return InlineLink(destination); + } else { + // We parsed something like `[foo](<url>X`. Not a link. + return null; + } + } + + /// Parse an inline link with a "bare" destination (a destination _not_ + /// wrapped in `<...>`). The current position of the parser must be the first + /// character of the destination. + /// + /// Returns the link if it was successfully created, `null` otherwise. + InlineLink? _parseInlineBareDestinationLink(InlineParser parser) { + // According to + // [CommonMark](http://spec.commonmark.org/0.28/#link-destination): + // + // > A link destination consists of [...] a nonempty sequence of + // > characters [...], and includes parentheses only if (a) they are + // > backslash-escaped or (b) they are part of a balanced pair of + // > unescaped parentheses. + // + // We need to count the open parens. We start with 1 for the paren that + // opened the destination. + var parenCount = 1; + final buffer = StringBuffer(); + + while (true) { + final char = parser.charAt(parser.pos); + switch (char) { + case $backslash: + parser.advanceBy(1); + if (parser.isDone) return null; // EOF. Not a link. + final next = parser.charAt(parser.pos); + // Parentheses may be escaped. + // + // http://spec.commonmark.org/0.28/#example-467 + if (next != $backslash && next != $lparen && next != $rparen) { + buffer.writeCharCode(char); + } + buffer.writeCharCode(next); + break; + + case $space: + case $lf: + case $cr: + case $ff: + final destination = buffer.toString(); + final title = _parseTitle(parser); + if (title == null && + (parser.isDone || parser.charAt(parser.pos) != $rparen)) { + // This looked like an inline link, until we found this $space + // followed by mystery characters; no longer a link. + return null; + } + // [_parseTitle] made sure the title was follwed by a closing `)` + // (but it's up to the code here to examine the balance of + // parentheses). + parenCount--; + if (parenCount == 0) { + return InlineLink(destination, title: title); + } + break; + + case $lparen: + parenCount++; + buffer.writeCharCode(char); + break; + + case $rparen: + parenCount--; + if (parenCount == 0) { + final destination = buffer.toString(); + return InlineLink(destination); + } + buffer.writeCharCode(char); + break; + + default: + buffer.writeCharCode(char); + } + parser.advanceBy(1); + if (parser.isDone) return null; // EOF. Not a link. + } + } + + // Walk the parser forward through any whitespace. + void _moveThroughWhitespace(InlineParser parser) { + while (!parser.isDone) { + final char = parser.charAt(parser.pos); + if (char != $space && + char != $tab && + char != $lf && + char != $vt && + char != $cr && + char != $ff) { + return; + } + parser.advanceBy(1); + } + } + + /// Parses a link title in [parser] at it's current position. The parser's + /// current position should be a whitespace character that followed a link + /// destination. + /// + /// Returns the title if it was successfully parsed, `null` otherwise. + String? _parseTitle(InlineParser parser) { + _moveThroughWhitespace(parser); + if (parser.isDone) return null; + + // The whitespace should be followed by a title delimiter. + final delimiter = parser.charAt(parser.pos); + if (delimiter != $apostrophe && + delimiter != $quote && + delimiter != $lparen) { + return null; + } + + final closeDelimiter = delimiter == $lparen ? $rparen : delimiter; + parser.advanceBy(1); + + // Now we look for an un-escaped closing delimiter. + final buffer = StringBuffer(); + while (true) { + final char = parser.charAt(parser.pos); + if (char == $backslash) { + parser.advanceBy(1); + final next = parser.charAt(parser.pos); + if (next != $backslash && next != closeDelimiter) { + buffer.writeCharCode(char); + } + buffer.writeCharCode(next); + } else if (char == closeDelimiter) { + break; + } else { + buffer.writeCharCode(char); + } + parser.advanceBy(1); + if (parser.isDone) return null; + } + final title = buffer.toString(); + + // Advance past the closing delimiter. + parser.advanceBy(1); + if (parser.isDone) return null; + _moveThroughWhitespace(parser); + if (parser.isDone) return null; + if (parser.charAt(parser.pos) != $rparen) return null; + return title; + } +} + +class InlineLink { + final String destination; + final String? title; + + InlineLink(this.destination, {this.title}); +}
diff --git a/lib/src/inline_syntaxes/strikethrough_syntax.dart b/lib/src/inline_syntaxes/strikethrough_syntax.dart new file mode 100644 index 0000000..9b4dc7b --- /dev/null +++ b/lib/src/inline_syntaxes/strikethrough_syntax.dart
@@ -0,0 +1,16 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'delimiter_syntax.dart'; + +/// Matches strikethrough syntax according to the GFM spec. +class StrikethroughSyntax extends DelimiterSyntax { + StrikethroughSyntax() + : super( + '~+', + requiresDelimiterRun: true, + allowIntraWord: true, + tags: [DelimiterTag('del', 2)], + ); +}
diff --git a/lib/src/inline_syntaxes/tag_syntax.dart b/lib/src/inline_syntaxes/tag_syntax.dart new file mode 100644 index 0000000..d284282 --- /dev/null +++ b/lib/src/inline_syntaxes/tag_syntax.dart
@@ -0,0 +1,11 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'delimiter_syntax.dart'; + +@Deprecated('Use DelimiterSyntax instead') +class TagSyntax extends DelimiterSyntax { + TagSyntax(String pattern, {bool requiresDelimiterRun = false}) + : super(pattern, requiresDelimiterRun: requiresDelimiterRun); +}
diff --git a/lib/src/inline_syntaxes/text_syntax.dart b/lib/src/inline_syntaxes/text_syntax.dart new file mode 100644 index 0000000..9ab02bb --- /dev/null +++ b/lib/src/inline_syntaxes/text_syntax.dart
@@ -0,0 +1,42 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import '../ast.dart'; +import '../inline_parser.dart'; +import '../util.dart'; +import 'inline_syntax.dart'; + +/// Matches stuff that should just be passed through as straight text. +class TextSyntax extends InlineSyntax { + final String substitute; + + /// Create a new [TextSyntax] which matches text on [pattern]. + /// + /// If [sub] is passed, it is used as a simple replacement for [pattern]. If + /// [startCharacter] is passed, it is used as a pre-matching check which is + /// faster than matching against [pattern]. + TextSyntax(String pattern, {String sub = '', int? startCharacter}) + : substitute = sub, + super(pattern, startCharacter: startCharacter); + + /// Adds a [Text] node to [parser] and returns `true` if there is a + /// [substitute], as long as the preceding character (if any) is not a `/`. + /// + /// Otherwise, the parser is advanced by the length of [match] and `false` is + /// returned. + @override + bool onMatch(InlineParser parser, Match match) { + if (substitute.isEmpty || + (match.start > 0 && + match.input.substring(match.start - 1, match.start) == '/')) { + // Just use the original matched text. + parser.advanceBy(match.match.length); + return false; + } + + // Insert the substitution. + parser.addNode(Text(substitute)); + return true; + } +}
diff --git a/lib/src/patterns.dart b/lib/src/patterns.dart new file mode 100644 index 0000000..4ddc184 --- /dev/null +++ b/lib/src/patterns.dart
@@ -0,0 +1,53 @@ +// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/// The line contains only whitespace or is empty. +final emptyPattern = RegExp(r'^(?:[ \t]*)$'); + +/// A series of `=` or `-` (on the next line) define setext-style headers. +final setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$'); + +/// Leading (and trailing) `#` define atx-style headers. +/// +/// Starts with 1-6 unescaped `#` characters which must not be followed by a +/// non-space character. Line may end with any number of `#` characters,. +final headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$'); + +/// The line starts with `>` with one optional space after. +final blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$'); + +/// A line indented four spaces. Used for code blocks and lists. +final indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$'); + +/// Fenced code block. +final codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$'); + +/// Fenced blockquotes. +final blockquoteFencePattern = RegExp(r'^>{3}\s*$'); + +/// Three or more hyphens, asterisks or underscores by themselves. Note that +/// a line like `----` is valid as both HR and SETEXT. In case of a tie, +/// SETEXT should win. +final hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$'); + +/// A line starting with one of these markers: `-`, `*`, `+`. May have up to +/// three leading spaces before the marker and any number of spaces or tabs +/// after. +/// +/// Contains a dummy group at [2], so that the groups in [ulPattern] and +/// [olPattern] match up; in both, [2] is the length of the number that begins +/// the list marker. +final ulPattern = RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$'); + +/// A line starting with a number like `123.`. May have up to three leading +/// spaces before the marker and any number of spaces or tabs after. +final olPattern = RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$'); + +/// A line of hyphens separated by at least one pipe. +final tablePattern = RegExp( + r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$'); + +/// A pattern which should never be used. It just satisfies non-nullability of +/// pattern fields. +final dummyPattern = RegExp('');
diff --git a/lib/src/util.dart b/lib/src/util.dart index 3a8a454..c514acf 100644 --- a/lib/src/util.dart +++ b/lib/src/util.dart
@@ -4,7 +4,7 @@ import 'dart:convert'; -import 'package:charcode/charcode.dart'; +import 'charcode.dart'; String escapeHtml(String html) => const HtmlEscape(HtmlEscapeMode.element).convert(html); @@ -17,7 +17,7 @@ /// /// Based on http://spec.commonmark.org/0.28/#backslash-escapes. String escapeAttribute(String value) { - var result = StringBuffer(); + final result = StringBuffer(); int ch; for (var i = 0; i < value.codeUnits.length; i++) { ch = value.codeUnitAt(i);
diff --git a/lib/src/version.dart b/lib/src/version.dart index 99761d4..92964be 100644 --- a/lib/src/version.dart +++ b/lib/src/version.dart
@@ -1,2 +1,2 @@ // Generated code. Do not modify. -const packageVersion = '5.0.0-dev'; +const packageVersion = '6.0.0-dev';
diff --git a/pubspec.yaml b/pubspec.yaml index 22a0cfc..db11ff4 100644 --- a/pubspec.yaml +++ b/pubspec.yaml
@@ -1,5 +1,5 @@ name: markdown -version: 5.0.0-dev +version: 6.0.0-dev description: A portable Markdown library written in Dart that can parse Markdown into HTML. @@ -13,7 +13,6 @@ dependencies: args: ^2.0.0 - charcode: ^1.2.0 meta: ^1.3.0 dev_dependencies:
diff --git a/test/blns.dart b/test/blns.dart index f059f6d..a649c3a 100644 --- a/test/blns.dart +++ b/test/blns.dart
@@ -2,9 +2,9 @@ // // This file was generated from big-list-of-naughty-strings's JSON file: // https://github.com/minimaxir/big-list-of-naughty-strings/raw/master/blns.json -// at 2021-11-14 15:03:54.726971 by the script, tool/update_blns.dart. +// at 2022-04-16 22:37:54.467197 by the script, tool/update_blns.dart. -// ignore_for_file: text_direction_code_point_in_literal +// ignore_for_file: text_direction_code_point_in_literal, use_raw_strings const blns = <String>[ '',
diff --git a/test/blns_test.dart b/test/blns_test.dart index 846ce8e..69790ef 100644 --- a/test/blns_test.dart +++ b/test/blns_test.dart
@@ -20,18 +20,18 @@ }); var index = 0; - for (var str in blns) { + for (final str in blns) { test('blns string $index', () { - var result = markdownToHtml(str); + final result = markdownToHtml(str); expect(result, const TypeMatcher<String>()); }); index++; } index = 0; - for (var str in blns) { + for (final str in blns) { test('blns string $index w/ gitHubWeb', () { - var result = markdownToHtml(str, extensionSet: ExtensionSet.gitHubWeb); + final result = markdownToHtml(str, extensionSet: ExtensionSet.gitHubWeb); expect(result, const TypeMatcher<String>()); }); index++;
diff --git a/test/document_test.dart b/test/document_test.dart index 4a4cd20..8a26900 100644 --- a/test/document_test.dart +++ b/test/document_test.dart
@@ -10,106 +10,117 @@ void main() { group('Document', () { test('encodeHtml prevents less than and ampersand escaping', () { - var document = Document(encodeHtml: false); - var result = document.parseInline('< &'); + final document = Document(encodeHtml: false); + final result = document.parseInline('< &'); expect(result, hasLength(1)); expect( - result[0], - const TypeMatcher<Text>() - .having((e) => e.text, 'text', equals('< &'))); + result[0], + const TypeMatcher<Text>().having((e) => e.text, 'text', equals('< &')), + ); }); group('with encodeHtml enabled', () { - var document = Document(encodeHtml: true); + final document = Document(encodeHtml: true); test('encodes HTML in an inline code snippet', () { - var result = document.parseInline('``<p>Hello <em>Markdown</em></p>``'); - var codeSnippet = result.single as Element; - expect(codeSnippet.textContent, - equals('<p>Hello <em>Markdown</em></p>')); + final result = + document.parseInline('``<p>Hello <em>Markdown</em></p>``'); + final codeSnippet = result.single as Element; + expect( + codeSnippet.textContent, + equals('<p>Hello <em>Markdown</em></p>'), + ); }); test('encodes HTML in a fenced code block', () { - var lines = '```\n<p>Hello <em>Markdown</em></p>\n```\n'.split('\n'); - var result = document.parseLines(lines); - var codeBlock = result.single as Element; - expect(codeBlock.textContent, - equals('<p>Hello <em>Markdown</em></p>\n')); + final lines = '```\n<p>Hello <em>Markdown</em></p>\n```\n'.split('\n'); + final result = document.parseLines(lines); + final codeBlock = result.single as Element; + expect( + codeBlock.textContent, + equals('<p>Hello <em>Markdown</em></p>\n'), + ); }); test('encodes HTML in an indented code block', () { - var lines = ' <p>Hello <em>Markdown</em></p>\n'.split('\n'); - var result = document.parseLines(lines); - var codeBlock = result.single as Element; - expect(codeBlock.textContent, - equals('<p>Hello <em>Markdown</em></p>\n')); + final lines = ' <p>Hello <em>Markdown</em></p>\n'.split('\n'); + final result = document.parseLines(lines); + final codeBlock = result.single as Element; + expect( + codeBlock.textContent, + equals('<p>Hello <em>Markdown</em></p>\n'), + ); }); test('encodeHtml spaces are preserved in text', () { // Example to get a <p> tag rendered before a text node. - var contents = 'Sample\n\n<pre>\n A\n B\n</pre>'; - var document = Document(encodeHtml: true); - var lines = LineSplitter.split(contents).toList(); - var nodes = BlockParser(lines, document).parseLines(); - var result = HtmlRenderer().render(nodes); + final contents = 'Sample\n\n<pre>\n A\n B\n</pre>'; + final document = Document(encodeHtml: true); + final lines = LineSplitter.split(contents).toList(); + final nodes = BlockParser(lines, document).parseLines(); + final result = HtmlRenderer().render(nodes); expect(result, '<p>\n</p><pre>\n A\n B\n</pre>'); }); test('encode double quotes, greater than, and less than when escaped', () { - var contents = r'\>\"\< Hello'; - var document = Document(encodeHtml: true); - var nodes = document.parseInline(contents); + final contents = r'\>\"\< Hello'; + final document = Document(encodeHtml: true); + final nodes = document.parseInline(contents); expect(nodes, hasLength(1)); expect( - nodes.single, - const TypeMatcher<Text>().having( - (e) => e.text, - 'text', - '>"< Hello', - )); + nodes.single, + const TypeMatcher<Text>().having( + (e) => e.text, + 'text', + '>"< Hello', + ), + ); }); }); group('with encodeHtml disabled', () { - var document = Document(encodeHtml: false); + final document = Document(encodeHtml: false); test('leaves HTML alone, in a code snippet', () { - var result = + final result = document.parseInline('```<p>Hello <em>Markdown</em></p>```'); - var codeSnippet = result.single as Element; + final codeSnippet = result.single as Element; expect( - codeSnippet.textContent, equals('<p>Hello <em>Markdown</em></p>')); + codeSnippet.textContent, + equals('<p>Hello <em>Markdown</em></p>'), + ); }); test('leaves HTML alone, in a fenced code block', () { - var lines = '```\n<p>Hello <em>Markdown</em></p>\n```\n'.split('\n'); - var result = document.parseLines(lines); - var codeBlock = result.single as Element; + final lines = '```\n<p>Hello <em>Markdown</em></p>\n```\n'.split('\n'); + final result = document.parseLines(lines); + final codeBlock = result.single as Element; expect( - codeBlock.textContent, equals('<p>Hello <em>Markdown</em></p>\n')); + codeBlock.textContent, + equals('<p>Hello <em>Markdown</em></p>\n'), + ); }); test('leaves HTML alone, in an indented code block', () { - var lines = ' <p>Hello <em>Markdown</em></p>\n'.split('\n'); - var result = document.parseLines(lines); - var codeBlock = result.single as Element; + final lines = ' <p>Hello <em>Markdown</em></p>\n'.split('\n'); + final result = document.parseLines(lines); + final codeBlock = result.single as Element; expect( - codeBlock.textContent, equals('<p>Hello <em>Markdown</em></p>\n')); + codeBlock.textContent, + equals('<p>Hello <em>Markdown</em></p>\n'), + ); }); test('leave double quotes, greater than, and less than when escaped', () { - var contents = r'\>\"\< Hello'; - var document = Document(encodeHtml: false); - var nodes = document.parseInline(contents); + final contents = r'\>\"\< Hello'; + final document = Document(encodeHtml: false); + final nodes = document.parseInline(contents); expect(nodes, hasLength(1)); expect( - nodes.single, - const TypeMatcher<Text>().having( - (e) => e.text, - 'text', - '>"< Hello', - )); + nodes.single, + const TypeMatcher<Text>().having((e) => e.text, 'text', '>"< Hello'), + ); }); }); });
diff --git a/test/extensions/strikethrough.unit b/test/extensions/strikethrough.unit index 3599718..5222d91 100644 --- a/test/extensions/strikethrough.unit +++ b/test/extensions/strikethrough.unit
@@ -18,3 +18,7 @@ word ~~past ~~word <<< <p>word ~~past ~~word</p> +>>> mixed with emphasis and order changes +**~~first~~** ~~**second**~~ +<<< +<p><strong><del>first</del></strong> <del><strong>second</strong></del></p>
diff --git a/test/html_renderer_test.dart b/test/html_renderer_test.dart index d478fc7..cc32869 100644 --- a/test/html_renderer_test.dart +++ b/test/html_renderer_test.dart
@@ -26,7 +26,9 @@ withDefaultInlineSyntaxes: false, encodeHtml: false, blockSyntaxes: [const HorizontalRuleSyntax()], - inlineSyntaxes: [TagSyntax(r'\*+', requiresDelimiterRun: true)], + inlineSyntaxes: [ + EmphasisSyntax.asterisk(), + ], ); expect( @@ -77,4 +79,39 @@ ); }); }); + + group('test InlineSyntax caseSensitive parameter', () { + const text = 'one BREAK two'; + + test('with caseSensitive enabled', () { + final result = markdownToHtml( + text, + inlineOnly: true, + inlineSyntaxes: [_BreakSyntax(true)], + ); + + expect(result, equals('one BREAK two')); + }); + + test('with caseSensitive disabled', () { + final result = markdownToHtml( + text, + inlineOnly: true, + inlineSyntaxes: [_BreakSyntax(false)], + ); + + expect(result, equals('one <break /> two')); + }); + }); +} + +class _BreakSyntax extends InlineSyntax { + _BreakSyntax(bool caseSensitive) + : super('break', caseSensitive: caseSensitive); + + @override + bool onMatch(InlineParser parser, Match match) { + parser.addNode(Element.empty('break')); + return true; + } }
diff --git a/test/markdown_test.dart b/test/markdown_test.dart index 945b904..a9f6fde 100644 --- a/test/markdown_test.dart +++ b/test/markdown_test.dart
@@ -11,21 +11,40 @@ await testDirectory('original'); // Block syntax extensions - testFile('extensions/fenced_code_blocks.unit', - blockSyntaxes: [const FencedCodeBlockSyntax()]); - testFile('extensions/headers_with_ids.unit', - blockSyntaxes: [const HeaderWithIdSyntax()]); - testFile('extensions/setext_headers_with_ids.unit', - blockSyntaxes: [const SetextHeaderWithIdSyntax()]); - testFile('extensions/tables.unit', blockSyntaxes: [const TableSyntax()]); - testFile('extensions/fenced_blockquotes.unit', - blockSyntaxes: [const FencedBlockquoteSyntax()]); + testFile( + 'extensions/fenced_code_blocks.unit', + blockSyntaxes: [const FencedCodeBlockSyntax()], + ); + testFile( + 'extensions/headers_with_ids.unit', + blockSyntaxes: [const HeaderWithIdSyntax()], + ); + testFile( + 'extensions/setext_headers_with_ids.unit', + blockSyntaxes: [const SetextHeaderWithIdSyntax()], + ); + testFile( + 'extensions/tables.unit', + blockSyntaxes: [const TableSyntax()], + ); + testFile( + 'extensions/fenced_blockquotes.unit', + blockSyntaxes: [const FencedBlockquoteSyntax()], + ); // Inline syntax extensions - testFile('extensions/emojis.unit', inlineSyntaxes: [EmojiSyntax()]); - testFile('extensions/inline_html.unit', inlineSyntaxes: [InlineHtmlSyntax()]); - testFile('extensions/strikethrough.unit', - inlineSyntaxes: [StrikethroughSyntax()]); + testFile( + 'extensions/emojis.unit', + inlineSyntaxes: [EmojiSyntax()], + ); + testFile( + 'extensions/inline_html.unit', + inlineSyntaxes: [InlineHtmlSyntax()], + ); + testFile( + 'extensions/strikethrough.unit', + inlineSyntaxes: [StrikethroughSyntax()], + ); await testDirectory('common_mark'); await testDirectory('gfm', extensionSet: ExtensionSet.gitHubFlavored); @@ -106,7 +125,7 @@ validateCore( 'can choose to _not_ resolve something, like an empty link', - r''' + ''' resolve [[]] thing ''', ''' @@ -116,7 +135,7 @@ }); group('Custom inline syntax', () { - var nyanSyntax = <InlineSyntax>[TextSyntax('nyan', sub: '~=[,,_,,]:3')]; + final nyanSyntax = <InlineSyntax>[TextSyntax('nyan', sub: '~=[,,_,,]:3')]; validateCore( 'simple inline syntax', ''' @@ -125,10 +144,15 @@ ''', inlineSyntaxes: nyanSyntax); - validateCore('dart custom links', 'links [are<foo>] awesome', - '<p>links <a>are<foo></a> awesome</p>\n', - linkResolver: (String text, [String? _]) => - Element.text('a', text.replaceAll('<', '<'))); + validateCore( + 'dart custom links', + 'links [are<foo>] awesome', + '<p>links <a>are<foo></a> awesome</p>\n', + linkResolver: (String text, [String? _]) => Element.text( + 'a', + text.replaceAll('<', '<'), + ), + ); // TODO(amouravski): need more tests here for custom syntaxes, as some // things are not quite working properly. The regexps are sometime a little
diff --git a/test/original/inline_links.unit b/test/original/inline_links.unit index 4bfd85a..4d910c8 100644 --- a/test/original/inline_links.unit +++ b/test/original/inline_links.unit
@@ -80,3 +80,8 @@ [foo](link(1.png) (what?) <<< <p>[foo](link(1.png) (what?)</p> +>>> not an inline link: the title's ending quote is escaped +links [are](<http://example.com> "title\") awesome + +<<< +<p>links [are](<a href="http://example.com">http://example.com</a> "title") awesome</p> \ No newline at end of file
diff --git a/test/util.dart b/test/util.dart index 362649b..ad73c0c 100644 --- a/test/util.dart +++ b/test/util.dart
@@ -12,8 +12,8 @@ /// Runs tests defined in "*.unit" files inside directory [name]. Future<void> testDirectory(String name, {ExtensionSet? extensionSet}) async { - await for (var dataCase in dataCasesUnder(testDirectory: name)) { - var description = + await for (final dataCase in dataCasesUnder(testDirectory: name)) { + final description = '${dataCase.directory}/${dataCase.file}.unit ${dataCase.description}'; validateCore( description, @@ -24,22 +24,28 @@ } } -Future<String> get markdownPackageRoot async => - p.dirname(p.dirname((await Isolate.resolvePackageUri( - Uri.parse('package:markdown/markdown.dart')))! - .path)); +Future<String> get markdownPackageRoot async { + final packageUri = Uri.parse('package:markdown/markdown.dart'); + final isolateUri = await Isolate.resolvePackageUri(packageUri); + return p.dirname(p.dirname(isolateUri!.toFilePath())); +} void testFile( String file, { Iterable<BlockSyntax> blockSyntaxes = const [], Iterable<InlineSyntax> inlineSyntaxes = const [], }) async { - var directory = p.join(await markdownPackageRoot, 'test'); - for (var dataCase in dataCasesInFile(path: p.join(directory, file))) { - var description = + final directory = p.join(await markdownPackageRoot, 'test'); + for (final dataCase in dataCasesInFile(path: p.join(directory, file))) { + final description = '${dataCase.directory}/${dataCase.file}.unit ${dataCase.description}'; - validateCore(description, dataCase.input, dataCase.expectedOutput, - blockSyntaxes: blockSyntaxes, inlineSyntaxes: inlineSyntaxes); + validateCore( + description, + dataCase.input, + dataCase.expectedOutput, + blockSyntaxes: blockSyntaxes, + inlineSyntaxes: inlineSyntaxes, + ); } } @@ -55,13 +61,15 @@ bool inlineOnly = false, }) { test(description, () { - var result = markdownToHtml(markdown, - blockSyntaxes: blockSyntaxes, - inlineSyntaxes: inlineSyntaxes, - extensionSet: extensionSet, - linkResolver: linkResolver, - imageLinkResolver: imageLinkResolver, - inlineOnly: inlineOnly); + final result = markdownToHtml( + markdown, + blockSyntaxes: blockSyntaxes, + inlineSyntaxes: inlineSyntaxes, + extensionSet: extensionSet, + linkResolver: linkResolver, + imageLinkResolver: imageLinkResolver, + inlineOnly: inlineOnly, + ); markdownPrintOnFailure(markdown, html, result);
diff --git a/test/version_test.dart b/test/version_test.dart index b9c3878..14763d3 100644 --- a/test/version_test.dart +++ b/test/version_test.dart
@@ -12,24 +12,30 @@ void main() { test('check versions', () async { - var packageRoot = await markdownPackageRoot; - var binary = p.normalize(p.join(packageRoot, 'bin', 'markdown.dart')); - var dartBin = Platform.executable; - var result = Process.runSync(dartBin, [binary, '--version']); - expect(result.exitCode, 0, - reason: 'Exit code expected: 0; actual: ${result.exitCode}\n\n' - 'stdout: ${result.stdout}\n\n' - 'stderr: ${result.stderr}'); + final packageRoot = await markdownPackageRoot; + final binary = p.normalize(p.join(packageRoot, 'bin', 'markdown.dart')); + final dartBin = Platform.executable; + final result = Process.runSync(dartBin, [binary, '--version']); + expect( + result.exitCode, + 0, + reason: 'Exit code expected: 0; actual: ${result.exitCode}\n\n' + 'stdout: ${result.stdout}\n\n' + 'stderr: ${result.stderr}', + ); - var binVersion = (result.stdout as String).trim(); + final binVersion = (result.stdout as String).trim(); - var pubspecFile = p.normalize(p.join(packageRoot, 'pubspec.yaml')); + final pubspecFile = p.normalize(p.join(packageRoot, 'pubspec.yaml')); - var pubspecContent = + final pubspecContent = loadYaml(File(pubspecFile).readAsStringSync()) as YamlMap; - expect(binVersion, pubspecContent['version'], - reason: 'The version reported by bin/markdown.dart should match the ' - 'version in pubspec. Run `pub run build_runner build` to update.'); + expect( + binVersion, + pubspecContent['version'], + reason: 'The version reported by bin/markdown.dart should match the ' + 'version in pubspec. Run `pub run build_runner build` to update.', + ); }); }
diff --git a/tool/dartdoc_compare.dart b/tool/dartdoc_compare.dart index 7c24553..e0195ba 100644 --- a/tool/dartdoc_compare.dart +++ b/tool/dartdoc_compare.dart
@@ -20,13 +20,20 @@ ..addSeparator('Usage: dartdoc-compare.dart [OPTIONS] <dart-package>') ..addOption(_dartdocDir, help: 'Directory of the dartdoc package') ..addOption(_markdownBefore, help: "Markdown package 'before' ref") - ..addOption(_markdownAfter, - defaultsTo: 'HEAD', help: "Markdown package 'after' ref (or 'local')") - ..addFlag(_sdk, - defaultsTo: false, negatable: false, help: 'Is the package the SDK?') + ..addOption( + _markdownAfter, + defaultsTo: 'HEAD', + help: "Markdown package 'after' ref (or 'local')", + ) + ..addFlag( + _sdk, + defaultsTo: false, + negatable: false, + help: 'Is the package the SDK?', + ) ..addFlag(_help, abbr: 'h', hide: true); - var options = parser.parse(arguments); + final options = parser.parse(arguments); if (options[_help] as bool) { print(parser.usage); exitCode = 0; @@ -39,13 +46,14 @@ exitCode = 1; return; } - var comparer = DartdocCompare( - options[_dartdocDir] as String, - options[_markdownBefore] as String, - options[_markdownAfter] as String, - absolute(options[_dartdocDir] as String, 'bin/dartdoc.dart'), - absolute(options[_dartdocDir] as String, 'pubspec.yaml'), - options[_sdk] as bool); + final comparer = DartdocCompare( + options[_dartdocDir] as String, + options[_markdownBefore] as String, + options[_markdownAfter] as String, + absolute(options[_dartdocDir] as String, 'bin/dartdoc.dart'), + absolute(options[_dartdocDir] as String, 'pubspec.yaml'), + options[_sdk] as bool, + ); String? path; if (comparer.sdk) { @@ -72,20 +80,26 @@ final bool sdk; final String markdownPath = File(Platform.script.path).parent.parent.path; - DartdocCompare(this.dartdocDir, this.markdownBefore, this.markdownAfter, - this.dartdocBin, this.dartdocPubspecPath, this.sdk); + DartdocCompare( + this.dartdocDir, + this.markdownBefore, + this.markdownAfter, + this.dartdocBin, + this.dartdocPubspecPath, + this.sdk, + ); bool compare(String? package) { // Generate docs with Markdown "Before". - var outBefore = _runDartdoc(markdownBefore, package); + final outBefore = _runDartdoc(markdownBefore, package); // Generate docs with Markdown "After". - var outAfter = _runDartdoc(markdownAfter, package); + final outAfter = _runDartdoc(markdownAfter, package); // Compare outputs - var diffOptions = ['-r', '-B', outBefore, outAfter]; - var result = Process.runSync('diff', diffOptions, runInShell: true); - var nlines = '\n'.allMatches(result.stdout as String).length; + final diffOptions = ['-r', '-B', outBefore, outAfter]; + final result = Process.runSync('diff', diffOptions, runInShell: true); + final nlines = '\n'.allMatches(result.stdout as String).length; print('Diff lines: $nlines'); print('diff ${diffOptions.join(" ")}'); return result.exitCode == 0; @@ -96,7 +110,7 @@ print('Running dartdoc for $markdownRef...'); print('=========================================================='); _doInPath(dartdocDir, () { - var returnCode = _updateDartdocPubspec(markdownRef); + final returnCode = _updateDartdocPubspec(markdownRef); if (returnCode != 0) { throw Exception("Could not update dartdoc's pubspec!"); } @@ -105,20 +119,20 @@ if (!sdk) { _system('pub', ['upgrade']); } - var out = Directory.systemTemp + final out = Directory.systemTemp .createTempSync('dartdoc-compare-${markdownRef}__'); - var cmd = 'dart'; - var args = [dartdocBin, '--output=${out.path}']; + final cmd = 'dart'; + final args = [dartdocBin, '--output=${out.path}']; if (sdk) { args.add('--sdk-docs'); } print('Command: $cmd ${args.join(' ')}'); - var startTime = DateTime.now(); + final startTime = DateTime.now(); _system(cmd, args); - var endTime = DateTime.now(); - var duration = endTime.difference(startTime).inSeconds; + final endTime = DateTime.now(); + final duration = endTime.difference(startTime).inSeconds; print('dartdoc generation for $markdownRef took $duration seconds.'); print(''); @@ -151,7 +165,7 @@ } int _system(String cmd, List<String> args) { - var result = Process.runSync(cmd, args); + final result = Process.runSync(cmd, args); print(result.stdout); print(result.stderr); return result.exitCode; @@ -162,7 +176,7 @@ return f(); } - var former = Directory.current.path; + final former = Directory.current.path; Directory.current = path; try { return f();
diff --git a/tool/expected_output.dart b/tool/expected_output.dart index 8561c8e..30d8fe9 100644 --- a/tool/expected_output.dart +++ b/tool/expected_output.dart
@@ -8,15 +8,17 @@ import 'package:path/path.dart' as p; /// Parse and yield data cases (each a [DataCase]) from [path]. -Iterable<DataCase> dataCasesInFile( - {required String path, String? baseDir}) sync* { - var file = p.basename(path).replaceFirst(RegExp(r'\..+$'), ''); +Iterable<DataCase> dataCasesInFile({ + required String path, + String? baseDir, +}) sync* { + final file = p.basename(path).replaceFirst(RegExp(r'\..+$'), ''); baseDir ??= p.relative(p.dirname(path), from: p.dirname(p.dirname(path))); // Explicitly create a File, in case the entry is a Link. - var lines = File(path).readAsLinesSync(); + final lines = File(path).readAsLinesSync(); - var frontMatter = StringBuffer(); + final frontMatter = StringBuffer(); var i = 0; @@ -26,7 +28,7 @@ while (i < lines.length) { var description = lines[i++].replaceFirst(RegExp(r'>>>\s*'), '').trim(); - var skip = description.startsWith('skip:'); + final skip = description.startsWith('skip:'); if (description == '') { description = 'line ${i + 1}'; } else { @@ -35,22 +37,23 @@ var input = ''; while (!lines[i].startsWith('<<<')) { - input += lines[i++] + '\n'; + input += '${lines[i++]}\n'; } var expectedOutput = ''; while (++i < lines.length && !lines[i].startsWith('>>>')) { - expectedOutput += lines[i] + '\n'; + expectedOutput += '${lines[i]}\n'; } - var dataCase = DataCase( - directory: baseDir, - file: file, - front_matter: frontMatter.toString(), - description: description, - skip: skip, - input: input, - expectedOutput: expectedOutput); + final dataCase = DataCase( + directory: baseDir, + file: file, + front_matter: frontMatter.toString(), + description: description, + skip: skip, + input: input, + expectedOutput: expectedOutput, + ); yield dataCase; } } @@ -65,15 +68,15 @@ String extension = 'unit', bool recursive = true, }) { - var entries = + final entries = Directory(directory).listSync(recursive: recursive, followLinks: false); - var results = <DataCase>[]; - for (var entry in entries) { + final results = <DataCase>[]; + for (final entry in entries) { if (!entry.path.endsWith(extension)) { continue; } - var relativeDir = + final relativeDir = p.relative(p.dirname(entry.path), from: p.dirname(directory)); results.addAll(dataCasesInFile(path: entry.path, baseDir: relativeDir)); @@ -82,7 +85,7 @@ // The API makes no guarantees on order. This is just here for stability in // tests. results.sort((a, b) { - var compare = a.directory.compareTo(b.directory); + final compare = a.directory.compareTo(b.directory); if (compare != 0) return compare; return a.file.compareTo(b.file); @@ -107,7 +110,8 @@ /// import 'package:test/test.dart'; /// /// void main() { -/// for (var dataCase in dataCasesUnder(library: #my_package.test.this_test)) { +/// for (final dataCase +/// in dataCasesUnder(library: #my_package.test.this_test)) { /// // ... /// } /// } @@ -117,13 +121,16 @@ String extension = 'unit', bool recursive = true, }) async* { - var markdownLibRoot = p.dirname((await Isolate.resolvePackageUri( - Uri.parse('package:markdown/markdown.dart')))! - .path); - var directory = + final packageUri = Uri.parse('package:markdown/markdown.dart'); + final isolateUri = await Isolate.resolvePackageUri(packageUri); + final markdownLibRoot = p.dirname(isolateUri!.toFilePath()); + final directory = p.joinAll([p.dirname(markdownLibRoot), 'test', testDirectory]); - for (var dataCase in _dataCases( - directory: directory, extension: extension, recursive: recursive)) { + for (final dataCase in _dataCases( + directory: directory, + extension: extension, + recursive: recursive, + )) { yield dataCase; } }
diff --git a/tool/stats.dart b/tool/stats.dart index 1006f94..a212e6c 100644 --- a/tool/stats.dart +++ b/tool/stats.dart
@@ -14,27 +14,41 @@ import '../tool/expected_output.dart'; import 'stats_lib.dart'; -final _configs = - List<Config>.unmodifiable([Config.commonMarkConfig, Config.gfmConfig]); +final _configs = List<Config>.unmodifiable([ + Config.commonMarkConfig, + Config.gfmConfig, +]); Future<void> main(List<String> args) async { final parser = ArgParser() - ..addOption('section', - help: 'Restrict tests to one section, provided after the option.') - ..addFlag('raw', - defaultsTo: false, help: 'raw JSON format', negatable: false) - ..addFlag('update-files', - defaultsTo: false, - help: 'Update stats files in $toolDir', - negatable: false) - ..addFlag('verbose', - defaultsTo: false, - help: 'Print details for failures and errors.', - negatable: false) - ..addFlag('verbose-loose', - defaultsTo: false, - help: 'Print details for "loose" matches.', - negatable: false) + ..addOption( + 'section', + help: 'Restrict tests to one section, provided after the option.', + ) + ..addFlag( + 'raw', + defaultsTo: false, + help: 'raw JSON format', + negatable: false, + ) + ..addFlag( + 'update-files', + defaultsTo: false, + help: 'Update stats files in $toolDir', + negatable: false, + ) + ..addFlag( + 'verbose', + defaultsTo: false, + help: 'Print details for failures and errors.', + negatable: false, + ) + ..addFlag( + 'verbose-loose', + defaultsTo: false, + help: 'Print details for "loose" matches.', + negatable: false, + ) ..addOption('flavor', allowed: _configs.map((c) => c.prefix)) ..addFlag('help', defaultsTo: false, negatable: false); @@ -54,11 +68,11 @@ return; } - var specifiedSection = options['section'] as String?; - var raw = options['raw'] as bool; - var verbose = options['verbose'] as bool; - var verboseLooseMatch = options['verbose-loose'] as bool; - var updateFiles = options['update-files'] as bool; + final specifiedSection = options['section'] as String?; + final raw = options['raw'] as bool; + final verbose = options['verbose'] as bool; + final verboseLooseMatch = options['verbose-loose'] as bool; + final updateFiles = options['update-files'] as bool; if (updateFiles && (raw || verbose || (specifiedSection != null))) { stderr.writeln('The `update-files` flag must be used by itself'); @@ -75,13 +89,19 @@ final testPrefixes = testPrefix == null ? _configs.map((c) => c.prefix) : <String>[testPrefix]; - for (var testPrefix in testPrefixes) { - await _processConfig(testPrefix, raw, updateFiles, verbose, - specifiedSection, verboseLooseMatch); + for (final testPrefix in testPrefixes) { + await _processConfig( + testPrefix, + raw, + updateFiles, + verbose, + specifiedSection, + verboseLooseMatch, + ); } } -final _sectionNameReplace = RegExp('[ \\)\\(]+'); +final _sectionNameReplace = RegExp(r'[ \)\(]+'); String _unitOutput(Iterable<DataCase> cases) => cases.map((dataCase) => ''' >>> ${dataCase.front_matter} @@ -102,21 +122,25 @@ ) async { final config = _configs.singleWhere((c) => c.prefix == testPrefix); - var sections = loadCommonMarkSections(testPrefix); + final sections = loadCommonMarkSections(testPrefix); - var scores = SplayTreeMap<String, SplayTreeMap<int, CompareLevel>>( + final scores = SplayTreeMap<String, SplayTreeMap<int, CompareLevel>>( compareAsciiLowerCaseNatural); - for (var entry in sections.entries) { + for (final entry in sections.entries) { if (specifiedSection != null && entry.key != specifiedSection) { continue; } final units = <DataCase>[]; - for (var e in entry.value) { - final result = compareResult(config, e, - verboseFail: verbose, verboseLooseMatch: verboseLooseMatch); + for (final e in entry.value) { + final result = compareResult( + config, + e, + verboseFail: verbose, + verboseLooseMatch: verboseLooseMatch, + ); units.add(DataCase( front_matter: result.testCase.toString(), @@ -127,8 +151,10 @@ : result.result!, )); - var nestedMap = scores.putIfAbsent( - entry.key, () => SplayTreeMap<int, CompareLevel>()); + final nestedMap = scores.putIfAbsent( + entry.key, + () => SplayTreeMap<int, CompareLevel>(), + ); nestedMap[e.example] = result.compareLevel; } @@ -170,9 +196,9 @@ } } if (obj is Map) { - var map = <String, Object?>{}; + final map = <String, Object?>{}; obj.forEach((k, v) { - var newKey = k.toString(); + final newKey = k.toString(); map[newKey] = v; }); return map; @@ -180,18 +206,21 @@ return obj; } -Future<void> _printRaw(String testPrefix, - Map<String, Map<int, CompareLevel>> scores, bool updateFiles) async { +Future<void> _printRaw( + String testPrefix, + Map<String, Map<int, CompareLevel>> scores, + bool updateFiles, +) async { IOSink sink; if (updateFiles) { - var file = getStatsFile(testPrefix); + final file = getStatsFile(testPrefix); print('Updating ${file.path}'); sink = file.openWrite(); } else { sink = stdout; } - var encoder = const JsonEncoder.withIndent(' ', _convert); + final encoder = const JsonEncoder.withIndent(' ', _convert); try { sink.writeln(encoder.convert(scores)); } on JsonUnsupportedObjectError catch (e) { @@ -210,34 +239,35 @@ '– ${(100 * value / total).toStringAsFixed(1).padLeft(5)}% $section'; Future<void> _printFriendly( - String testPrefix, - SplayTreeMap<String, SplayTreeMap<int, CompareLevel>> scores, - bool updateFiles) async { + String testPrefix, + SplayTreeMap<String, SplayTreeMap<int, CompareLevel>> scores, + bool updateFiles, +) async { var totalValid = 0; var totalStrict = 0; var totalExamples = 0; IOSink sink; if (updateFiles) { - var path = p.join(toolDir, '${testPrefix}_stats.txt'); + final path = p.join(toolDir, '${testPrefix}_stats.txt'); print('Updating $path'); - var file = File(path); + final file = File(path); sink = file.openWrite(); } else { sink = stdout; } scores.forEach((section, Map<int, CompareLevel> map) { - var total = map.values.length; + final total = map.values.length; totalExamples += total; - var sectionStrictCount = + final sectionStrictCount = map.values.where((val) => val == CompareLevel.strict).length; - var sectionLooseCount = + final sectionLooseCount = map.values.where((val) => val == CompareLevel.loose).length; - var sectionValidCount = sectionStrictCount + sectionLooseCount; + final sectionValidCount = sectionStrictCount + sectionLooseCount; totalStrict += sectionStrictCount; totalValid += sectionValidCount;
diff --git a/tool/stats_lib.dart b/tool/stats_lib.dart index 7439766..125b34e 100644 --- a/tool/stats_lib.dart +++ b/tool/stats_lib.dart
@@ -15,30 +15,33 @@ // Locate the "tool" directory. Use mirrors so that this works with the test // package, which loads this suite into an isolate. -String get toolDir => - p.dirname((reflect(loadCommonMarkSections) as ClosureMirror) - .function - .location! - .sourceUri - .path); +String get toolDir { + final path = (reflect(loadCommonMarkSections) as ClosureMirror) + .function + .location! + .sourceUri + .path; + + return p.dirname(path); +} File getStatsFile(String prefix) => File(p.join(toolDir, '${prefix}_stats.json')); Map<String, List<CommonMarkTestCase>> loadCommonMarkSections( String testPrefix) { - var testFile = File(p.join(toolDir, '${testPrefix}_tests.json')); - var testsJson = testFile.readAsStringSync(); + final testFile = File(p.join(toolDir, '${testPrefix}_tests.json')); + final testsJson = testFile.readAsStringSync(); - var testArray = jsonDecode(testsJson) as List; + final testArray = jsonDecode(testsJson) as List; - var sections = <String, List<CommonMarkTestCase>>{}; + final sections = <String, List<CommonMarkTestCase>>{}; - for (var exampleMap in testArray) { - var exampleTest = + for (final exampleMap in testArray) { + final exampleTest = CommonMarkTestCase.fromJson(exampleMap as Map<String, dynamic>); - var sectionList = + final sectionList = sections.putIfAbsent(exampleTest.section, () => <CommonMarkTestCase>[]); sectionList.add(exampleTest); @@ -48,10 +51,16 @@ } class Config { - static final Config commonMarkConfig = - Config._('common_mark', 'http://spec.commonmark.org/0.28/', null); + static final Config commonMarkConfig = Config._( + 'common_mark', + 'http://spec.commonmark.org/0.28/', + null, + ); static final Config gfmConfig = Config._( - 'gfm', 'https://github.github.com/gfm/', ExtensionSet.gitHubFlavored); + 'gfm', + 'https://github.github.com/gfm/', + ExtensionSet.gitHubFlavored, + ); final String prefix; final String baseUrl; @@ -68,17 +77,24 @@ final int startLine; final int endLine; - CommonMarkTestCase(this.example, this.section, this.startLine, this.endLine, - this.markdown, this.html); + CommonMarkTestCase( + this.example, + this.section, + this.startLine, + this.endLine, + this.markdown, + this.html, + ); factory CommonMarkTestCase.fromJson(Map<String, dynamic> json) { return CommonMarkTestCase( - json['example'] as int, - json['section'] as String /*!*/, - json['start_line'] as int, - json['end_line'] as int, - json['markdown'] as String /*!*/, - json['html'] as String); + json['example'] as int, + json['section'] as String /*!*/, + json['start_line'] as int, + json['end_line'] as int, + json['markdown'] as String /*!*/, + json['html'] as String, + ); } @override @@ -95,10 +111,13 @@ CompareResult(this.testCase, this.result, this.compareLevel); } -CompareResult compareResult(Config config, CommonMarkTestCase testCase, - {bool throwOnError = false, - bool verboseFail = false, - bool verboseLooseMatch = false}) { +CompareResult compareResult( + Config config, + CommonMarkTestCase testCase, { + bool throwOnError = false, + bool verboseFail = false, + bool verboseLooseMatch = false, +}) { String output; try { output = @@ -109,7 +128,11 @@ } if (verboseFail) { _printVerboseFailure( - config.baseUrl, 'ERROR', testCase, 'Thrown: $err\n$stackTrace'); + config.baseUrl, + 'ERROR', + testCase, + 'Thrown: $err\n$stackTrace', + ); } return CompareResult(testCase, null, CompareLevel.error); @@ -119,10 +142,10 @@ return CompareResult(testCase, output, CompareLevel.strict); } - var expectedParsed = parseFragment(testCase.html); - var actual = parseFragment(output); + final expectedParsed = parseFragment(testCase.html); + final actual = parseFragment(output); - var looseMatch = _compareHtml(expectedParsed.children, actual.children); + final looseMatch = _compareHtml(expectedParsed.children, actual.children); if (!looseMatch && verboseFail) { _printVerboseFailure(config.baseUrl, 'FAIL', testCase, output); @@ -133,14 +156,21 @@ } return CompareResult( - testCase, output, looseMatch ? CompareLevel.loose : CompareLevel.fail); + testCase, + output, + looseMatch ? CompareLevel.loose : CompareLevel.fail, + ); } String _indent(String s) => s.splitMapJoin('\n', onNonMatch: (n) => ' ${whitespaceColor(n)}'); -void _printVerboseFailure(String baseUrl, String message, - CommonMarkTestCase testCase, String actual) { +void _printVerboseFailure( + String baseUrl, + String message, + CommonMarkTestCase testCase, + String actual, +) { print('$message: $baseUrl#example-${testCase.example} ' '@ ${testCase.section}'); print('input:'); @@ -154,14 +184,16 @@ /// Compare two DOM trees for equality. bool _compareHtml( - List<Element> expectedElements, List<Element> actualElements) { + List<Element> expectedElements, + List<Element> actualElements, +) { if (expectedElements.length != actualElements.length) { return false; } for (var childNum = 0; childNum < expectedElements.length; childNum++) { - var expected = expectedElements[childNum]; - var actual = actualElements[childNum]; + final expected = expectedElements[childNum]; + final actual = actualElements[childNum]; if (expected.runtimeType != actual.runtimeType) { return false; @@ -175,15 +207,15 @@ return false; } - var expectedAttrKeys = expected.attributes.keys.toList(); + final expectedAttrKeys = expected.attributes.keys.toList(); expectedAttrKeys.sort(); - var actualAttrKeys = actual.attributes.keys.toList(); + final actualAttrKeys = actual.attributes.keys.toList(); actualAttrKeys.sort(); for (var attrNum = 0; attrNum < actualAttrKeys.length; attrNum++) { - var expectedAttrKey = expectedAttrKeys[attrNum]; - var actualAttrKey = actualAttrKeys[attrNum]; + final expectedAttrKey = expectedAttrKeys[attrNum]; + final actualAttrKey = actualAttrKeys[attrNum]; if (expectedAttrKey != actualAttrKey) { return false; @@ -195,7 +227,7 @@ } } - var childrenEqual = _compareHtml(expected.children, actual.children); + final childrenEqual = _compareHtml(expected.children, actual.children); if (!childrenEqual) { return false;
diff --git a/tool/update_blns.dart b/tool/update_blns.dart index 09c1959..b48b8a7 100644 --- a/tool/update_blns.dart +++ b/tool/update_blns.dart
@@ -7,32 +7,30 @@ final _blnsFilePath = 'test/blns.dart'; Future<void> main() async { - var client = HttpClient(); + final client = HttpClient(); List<String> json; try { - var request = await client.getUrl(Uri.parse(_blnsJsonRawUrl)); - var response = await request.close(); - json = (jsonDecode(await response - .cast<List<int>>() - .transform(utf8.decoder) - .join('')) as List) - .cast<String>(); + final request = await client.getUrl(Uri.parse(_blnsJsonRawUrl)); + final response = await request.close(); + final source = + await response.cast<List<int>>().transform(utf8.decoder).join(''); + json = (jsonDecode(source) as List).cast<String>(); } finally { client.close(); } - var blnsContent = StringBuffer(''' + final blnsContent = StringBuffer(''' // GENERATED FILE. DO NOT EDIT. // // This file was generated from big-list-of-naughty-strings's JSON file: // $_blnsJsonRawUrl // at ${DateTime.now()} by the script, tool/update_blns.dart. -// ignore_for_file: text_direction_code_point_in_literal +// ignore_for_file: text_direction_code_point_in_literal, use_raw_strings '''); blnsContent.writeln('const blns = <String>['); - for (var str in json) { - var escaped = str + for (final str in json) { + final escaped = str .replaceAll(r'\', r'\\') .replaceAll("'", r"\'") .replaceAll(r'$', r'\$');
diff --git a/tool/update_emojis.dart b/tool/update_emojis.dart index ebdf803..76b82e2 100644 --- a/tool/update_emojis.dart +++ b/tool/update_emojis.dart
@@ -1,3 +1,7 @@ +// Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + import 'dart:async'; import 'dart:convert'; import 'dart:io'; @@ -10,15 +14,15 @@ final _emojisFilePath = 'lib/src/emojis.dart'; Future<void> main() async { - var client = HttpClient(); - var request = await client.getUrl(Uri.parse(_emojisJsonRawUrl)); - var response = await request.close(); - var json = jsonDecode( + final client = HttpClient(); + final request = await client.getUrl(Uri.parse(_emojisJsonRawUrl)); + final response = await request.close(); + final json = jsonDecode( await response.cast<List<int>>().transform(utf8.decoder).join('')) .map((String alias, dynamic info) => MapEntry(alias, info.cast<String, dynamic>())) .cast<String, Map<String, dynamic>>(); - var emojisContent = StringBuffer(''' + final emojisContent = StringBuffer(''' // GENERATED FILE. DO NOT EDIT. // // This file was generated from emojilib's emoji data file: @@ -28,7 +32,7 @@ '''); emojisContent.writeln('const emojis = <String, String>{'); var emojiCount = 0; - var ignored = <String>[]; + final ignored = <String>[]; json.forEach((String alias, Map<String, dynamic> info) { if (info['char'] != null) { emojisContent.writeln(" '$alias': '${info['char']}',");