| // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| import 'ast.dart'; |
| import 'document.dart'; |
| import 'util.dart'; |
| |
| /// The line contains only whitespace or is empty. |
| final _emptyPattern = new RegExp(r'^(?:[ \t]*)$'); |
| |
| /// A series of `=` or `-` (on the next line) define setext-style headers. |
| final _setextPattern = new RegExp(r'^[ ]{0,3}(=+|-+)\s*$'); |
| |
| /// Leading (and trailing) `#` define atx-style headers. |
| /// |
| /// Starts with 1-6 unescaped `#` characters which must not be followed by a |
| /// non-space character. Line may end with any number of `#` characters,. |
| final _headerPattern = new RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$'); |
| |
| /// The line starts with `>` with one optional space after. |
| final _blockquotePattern = new RegExp(r'^[ ]{0,3}>[ ]?(.*)$'); |
| |
| /// A line indented four spaces. Used for code blocks and lists. |
| final _indentPattern = new RegExp(r'^(?: | {0,3}\t)(.*)$'); |
| |
| /// Fenced code block. |
| final _codePattern = new RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$'); |
| |
| /// Three or more hyphens, asterisks or underscores by themselves. Note that |
| /// a line like `----` is valid as both HR and SETEXT. In case of a tie, |
| /// SETEXT should win. |
| final _hrPattern = new RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$'); |
| |
| /// One or more whitespace, for compressing. |
| final _oneOrMoreWhitespacePattern = new RegExp('[ \n\r\t]+'); |
| |
| /// A line starting with one of these markers: `-`, `*`, `+`. May have up to |
| /// three leading spaces before the marker and any number of spaces or tabs |
| /// after. |
| /// |
| /// Contains a dummy group at [2], so that the groups in [_ulPattern] and |
| /// [_olPattern] match up; in both, [2] is the length of the number that begins |
| /// the list marker. |
| final _ulPattern = new RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$'); |
| |
| /// A line starting with a number like `123.`. May have up to three leading |
| /// spaces before the marker and any number of spaces or tabs after. |
| final _olPattern = |
| new RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$'); |
| |
| /// A line of hyphens separated by at least one pipe. |
| final _tablePattern = |
| new RegExp(r'^[ ]{0,3}\|?( *:?\-+:? *\|)+( *:?\-+:? *)?$'); |
| |
| /// Maintains the internal state needed to parse a series of lines into blocks |
| /// of Markdown suitable for further inline parsing. |
| class BlockParser { |
| final List<String> lines; |
| |
| /// The Markdown document this parser is parsing. |
| final Document document; |
| |
| /// The enabled block syntaxes. |
| /// |
| /// To turn a series of lines into blocks, each of these will be tried in |
| /// turn. Order matters here. |
| final List<BlockSyntax> blockSyntaxes = []; |
| |
| /// Index of the current line. |
| int _pos = 0; |
| |
| /// Whether the parser has encountered a blank line between two block-level |
| /// elements. |
| bool encounteredBlankLine = false; |
| |
| /// The collection of built-in block parsers. |
| final List<BlockSyntax> standardBlockSyntaxes = [ |
| const EmptyBlockSyntax(), |
| const BlockTagBlockHtmlSyntax(), |
| new LongBlockHtmlSyntax(r'^ {0,3}<pre(?:\s|>|$)', '</pre>'), |
| new LongBlockHtmlSyntax(r'^ {0,3}<script(?:\s|>|$)', '</script>'), |
| new LongBlockHtmlSyntax(r'^ {0,3}<style(?:\s|>|$)', '</style>'), |
| new LongBlockHtmlSyntax('^ {0,3}<!--', '-->'), |
| new LongBlockHtmlSyntax('^ {0,3}<\\?', '\\?>'), |
| new LongBlockHtmlSyntax('^ {0,3}<![A-Z]', '>'), |
| new LongBlockHtmlSyntax('^ {0,3}<!\\[CDATA\\[', '\\]\\]>'), |
| const OtherTagBlockHtmlSyntax(), |
| const SetextHeaderSyntax(), |
| const HeaderSyntax(), |
| const CodeBlockSyntax(), |
| const BlockquoteSyntax(), |
| const HorizontalRuleSyntax(), |
| const UnorderedListSyntax(), |
| const OrderedListSyntax(), |
| const ParagraphSyntax() |
| ]; |
| |
| BlockParser(this.lines, this.document) { |
| blockSyntaxes.addAll(document.blockSyntaxes); |
| blockSyntaxes.addAll(standardBlockSyntaxes); |
| } |
| |
| /// Gets the current line. |
| String get current => lines[_pos]; |
| |
| /// Gets the line after the current one or `null` if there is none. |
| String get next { |
| // Don't read past the end. |
| if (_pos >= lines.length - 1) return null; |
| return lines[_pos + 1]; |
| } |
| |
| /// Gets the line that is [linesAhead] lines ahead of the current one, or |
| /// `null` if there is none. |
| /// |
| /// `peek(0)` is equivalent to [current]. |
| /// |
| /// `peek(1)` is equivalent to [next]. |
| String peek(int linesAhead) { |
| if (linesAhead < 0) { |
| throw new ArgumentError('Invalid linesAhead: $linesAhead; must be >= 0.'); |
| } |
| // Don't read past the end. |
| if (_pos >= lines.length - linesAhead) return null; |
| return lines[_pos + linesAhead]; |
| } |
| |
| void advance() { |
| _pos++; |
| } |
| |
| bool get isDone => _pos >= lines.length; |
| |
| /// Gets whether or not the current line matches the given pattern. |
| bool matches(RegExp regex) { |
| if (isDone) return false; |
| return regex.firstMatch(current) != null; |
| } |
| |
| /// Gets whether or not the next line matches the given pattern. |
| bool matchesNext(RegExp regex) { |
| if (next == null) return false; |
| return regex.firstMatch(next) != null; |
| } |
| |
| List<Node> parseLines() { |
| var blocks = <Node>[]; |
| while (!isDone) { |
| for (var syntax in blockSyntaxes) { |
| if (syntax.canParse(this)) { |
| var block = syntax.parse(this); |
| if (block != null) blocks.add(block); |
| break; |
| } |
| } |
| } |
| |
| return blocks; |
| } |
| } |
| |
| abstract class BlockSyntax { |
| const BlockSyntax(); |
| |
| /// Gets the regex used to identify the beginning of this block, if any. |
| RegExp get pattern => null; |
| |
| bool get canEndBlock => true; |
| |
| bool canParse(BlockParser parser) { |
| return pattern.firstMatch(parser.current) != null; |
| } |
| |
| Node parse(BlockParser parser); |
| |
| List<String> parseChildLines(BlockParser parser) { |
| // Grab all of the lines that form the block element. |
| var childLines = <String>[]; |
| |
| while (!parser.isDone) { |
| var match = pattern.firstMatch(parser.current); |
| if (match == null) break; |
| childLines.add(match[1]); |
| parser.advance(); |
| } |
| |
| return childLines; |
| } |
| |
| /// Gets whether or not [parser]'s current line should end the previous block. |
| static bool isAtBlockEnd(BlockParser parser) { |
| if (parser.isDone) return true; |
| return parser.blockSyntaxes.any((s) => s.canParse(parser) && s.canEndBlock); |
| } |
| |
| /// Generates a valid HTML anchor from the inner text of [element]. |
| static String generateAnchorHash(Element element) => |
| element.children.first.textContent |
| .toLowerCase() |
| .trim() |
| .replaceFirst(new RegExp(r'^[^a-z]+'), '') |
| .replaceAll(new RegExp(r'[^a-z0-9 _-]'), '') |
| .replaceAll(new RegExp(r'\s'), '-'); |
| } |
| |
| class EmptyBlockSyntax extends BlockSyntax { |
| RegExp get pattern => _emptyPattern; |
| |
| const EmptyBlockSyntax(); |
| |
| Node parse(BlockParser parser) { |
| parser.encounteredBlankLine = true; |
| parser.advance(); |
| |
| // Don't actually emit anything. |
| return null; |
| } |
| } |
| |
| /// Parses setext-style headers. |
| class SetextHeaderSyntax extends BlockSyntax { |
| const SetextHeaderSyntax(); |
| |
| bool canParse(BlockParser parser) { |
| if (!_interperableAsParagraph(parser.current)) return false; |
| var i = 1; |
| while (true) { |
| var nextLine = parser.peek(i); |
| if (nextLine == null) { |
| // We never reached an underline. |
| return false; |
| } |
| if (_setextPattern.hasMatch(nextLine)) { |
| return true; |
| } |
| // Ensure that we're still in something like paragraph text. |
| if (!_interperableAsParagraph(nextLine)) { |
| return false; |
| } |
| i++; |
| } |
| } |
| |
| Node parse(BlockParser parser) { |
| var lines = <String>[]; |
| String tag; |
| while (!parser.isDone) { |
| var match = _setextPattern.firstMatch(parser.current); |
| if (match == null) { |
| // More text. |
| lines.add(parser.current); |
| parser.advance(); |
| continue; |
| } else { |
| // The underline. |
| tag = (match[1][0] == '=') ? 'h1' : 'h2'; |
| parser.advance(); |
| break; |
| } |
| } |
| |
| var contents = new UnparsedContent(lines.join('\n')); |
| |
| return new Element(tag, [contents]); |
| } |
| |
| bool _interperableAsParagraph(String line) => |
| !(_indentPattern.hasMatch(line) || |
| _codePattern.hasMatch(line) || |
| _headerPattern.hasMatch(line) || |
| _blockquotePattern.hasMatch(line) || |
| _hrPattern.hasMatch(line) || |
| _ulPattern.hasMatch(line) || |
| _olPattern.hasMatch(line) || |
| _emptyPattern.hasMatch(line)); |
| } |
| |
| /// Parses setext-style headers, and adds generated IDs to the generated |
| /// elements. |
| class SetextHeaderWithIdSyntax extends SetextHeaderSyntax { |
| const SetextHeaderWithIdSyntax(); |
| |
| Node parse(BlockParser parser) { |
| var element = super.parse(parser) as Element; |
| element.generatedId = BlockSyntax.generateAnchorHash(element); |
| return element; |
| } |
| } |
| |
| /// Parses atx-style headers: `## Header ##`. |
| class HeaderSyntax extends BlockSyntax { |
| RegExp get pattern => _headerPattern; |
| |
| const HeaderSyntax(); |
| |
| Node parse(BlockParser parser) { |
| var match = pattern.firstMatch(parser.current); |
| parser.advance(); |
| var level = match[1].length; |
| var contents = new UnparsedContent(match[2].trim()); |
| return new Element('h$level', [contents]); |
| } |
| } |
| |
| /// Parses atx-style headers, and adds generated IDs to the generated elements. |
| class HeaderWithIdSyntax extends HeaderSyntax { |
| const HeaderWithIdSyntax(); |
| |
| Node parse(BlockParser parser) { |
| var element = super.parse(parser) as Element; |
| element.generatedId = BlockSyntax.generateAnchorHash(element); |
| return element; |
| } |
| } |
| |
| /// Parses email-style blockquotes: `> quote`. |
| class BlockquoteSyntax extends BlockSyntax { |
| RegExp get pattern => _blockquotePattern; |
| |
| const BlockquoteSyntax(); |
| |
| List<String> parseChildLines(BlockParser parser) { |
| // Grab all of the lines that form the blockquote, stripping off the ">". |
| var childLines = <String>[]; |
| |
| while (!parser.isDone) { |
| var match = pattern.firstMatch(parser.current); |
| if (match != null) { |
| childLines.add(match[1]); |
| parser.advance(); |
| continue; |
| } |
| |
| // A paragraph continuation is OK. This is content that cannot be parsed |
| // as any other syntax except Paragraph, and it doesn't match the bar in |
| // a Setext header. |
| if (parser.blockSyntaxes.firstWhere((s) => s.canParse(parser)) |
| is ParagraphSyntax) { |
| childLines.add(parser.current); |
| parser.advance(); |
| } else { |
| break; |
| } |
| } |
| |
| return childLines; |
| } |
| |
| Node parse(BlockParser parser) { |
| var childLines = parseChildLines(parser); |
| |
| // Recursively parse the contents of the blockquote. |
| var children = new BlockParser(childLines, parser.document).parseLines(); |
| |
| return new Element('blockquote', children); |
| } |
| } |
| |
| /// Parses preformatted code blocks that are indented four spaces. |
| class CodeBlockSyntax extends BlockSyntax { |
| RegExp get pattern => _indentPattern; |
| |
| bool get canEndBlock => false; |
| |
| const CodeBlockSyntax(); |
| |
| List<String> parseChildLines(BlockParser parser) { |
| var childLines = <String>[]; |
| |
| while (!parser.isDone) { |
| var match = pattern.firstMatch(parser.current); |
| if (match != null) { |
| childLines.add(match[1]); |
| parser.advance(); |
| } else { |
| // If there's a codeblock, then a newline, then a codeblock, keep the |
| // code blocks together. |
| var nextMatch = |
| parser.next != null ? pattern.firstMatch(parser.next) : null; |
| if (parser.current.trim() == '' && nextMatch != null) { |
| childLines.add(''); |
| childLines.add(nextMatch[1]); |
| parser.advance(); |
| parser.advance(); |
| } else { |
| break; |
| } |
| } |
| } |
| return childLines; |
| } |
| |
| Node parse(BlockParser parser) { |
| var childLines = parseChildLines(parser); |
| |
| // The Markdown tests expect a trailing newline. |
| childLines.add(''); |
| |
| // Escape the code. |
| var escaped = escapeHtml(childLines.join('\n')); |
| |
| return new Element('pre', [new Element.text('code', escaped)]); |
| } |
| } |
| |
| /// Parses preformatted code blocks between two ~~~ or ``` sequences. |
| /// |
| /// See [Pandoc's documentation](http://pandoc.org/README.html#fenced-code-blocks). |
| class FencedCodeBlockSyntax extends BlockSyntax { |
| RegExp get pattern => _codePattern; |
| |
| const FencedCodeBlockSyntax(); |
| |
| List<String> parseChildLines(BlockParser parser, [String endBlock]) { |
| if (endBlock == null) endBlock = ''; |
| |
| var childLines = <String>[]; |
| parser.advance(); |
| |
| while (!parser.isDone) { |
| var match = pattern.firstMatch(parser.current); |
| if (match == null || !match[1].startsWith(endBlock)) { |
| childLines.add(parser.current); |
| parser.advance(); |
| } else { |
| parser.advance(); |
| break; |
| } |
| } |
| |
| return childLines; |
| } |
| |
| Node parse(BlockParser parser) { |
| // Get the syntax identifier, if there is one. |
| var match = pattern.firstMatch(parser.current); |
| var endBlock = match.group(1); |
| var infoString = match.group(2); |
| |
| var childLines = parseChildLines(parser, endBlock); |
| |
| // The Markdown tests expect a trailing newline. |
| childLines.add(''); |
| |
| // Escape the code. |
| var escaped = escapeHtml(childLines.join('\n')); |
| |
| var code = new Element.text('code', escaped); |
| |
| // the info-string should be trimmed |
| // http://spec.commonmark.org/0.22/#example-100 |
| infoString = infoString.trim(); |
| if (infoString.isNotEmpty) { |
| // only use the first word in the syntax |
| // http://spec.commonmark.org/0.22/#example-100 |
| infoString = infoString.split(' ').first; |
| code.attributes['class'] = "language-$infoString"; |
| } |
| |
| var element = new Element('pre', [code]); |
| |
| return element; |
| } |
| } |
| |
| /// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc. |
| class HorizontalRuleSyntax extends BlockSyntax { |
| RegExp get pattern => _hrPattern; |
| |
| const HorizontalRuleSyntax(); |
| |
| Node parse(BlockParser parser) { |
| parser.advance(); |
| return new Element.empty('hr'); |
| } |
| } |
| |
| /// Parses inline HTML at the block level. This differs from other Markdown |
| /// implementations in several ways: |
| /// |
| /// 1. This one is way way WAY simpler. |
| /// 2. Essentially no HTML parsing or validation is done. We're a Markdown |
| /// parser, not an HTML parser! |
| abstract class BlockHtmlSyntax extends BlockSyntax { |
| bool get canEndBlock => true; |
| |
| const BlockHtmlSyntax(); |
| } |
| |
| class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax { |
| static final _pattern = new RegExp( |
| r'^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|' |
| r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|' |
| r'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|' |
| r'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|' |
| r'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|' |
| 'title|tr|track|ul)' |
| r'(?:\s|>|/>|$)'); |
| |
| RegExp get pattern => _pattern; |
| |
| const BlockTagBlockHtmlSyntax(); |
| |
| Node parse(BlockParser parser) { |
| var childLines = <String>[]; |
| |
| // Eat until we hit a blank line. |
| while (!parser.isDone && !parser.matches(_emptyPattern)) { |
| childLines.add(parser.current); |
| parser.advance(); |
| } |
| |
| return new Text(childLines.join('\n')); |
| } |
| } |
| |
| class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax { |
| bool get canEndBlock => false; |
| |
| // Really hacky way to detect "other" HTML. This matches: |
| // |
| // * any opening spaces |
| // * open bracket and maybe a slash ("<" or "</") |
| // * some word characters |
| // * either: |
| // * a close bracket, or |
| // * whitespace followed by not-brackets followed by a close bracket |
| // * possible whitespace and the end of the line. |
| RegExp get pattern => new RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$'); |
| |
| const OtherTagBlockHtmlSyntax(); |
| } |
| |
| /// A BlockHtmlSyntax that has a specific `endPattern`. |
| /// |
| /// In practice this means that the syntax dominates; it is allowed to eat |
| /// many lines, including blank lines, before matching its `endPattern`. |
| class LongBlockHtmlSyntax extends BlockHtmlSyntax { |
| final RegExp pattern; |
| final RegExp _endPattern; |
| |
| LongBlockHtmlSyntax(String pattern, String endPattern) |
| : pattern = new RegExp(pattern), |
| _endPattern = new RegExp(endPattern); |
| |
| Node parse(BlockParser parser) { |
| var childLines = <String>[]; |
| // Eat until we hit [endPattern]. |
| while (!parser.isDone) { |
| childLines.add(parser.current); |
| if (parser.matches(_endPattern)) break; |
| parser.advance(); |
| } |
| |
| parser.advance(); |
| return new Text(childLines.join('\n')); |
| } |
| } |
| |
| class ListItem { |
| bool forceBlock = false; |
| final List<String> lines; |
| |
| ListItem(this.lines); |
| } |
| |
| /// Base class for both ordered and unordered lists. |
| abstract class ListSyntax extends BlockSyntax { |
| bool get canEndBlock => true; |
| |
| String get listTag; |
| |
| const ListSyntax(); |
| |
| /// A list of patterns that can start a valid block within a list item. |
| static final blocksInList = [ |
| _blockquotePattern, |
| _headerPattern, |
| _hrPattern, |
| _indentPattern, |
| _ulPattern, |
| _olPattern |
| ]; |
| |
| static final _whitespaceRe = new RegExp('[ \t]*'); |
| |
| Node parse(BlockParser parser) { |
| var items = <ListItem>[]; |
| var childLines = <String>[]; |
| |
| void endItem() { |
| if (childLines.length > 0) { |
| items.add(new ListItem(childLines)); |
| childLines = <String>[]; |
| } |
| } |
| |
| Match match; |
| bool tryMatch(RegExp pattern) { |
| match = pattern.firstMatch(parser.current); |
| return match != null; |
| } |
| |
| String listMarker; |
| String indent; |
| // In case the first number in an ordered list is not 1, use it as the |
| // "start". |
| int startNumber; |
| |
| while (!parser.isDone) { |
| var leadingSpace = _whitespaceRe.matchAsPrefix(parser.current).group(0); |
| var leadingExpandedTabLength = _expandedTabLength(leadingSpace); |
| if (tryMatch(_emptyPattern)) { |
| if (_emptyPattern.firstMatch(parser.next ?? '') != null) { |
| // Two blank lines ends a list. |
| break; |
| } |
| // Add a blank line to the current list item. |
| childLines.add(''); |
| } else if (indent != null && indent.length <= leadingExpandedTabLength) { |
| // Strip off indent and add to current item. |
| var line = parser.current |
| .replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength) |
| .replaceFirst(indent, ''); |
| childLines.add(line); |
| } else if (tryMatch(_hrPattern)) { |
| // Horizontal rule takes precedence to a new list item. |
| break; |
| } else if (tryMatch(_ulPattern) || tryMatch(_olPattern)) { |
| var precedingWhitespace = match[1]; |
| var digits = match[2] ?? ''; |
| if (startNumber == null && digits.isNotEmpty) { |
| startNumber = int.parse(digits); |
| } |
| var marker = match[3]; |
| var firstWhitespace = match[5] ?? ''; |
| var restWhitespace = match[6] ?? ''; |
| var content = match[7] ?? ''; |
| var isBlank = content.isEmpty; |
| if (listMarker != null && listMarker != marker) { |
| // Changing the bullet or ordered list delimiter starts a new list. |
| break; |
| } |
| listMarker = marker; |
| var markerAsSpaces = ' ' * (digits.length + marker.length); |
| if (isBlank) { |
| // See http://spec.commonmark.org/0.28/#list-items under "3. Item |
| // starting with a blank line." |
| // |
| // If the list item starts with a blank line, the final piece of the |
| // indentation is just a single space. |
| indent = precedingWhitespace + markerAsSpaces + ' '; |
| } else if (restWhitespace.length >= 4) { |
| // See http://spec.commonmark.org/0.28/#list-items under "2. Item |
| // starting with indented code." |
| // |
| // If the list item starts with indented code, we need to _not_ count |
| // any indentation past the required whitespace character. |
| indent = precedingWhitespace + markerAsSpaces + firstWhitespace; |
| } else { |
| indent = precedingWhitespace + |
| markerAsSpaces + |
| firstWhitespace + |
| restWhitespace; |
| } |
| // End the current list item and start a new one. |
| endItem(); |
| childLines.add(restWhitespace + content); |
| } else if (BlockSyntax.isAtBlockEnd(parser)) { |
| // Done with the list. |
| break; |
| } else { |
| // If the previous item is a blank line, this means we're done with the |
| // list and are starting a new top-level paragraph. |
| if ((childLines.isNotEmpty) && (childLines.last == '')) { |
| parser.encounteredBlankLine = true; |
| break; |
| } |
| |
| // Anything else is paragraph continuation text. |
| childLines.add(parser.current); |
| } |
| parser.advance(); |
| } |
| |
| endItem(); |
| var itemNodes = <Element>[]; |
| |
| items.forEach(removeLeadingEmptyLine); |
| var anyEmptyLines = removeTrailingEmptyLines(items); |
| var anyEmptyLinesBetweenBlocks = false; |
| |
| for (var item in items) { |
| var itemParser = new BlockParser(item.lines, parser.document); |
| var children = itemParser.parseLines(); |
| itemNodes.add(new Element('li', children)); |
| anyEmptyLinesBetweenBlocks = |
| anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine; |
| } |
| |
| // Must strip paragraph tags if the list is "tight". |
| // http://spec.commonmark.org/0.28/#lists |
| var listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks; |
| |
| if (listIsTight) { |
| // We must post-process the list items, converting any top-level paragraph |
| // elements to just text elements. |
| for (var item in itemNodes) { |
| for (var i = 0; i < item.children.length; i++) { |
| var child = item.children[i]; |
| if (child is Element && child.tag == 'p') { |
| item.children.removeAt(i); |
| item.children.insertAll(i, child.children); |
| } |
| } |
| } |
| } |
| |
| if (listTag == 'ol' && startNumber != 1) { |
| return new Element(listTag, itemNodes) |
| ..attributes['start'] = '$startNumber'; |
| } else { |
| return new Element(listTag, itemNodes); |
| } |
| } |
| |
| void removeLeadingEmptyLine(ListItem item) { |
| if (item.lines.isNotEmpty && _emptyPattern.hasMatch(item.lines.first)) { |
| item.lines.removeAt(0); |
| } |
| } |
| |
| /// Removes any trailing empty lines and notes whether any items are separated |
| /// by such lines. |
| bool removeTrailingEmptyLines(List<ListItem> items) { |
| var anyEmpty = false; |
| for (var i = 0; i < items.length; i++) { |
| if (items[i].lines.length == 1) continue; |
| while (items[i].lines.isNotEmpty && |
| _emptyPattern.hasMatch(items[i].lines.last)) { |
| if (i < items.length - 1) { |
| anyEmpty = true; |
| } |
| items[i].lines.removeLast(); |
| } |
| } |
| return anyEmpty; |
| } |
| |
| static int _expandedTabLength(String input) { |
| var length = 0; |
| for (var char in input.codeUnits) { |
| length += char == 0x9 ? 4 - (length % 4) : 1; |
| } |
| return length; |
| } |
| } |
| |
| /// Parses unordered lists. |
| class UnorderedListSyntax extends ListSyntax { |
| RegExp get pattern => _ulPattern; |
| String get listTag => 'ul'; |
| |
| const UnorderedListSyntax(); |
| } |
| |
| /// Parses ordered lists. |
| class OrderedListSyntax extends ListSyntax { |
| RegExp get pattern => _olPattern; |
| String get listTag => 'ol'; |
| |
| const OrderedListSyntax(); |
| } |
| |
| /// Parses tables. |
| class TableSyntax extends BlockSyntax { |
| static final _pipePattern = new RegExp(r'\s*\|\s*'); |
| static final _openingPipe = new RegExp(r'^\|\s*'); |
| static final _closingPipe = new RegExp(r'\s*\|$'); |
| |
| bool get canEndBlock => false; |
| |
| const TableSyntax(); |
| |
| bool canParse(BlockParser parser) { |
| // Note: matches *next* line, not the current one. We're looking for the |
| // bar separating the head row from the body rows. |
| return parser.matchesNext(_tablePattern); |
| } |
| |
| /// Parses a table into its three parts: |
| /// |
| /// * a head row of head cells (`<th>` cells) |
| /// * a divider of hyphens and pipes (not rendered) |
| /// * many body rows of body cells (`<td>` cells) |
| Node parse(BlockParser parser) { |
| var alignments = parseAlignments(parser.next); |
| var columnCount = alignments.length; |
| var headRow = parseRow(parser, alignments, 'th'); |
| if (headRow.children.length != columnCount) { |
| return null; |
| } |
| var head = new Element('thead', [headRow]); |
| |
| // Advance past the divider of hyphens. |
| parser.advance(); |
| |
| var rows = <Element>[]; |
| while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) { |
| var row = parseRow(parser, alignments, 'td'); |
| while (row.children.length < columnCount) { |
| // Insert synthetic empty cells. |
| row.children.add(new Element.empty('td')); |
| } |
| while (row.children.length > columnCount) { |
| row.children.removeLast(); |
| } |
| rows.add(row); |
| } |
| if (rows.isEmpty) { |
| return new Element('table', [head]); |
| } else { |
| var body = new Element('tbody', rows); |
| |
| return new Element('table', [head, body]); |
| } |
| } |
| |
| List<String> parseAlignments(String line) { |
| line = line.replaceFirst(_openingPipe, '').replaceFirst(_closingPipe, ''); |
| return line.split('|').map((column) { |
| column = column.trim(); |
| if (column.startsWith(':') && column.endsWith(':')) return 'center'; |
| if (column.startsWith(':')) return 'left'; |
| if (column.endsWith(':')) return 'right'; |
| return null; |
| }).toList(); |
| } |
| |
| Element parseRow( |
| BlockParser parser, List<String> alignments, String cellType) { |
| var line = parser.current |
| .replaceFirst(_openingPipe, '') |
| .replaceFirst(_closingPipe, ''); |
| var cells = line.split(_pipePattern); |
| parser.advance(); |
| var row = <Element>[]; |
| String preCell; |
| |
| for (var cell in cells) { |
| if (preCell != null) { |
| cell = preCell + cell; |
| preCell = null; |
| } |
| if (cell.endsWith('\\')) { |
| preCell = cell.substring(0, cell.length - 1) + '|'; |
| continue; |
| } |
| |
| var contents = new UnparsedContent(cell); |
| row.add(new Element(cellType, [contents])); |
| } |
| |
| for (var i = 0; i < row.length && i < alignments.length; i++) { |
| if (alignments[i] == null) continue; |
| row[i].attributes['style'] = 'text-align: ${alignments[i]};'; |
| } |
| |
| return new Element('tr', row); |
| } |
| } |
| |
| /// Parses paragraphs of regular text. |
| class ParagraphSyntax extends BlockSyntax { |
| static final _reflinkDefinitionStart = new RegExp(r'[ ]{0,3}\['); |
| |
| static final _whitespacePattern = new RegExp(r'^\s*$'); |
| |
| bool get canEndBlock => false; |
| |
| const ParagraphSyntax(); |
| |
| bool canParse(BlockParser parser) => true; |
| |
| Node parse(BlockParser parser) { |
| var childLines = <String>[]; |
| |
| // Eat until we hit something that ends a paragraph. |
| while (!BlockSyntax.isAtBlockEnd(parser)) { |
| childLines.add(parser.current); |
| parser.advance(); |
| } |
| |
| var paragraphLines = _extractReflinkDefinitions(parser, childLines); |
| if (paragraphLines == null) { |
| // Paragraph consisted solely of reference link definitions. |
| return new Text(''); |
| } else { |
| var contents = new UnparsedContent(paragraphLines.join('\n')); |
| return new Element('p', [contents]); |
| } |
| } |
| |
| /// Extract reference link definitions from the front of the paragraph, and |
| /// return the remaining paragraph lines. |
| List<String> _extractReflinkDefinitions( |
| BlockParser parser, List<String> lines) { |
| bool lineStartsReflinkDefinition(int i) => |
| lines[i].startsWith(_reflinkDefinitionStart); |
| |
| var i = 0; |
| loopOverDefinitions: |
| while (true) { |
| // Check for reflink definitions. |
| if (!lineStartsReflinkDefinition(i)) { |
| // It's paragraph content from here on out. |
| break; |
| } |
| var contents = lines[i]; |
| var j = i + 1; |
| while (j < lines.length) { |
| // Check to see if the _next_ line might start a new reflink definition. |
| // Even if it turns out not to be, but it started with a '[', then it |
| // is not a part of _this_ possible reflink definition. |
| if (lineStartsReflinkDefinition(j)) { |
| // Try to parse [contents] as a reflink definition. |
| if (_parseReflinkDefinition(parser, contents)) { |
| // Loop again, starting at the next possible reflink definition. |
| i = j; |
| continue loopOverDefinitions; |
| } else { |
| // Could not parse [contents] as a reflink definition. |
| break; |
| } |
| } else { |
| contents = contents + '\n' + lines[j]; |
| j++; |
| } |
| } |
| // End of the block. |
| if (_parseReflinkDefinition(parser, contents)) { |
| i = j; |
| break; |
| } |
| |
| // It may be that there is a reflink definition starting at [i], but it |
| // does not extend all the way to [j], such as: |
| // |
| // [link]: url // line i |
| // "title" |
| // garbage |
| // [link2]: url // line j |
| // |
| // In this case, [i, i+1] is a reflink definition, and the rest is |
| // paragraph content. |
| while (j >= i) { |
| // This isn't the most efficient loop, what with this big ole' |
| // Iterable allocation (`getRange`) followed by a big 'ole String |
| // allocation, but we |
| // must walk backwards, checking each range. |
| contents = lines.getRange(i, j).join('\n'); |
| if (_parseReflinkDefinition(parser, contents)) { |
| // That is the last reflink definition. The rest is paragraph |
| // content. |
| i = j; |
| break; |
| } |
| j--; |
| } |
| // The ending was not a reflink definition at all. Just paragraph |
| // content. |
| |
| break; |
| } |
| |
| if (i == lines.length) { |
| // No paragraph content. |
| return null; |
| } else { |
| // Ends with paragraph content. |
| return lines.sublist(i); |
| } |
| } |
| |
| // Parse [contents] as a reference link definition. |
| // |
| // Also adds the reference link definition to the document. |
| // |
| // Returns whether [contents] could be parsed as a reference link definition. |
| bool _parseReflinkDefinition(BlockParser parser, String contents) { |
| var pattern = new RegExp( |
| // Leading indentation. |
| r'''^[ ]{0,3}''' |
| // Reference id in brackets, and URL. |
| r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*''' |
| // Title in double or single quotes, or parens. |
| r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''', |
| multiLine: true); |
| var match = pattern.firstMatch(contents); |
| if (match == null) { |
| // Not a reference link definition. |
| return false; |
| } |
| if (match[0].length < contents.length) { |
| // Trailing text. No good. |
| return false; |
| } |
| |
| var label = match[1]; |
| var destination = match[2] ?? match[3]; |
| var title = match[4]; |
| |
| // The label must contain at least one non-whitespace character. |
| if (_whitespacePattern.hasMatch(label)) { |
| return false; |
| } |
| |
| if (title == '') { |
| // No title. |
| title = null; |
| } else { |
| // Remove "", '', or (). |
| title = title.substring(1, title.length - 1); |
| } |
| |
| // References are case-insensitive, and internal whitespace is compressed. |
| label = |
| label.toLowerCase().trim().replaceAll(_oneOrMoreWhitespacePattern, ' '); |
| |
| parser.document.linkReferences |
| .putIfAbsent(label, () => new LinkReference(label, destination, title)); |
| return true; |
| } |
| } |