blob: ca6102f1b77584f948c0f11efda0e44569eec042 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'package:charcode/charcode.dart';
import 'ast.dart';
import 'document.dart';
import 'util.dart';
/// The line contains only whitespace or is empty.
final _emptyPattern = RegExp(r'^(?:[ \t]*)$');
/// A series of `=` or `-` (on the next line) define setext-style headers.
final _setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$');
/// Leading (and trailing) `#` define atx-style headers.
///
/// Starts with 1-6 unescaped `#` characters which must not be followed by a
/// non-space character. Line may end with any number of `#` characters,.
final _headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$');
/// The line starts with `>` with one optional space after.
final _blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
/// A line indented four spaces. Used for code blocks and lists.
final _indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');
/// Fenced code block.
final _codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
/// Three or more hyphens, asterisks or underscores by themselves. Note that
/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
/// SETEXT should win.
final _hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
/// A line starting with one of these markers: `-`, `*`, `+`. May have up to
/// three leading spaces before the marker and any number of spaces or tabs
/// after.
///
/// Contains a dummy group at [2], so that the groups in [_ulPattern] and
/// [_olPattern] match up; in both, [2] is the length of the number that begins
/// the list marker.
final _ulPattern = RegExp(r'^([ ]{0,3})()([*+-])(([ \t])([ \t]*)(.*))?$');
/// A line starting with a number like `123.`. May have up to three leading
/// spaces before the marker and any number of spaces or tabs after.
final _olPattern =
RegExp(r'^([ ]{0,3})(\d{1,9})([\.)])(([ \t])([ \t]*)(.*))?$');
/// A line of hyphens separated by at least one pipe.
final _tablePattern = RegExp(r'^[ ]{0,3}\|?( *:?\-+:? *\|)+( *:?\-+:? *)?$');
/// Maintains the internal state needed to parse a series of lines into blocks
/// of Markdown suitable for further inline parsing.
class BlockParser {
final List<String> lines;
/// The Markdown document this parser is parsing.
final Document document;
/// The enabled block syntaxes.
///
/// To turn a series of lines into blocks, each of these will be tried in
/// turn. Order matters here.
final List<BlockSyntax> blockSyntaxes = [];
/// Index of the current line.
int _pos = 0;
/// Whether the parser has encountered a blank line between two block-level
/// elements.
bool encounteredBlankLine = false;
/// The collection of built-in block parsers.
final List<BlockSyntax> standardBlockSyntaxes = [
const EmptyBlockSyntax(),
const BlockTagBlockHtmlSyntax(),
LongBlockHtmlSyntax(r'^ {0,3}<pre(?:\s|>|$)', '</pre>'),
LongBlockHtmlSyntax(r'^ {0,3}<script(?:\s|>|$)', '</script>'),
LongBlockHtmlSyntax(r'^ {0,3}<style(?:\s|>|$)', '</style>'),
LongBlockHtmlSyntax('^ {0,3}<!--', '-->'),
LongBlockHtmlSyntax('^ {0,3}<\\?', '\\?>'),
LongBlockHtmlSyntax('^ {0,3}<![A-Z]', '>'),
LongBlockHtmlSyntax('^ {0,3}<!\\[CDATA\\[', '\\]\\]>'),
const OtherTagBlockHtmlSyntax(),
const SetextHeaderSyntax(),
const HeaderSyntax(),
const CodeBlockSyntax(),
const BlockquoteSyntax(),
const HorizontalRuleSyntax(),
const UnorderedListSyntax(),
const OrderedListSyntax(),
const ParagraphSyntax()
];
BlockParser(this.lines, this.document) {
blockSyntaxes.addAll(document.blockSyntaxes);
blockSyntaxes.addAll(standardBlockSyntaxes);
}
/// Gets the current line.
String get current => lines[_pos];
/// Gets the line after the current one or `null` if there is none.
String get next {
// Don't read past the end.
if (_pos >= lines.length - 1) return null;
return lines[_pos + 1];
}
/// Gets the line that is [linesAhead] lines ahead of the current one, or
/// `null` if there is none.
///
/// `peek(0)` is equivalent to [current].
///
/// `peek(1)` is equivalent to [next].
String peek(int linesAhead) {
if (linesAhead < 0) {
throw ArgumentError('Invalid linesAhead: $linesAhead; must be >= 0.');
}
// Don't read past the end.
if (_pos >= lines.length - linesAhead) return null;
return lines[_pos + linesAhead];
}
void advance() {
_pos++;
}
bool get isDone => _pos >= lines.length;
/// Gets whether or not the current line matches the given pattern.
bool matches(RegExp regex) {
if (isDone) return false;
return regex.hasMatch(current);
}
/// Gets whether or not the next line matches the given pattern.
bool matchesNext(RegExp regex) {
if (next == null) return false;
return regex.hasMatch(next);
}
List<Node> parseLines() {
var blocks = <Node>[];
while (!isDone) {
for (var syntax in blockSyntaxes) {
if (syntax.canParse(this)) {
var block = syntax.parse(this);
if (block != null) blocks.add(block);
break;
}
}
}
return blocks;
}
}
abstract class BlockSyntax {
const BlockSyntax();
/// Gets the regex used to identify the beginning of this block, if any.
RegExp get pattern => null;
bool get canEndBlock => true;
bool canParse(BlockParser parser) {
return pattern.hasMatch(parser.current);
}
Node parse(BlockParser parser);
List<String> parseChildLines(BlockParser parser) {
// Grab all of the lines that form the block element.
var childLines = <String>[];
while (!parser.isDone) {
var match = pattern.firstMatch(parser.current);
if (match == null) break;
childLines.add(match[1]);
parser.advance();
}
return childLines;
}
/// Gets whether or not [parser]'s current line should end the previous block.
static bool isAtBlockEnd(BlockParser parser) {
if (parser.isDone) return true;
return parser.blockSyntaxes.any((s) => s.canParse(parser) && s.canEndBlock);
}
/// Generates a valid HTML anchor from the inner text of [element].
static String generateAnchorHash(Element element) =>
element.children.first.textContent
.toLowerCase()
.trim()
.replaceAll(RegExp(r'[^a-z0-9 _-]'), '')
.replaceAll(RegExp(r'\s'), '-');
}
class EmptyBlockSyntax extends BlockSyntax {
@override
RegExp get pattern => _emptyPattern;
const EmptyBlockSyntax();
@override
Node parse(BlockParser parser) {
parser.encounteredBlankLine = true;
parser.advance();
// Don't actually emit anything.
return null;
}
}
/// Parses setext-style headers.
class SetextHeaderSyntax extends BlockSyntax {
const SetextHeaderSyntax();
@override
bool canParse(BlockParser parser) {
if (!_interperableAsParagraph(parser.current)) return false;
var i = 1;
while (true) {
var nextLine = parser.peek(i);
if (nextLine == null) {
// We never reached an underline.
return false;
}
if (_setextPattern.hasMatch(nextLine)) {
return true;
}
// Ensure that we're still in something like paragraph text.
if (!_interperableAsParagraph(nextLine)) {
return false;
}
i++;
}
}
@override
Node parse(BlockParser parser) {
var lines = <String>[];
String tag;
while (!parser.isDone) {
var match = _setextPattern.firstMatch(parser.current);
if (match == null) {
// More text.
lines.add(parser.current);
parser.advance();
continue;
} else {
// The underline.
tag = (match[1][0] == '=') ? 'h1' : 'h2';
parser.advance();
break;
}
}
var contents = UnparsedContent(lines.join('\n'));
return Element(tag, [contents]);
}
bool _interperableAsParagraph(String line) =>
!(_indentPattern.hasMatch(line) ||
_codeFencePattern.hasMatch(line) ||
_headerPattern.hasMatch(line) ||
_blockquotePattern.hasMatch(line) ||
_hrPattern.hasMatch(line) ||
_ulPattern.hasMatch(line) ||
_olPattern.hasMatch(line) ||
_emptyPattern.hasMatch(line));
}
/// Parses setext-style headers, and adds generated IDs to the generated
/// elements.
class SetextHeaderWithIdSyntax extends SetextHeaderSyntax {
const SetextHeaderWithIdSyntax();
@override
Node parse(BlockParser parser) {
var element = super.parse(parser) as Element;
element.generatedId = BlockSyntax.generateAnchorHash(element);
return element;
}
}
/// Parses atx-style headers: `## Header ##`.
class HeaderSyntax extends BlockSyntax {
@override
RegExp get pattern => _headerPattern;
const HeaderSyntax();
@override
Node parse(BlockParser parser) {
var match = pattern.firstMatch(parser.current);
parser.advance();
var level = match[1].length;
var contents = UnparsedContent(match[2].trim());
return Element('h$level', [contents]);
}
}
/// Parses atx-style headers, and adds generated IDs to the generated elements.
class HeaderWithIdSyntax extends HeaderSyntax {
const HeaderWithIdSyntax();
@override
Node parse(BlockParser parser) {
var element = super.parse(parser) as Element;
element.generatedId = BlockSyntax.generateAnchorHash(element);
return element;
}
}
/// Parses email-style blockquotes: `> quote`.
class BlockquoteSyntax extends BlockSyntax {
@override
RegExp get pattern => _blockquotePattern;
const BlockquoteSyntax();
@override
List<String> parseChildLines(BlockParser parser) {
// Grab all of the lines that form the blockquote, stripping off the ">".
var childLines = <String>[];
while (!parser.isDone) {
var match = pattern.firstMatch(parser.current);
if (match != null) {
childLines.add(match[1]);
parser.advance();
continue;
}
// A paragraph continuation is OK. This is content that cannot be parsed
// as any other syntax except Paragraph, and it doesn't match the bar in
// a Setext header.
if (parser.blockSyntaxes.firstWhere((s) => s.canParse(parser))
is ParagraphSyntax) {
childLines.add(parser.current);
parser.advance();
} else {
break;
}
}
return childLines;
}
@override
Node parse(BlockParser parser) {
var childLines = parseChildLines(parser);
// Recursively parse the contents of the blockquote.
var children = BlockParser(childLines, parser.document).parseLines();
return Element('blockquote', children);
}
}
/// Parses preformatted code blocks that are indented four spaces.
class CodeBlockSyntax extends BlockSyntax {
@override
RegExp get pattern => _indentPattern;
@override
bool get canEndBlock => false;
const CodeBlockSyntax();
@override
List<String> parseChildLines(BlockParser parser) {
var childLines = <String>[];
while (!parser.isDone) {
var match = pattern.firstMatch(parser.current);
if (match != null) {
childLines.add(match[1]);
parser.advance();
} else {
// If there's a codeblock, then a newline, then a codeblock, keep the
// code blocks together.
var nextMatch =
parser.next != null ? pattern.firstMatch(parser.next) : null;
if (parser.current.trim() == '' && nextMatch != null) {
childLines.add('');
childLines.add(nextMatch[1]);
parser.advance();
parser.advance();
} else {
break;
}
}
}
return childLines;
}
@override
Node parse(BlockParser parser) {
var childLines = parseChildLines(parser);
// The Markdown tests expect a trailing newline.
childLines.add('');
var content = childLines.join('\n');
if (parser.document.encodeHtml) {
content = escapeHtml(content);
}
return Element('pre', [Element.text('code', content)]);
}
}
/// Parses preformatted code blocks between two ~~~ or ``` sequences.
///
/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks
class FencedCodeBlockSyntax extends BlockSyntax {
@override
RegExp get pattern => _codeFencePattern;
const FencedCodeBlockSyntax();
@override
bool canParse(BlockParser parser) {
final match = pattern.firstMatch(parser.current);
if (match == null) return false;
final codeFence = match.group(1);
final infoString = match.group(2);
// From the CommonMark spec:
//
// > If the info string comes after a backtick fence, it may not contain
// > any backtick characters.
return (codeFence.codeUnitAt(0) != $backquote ||
!infoString.codeUnits.contains($backquote));
}
@override
List<String> parseChildLines(BlockParser parser, [String endBlock]) {
endBlock ??= '';
var childLines = <String>[];
parser.advance();
while (!parser.isDone) {
var match = pattern.firstMatch(parser.current);
if (match == null || !match[1].startsWith(endBlock)) {
childLines.add(parser.current);
parser.advance();
} else {
parser.advance();
break;
}
}
return childLines;
}
@override
Node parse(BlockParser parser) {
// Get the syntax identifier, if there is one.
var match = pattern.firstMatch(parser.current);
var endBlock = match.group(1);
var infoString = match.group(2);
var childLines = parseChildLines(parser, endBlock);
// The Markdown tests expect a trailing newline.
childLines.add('');
var text = childLines.join('\n');
if (parser.document.encodeHtml) {
text = escapeHtml(text);
}
var code = Element.text('code', text);
// the info-string should be trimmed
// http://spec.commonmark.org/0.22/#example-100
infoString = infoString.trim();
if (infoString.isNotEmpty) {
// only use the first word in the syntax
// http://spec.commonmark.org/0.22/#example-100
var firstSpace = infoString.indexOf(' ');
if (firstSpace >= 0) {
infoString = infoString.substring(0, firstSpace);
}
if (parser.document.encodeHtml) {
infoString = escapeHtmlAttribute(infoString);
}
code.attributes['class'] = 'language-$infoString';
}
var element = Element('pre', [code]);
return element;
}
}
/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc.
class HorizontalRuleSyntax extends BlockSyntax {
@override
RegExp get pattern => _hrPattern;
const HorizontalRuleSyntax();
@override
Node parse(BlockParser parser) {
parser.advance();
return Element.empty('hr');
}
}
/// Parses inline HTML at the block level. This differs from other Markdown
/// implementations in several ways:
///
/// 1. This one is way way WAY simpler.
/// 2. Essentially no HTML parsing or validation is done. We're a Markdown
/// parser, not an HTML parser!
abstract class BlockHtmlSyntax extends BlockSyntax {
@override
bool get canEndBlock => true;
const BlockHtmlSyntax();
}
class BlockTagBlockHtmlSyntax extends BlockHtmlSyntax {
static final _pattern = RegExp(
r'^ {0,3}</?(?:address|article|aside|base|basefont|blockquote|body|'
r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|'
r'figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|'
r'iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|'
r'option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|'
r'title|tr|track|ul)'
r'(?:\s|>|/>|$)');
/// The [_pattern] regular expression above is very expensive, even on
/// paragraphs of Markdown with no HTML. This regular expression can be used
/// first as a basic check that the input might possibly be an HTML block
/// tag, which occur very rarely in typical Markdown.
static final _openBracketPattern = RegExp(r'^ {0,3}<');
@override
RegExp get pattern => _pattern;
const BlockTagBlockHtmlSyntax();
@override
bool canParse(BlockParser parser) {
if (!_openBracketPattern.hasMatch(parser.current)) return false;
return super.canParse(parser);
}
@override
Node parse(BlockParser parser) {
var childLines = <String>[];
// Eat until we hit a blank line.
while (!parser.isDone && !parser.matches(_emptyPattern)) {
childLines.add(parser.current);
parser.advance();
}
return Text(childLines.join('\n'));
}
}
class OtherTagBlockHtmlSyntax extends BlockTagBlockHtmlSyntax {
@override
bool get canEndBlock => false;
// Really hacky way to detect "other" HTML. This matches:
//
// * any opening spaces
// * open bracket and maybe a slash ("<" or "</")
// * some word characters
// * either:
// * a close bracket, or
// * whitespace followed by not-brackets followed by a close bracket
// * possible whitespace and the end of the line.
@override
RegExp get pattern => RegExp(r'^ {0,3}</?\w+(?:>|\s+[^>]*>)\s*$');
const OtherTagBlockHtmlSyntax();
}
/// A BlockHtmlSyntax that has a specific `endPattern`.
///
/// In practice this means that the syntax dominates; it is allowed to eat
/// many lines, including blank lines, before matching its `endPattern`.
class LongBlockHtmlSyntax extends BlockHtmlSyntax {
@override
final RegExp pattern;
final RegExp _endPattern;
LongBlockHtmlSyntax(String pattern, String endPattern)
: pattern = RegExp(pattern),
_endPattern = RegExp(endPattern);
@override
Node parse(BlockParser parser) {
var childLines = <String>[];
// Eat until we hit [endPattern].
while (!parser.isDone) {
childLines.add(parser.current);
if (parser.matches(_endPattern)) break;
parser.advance();
}
parser.advance();
return Text(childLines.join('\n'));
}
}
class ListItem {
bool forceBlock = false;
final List<String> lines;
ListItem(this.lines);
}
/// Base class for both ordered and unordered lists.
abstract class ListSyntax extends BlockSyntax {
@override
bool get canEndBlock => true;
String get listTag;
const ListSyntax();
/// A list of patterns that can start a valid block within a list item.
static final blocksInList = [
_blockquotePattern,
_headerPattern,
_hrPattern,
_indentPattern,
_ulPattern,
_olPattern
];
static final _whitespaceRe = RegExp('[ \t]*');
@override
Node parse(BlockParser parser) {
var items = <ListItem>[];
var childLines = <String>[];
void endItem() {
if (childLines.isNotEmpty) {
items.add(ListItem(childLines));
childLines = <String>[];
}
}
Match match;
bool tryMatch(RegExp pattern) {
match = pattern.firstMatch(parser.current);
return match != null;
}
String listMarker;
String indent;
// In case the first number in an ordered list is not 1, use it as the
// "start".
int startNumber;
while (!parser.isDone) {
var leadingSpace = _whitespaceRe.matchAsPrefix(parser.current).group(0);
var leadingExpandedTabLength = _expandedTabLength(leadingSpace);
if (tryMatch(_emptyPattern)) {
if (_emptyPattern.hasMatch(parser.next ?? '')) {
// Two blank lines ends a list.
break;
}
// Add a blank line to the current list item.
childLines.add('');
} else if (indent != null && indent.length <= leadingExpandedTabLength) {
// Strip off indent and add to current item.
var line = parser.current
.replaceFirst(leadingSpace, ' ' * leadingExpandedTabLength)
.replaceFirst(indent, '');
childLines.add(line);
} else if (tryMatch(_hrPattern)) {
// Horizontal rule takes precedence to a new list item.
break;
} else if (tryMatch(_ulPattern) || tryMatch(_olPattern)) {
var precedingWhitespace = match[1];
var digits = match[2] ?? '';
if (startNumber == null && digits.isNotEmpty) {
startNumber = int.parse(digits);
}
var marker = match[3];
var firstWhitespace = match[5] ?? '';
var restWhitespace = match[6] ?? '';
var content = match[7] ?? '';
var isBlank = content.isEmpty;
if (listMarker != null && listMarker != marker) {
// Changing the bullet or ordered list delimiter starts a new list.
break;
}
listMarker = marker;
var markerAsSpaces = ' ' * (digits.length + marker.length);
if (isBlank) {
// See http://spec.commonmark.org/0.28/#list-items under "3. Item
// starting with a blank line."
//
// If the list item starts with a blank line, the final piece of the
// indentation is just a single space.
indent = precedingWhitespace + markerAsSpaces + ' ';
} else if (restWhitespace.length >= 4) {
// See http://spec.commonmark.org/0.28/#list-items under "2. Item
// starting with indented code."
//
// If the list item starts with indented code, we need to _not_ count
// any indentation past the required whitespace character.
indent = precedingWhitespace + markerAsSpaces + firstWhitespace;
} else {
indent = precedingWhitespace +
markerAsSpaces +
firstWhitespace +
restWhitespace;
}
// End the current list item and start a new one.
endItem();
childLines.add(restWhitespace + content);
} else if (BlockSyntax.isAtBlockEnd(parser)) {
// Done with the list.
break;
} else {
// If the previous item is a blank line, this means we're done with the
// list and are starting a new top-level paragraph.
if ((childLines.isNotEmpty) && (childLines.last == '')) {
parser.encounteredBlankLine = true;
break;
}
// Anything else is paragraph continuation text.
childLines.add(parser.current);
}
parser.advance();
}
endItem();
var itemNodes = <Element>[];
items.forEach(removeLeadingEmptyLine);
var anyEmptyLines = removeTrailingEmptyLines(items);
var anyEmptyLinesBetweenBlocks = false;
for (var item in items) {
var itemParser = BlockParser(item.lines, parser.document);
var children = itemParser.parseLines();
itemNodes.add(Element('li', children));
anyEmptyLinesBetweenBlocks =
anyEmptyLinesBetweenBlocks || itemParser.encounteredBlankLine;
}
// Must strip paragraph tags if the list is "tight".
// http://spec.commonmark.org/0.28/#lists
var listIsTight = !anyEmptyLines && !anyEmptyLinesBetweenBlocks;
if (listIsTight) {
// We must post-process the list items, converting any top-level paragraph
// elements to just text elements.
for (var item in itemNodes) {
for (var i = 0; i < item.children.length; i++) {
var child = item.children[i];
if (child is Element && child.tag == 'p') {
item.children.removeAt(i);
item.children.insertAll(i, child.children);
}
}
}
}
if (listTag == 'ol' && startNumber != 1) {
return Element(listTag, itemNodes)..attributes['start'] = '$startNumber';
} else {
return Element(listTag, itemNodes);
}
}
void removeLeadingEmptyLine(ListItem item) {
if (item.lines.isNotEmpty && _emptyPattern.hasMatch(item.lines.first)) {
item.lines.removeAt(0);
}
}
/// Removes any trailing empty lines and notes whether any items are separated
/// by such lines.
bool removeTrailingEmptyLines(List<ListItem> items) {
var anyEmpty = false;
for (var i = 0; i < items.length; i++) {
if (items[i].lines.length == 1) continue;
while (items[i].lines.isNotEmpty &&
_emptyPattern.hasMatch(items[i].lines.last)) {
if (i < items.length - 1) {
anyEmpty = true;
}
items[i].lines.removeLast();
}
}
return anyEmpty;
}
static int _expandedTabLength(String input) {
var length = 0;
for (var char in input.codeUnits) {
length += char == 0x9 ? 4 - (length % 4) : 1;
}
return length;
}
}
/// Parses unordered lists.
class UnorderedListSyntax extends ListSyntax {
@override
RegExp get pattern => _ulPattern;
@override
String get listTag => 'ul';
const UnorderedListSyntax();
}
/// Parses ordered lists.
class OrderedListSyntax extends ListSyntax {
@override
RegExp get pattern => _olPattern;
@override
String get listTag => 'ol';
const OrderedListSyntax();
}
/// Parses tables.
class TableSyntax extends BlockSyntax {
@override
bool get canEndBlock => false;
const TableSyntax();
@override
bool canParse(BlockParser parser) {
// Note: matches *next* line, not the current one. We're looking for the
// bar separating the head row from the body rows.
return parser.matchesNext(_tablePattern);
}
/// Parses a table into its three parts:
///
/// * a head row of head cells (`<th>` cells)
/// * a divider of hyphens and pipes (not rendered)
/// * many body rows of body cells (`<td>` cells)
@override
Node parse(BlockParser parser) {
var alignments = parseAlignments(parser.next);
var columnCount = alignments.length;
var headRow = parseRow(parser, alignments, 'th');
if (headRow.children.length != columnCount) {
return null;
}
var head = Element('thead', [headRow]);
// Advance past the divider of hyphens.
parser.advance();
var rows = <Element>[];
while (!parser.isDone && !BlockSyntax.isAtBlockEnd(parser)) {
var row = parseRow(parser, alignments, 'td');
while (row.children.length < columnCount) {
// Insert synthetic empty cells.
row.children.add(Element.empty('td'));
}
while (row.children.length > columnCount) {
row.children.removeLast();
}
rows.add(row);
}
if (rows.isEmpty) {
return Element('table', [head]);
} else {
var body = Element('tbody', rows);
return Element('table', [head, body]);
}
}
List<String> parseAlignments(String line) {
var startIndex = _walkPastOpeningPipe(line);
var endIndex = line.length - 1;
while (endIndex > 0) {
var ch = line.codeUnitAt(endIndex);
if (ch == $pipe) {
endIndex--;
break;
}
if (ch != $space && ch != $tab) {
break;
}
endIndex--;
}
// Optimization: We walk [line] too many times. One lap should do it.
return line.substring(startIndex, endIndex + 1).split('|').map((column) {
column = column.trim();
if (column.startsWith(':') && column.endsWith(':')) return 'center';
if (column.startsWith(':')) return 'left';
if (column.endsWith(':')) return 'right';
return null;
}).toList();
}
/// Parses a table row at the current line into a table row element, with
/// parsed table cells.
///
/// [alignments] is used to annotate an alignment on each cell, and
/// [cellType] is used to declare either "td" or "th" cells.
Element parseRow(
BlockParser parser, List<String> alignments, String cellType) {
var line = parser.current;
var cells = <String>[];
var index = _walkPastOpeningPipe(line);
var cellBuffer = StringBuffer();
while (true) {
if (index >= line.length) {
// This row ended without a trailing pipe, which is fine.
cells.add(cellBuffer.toString().trimRight());
cellBuffer.clear();
break;
}
var ch = line.codeUnitAt(index);
if (ch == $backslash) {
if (index == line.length - 1) {
// A table row ending in a backslash is not well-specified, but it
// looks like GitHub just allows the character as part of the text of
// the last cell.
cellBuffer.writeCharCode(ch);
cells.add(cellBuffer.toString().trimRight());
cellBuffer.clear();
break;
}
var escaped = line.codeUnitAt(index + 1);
if (escaped == $pipe) {
// GitHub Flavored Markdown has a strange bit here; the pipe is to be
// escaped before any other inline processing. One consequence, for
// example, is that "| `\|` |" should be parsed as a cell with a code
// element with text "|", rather than "\|". Most parsers are not
// compliant with this corner, but this is what is specified, and what
// GitHub does in practice.
cellBuffer.writeCharCode(escaped);
} else {
// The [InlineParser] will handle the escaping.
cellBuffer.writeCharCode(ch);
cellBuffer.writeCharCode(escaped);
}
index += 2;
} else if (ch == $pipe) {
cells.add(cellBuffer.toString().trimRight());
cellBuffer.clear();
// Walk forward past any whitespace which leads the next cell.
index++;
index = _walkPastWhitespace(line, index);
if (index >= line.length) {
// This row ended with a trailing pipe.
break;
}
} else {
cellBuffer.writeCharCode(ch);
index++;
}
}
parser.advance();
var row = [
for (var cell in cells) Element(cellType, [UnparsedContent(cell)])
];
for (var i = 0; i < row.length && i < alignments.length; i++) {
if (alignments[i] == null) continue;
row[i].attributes['style'] = 'text-align: ${alignments[i]};';
}
return Element('tr', row);
}
/// Walks past whitespace in [line] starting at [index].
///
/// Returns the index of the first non-whitespace character.
int _walkPastWhitespace(String line, int index) {
while (index < line.length) {
var ch = line.codeUnitAt(index);
if (ch != $space && ch != $tab) {
break;
}
index++;
}
return index;
}
/// Walks past the opening pipe (and any whitespace that surrounds it) in
/// [line].
///
/// Returns the index of the first non-whitespace character after the pipe.
/// If no opening pipe is found, this just returns the index of the first
/// non-whitespace character.
int _walkPastOpeningPipe(String line) {
var index = 0;
while (index < line.length) {
var ch = line.codeUnitAt(index);
if (ch == $pipe) {
index++;
index = _walkPastWhitespace(line, index);
}
if (ch != $space && ch != $tab) {
// No leading pipe.
break;
}
index++;
}
return index;
}
}
/// Parses paragraphs of regular text.
class ParagraphSyntax extends BlockSyntax {
static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');
static final _whitespacePattern = RegExp(r'^\s*$');
@override
bool get canEndBlock => false;
const ParagraphSyntax();
@override
bool canParse(BlockParser parser) => true;
@override
Node parse(BlockParser parser) {
var childLines = <String>[];
// Eat until we hit something that ends a paragraph.
while (!BlockSyntax.isAtBlockEnd(parser)) {
childLines.add(parser.current);
parser.advance();
}
var paragraphLines = _extractReflinkDefinitions(parser, childLines);
if (paragraphLines == null) {
// Paragraph consisted solely of reference link definitions.
return Text('');
} else {
var contents = UnparsedContent(paragraphLines.join('\n'));
return Element('p', [contents]);
}
}
/// Extract reference link definitions from the front of the paragraph, and
/// return the remaining paragraph lines.
List<String> _extractReflinkDefinitions(
BlockParser parser, List<String> lines) {
bool lineStartsReflinkDefinition(int i) =>
lines[i].startsWith(_reflinkDefinitionStart);
var i = 0;
loopOverDefinitions:
while (true) {
// Check for reflink definitions.
if (!lineStartsReflinkDefinition(i)) {
// It's paragraph content from here on out.
break;
}
var contents = lines[i];
var j = i + 1;
while (j < lines.length) {
// Check to see if the _next_ line might start a new reflink definition.
// Even if it turns out not to be, but it started with a '[', then it
// is not a part of _this_ possible reflink definition.
if (lineStartsReflinkDefinition(j)) {
// Try to parse [contents] as a reflink definition.
if (_parseReflinkDefinition(parser, contents)) {
// Loop again, starting at the next possible reflink definition.
i = j;
continue loopOverDefinitions;
} else {
// Could not parse [contents] as a reflink definition.
break;
}
} else {
contents = contents + '\n' + lines[j];
j++;
}
}
// End of the block.
if (_parseReflinkDefinition(parser, contents)) {
i = j;
break;
}
// It may be that there is a reflink definition starting at [i], but it
// does not extend all the way to [j], such as:
//
// [link]: url // line i
// "title"
// garbage
// [link2]: url // line j
//
// In this case, [i, i+1] is a reflink definition, and the rest is
// paragraph content.
while (j >= i) {
// This isn't the most efficient loop, what with this big ole'
// Iterable allocation (`getRange`) followed by a big 'ole String
// allocation, but we
// must walk backwards, checking each range.
contents = lines.getRange(i, j).join('\n');
if (_parseReflinkDefinition(parser, contents)) {
// That is the last reflink definition. The rest is paragraph
// content.
i = j;
break;
}
j--;
}
// The ending was not a reflink definition at all. Just paragraph
// content.
break;
}
if (i == lines.length) {
// No paragraph content.
return null;
} else {
// Ends with paragraph content.
return lines.sublist(i);
}
}
// Parse [contents] as a reference link definition.
//
// Also adds the reference link definition to the document.
//
// Returns whether [contents] could be parsed as a reference link definition.
bool _parseReflinkDefinition(BlockParser parser, String contents) {
var pattern = RegExp(
// Leading indentation.
r'''^[ ]{0,3}'''
// Reference id in brackets, and URL.
r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*'''
// Title in double or single quotes, or parens.
r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''',
multiLine: true);
var match = pattern.firstMatch(contents);
if (match == null) {
// Not a reference link definition.
return false;
}
if (match[0].length < contents.length) {
// Trailing text. No good.
return false;
}
var label = match[1];
var destination = match[2] ?? match[3];
var title = match[4];
// The label must contain at least one non-whitespace character.
if (_whitespacePattern.hasMatch(label)) {
return false;
}
if (title == '') {
// No title.
title = null;
} else {
// Remove "", '', or ().
title = title.substring(1, title.length - 1);
}
// References are case-insensitive, and internal whitespace is compressed.
label = normalizeLinkLabel(label);
parser.document.linkReferences
.putIfAbsent(label, () => LinkReference(label, destination, title));
return true;
}
}