blob: aaf4de76a1677c972f81cdb93ec01edf0b2c7995 [file] [log] [blame]
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import '../ast.dart';
import '../block_parser.dart';
import '../document.dart';
import '../patterns.dart';
import '../util.dart';
import 'block_syntax.dart';
/// Parses paragraphs of regular text.
class ParagraphSyntax extends BlockSyntax {
static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');
static final _whitespacePattern = RegExp(r'^\s*$');
@override
RegExp get pattern => dummyPattern;
@override
bool canEndBlock(BlockParser parser) => false;
const ParagraphSyntax();
@override
bool canParse(BlockParser parser) => true;
@override
Node parse(BlockParser parser) {
final childLines = <String>[];
// Eat until we hit something that ends a paragraph.
while (!BlockSyntax.isAtBlockEnd(parser)) {
childLines.add(parser.current);
parser.advance();
}
final paragraphLines = _extractReflinkDefinitions(parser, childLines);
if (paragraphLines == null) {
// Paragraph consisted solely of reference link definitions.
return Text('');
} else {
final contents = UnparsedContent(paragraphLines.join('\n').trimRight());
return Element('p', [contents]);
}
}
/// Extract reference link definitions from the front of the paragraph, and
/// return the remaining paragraph lines.
List<String>? _extractReflinkDefinitions(
BlockParser parser,
List<String> lines,
) {
bool lineStartsReflinkDefinition(int i) =>
lines[i].startsWith(_reflinkDefinitionStart);
var i = 0;
loopOverDefinitions:
while (true) {
// Check for reflink definitions.
if (!lineStartsReflinkDefinition(i)) {
// It's paragraph content from here on out.
break;
}
var contents = lines[i];
var j = i + 1;
while (j < lines.length) {
// Check to see if the _next_ line might start a new reflink definition.
// Even if it turns out not to be, but it started with a '[', then it
// is not a part of _this_ possible reflink definition.
if (lineStartsReflinkDefinition(j)) {
// Try to parse [contents] as a reflink definition.
if (_parseReflinkDefinition(parser, contents)) {
// Loop again, starting at the next possible reflink definition.
i = j;
continue loopOverDefinitions;
} else {
// Could not parse [contents] as a reflink definition.
break;
}
} else {
contents = '$contents\n${lines[j]}';
j++;
}
}
// End of the block.
if (_parseReflinkDefinition(parser, contents)) {
i = j;
break;
}
// It may be that there is a reflink definition starting at [i], but it
// does not extend all the way to [j], such as:
//
// [link]: url // line i
// "title"
// garbage
// [link2]: url // line j
//
// In this case, [i, i+1] is a reflink definition, and the rest is
// paragraph content.
while (j >= i) {
// This isn't the most efficient loop, what with this big ole'
// Iterable allocation (`getRange`) followed by a big 'ole String
// allocation, but we
// must walk backwards, checking each range.
contents = lines.getRange(i, j).join('\n');
if (_parseReflinkDefinition(parser, contents)) {
// That is the last reflink definition. The rest is paragraph
// content.
i = j;
break;
}
j--;
}
// The ending was not a reflink definition at all. Just paragraph
// content.
break;
}
if (i == lines.length) {
// No paragraph content.
return null;
} else {
// Ends with paragraph content.
return lines.sublist(i);
}
}
// Parse [contents] as a reference link definition.
//
// Also adds the reference link definition to the document.
//
// Returns whether [contents] could be parsed as a reference link definition.
bool _parseReflinkDefinition(BlockParser parser, String contents) {
final pattern = RegExp(
// Leading indentation.
'''^[ ]{0,3}'''
// Reference id in brackets, and URL.
r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*'''
// Title in double or single quotes, or parens.
r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''',
multiLine: true,
);
final match = pattern.firstMatch(contents);
if (match == null) {
// Not a reference link definition.
return false;
}
if (match.match.length < contents.length) {
// Trailing text. No good.
return false;
}
var label = match[1]!;
final destination = match[2] ?? match[3]!;
var title = match[4];
// The label must contain at least one non-whitespace character.
if (_whitespacePattern.hasMatch(label)) {
return false;
}
if (title == '') {
// No title.
title = null;
} else {
// Remove "", '', or ().
title = title!.substring(1, title.length - 1);
}
// References are case-insensitive, and internal whitespace is compressed.
label = normalizeLinkLabel(label);
parser.document.linkReferences
.putIfAbsent(label, () => LinkReference(label, destination, title));
return true;
}
}