| // Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| import '../ast.dart'; |
| import '../block_parser.dart'; |
| import '../document.dart'; |
| import '../patterns.dart'; |
| import '../util.dart'; |
| import 'block_syntax.dart'; |
| |
| /// Parses paragraphs of regular text. |
| class ParagraphSyntax extends BlockSyntax { |
| static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\['); |
| |
| static final _whitespacePattern = RegExp(r'^\s*$'); |
| |
| @override |
| RegExp get pattern => dummyPattern; |
| |
| @override |
| bool canEndBlock(BlockParser parser) => false; |
| |
| const ParagraphSyntax(); |
| |
| @override |
| bool canParse(BlockParser parser) => true; |
| |
| @override |
| Node parse(BlockParser parser) { |
| final childLines = <String>[]; |
| |
| // Eat until we hit something that ends a paragraph. |
| while (!BlockSyntax.isAtBlockEnd(parser)) { |
| childLines.add(parser.current); |
| parser.advance(); |
| } |
| |
| final paragraphLines = _extractReflinkDefinitions(parser, childLines); |
| if (paragraphLines == null) { |
| // Paragraph consisted solely of reference link definitions. |
| return Text(''); |
| } else { |
| final contents = UnparsedContent(paragraphLines.join('\n').trimRight()); |
| return Element('p', [contents]); |
| } |
| } |
| |
| /// Extract reference link definitions from the front of the paragraph, and |
| /// return the remaining paragraph lines. |
| List<String>? _extractReflinkDefinitions( |
| BlockParser parser, |
| List<String> lines, |
| ) { |
| bool lineStartsReflinkDefinition(int i) => |
| lines[i].startsWith(_reflinkDefinitionStart); |
| |
| var i = 0; |
| loopOverDefinitions: |
| while (true) { |
| // Check for reflink definitions. |
| if (!lineStartsReflinkDefinition(i)) { |
| // It's paragraph content from here on out. |
| break; |
| } |
| var contents = lines[i]; |
| var j = i + 1; |
| while (j < lines.length) { |
| // Check to see if the _next_ line might start a new reflink definition. |
| // Even if it turns out not to be, but it started with a '[', then it |
| // is not a part of _this_ possible reflink definition. |
| if (lineStartsReflinkDefinition(j)) { |
| // Try to parse [contents] as a reflink definition. |
| if (_parseReflinkDefinition(parser, contents)) { |
| // Loop again, starting at the next possible reflink definition. |
| i = j; |
| continue loopOverDefinitions; |
| } else { |
| // Could not parse [contents] as a reflink definition. |
| break; |
| } |
| } else { |
| contents = '$contents\n${lines[j]}'; |
| j++; |
| } |
| } |
| // End of the block. |
| if (_parseReflinkDefinition(parser, contents)) { |
| i = j; |
| break; |
| } |
| |
| // It may be that there is a reflink definition starting at [i], but it |
| // does not extend all the way to [j], such as: |
| // |
| // [link]: url // line i |
| // "title" |
| // garbage |
| // [link2]: url // line j |
| // |
| // In this case, [i, i+1] is a reflink definition, and the rest is |
| // paragraph content. |
| while (j >= i) { |
| // This isn't the most efficient loop, what with this big ole' |
| // Iterable allocation (`getRange`) followed by a big 'ole String |
| // allocation, but we |
| // must walk backwards, checking each range. |
| contents = lines.getRange(i, j).join('\n'); |
| if (_parseReflinkDefinition(parser, contents)) { |
| // That is the last reflink definition. The rest is paragraph |
| // content. |
| i = j; |
| break; |
| } |
| j--; |
| } |
| // The ending was not a reflink definition at all. Just paragraph |
| // content. |
| |
| break; |
| } |
| |
| if (i == lines.length) { |
| // No paragraph content. |
| return null; |
| } else { |
| // Ends with paragraph content. |
| return lines.sublist(i); |
| } |
| } |
| |
| // Parse [contents] as a reference link definition. |
| // |
| // Also adds the reference link definition to the document. |
| // |
| // Returns whether [contents] could be parsed as a reference link definition. |
| bool _parseReflinkDefinition(BlockParser parser, String contents) { |
| final pattern = RegExp( |
| // Leading indentation. |
| '''^[ ]{0,3}''' |
| // Reference id in brackets, and URL. |
| r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*''' |
| // Title in double or single quotes, or parens. |
| r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''', |
| multiLine: true, |
| ); |
| final match = pattern.firstMatch(contents); |
| if (match == null) { |
| // Not a reference link definition. |
| return false; |
| } |
| if (match.match.length < contents.length) { |
| // Trailing text. No good. |
| return false; |
| } |
| |
| var label = match[1]!; |
| final destination = match[2] ?? match[3]!; |
| var title = match[4]; |
| |
| // The label must contain at least one non-whitespace character. |
| if (_whitespacePattern.hasMatch(label)) { |
| return false; |
| } |
| |
| if (title == '') { |
| // No title. |
| title = null; |
| } else { |
| // Remove "", '', or (). |
| title = title!.substring(1, title.length - 1); |
| } |
| |
| // References are case-insensitive, and internal whitespace is compressed. |
| label = normalizeLinkLabel(label); |
| |
| parser.document.linkReferences |
| .putIfAbsent(label, () => LinkReference(label, destination, title)); |
| return true; |
| } |
| } |