lib/src/block_syntaxes/paragraph_syntax.dart - markdown.git - Git at Google

 // Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 import '../ast.dart';
 import '../block_parser.dart';
 import '../document.dart';
 import '../patterns.dart';
 import '../util.dart';
 import 'block_syntax.dart';

 /// Parses paragraphs of regular text.
 class ParagraphSyntax extends BlockSyntax {
   static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');

   static final _whitespacePattern = RegExp(r'^\s*$');

   @override
   RegExp get pattern => dummyPattern;

   @override
   bool canEndBlock(BlockParser parser) => false;

   const ParagraphSyntax();

   @override
   bool canParse(BlockParser parser) => true;

   @override
   Node parse(BlockParser parser) {
     final childLines = <String>[];

     // Eat until we hit something that ends a paragraph.
     while (!BlockSyntax.isAtBlockEnd(parser)) {
       childLines.add(parser.current);
       parser.advance();
     }

     final paragraphLines = _extractReflinkDefinitions(parser, childLines);
     if (paragraphLines == null) {
       // Paragraph consisted solely of reference link definitions.
       return Text('');
     } else {
       final contents = UnparsedContent(paragraphLines.join('\n').trimRight());
       return Element('p', [contents]);
     }
   }

   /// Extract reference link definitions from the front of the paragraph, and
   /// return the remaining paragraph lines.
   List<String>? _extractReflinkDefinitions(
     BlockParser parser,
     List<String> lines,
   ) {
     bool lineStartsReflinkDefinition(int i) =>
         lines[i].startsWith(_reflinkDefinitionStart);

     var i = 0;
     loopOverDefinitions:
     while (true) {
       // Check for reflink definitions.
       if (!lineStartsReflinkDefinition(i)) {
         // It's paragraph content from here on out.
         break;
       }
       var contents = lines[i];
       var j = i + 1;
       while (j < lines.length) {
         // Check to see if the _next_ line might start a new reflink definition.
         // Even if it turns out not to be, but it started with a '[', then it
         // is not a part of _this_ possible reflink definition.
         if (lineStartsReflinkDefinition(j)) {
           // Try to parse [contents] as a reflink definition.
           if (_parseReflinkDefinition(parser, contents)) {
             // Loop again, starting at the next possible reflink definition.
             i = j;
             continue loopOverDefinitions;
           } else {
             // Could not parse [contents] as a reflink definition.
             break;
           }
         } else {
           contents = '$contents\n${lines[j]}';
           j++;
         }
       }
       // End of the block.
       if (_parseReflinkDefinition(parser, contents)) {
         i = j;
         break;
       }

       // It may be that there is a reflink definition starting at [i], but it
       // does not extend all the way to [j], such as:
       //
       //     [link]: url     // line i
       //     "title"
       //     garbage
       //     [link2]: url   // line j
       //
       // In this case, [i, i+1] is a reflink definition, and the rest is
       // paragraph content.
       while (j >= i) {
         // This isn't the most efficient loop, what with this big ole'
         // Iterable allocation (`getRange`) followed by a big 'ole String
         // allocation, but we
         // must walk backwards, checking each range.
         contents = lines.getRange(i, j).join('\n');
         if (_parseReflinkDefinition(parser, contents)) {
           // That is the last reflink definition. The rest is paragraph
           // content.
           i = j;
           break;
         }
         j--;
       }
       // The ending was not a reflink definition at all. Just paragraph
       // content.

       break;
     }

     if (i == lines.length) {
       // No paragraph content.
       return null;
     } else {
       // Ends with paragraph content.
       return lines.sublist(i);
     }
   }

   // Parse [contents] as a reference link definition.
   //
   // Also adds the reference link definition to the document.
   //
   // Returns whether [contents] could be parsed as a reference link definition.
   bool _parseReflinkDefinition(BlockParser parser, String contents) {
     final pattern = RegExp(
       // Leading indentation.
       '''^[ ]{0,3}'''
       // Reference id in brackets, and URL.
       r'''\[((?:\\\]|[^\]])+)\]:\s*(?:<(\S+)>|(\S+))\s*'''
       // Title in double or single quotes, or parens.
       r'''("[^"]+"|'[^']+'|\([^)]+\)|)\s*$''',
       multiLine: true,
     );
     final match = pattern.firstMatch(contents);
     if (match == null) {
       // Not a reference link definition.
       return false;
     }
     if (match.match.length < contents.length) {
       // Trailing text. No good.
       return false;
     }

     var label = match[1]!;
     final destination = match[2] ?? match[3]!;
     var title = match[4];

     // The label must contain at least one non-whitespace character.
     if (_whitespacePattern.hasMatch(label)) {
       return false;
     }

     if (title == '') {
       // No title.
       title = null;
     } else {
       // Remove "", '', or ().
       title = title!.substring(1, title.length - 1);
     }

     // References are case-insensitive, and internal whitespace is compressed.
     label = normalizeLinkLabel(label);

     parser.document.linkReferences
         .putIfAbsent(label, () => LinkReference(label, destination, title));
     return true;
   }
 }
	// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	import '../ast.dart';
	import '../block_parser.dart';
	import '../document.dart';
	import '../patterns.dart';
	import '../util.dart';
	import 'block_syntax.dart';

	/// Parses paragraphs of regular text.
	class ParagraphSyntax extends BlockSyntax {
	static final _reflinkDefinitionStart = RegExp(r'[ ]{0,3}\[');

	static final _whitespacePattern = RegExp(r'^\s*$');

	@override
	RegExp get pattern => dummyPattern;

	@override
	bool canEndBlock(BlockParser parser) => false;

	const ParagraphSyntax();

	@override
	bool canParse(BlockParser parser) => true;

	@override
	Node parse(BlockParser parser) {
	final childLines = <String>[];

	// Eat until we hit something that ends a paragraph.
	while (!BlockSyntax.isAtBlockEnd(parser)) {
	childLines.add(parser.current);
	parser.advance();
	}

	final paragraphLines = _extractReflinkDefinitions(parser, childLines);
	if (paragraphLines == null) {
	// Paragraph consisted solely of reference link definitions.
	return Text('');
	} else {
	final contents = UnparsedContent(paragraphLines.join('\n').trimRight());
	return Element('p', [contents]);
	}
	}

	/// Extract reference link definitions from the front of the paragraph, and
	/// return the remaining paragraph lines.
	List<String>? _extractReflinkDefinitions(
	BlockParser parser,
	List<String> lines,
	) {
	bool lineStartsReflinkDefinition(int i) =>
	lines[i].startsWith(_reflinkDefinitionStart);

	var i = 0;
	loopOverDefinitions:
	while (true) {
	// Check for reflink definitions.
	if (!lineStartsReflinkDefinition(i)) {
	// It's paragraph content from here on out.
	break;
	}
	var contents = lines[i];
	var j = i + 1;
	while (j < lines.length) {
	// Check to see if the _next_ line might start a new reflink definition.
	// Even if it turns out not to be, but it started with a '[', then it
	// is not a part of _this_ possible reflink definition.
	if (lineStartsReflinkDefinition(j)) {
	// Try to parse [contents] as a reflink definition.
	if (_parseReflinkDefinition(parser, contents)) {
	// Loop again, starting at the next possible reflink definition.
	i = j;
	continue loopOverDefinitions;
	} else {
	// Could not parse [contents] as a reflink definition.
	break;
	}
	} else {
	contents = '$contents\n${lines[j]}';
	j++;
	}
	}
	// End of the block.
	if (_parseReflinkDefinition(parser, contents)) {
	i = j;
	break;
	}

	// It may be that there is a reflink definition starting at [i], but it
	// does not extend all the way to [j], such as:
	//
	// [link]: url // line i
	// "title"
	// garbage
	// [link2]: url // line j
	//
	// In this case, [i, i+1] is a reflink definition, and the rest is
	// paragraph content.
	while (j >= i) {
	// This isn't the most efficient loop, what with this big ole'
	// Iterable allocation (`getRange`) followed by a big 'ole String
	// allocation, but we
	// must walk backwards, checking each range.
	contents = lines.getRange(i, j).join('\n');
	if (_parseReflinkDefinition(parser, contents)) {
	// That is the last reflink definition. The rest is paragraph
	// content.
	i = j;
	break;
	}
	j--;
	}
	// The ending was not a reflink definition at all. Just paragraph
	// content.

	break;
	}

	if (i == lines.length) {
	// No paragraph content.
	return null;
	} else {
	// Ends with paragraph content.
	return lines.sublist(i);
	}
	}

	// Parse [contents] as a reference link definition.
	//
	// Also adds the reference link definition to the document.
	//
	// Returns whether [contents] could be parsed as a reference link definition.
	bool _parseReflinkDefinition(BlockParser parser, String contents) {
	final pattern = RegExp(
	// Leading indentation.
	'''^[ ]{0,3}'''
	// Reference id in brackets, and URL.
	r'''\[((?:\\\]\|[^\]])+)\]:\s(?:<(\S+)>\|(\S+))\s'''
	// Title in double or single quotes, or parens.
	r'''("[^"]+"\|'[^']+'\|\([^)]+\)\|)\s*$''',
	multiLine: true,
	);
	final match = pattern.firstMatch(contents);
	if (match == null) {
	// Not a reference link definition.
	return false;
	}
	if (match.match.length < contents.length) {
	// Trailing text. No good.
	return false;
	}

	var label = match[1]!;
	final destination = match[2] ?? match[3]!;
	var title = match[4];

	// The label must contain at least one non-whitespace character.
	if (_whitespacePattern.hasMatch(label)) {
	return false;
	}

	if (title == '') {
	// No title.
	title = null;
	} else {
	// Remove "", '', or ().
	title = title!.substring(1, title.length - 1);
	}

	// References are case-insensitive, and internal whitespace is compressed.
	label = normalizeLinkLabel(label);

	parser.document.linkReferences
	.putIfAbsent(label, () => LinkReference(label, destination, title));
	return true;
	}
	}