lib/src/inline_syntaxes/autolink_extension_syntax.dart - markdown.git - Git at Google

 // Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 import '../ast.dart';
 import '../inline_parser.dart';
 import '../util.dart';
 import 'inline_syntax.dart';

 /// Matches autolinks like `http://foo.com`.
 class AutolinkExtensionSyntax extends InlineSyntax {
   /// Broken up parts of the autolink regex for reusability and readability

   // Autolinks can only come at the beginning of a line, after whitespace, or
   // any of the delimiting characters *, _, ~, and (.
   static const start = r'(?:^|[\s*_~(>])';

   // An extended url autolink will be recognized when one of the schemes
   // http://, https://, or ftp://, followed by a valid domain
   static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)';

   // A valid domain consists of alphanumeric characters, underscores (_),
   // hyphens (-) and periods (.). There must be at least one period, and no
   // underscores may be present in the last two segments of the domain.
   static const domainPart = r'\w\-';
   static const domain = '[$domainPart][$domainPart.]+';

   // A valid domain consists of alphanumeric characters, underscores (_),
   // hyphens (-) and periods (.).
   static const path = r'[^\s<]*';

   // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not
   // be considered part of the autolink
   static const truncatingPunctuationPositive = '[?!.,:*_~]';

   static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$');
   static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$');
   static final regExpWhiteSpace = RegExp(r'\s');

   AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))');

   @override
   bool tryMatch(InlineParser parser, [int? startMatchPos]) {
     return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0);
   }

   @override
   bool onMatch(InlineParser parser, Match match) {
     var url = match[1]!;
     var href = url;
     var matchLength = url.length;

     if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) {
       url = url.substring(1, url.length - 1);
       href = href.substring(1, href.length - 1);
       parser.pos++;
       matchLength--;
     }

     // Prevent accidental standard autolink matches
     if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') {
       return false;
     }

     // When an autolink ends in ), we scan the entire autolink for the total
     // number of parentheses. If there is a greater number of closing
     // parentheses than opening ones, we don’t consider the last character
     // part of the autolink, in order to facilitate including an autolink
     // inside a parenthesis:
     // https://github.github.com/gfm/#example-600
     if (url.endsWith(')')) {
       final opening = _countChars(url, '(');
       final closing = _countChars(url, ')');

       if (closing > opening) {
         url = url.substring(0, url.length - 1);
         href = href.substring(0, href.length - 1);
         matchLength--;
       }
     }

     // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will
     // not be considered part of the autolink, though they may be included
     // in the interior of the link:
     // https://github.github.com/gfm/#example-599
     final trailingPunc = regExpTrailingPunc.firstMatch(url);
     if (trailingPunc != null) {
       final trailingLength = trailingPunc.match.length;
       url = url.substring(0, url.length - trailingLength);
       href = href.substring(0, href.length - trailingLength);
       matchLength -= trailingLength;
     }

     // If an autolink ends in a semicolon (;), we check to see if it appears
     // to resemble an
     // [entity reference](https://github.github.com/gfm/#entity-references);
     // if the preceding text is & followed by one or more alphanumeric
     // characters. If so, it is excluded from the autolink:
     // https://github.github.com/gfm/#example-602
     if (url.endsWith(';')) {
       final entityRef = regExpEndsWithColon.firstMatch(url);
       if (entityRef != null) {
         // Strip out HTML entity reference
         final entityRefLength = entityRef.match.length;
         url = url.substring(0, url.length - entityRefLength);
         href = href.substring(0, href.length - entityRefLength);
         matchLength -= entityRefLength;
       }
     }

     // The scheme http will be inserted automatically
     if (!href.startsWith('http://') &&
         !href.startsWith('https://') &&
         !href.startsWith('ftp://')) {
       href = 'http://$href';
     }

     final text = parser.encodeHtml ? escapeHtml(url) : url;
     final anchor = Element.text('a', text);
     anchor.attributes['href'] = Uri.encodeFull(href);
     parser.addNode(anchor);

     parser.consume(matchLength);
     return false;
   }

   int _countChars(String input, String char) {
     var count = 0;

     for (var i = 0; i < input.length; i++) {
       if (input[i] == char) count++;
     }

     return count;
   }
 }
	// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	import '../ast.dart';
	import '../inline_parser.dart';
	import '../util.dart';
	import 'inline_syntax.dart';

	/// Matches autolinks like `http://foo.com`.
	class AutolinkExtensionSyntax extends InlineSyntax {
	/// Broken up parts of the autolink regex for reusability and readability

	// Autolinks can only come at the beginning of a line, after whitespace, or
	// any of the delimiting characters *, _, ~, and (.
	static const start = r'(?:^\|[\s*_~(>])';

	// An extended url autolink will be recognized when one of the schemes
	// http://, https://, or ftp://, followed by a valid domain
	static const scheme = r'(?:(?:https?\|ftp):\/\/\|www\.)';

	// A valid domain consists of alphanumeric characters, underscores (_),
	// hyphens (-) and periods (.). There must be at least one period, and no
	// underscores may be present in the last two segments of the domain.
	static const domainPart = r'\w\-';
	static const domain = '[$domainPart][$domainPart.]+';

	// A valid domain consists of alphanumeric characters, underscores (_),
	// hyphens (-) and periods (.).
	static const path = r'[^\s<]*';

	// Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not
	// be considered part of the autolink
	static const truncatingPunctuationPositive = '[?!.,:*_~]';

	static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$');
	static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$');
	static final regExpWhiteSpace = RegExp(r'\s');

	AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))');

	@override
	bool tryMatch(InlineParser parser, [int? startMatchPos]) {
	return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0);
	}

	@override
	bool onMatch(InlineParser parser, Match match) {
	var url = match[1]!;
	var href = url;
	var matchLength = url.length;

	if (url[0] == '>' \|\| url.startsWith(regExpWhiteSpace)) {
	url = url.substring(1, url.length - 1);
	href = href.substring(1, href.length - 1);
	parser.pos++;
	matchLength--;
	}

	// Prevent accidental standard autolink matches
	if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') {
	return false;
	}

	// When an autolink ends in ), we scan the entire autolink for the total
	// number of parentheses. If there is a greater number of closing
	// parentheses than opening ones, we don’t consider the last character
	// part of the autolink, in order to facilitate including an autolink
	// inside a parenthesis:
	// https://github.github.com/gfm/#example-600
	if (url.endsWith(')')) {
	final opening = _countChars(url, '(');
	final closing = _countChars(url, ')');

	if (closing > opening) {
	url = url.substring(0, url.length - 1);
	href = href.substring(0, href.length - 1);
	matchLength--;
	}
	}

	// Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will
	// not be considered part of the autolink, though they may be included
	// in the interior of the link:
	// https://github.github.com/gfm/#example-599
	final trailingPunc = regExpTrailingPunc.firstMatch(url);
	if (trailingPunc != null) {
	final trailingLength = trailingPunc.match.length;
	url = url.substring(0, url.length - trailingLength);
	href = href.substring(0, href.length - trailingLength);
	matchLength -= trailingLength;
	}

	// If an autolink ends in a semicolon (;), we check to see if it appears
	// to resemble an
	// [entity reference](https://github.github.com/gfm/#entity-references);
	// if the preceding text is & followed by one or more alphanumeric
	// characters. If so, it is excluded from the autolink:
	// https://github.github.com/gfm/#example-602
	if (url.endsWith(';')) {
	final entityRef = regExpEndsWithColon.firstMatch(url);
	if (entityRef != null) {
	// Strip out HTML entity reference
	final entityRefLength = entityRef.match.length;
	url = url.substring(0, url.length - entityRefLength);
	href = href.substring(0, href.length - entityRefLength);
	matchLength -= entityRefLength;
	}
	}

	// The scheme http will be inserted automatically
	if (!href.startsWith('http://') &&
	!href.startsWith('https://') &&
	!href.startsWith('ftp://')) {
	href = 'http://$href';
	}

	final text = parser.encodeHtml ? escapeHtml(url) : url;
	final anchor = Element.text('a', text);
	anchor.attributes['href'] = Uri.encodeFull(href);
	parser.addNode(anchor);

	parser.consume(matchLength);
	return false;
	}

	int _countChars(String input, String char) {
	var count = 0;

	for (var i = 0; i < input.length; i++) {
	if (input[i] == char) count++;
	}

	return count;
	}
	}