blob: 269689f3af76de4e86215f0622fd1ac7f428f9e7 [file] [log] [blame]
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// The line contains only whitespace or is empty.
final emptyPattern = RegExp(r'^(?:[ \t]*)$');
/// A series of `=` or `-` (on the next line) define setext-style headers.
final setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$');
/// Leading (and trailing) `#` define atx-style headers.
/// Starts with 1-6 unescaped `#` characters which must not be followed by a
/// non-space character. Line may end with any number of `#` characters,.
final headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$');
/// The line starts with `>` with one optional space after.
final blockquotePattern = RegExp(r'^[ ]{0,3}>[ \t]?.*$');
/// A line indented four spaces. Used for code blocks and lists.
final indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');
/// Fenced code block.
final codeFencePattern = RegExp(
r'^([ ]{0,3})(?:(?<backtick>`{3,})(?<backtickInfo>[^`]*)|(?<tilde>~{3,})(?<tildeInfo>.*))$',
/// Fenced blockquotes.
final blockquoteFencePattern = RegExp(r'^>{3}\s*$');
/// Three or more hyphens, asterisks or underscores by themselves. Note that
/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
/// SETEXT should win.
final hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$');
// why `{1}`?
const _checkbox = r'\[[ xX]{1}\]';
const _groupedWhitespaceAndEverything = r'([ \t])([ \t]*)(.*)';
const _oneToNineDigits = r'\d{1,9}';
const _zeroToFourWhitespace = r'[ \t]{0,4}';
const _zeroToThreeSpaces = '[ ]{0,3}';
/// A line starting with one of these markers: `-`, `*`, `+`.
/// May have up to three leading spaces before the marker and any number of
/// spaces or tabs after.
/// Contains a dummy group at `[2]`, so that the groups in [ulPattern] and
/// [olPattern] match up; in both, `[2]` is the length of the number that begins
/// the list marker.
final ulPattern = RegExp(''
// Empty group for group number alignment with [olPattern].
/// Similar to [ulPattern] but with a GitHub-style checkbox
/// (`'[ ]'|'[x]'|'[X]'`) following the number.
/// The checkbox will be grabbed by group `[5]` and [ulPattern]'s groups
/// `[4]`, `[5]`, and `[6]` are all shifted 2 places to be `[6]`, `[7]`, and
/// `[8]`.
final ulWithCheckBoxPattern = RegExp(''
// Empty group for group number alignment with [olWithCheckBoxPattern].
/// Similar to [ulWithCheckBoxPattern] but the checkbox is optional.
// TODO(srawlins): This is temporary tech debt. I think we will collapse
// [ulPattern] and [ulWithCheckBoxPattern] into this one pattern.
final ulWithPossibleCheckboxPattern = RegExp(''
// Empty group for group number alignment with [olWithCheckBoxPattern].
// [7], [8], [9], and [10].
/// A line starting with a number like `123.`. May have up to three leading
/// spaces before the marker and any number of spaces or tabs after.
final olPattern = RegExp(''
/// Similar to [olPattern] but with a GitHub-style checkbox
/// (`'[ ]'|'[x]'|'[X]'`) following the number.
/// The checkbox will be grabbed by group `[5]` and [olPattern]'s groups
/// `[4]`, `[5]`, and `[6]` are all shifted 2 places to be `[6]`, `[7]`, and
/// `[8]`.
final olWithCheckBoxPattern = RegExp(''
/// Similar to [olWithCheckBoxPattern] but the checkbox is optional.
// TODO(srawlins): This is temporary tech debt. I think we will collapse
// [olPattern] and [olWithCheckBoxPattern] into this one pattern.
final olWithPossibleCheckboxPattern = RegExp(''
// [7], [8], [9], and [10].
/// A line of hyphens separated by at least one pipe.
final tablePattern = RegExp(
r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$');
/// A pattern which should never be used. It just satisfies non-nullability of
/// pattern fields.
final dummyPattern = RegExp('');
/// A [String] pattern to match a named tag like `<table>` or `</table>`.
const namedTagDefinition =
// Opening tag begins.
// Tag name.
// Attribute begins, see
// Attribute name, see
// Attribute value specification, see
// Attribute value, see
// Attribute ends.
// Opening tag ends.
// Or
// Closing tag, see
/// A pattern to match the start of an HTML block.
/// The 7 conditions here correspond to the 7 start conditions in the Commonmark
/// specification one by one:
final htmlBlockPattern = RegExp(
'^ {0,3}(?:'
// Here we are more restrictive than the Commonmark definition (Rule #7).
// Otherwise some raw HTML test cases will fail, for example:
// Because if a line is treated as an HTML block, it will output as Text node
// directly, the RawHtmlSyntax does not have a chance to validate if this
// HTML tag is legal or not.
caseSensitive: false);
/// ASCII punctuation characters.
// See
const asciiPunctuationCharacters = r'''!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~''';
/// ASCII punctuation characters with some characters escaped, in order to be
// used in the RegExp character set.
const asciiPunctuationEscaped = r'''!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~''';
/// A pattern to match HTML entity references and numeric character references.
final htmlCharactersPattern = RegExp(
caseSensitive: false,