| // Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| /// The line contains only whitespace or is empty. |
| final emptyPattern = RegExp(r'^(?:[ \t]*)$'); |
| |
| /// A series of `=` or `-` (on the next line) define setext-style headers. |
| final setextPattern = RegExp(r'^[ ]{0,3}(=+|-+)\s*$'); |
| |
| /// Leading (and trailing) `#` define atx-style headers. |
| /// |
| /// Starts with 1-6 unescaped `#` characters which must not be followed by a |
| /// non-space character. Line may end with any number of `#` characters,. |
| final headerPattern = RegExp(r'^ {0,3}(#{1,6})[ \x09\x0b\x0c](.*?)#*$'); |
| |
| /// The line starts with `>` with one optional space after. |
| final blockquotePattern = RegExp(r'^[ ]{0,3}>[ \t]?.*$'); |
| |
| /// A line indented four spaces. Used for code blocks and lists. |
| final indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$'); |
| |
| /// Fenced code block. |
| final codeFencePattern = RegExp( |
| r'^([ ]{0,3})(?:(?<backtick>`{3,})(?<backtickInfo>[^`]*)|(?<tilde>~{3,})(?<tildeInfo>.*))$', |
| ); |
| |
| /// Fenced blockquotes. |
| final blockquoteFencePattern = RegExp(r'^>{3}\s*$'); |
| |
| /// Three or more hyphens, asterisks or underscores by themselves. Note that |
| /// a line like `----` is valid as both HR and SETEXT. In case of a tie, |
| /// SETEXT should win. |
| final hrPattern = RegExp(r'^ {0,3}([-*_])[ \t]*\1[ \t]*\1(?:\1|[ \t])*$'); |
| |
| // why `{1}`? |
| const _checkbox = r'\[[ xX]{1}\]'; |
| |
| const _groupedWhitespaceAndEverything = r'([ \t])([ \t]*)(.*)'; |
| |
| const _oneToNineDigits = r'\d{1,9}'; |
| |
| const _zeroToFourWhitespace = r'[ \t]{0,4}'; |
| |
| const _zeroToThreeSpaces = '[ ]{0,3}'; |
| |
| /// A line starting with one of these markers: `-`, `*`, `+`. |
| /// |
| /// May have up to three leading spaces before the marker and any number of |
| /// spaces or tabs after. |
| /// |
| /// Contains a dummy group at `[2]`, so that the groups in [ulPattern] and |
| /// [olPattern] match up; in both, `[2]` is the length of the number that begins |
| /// the list marker. |
| final ulPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| // Empty group for group number alignment with [olPattern]. |
| '()' |
| '([*+-])' |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// Similar to [ulPattern] but with a GitHub-style checkbox |
| /// (`'[ ]'|'[x]'|'[X]'`) following the number. |
| /// |
| /// The checkbox will be grabbed by group `[5]` and [ulPattern]'s groups |
| /// `[4]`, `[5]`, and `[6]` are all shifted 2 places to be `[6]`, `[7]`, and |
| /// `[8]`. |
| final ulWithCheckBoxPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| // Empty group for group number alignment with [olWithCheckBoxPattern]. |
| '()' |
| '([*+-])' |
| '($_zeroToFourWhitespace)' |
| '($_checkbox)' |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// Similar to [ulWithCheckBoxPattern] but the checkbox is optional. |
| // TODO(srawlins): This is temporary tech debt. I think we will collapse |
| // [ulPattern] and [ulWithCheckBoxPattern] into this one pattern. |
| final ulWithPossibleCheckboxPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| // Empty group for group number alignment with [olWithCheckBoxPattern]. |
| '()' |
| '([*+-])' |
| '(($_zeroToFourWhitespace)($_checkbox))?' |
| // [7], [8], [9], and [10]. |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// A line starting with a number like `123.`. May have up to three leading |
| /// spaces before the marker and any number of spaces or tabs after. |
| final olPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| '($_oneToNineDigits)' |
| r'([\.)])' |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// Similar to [olPattern] but with a GitHub-style checkbox |
| /// (`'[ ]'|'[x]'|'[X]'`) following the number. |
| /// |
| /// The checkbox will be grabbed by group `[5]` and [olPattern]'s groups |
| /// `[4]`, `[5]`, and `[6]` are all shifted 2 places to be `[6]`, `[7]`, and |
| /// `[8]`. |
| final olWithCheckBoxPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| '($_oneToNineDigits)' |
| r'([\.)])' |
| '($_zeroToFourWhitespace)' |
| '($_checkbox)' |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// Similar to [olWithCheckBoxPattern] but the checkbox is optional. |
| // TODO(srawlins): This is temporary tech debt. I think we will collapse |
| // [olPattern] and [olWithCheckBoxPattern] into this one pattern. |
| final olWithPossibleCheckboxPattern = RegExp('' |
| '^($_zeroToThreeSpaces)' |
| '($_oneToNineDigits)' |
| r'([\.)])' |
| '(($_zeroToFourWhitespace)($_checkbox))?' |
| // [7], [8], [9], and [10]. |
| '($_groupedWhitespaceAndEverything)?\$'); |
| |
| /// A line of hyphens separated by at least one pipe. |
| final tablePattern = RegExp( |
| r'^[ ]{0,3}\|?([ \t]*:?\-+:?[ \t]*\|)+([ \t]|[ \t]*:?\-+:?[ \t]*)?$'); |
| |
| /// A pattern which should never be used. It just satisfies non-nullability of |
| /// pattern fields. |
| final dummyPattern = RegExp(''); |
| |
| /// A [String] pattern to match a named tag like `<table>` or `</table>`. |
| const namedTagDefinition = |
| // Opening tag begins. |
| '<' |
| |
| // Tag name. |
| '[a-z][a-z0-9-]*' |
| |
| // Attribute begins, see |
| // https://spec.commonmark.org/0.30/#attribute. |
| r'(?:\s+' |
| |
| // Attribute name, see |
| // https://spec.commonmark.org/0.30/#attribute-name. |
| '[a-z_:][a-z0-9._:-]*' |
| |
| // |
| '(?:' |
| // Attribute value specification, see |
| // https://spec.commonmark.org/0.30/#attribute-value-specification. |
| r'\s*=\s*' |
| |
| // Attribute value, see |
| // https://spec.commonmark.org/0.30/#unquoted-attribute-value. |
| r'''(?:[^\s"'=<>`]+?|'[^']*?'|"[^"]*?")''' |
| |
| // Attribute ends. |
| ')?)*' |
| |
| // Opening tag ends. |
| r'\s*/?>' |
| |
| // Or |
| '|' |
| |
| // Closing tag, see |
| // https://spec.commonmark.org/0.30/#closing-tag. |
| r'</[a-z][a-z0-9-]*\s*>'; |
| |
| /// A pattern to match the start of an HTML block. |
| /// |
| /// The 7 conditions here correspond to the 7 start conditions in the Commonmark |
| /// specification one by one: https://spec.commonmark.org/0.30/#html-block. |
| final htmlBlockPattern = RegExp( |
| '^ {0,3}(?:' |
| '<(?<condition_1>pre|script|style|textarea)' |
| r'(?:\s|>|$)' |
| '|' |
| '(?<condition_2><!--)' |
| '|' |
| r'(?<condition_3><\?)' |
| '|' |
| '(?<condition_4><![a-z])' |
| '|' |
| r'(?<condition_5><!\[CDATA\[)' |
| '|' |
| '</?(?<condition_6>address|article|aside|base|basefont|blockquote|body|' |
| 'caption|center|col|colgroup|dd|details|dialog|dir|DIV|dl|dt|fieldset|' |
| 'figcaption|figure|footer|form|frame|frameset|h1|h2|h3|h4|h5|h6|head|' |
| 'header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|' |
| 'optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|' |
| 'thead|title|tr|track|ul)' |
| r'(?:\s|>|/>|$)' |
| '|' |
| |
| // Here we are more restrictive than the Commonmark definition (Rule #7). |
| // Otherwise some raw HTML test cases will fail, for example: |
| // https://spec.commonmark.org/0.30/#example-618. |
| // Because if a line is treated as an HTML block, it will output as Text node |
| // directly, the RawHtmlSyntax does not have a chance to validate if this |
| // HTML tag is legal or not. |
| '(?<condition_7>(?:$namedTagDefinition)\\s*\$))', |
| caseSensitive: false); |
| |
| /// ASCII punctuation characters. |
| // See https://spec.commonmark.org/0.30/#unicode-whitespace-character. |
| const asciiPunctuationCharacters = r'''!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~'''; |
| |
| /// ASCII punctuation characters with some characters escaped, in order to be |
| // used in the RegExp character set. |
| const asciiPunctuationEscaped = r'''!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~'''; |
| |
| /// A pattern to match HTML entity references and numeric character references. |
| // https://spec.commonmark.org/0.30/#entity-and-numeric-character-references |
| final htmlCharactersPattern = RegExp( |
| '&(?:([a-z0-9]+)|#([0-9]{1,7})|#x([a-f0-9]{1,6}));', |
| caseSensitive: false, |
| ); |