blob: 7dbad63117222837f785e9fb4fc44179a1b82329 [file] [log] [blame]
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import '../ast.dart';
import '../inline_parser.dart';
import 'inline_syntax.dart';
/// Matches syntax that has a pair of tags and becomes an element, like `*` for
/// `<em>`. Allows nested tags.
class DelimiterSyntax extends InlineSyntax {
/// Whether this is parsed according to the same nesting rules as [emphasis
/// delimiters][].
///
/// [emphasis delimiters]: http://spec.commonmark.org/0.28/#can-open-emphasis
final bool requiresDelimiterRun;
/// Whether to allow intra-word delimiter runs. CommonMark emphasis and
/// strong emphasis does not allow this, but GitHub-Flavored Markdown allows
/// it on strikethrough.
final bool allowIntraWord;
final List<DelimiterTag>? tags;
/// Creates a new [DelimiterSyntax] which matches text on [pattern].
///
/// The [pattern] is used to find the matching text. If [requiresDelimiterRun]
/// is passed, this syntax parses according to the same nesting rules as
/// emphasis delimiters. If [startCharacter] is passed, it is used as a
/// pre-matching check which is faster than matching against [pattern].
DelimiterSyntax(
String pattern, {
this.requiresDelimiterRun = false,
int? startCharacter,
this.allowIntraWord = false,
this.tags,
}) : super(pattern, startCharacter: startCharacter);
@override
bool onMatch(InlineParser parser, Match match) {
final runLength = match.group(0)!.length;
final matchStart = parser.pos;
final matchEnd = parser.pos + runLength;
final text = Text(parser.source.substring(matchStart, matchEnd));
if (!requiresDelimiterRun) {
parser.pushDelimiter(SimpleDelimiter(
node: text,
length: runLength,
char: parser.source.codeUnitAt(matchStart),
canOpen: true,
canClose: false,
syntax: this,
endPos: matchEnd,
));
parser.addNode(text);
return true;
}
final delimiterRun = DelimiterRun.tryParse(
parser,
matchStart,
matchEnd,
syntax: this,
node: text,
allowIntraWord: allowIntraWord,
tags: tags ?? [],
);
if (delimiterRun != null) {
parser.pushDelimiter(delimiterRun);
parser.addNode(text);
return true;
} else {
parser.advanceBy(runLength);
return false;
}
}
/// Attempts to close this tag at the current position.
///
/// If a tag cannot be closed at the current position (for example, if a link
/// reference cannot be found for a link tag's label), then `null` is
/// returned.
///
/// If a tag can be closed at the current position, then this method calls
/// [getChildren], in which [parser] parses any nested text into child nodes.
/// The returned [Node] incorpororates these child nodes.
Node? close(
InlineParser parser,
Delimiter opener,
Delimiter closer, {
required String tag,
required List<Node> Function() getChildren,
}) {
return Element(tag, getChildren());
}
}
class DelimiterTag {
DelimiterTag(this.tag, this.indicatorLength);
// Tag name of the HTML element.
final String tag;
final int indicatorLength;
}
/// A delimiter indicating the possible "open" or possible "close" of a tag for
/// a [DelimiterSyntax].
abstract class Delimiter {
/// The [Text] node representing the plain text representing this delimiter.
abstract Text node;
/// The type of delimiter.
///
/// For the two-character image delimiter, `![`, this is `!`.
int get char;
/// The number of delimiters.
int get length;
/// Whether the delimiter is active.
///
/// Links cannot be nested, so we must "deactivate" any pending ones. For
/// example, take the following text:
///
/// Text [link and [more](links)](links).
///
/// Once we have parsed `Text [`, there is one (pending) link in the state
/// stack. It is, by default, active. Once we parse the next possible link,
/// `[more](links)`, as a real link, we must deactive the pending links (just
/// the one, in this case).
abstract bool isActive;
/// Whether this delimiter can open emphasis or strong emphasis.
bool get canOpen;
/// Whether this delimiter can close emphasis or strong emphasis.
bool get canClose;
/// The syntax which uses this delimiter to parse a tag.
DelimiterSyntax get syntax;
}
/// A simple delimiter implements the [Delimiter] interface with basic fields,
/// and does not have the concept of "left-flanking" or "right-flanking".
class SimpleDelimiter implements Delimiter {
@override
Text node;
@override
final int char;
@override
final int length;
@override
bool isActive;
@override
final bool canOpen;
@override
final bool canClose;
@override
final DelimiterSyntax syntax;
final int endPos;
SimpleDelimiter({
required this.node,
required this.char,
required this.length,
required this.canOpen,
required this.canClose,
required this.syntax,
required this.endPos,
}) : isActive = true;
}
/// An implementation of [Delimiter] which uses concepts of "left-flanking" and
/// "right-flanking" to determine the values of [canOpen] and [canClose].
///
/// This is primarily used when parsing emphasis and strong emphasis, but can
/// also be used by other extensions of [DelimiterSyntax].
class DelimiterRun implements Delimiter {
/// According to
/// [CommonMark](https://spec.commonmark.org/0.29/#punctuation-character):
///
/// > A punctuation character is an ASCII punctuation character or anything in
/// > the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or
/// > `Ps`.
// This RegExp is inspired by
// https://github.com/commonmark/commonmark.js/blob/1f7d09099c20d7861a674674a5a88733f55ff729/lib/inlines.js#L39.
// I don't know if there is any way to simplify it or maintain it.
static final RegExp punctuation = RegExp('['
r'''!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~'''
r'\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE'
r'\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E'
r'\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E'
r'\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14'
r'\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB'
r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736'
r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F'
r'\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E'
r'\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051'
r'\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A'
r'\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC'
r'\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42'
r'\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE'
r'\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF'
r'\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF'
r'\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19'
r'\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03'
r'\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F'
r'\uFF5B\uFF5D\uFF5F-\uFF65'
']');
// TODO(srawlins): Unicode whitespace
static final String whitespace = ' \t\r\n';
@override
Text node;
@override
final int char;
@override
int get length => node.text.length;
@override
bool isActive;
@override
final DelimiterSyntax syntax;
final bool allowIntraWord;
@override
final bool canOpen;
@override
final bool canClose;
final List<DelimiterTag> tags;
DelimiterRun._({
required this.node,
required this.char,
required this.syntax,
required this.tags,
required bool isLeftFlanking,
required bool isRightFlanking,
required bool isPrecededByPunctuation,
required bool isFollowedByPunctuation,
required this.allowIntraWord,
}) : canOpen = isLeftFlanking &&
(!isRightFlanking || allowIntraWord || isPrecededByPunctuation),
canClose = isRightFlanking &&
(!isLeftFlanking || allowIntraWord || isFollowedByPunctuation),
isActive = true;
/// Tries to parse a delimiter run from [runStart] (inclusive) to [runEnd]
/// (exclusive).
static DelimiterRun? tryParse(
InlineParser parser,
int runStart,
int runEnd, {
required DelimiterSyntax syntax,
required List<DelimiterTag> tags,
required Text node,
bool allowIntraWord = false,
}) {
bool leftFlanking,
rightFlanking,
precededByPunctuation,
followedByPunctuation;
String preceding, following;
if (runStart == 0) {
rightFlanking = false;
preceding = '\n';
} else {
preceding = parser.source.substring(runStart - 1, runStart);
}
precededByPunctuation = punctuation.hasMatch(preceding);
if (runEnd == parser.source.length) {
leftFlanking = false;
following = '\n';
} else {
following = parser.source.substring(runEnd, runEnd + 1);
}
followedByPunctuation = punctuation.hasMatch(following);
// http://spec.commonmark.org/0.30/#left-flanking-delimiter-run
if (whitespace.contains(following)) {
leftFlanking = false;
} else {
leftFlanking = !followedByPunctuation ||
whitespace.contains(preceding) ||
precededByPunctuation;
}
// http://spec.commonmark.org/0.30/#right-flanking-delimiter-run
if (whitespace.contains(preceding)) {
rightFlanking = false;
} else {
rightFlanking = !precededByPunctuation ||
whitespace.contains(following) ||
followedByPunctuation;
}
if (!leftFlanking && !rightFlanking) {
// Could not parse a delimiter run.
return null;
}
tags.sort((a, b) => a.indicatorLength.compareTo(b.indicatorLength));
return DelimiterRun._(
node: node,
char: parser.charAt(runStart),
syntax: syntax,
tags: tags,
isLeftFlanking: leftFlanking,
isRightFlanking: rightFlanking,
isPrecededByPunctuation: precededByPunctuation,
isFollowedByPunctuation: followedByPunctuation,
allowIntraWord: allowIntraWord,
);
}
@override
String toString() => '<char: $char, length: $length, canOpen: $canOpen, '
'canClose: $canClose>';
}