blob: bcddfa6ae6f094536f909692ed02d3378c200619 [file] [log] [blame] [edit]
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// Contains a parser for ICU format plural/gender/select format for localized
/// messages. See extract_to_arb.dart and make_hardcoded_translation.dart.
library message_parser;
import 'messages/composite_message.dart';
import 'messages/literal_string_message.dart';
import 'messages/message.dart';
import 'messages/pair_message.dart';
import 'messages/submessages/gender.dart';
import 'messages/submessages/plural.dart';
import 'messages/submessages/select.dart';
import 'messages/variable_substitution_message.dart';
class MessageParser {
final _ParserUtil _parser;
MessageParser(String input) : _parser = _ParserUtil(input);
Message pluralGenderSelectParse() =>
(_parser.pluralOrGenderOrSelect(0) ?? _parser.empty(0)).result;
Message nonIcuMessageParse() =>
(_parser.simpleText(0) ?? _parser.empty(0)).result;
}
/// Holds a parsed piece of the input
class At<T extends Message> {
/// Holds the result of parsing this piece of the message
final T result;
/// Holds the position of the parser after parsing this piece of the message
final int at;
At(this.result, this.at);
/// Helper method to simplify null checks
At<S> mapResult<S extends Message>(S Function(T res) callable) =>
At<S>(callable(result), at);
}
/// Methods for actually parsing a message.
///
/// The parser goes through the message in a DFS kind of way. Whenever a branch
/// fails to parse, it returns null.
class _ParserUtil {
final String input;
const _ParserUtil(this.input);
//Precompiled regexes
static final RegExp quotedBracketOpen = RegExp(r"'({)'");
static final RegExp quotedBracketClose = RegExp(r"'(})'");
static final RegExp doubleQuotes = RegExp(r"'(')");
static final RegExp numberRegex = RegExp(r'\s*([0-9]+)\s*');
static final RegExp nonICURegex = RegExp(r'[^\{\}\<]');
static final RegExp idRegex = RegExp(r'\s*([a-zA-Z][a-zA-Z_0-9]*)\s*');
static final RegExp commaWithWhitespace = RegExp(r'\s*(,)\s*');
static final List<String> pluralKeywords = [
'=0',
'=1',
'=2',
'zero',
'one',
'two',
'few',
'many',
'other'
];
static final Map<String, RegExp> pluralKeywordsToRegex = {
for (var v in pluralKeywords) v: RegExp('\\s*$v\\s*')
};
static final List<String> genderKeywords = ['female', 'male', 'other'];
static final Map<String, RegExp> genderKeywordsToRegex = {
for (var v in genderKeywords) v: RegExp('\\s*$v\\s*')
};
/// Corresponds to a [+] operator in a regex, matching at least one occurrence
/// of the [callable].
static At<CompositeMessage>? oneOrMore(
At? Function(int s) callable,
int at,
) {
var newAt = -1;
var results = <At>[];
while (newAt != at) {
newAt = at;
var parser = callable(newAt);
if (parser != null) {
at = parser.at;
results.add(parser);
}
}
return results.isNotEmpty
? At(CompositeMessage(results.map((p) => p.result).toList()), newAt)
: null;
}
/// Corresponds to an [AND] operator, matching all [callables] or failing, i.e.
/// returning [null].
static At<CompositeMessage>? and(
List<At? Function(int s)> callables,
int at,
) {
var newAt = at;
var resParser = <At>[];
for (var i = 0; i < callables.length; i++) {
var callable = callables[i];
var parser = callable.call(newAt);
if (parser != null) {
resParser.add(parser);
newAt = parser.at;
} else {
return null;
}
}
return At(CompositeMessage(resParser.map((p) => p.result).toList()), newAt);
}
/// Match a simple string
At<LiteralString>? matchString(int at, String t) => input.startsWith(t, at)
? At(LiteralString(t, null), at + t.length)
: null;
/// Match any of the given keywords
At<LiteralString>? asKeywords(Map<String, RegExp> keywordsToRegex, int at) {
if (at < input.length) {
for (var entry in keywordsToRegex.entries) {
var match = entry.value.matchAsPrefix(input, at);
if (match != null) {
return At(LiteralString(entry.key, null), match.end);
}
}
}
return null;
}
/// Parse whitespace
At<LiteralString> trimAt(int at) => at < input.length
? At(LiteralString(input), RegExp(r'\s*').matchAsPrefix(input, at)!.end)
: At(LiteralString(''), at);
Pattern matchUnescaped(String curly) => RegExp("(?:^|''|[^'])($curly)");
At<LiteralString>? matchUnescapedCurly(String curly, int at) {
var match = matchUnescaped(curly).matchAsPrefix(input.substring(at));
return match != null
? At(LiteralString(curly, null), at + match[1]!.length)
: null;
}
At<LiteralString>? openCurly(int at) => matchUnescapedCurly('{', at);
At<LiteralString>? closeCurly(int at) => matchUnescapedCurly('}', at);
At<LiteralString>? icuEscapedText(int at) {
if (at < input.length) {
var match = quotedBracketOpen.matchAsPrefix(input, at) ??
quotedBracketClose.matchAsPrefix(input, at) ??
doubleQuotes.matchAsPrefix(input, at);
if (match != null) {
var matchGroup = match.group(1);
if (matchGroup != null) {
return At(LiteralString(matchGroup), match.end);
}
}
}
return null;
}
At<LiteralString>? icuText(int at) {
var match = nonICURegex.matchAsPrefix(input, at);
return at < input.length && match != null
? At(LiteralString(input[at]), at + 1)
: null;
}
At<LiteralString>? messageText(int at) {
return oneOrMore((s) => icuEscapedText(s) ?? icuText(s), at)
?.mapResult((compMsg) {
return LiteralString(compMsg.pieces
.whereType<LiteralString>()
.map((e) => e.string)
.join());
});
}
At<LiteralString>? nonIcuMessageText(int at) {
if (at < input.length) {
var inputAt = input.substring(at);
var indexOf = indexOfUnescapedOpenBracket(inputAt);
String matched;
if (indexOf == 0) {
return null;
} else if (indexOf == -1) {
matched = inputAt;
} else {
matched = inputAt.substring(0, indexOf);
}
return At(LiteralString(matched), at + matched.length);
}
return null;
}
int indexOfUnescapedOpenBracket(String inputAt) {
final characters = inputAt.runes.toList();
var nextIsEscaped = false;
for (var i = 0; i < characters.length; i++) {
var isQuote = characters[i] == Message.$singleQuote;
if (!nextIsEscaped && isQuote && i + 1 < characters.length) {
nextIsEscaped = true;
} else {
if (!nextIsEscaped && characters[i] == Message.$openCurlyBracket) {
return i;
}
nextIsEscaped = false;
}
}
return -1;
}
At<LiteralString>? number(int at) {
var match = numberRegex.matchAsPrefix(input, at);
return match != null && match.group(1) != null
? At(LiteralString(int.parse(match.group(1)!).toString()), match.end)
: null;
}
At<LiteralString>? id(int at) {
if (at < input.length) {
var match = idRegex.matchAsPrefix(input, at);
if (match != null) {
var matchGroup = match.group(1);
if (matchGroup != null) {
return At(LiteralString(matchGroup), match.end);
}
}
}
return null;
}
At<LiteralString>? comma(int at) {
var match = commaWithWhitespace.matchAsPrefix(input, at);
return at < input.length && match != null
? At(LiteralString(','), match.end)
: null;
}
At<LiteralString>? preface(int at) {
return and(
[
(s) => openCurly(s),
(s) => id(s),
(s) => comma(s),
],
at,
)?.mapResult((compMsg) => compMsg.pieces[1] as LiteralString);
}
At<PairMessage<LiteralString, Message>>? pluralClausePairs(int at) {
return and(
[
(s) => trimAt(s),
(s) => asKeywords(pluralKeywordsToRegex, s),
(s) => openCurly(s),
(s) => interiorText(s),
(s) => closeCurly(s),
(s) => trimAt(s),
],
at,
)?.mapResult((compMsg) {
var pluralKeyword = compMsg.pieces[1] as LiteralString;
var interiorText = compMsg.pieces[3];
return PairMessage<LiteralString, Message>(pluralKeyword, interiorText);
});
}
At<Plural>? intlPlural(int at) {
return and(
[
(s) => preface(s),
(s) => matchString(s, 'plural'),
(s) => comma(s),
(s) => oneOrMore((s1) => pluralClausePairs(s1), s),
(s) => closeCurly(s),
],
at,
)?.mapResult((compMsg) {
var preface = compMsg.pieces[0] as LiteralString;
var pluralClause = compMsg.pieces[3] as CompositeMessage;
return Plural.from(preface.string, pluralClause.pieces, null);
});
}
At<LiteralString>? genderKeyword(int at) =>
asKeywords(genderKeywordsToRegex, at);
At<CompositeMessage>? genderClause(int at) {
return oneOrMore(
(s1) => and(
[
(s) => trimAt(s),
(s) => genderKeyword(s),
(s) => openCurly(s),
(s) => interiorText(s),
(s) => closeCurly(s),
(s) => trimAt(s),
],
s1,
)?.mapResult((compMsg) {
var genderKeyword = compMsg.pieces[1] as LiteralString;
var interiorText = compMsg.pieces[3];
return PairMessage(genderKeyword, interiorText);
}),
at,
);
}
At<Gender>? intlGender(int at) {
return and(
[
(s) => preface(s),
(s) => selectLiteral(s),
(s) => comma(s),
(s) => genderClause(s),
(s) => closeCurly(s),
],
at,
)?.mapResult((compMsg) {
var preface = compMsg.pieces[0] as LiteralString;
var genderClause = (compMsg.pieces[3] as CompositeMessage);
return Gender.from(preface.string, genderClause.pieces, null);
});
}
At<LiteralString>? selectLiteral(int at) => matchString(at, 'select');
At<CompositeMessage>? selectClause(int at) {
return oneOrMore(
(s1) => and(
[
(s) => id(s),
(s) => openCurly(s),
(s) => interiorText(s),
(s) => closeCurly(s),
],
s1,
)?.mapResult((compMsg) {
var id = compMsg.pieces[0] as LiteralString;
var interiorText = compMsg.pieces[2];
return PairMessage(id, interiorText);
}),
at,
);
}
At<Select>? intlSelect(int at) {
return and(
[
(s) => preface(s),
(s) => selectLiteral(s),
(s) => comma(s),
(s) => selectClause(s),
(s) => closeCurly(s),
],
at,
)?.mapResult((compMsg) {
var preface = compMsg.pieces[0] as LiteralString;
var selectClause = compMsg.pieces[3] as CompositeMessage;
return Select.from(preface.string, selectClause.pieces, null);
});
}
At<Message>? pluralOrGenderOrSelect(int at) =>
intlPlural(at) ?? intlGender(at) ?? intlSelect(at);
At<Message>? contents(int at) =>
pluralOrGenderOrSelect(at) ?? parameter(at) ?? messageText(at);
At interiorText(int at) => oneOrMore((s) => contents(s), at) ?? empty(at);
At<Message>? simpleText(int at) {
return oneOrMore(
(s) => nonIcuMessageText(s) ?? parameter(s) ?? openCurly(s),
at,
);
}
At<LiteralString> empty(int at) => At(LiteralString(''), at);
At<VariableSubstitution>? parameter(int at) {
return and(
[
(s) => openCurly(s),
(s) => id(s),
(s) => closeCurly(s),
],
at,
)?.mapResult((compMsg) {
var id = (compMsg.pieces[1] as LiteralString);
return VariableSubstitution.named(id.string);
});
}
}