| // Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| import 'locale_deprecations.dart'; |
| import 'locale_extensions.dart'; |
| import 'locale_implementation.dart'; |
| |
| /// A parser for [Unicode Locale |
| /// Identifiers](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier). |
| class LocaleParser { |
| /// Language subtag of Unicode Language Identifier. |
| String _languageCode = 'und'; |
| |
| /// Script subtag of Unicode Language Identifier. |
| String _scriptCode; |
| |
| /// Region subtag of Unicode Language Identifier. |
| String _countryCode; |
| |
| /// Variant subtags of Unicode Language Identifier. |
| List<String> _variants; |
| |
| /// Unicode Locale Extensions, also known as "U Extension". |
| Map<String, String> _uExtensions; |
| |
| /// Transformed Extensions, also known as "T Extension". |
| Map<String, String> _tExtensions; |
| |
| /// Private-Use Extensions. |
| String _xExtensions; |
| |
| /// Other Extensions. |
| Map<String, String> _otherExtensions; |
| |
| /// List of problems with the localeId the parser tried to parse. |
| /// |
| /// An empty list indicates problem-free parsing. |
| final List<String> problems = <String>[]; |
| |
| /// Produces a Locale instance for the parser's current state. |
| /// |
| /// Returns null if the Locale would be syntactically invalid. |
| LocaleImplementation toLocale() { |
| if (problems.isNotEmpty) return null; |
| LocaleExtensions extensions; |
| if (_uExtensions != null || |
| _tExtensions != null || |
| _otherExtensions != null || |
| _xExtensions != null) { |
| extensions = LocaleExtensions( |
| _uExtensions, _tExtensions, _otherExtensions, _xExtensions); |
| } |
| return LocaleImplementation.unsafe( |
| _languageCode, |
| scriptCode: _scriptCode, |
| countryCode: _countryCode, |
| variants: _variants, |
| extensions: extensions, |
| ); |
| } |
| |
| /// Subtags of the Locale Identifier, as split by [separators]. |
| List<String> _subtags; |
| |
| /// RegExp that matches Unicode Locale Identifier subtag separators. |
| static final separators = RegExp('[-_]'); |
| |
| /// Last accepted subtag. |
| String _accepted; |
| |
| /// Last accepted subtag. |
| String accepted() => _accepted; |
| |
| /// Last accepted list of subtags (for variants). |
| List<String> _acceptedList; |
| |
| /// Last accepted list of subtags (for variants). |
| List<String> acceptedList() => _acceptedList; |
| |
| /// Current subtag pending acceptance. |
| String _current; |
| |
| /// Current subtag pending acceptance. |
| String current() => _current; |
| |
| /// Index of the current subtag. |
| int _currentIndex; |
| |
| /// Advance to the next subtag (see [current] and [accepted]). |
| void advance() { |
| _accepted = _current; |
| _currentIndex++; |
| if (_currentIndex < _subtags.length) { |
| _current = _subtags[_currentIndex]; |
| } else { |
| _current = null; |
| } |
| } |
| |
| /// Returns true if all subtags have been parsed. |
| bool atEnd() { |
| return _currentIndex >= _subtags.length; |
| } |
| |
| /// Parses [Unicode CLDR Locale |
| /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers). |
| /// |
| /// This method does not parse all BCP 47 tags. See [BCP 47 |
| /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for |
| /// details. |
| /// |
| /// localeId may not be null. |
| /// |
| /// Parsing failed if there are any entries in [problems]. |
| LocaleParser(String localeId) { |
| assert(localeId != null); |
| |
| // Calling toLowerCase unconditionally should be efficient if |
| // string_patch.dart is in use: |
| // https://github.com/dart-lang/sdk/blob/cabaa78cc57d08bcfcd75bfe99a42c19ed497d26/runtime/lib/string_patch.dart#L1178 |
| localeId = localeId.toLowerCase(); |
| if (localeId == 'root') { |
| return; |
| } |
| |
| _subtags = localeId.split(separators); |
| _currentIndex = 0; |
| _current = _subtags[0]; |
| |
| var scriptFound = false; |
| if (acceptLanguage()) { |
| _languageCode = replaceDeprecatedLanguageSubtag(accepted()); |
| scriptFound = acceptScript(); |
| } else { |
| scriptFound = acceptScript(); |
| if (!scriptFound) { |
| problems.add('bad language/script'); |
| } |
| } |
| if (scriptFound) { |
| _scriptCode = toCapCase(accepted()); |
| } |
| if (acceptRegion()) { |
| _countryCode = replaceDeprecatedRegionSubtag(accepted().toUpperCase()); |
| } |
| acceptVariants(); |
| _variants = acceptedList(); |
| |
| processExtensions(); |
| |
| if (!atEnd()) { |
| problems.add('bad subtag "${current()}"'); |
| } |
| } |
| |
| /// Consumes all remaining subtags, if syntactically valid. |
| /// |
| /// If parsing fails, `atEnd()` will be false and/or [problems] will not be |
| /// empty. |
| void processExtensions() { |
| while (acceptSingleton()) { |
| var singleton = accepted(); |
| if (singleton == 'u') { |
| processUExtensions(); |
| } else if (singleton == 't') { |
| processTExtensions(); |
| } else if (singleton == 'x') { |
| processPrivateUseExtensions(); |
| break; |
| } else { |
| processOtherExtensions(singleton); |
| } |
| } |
| } |
| |
| /// Consumes tags matched by `unicode_locale_extensions` in the specification, |
| /// except that the 'u' singleton must already be accepted. |
| /// |
| /// If parsing fails, `atEnd()` will be false and/or [problems] will not be |
| /// empty. |
| void processUExtensions() { |
| if (_uExtensions != null) { |
| problems.add('duplicate "u"'); |
| return; |
| } |
| _uExtensions = <String, String>{}; |
| var empty = true; |
| final attributes = <String>[]; |
| while (acceptLowAlphaNumeric3to8()) { |
| attributes.add(accepted()); |
| } |
| if (attributes.isNotEmpty) { |
| empty = false; |
| attributes.sort(); |
| _uExtensions[''] = attributes.join('-'); |
| } |
| // unicode_locale_extensions: collect "(sep keyword)*". |
| while (acceptUExtensionKey()) { |
| empty = false; |
| var key = accepted(); |
| final typeParts = <String>[]; |
| while (acceptLowAlphaNumeric3to8()) { |
| typeParts.add(accepted()); |
| } |
| if (!_uExtensions.containsKey(key)) { |
| if (typeParts.length == 1 && typeParts[0] == 'true') { |
| _uExtensions[key] = ''; |
| } else { |
| _uExtensions[key] = typeParts.join('-'); |
| } |
| } else { |
| problems.add('duplicate "$key"'); |
| } |
| } |
| if (empty) { |
| problems.add('empty "u"'); |
| } |
| } |
| |
| /// Consumes tags matched by `transformed_extensions` in the specification, |
| /// except that the 't' singleton must already be accepted. |
| /// |
| /// If parsing fails, `atEnd()` will be false and/or [problems] will not be |
| /// empty. |
| void processTExtensions() { |
| if (_tExtensions != null) { |
| problems.add('duplicate "t"'); |
| return; |
| } |
| _tExtensions = <String, String>{}; |
| var empty = true; |
| final tlang = <String>[]; |
| if (acceptLanguage()) { |
| empty = false; |
| tlang.add(replaceDeprecatedLanguageSubtag(accepted())); |
| if (acceptScript()) { |
| tlang.add(accepted()); |
| } |
| if (acceptRegion()) { |
| tlang.add(replaceDeprecatedRegionSubtag(accepted().toUpperCase()) |
| .toLowerCase()); |
| } |
| acceptVariants(); |
| tlang.addAll(acceptedList()); |
| _tExtensions[''] = tlang.join('-'); |
| } |
| // transformed_extensions: collect "(sep tfield)*". |
| while (acceptTExtensionKey()) { |
| var tkey = accepted(); |
| final tvalueParts = <String>[]; |
| while (acceptLowAlphaNumeric3to8()) { |
| tvalueParts.add(accepted()); |
| } |
| if (tvalueParts.isNotEmpty) { |
| empty = false; |
| if (!_tExtensions.containsKey(tkey)) { |
| _tExtensions[tkey] = tvalueParts.join('-'); |
| } else { |
| problems.add('duplicate "$tkey"'); |
| } |
| } else { |
| problems.add('empty "$tkey"'); |
| } |
| } |
| if (empty) { |
| problems.add('empty "t"'); |
| } |
| } |
| |
| /// Consumes tags matched by `pu_extensions` in the specification, except that |
| /// the 'x' singleton must already be accepted. |
| /// |
| /// If parsing fails, `atEnd()` will be false and/or [problems] will not be |
| /// empty. |
| void processPrivateUseExtensions() { |
| final values = <String>[]; |
| while (acceptLowAlphaNumeric1to8()) { |
| values.add(accepted()); |
| } |
| if (values.isNotEmpty) { |
| _xExtensions = values.join('-'); |
| } |
| } |
| |
| /// Consumes tags matched by `other_extensions` in the specification, except |
| /// that the singleton in question must already be accepted and passed as |
| /// parameter. |
| /// |
| /// If parsing fails, `atEnd()` will be false and/or [problems] will not be |
| /// empty. |
| void processOtherExtensions(String singleton) { |
| final values = <String>[]; |
| while (acceptLowAlphaNumeric2to8()) { |
| values.add(accepted()); |
| } |
| if (values.isEmpty) return; |
| if (_otherExtensions == null) { |
| _otherExtensions = <String, String>{}; |
| } else if (_otherExtensions.containsKey(singleton)) { |
| problems.add('duplicate "$singleton"'); |
| return; |
| } |
| _otherExtensions[singleton] = values.join('-'); |
| } |
| |
| /// Advances and returns true if current subtag is a language subtag. |
| bool acceptLanguage() { |
| if (atEnd()) return false; |
| if (!_languageRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _languageRegExp = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$'); |
| |
| /// Advances and returns true if current subtag is a script subtag. |
| bool acceptScript() { |
| if (atEnd()) return false; |
| if (!_scriptRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _scriptRegExp = RegExp(r'^[a-z]{4}$'); |
| |
| /// Advances and returns true if current subtag is a region subtag. |
| bool acceptRegion() { |
| if (atEnd()) return false; |
| if (!_regionRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _regionRegExp = RegExp(r'^[a-z]{2}$|^\d{3}$'); |
| |
| /// Advances, collecting subtags in [_acceptedList], as long as the current |
| /// subtag is a variant subtag. |
| /// |
| /// Does not return a boolean: when done, _acceptedList will contain the |
| /// collected subtags. |
| void acceptVariants() { |
| _acceptedList = []; |
| while (!atEnd() && _variantRegExp.hasMatch(current())) { |
| _acceptedList.add(current()); |
| advance(); |
| } |
| } |
| |
| static final _variantRegExp = RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$'); |
| |
| /// Advances and returns true if current subtag is a singleton. |
| bool acceptSingleton() { |
| if (atEnd()) return false; |
| if (!_singletonRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _singletonRegExp = RegExp(r'^[a-z]$'); |
| |
| /// Advances and returns true if current subtag is alphanumeric, with length |
| /// ranging from 1 to 8. |
| bool acceptLowAlphaNumeric1to8() { |
| if (atEnd()) return false; |
| if (!_alphaNumeric1to8RegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _alphaNumeric1to8RegExp = RegExp(r'^[a-z\d]{1,8}$'); |
| |
| /// Advances and returns true if current subtag is alphanumeric, with length |
| /// ranging from 2 to 8. |
| bool acceptLowAlphaNumeric2to8() { |
| if (atEnd()) return false; |
| if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 2) { |
| return false; |
| } |
| advance(); |
| return true; |
| } |
| |
| /// Advances and returns true if current subtag is alphanumeric, with length |
| /// ranging from 3 to 8. |
| bool acceptLowAlphaNumeric3to8() { |
| if (atEnd()) return false; |
| if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 3) { |
| return false; |
| } |
| advance(); |
| return true; |
| } |
| |
| /// Advances and returns true if current subtag is a valid U Extension key. |
| bool acceptUExtensionKey() { |
| if (atEnd()) return false; |
| if (!_uExtensionKeyRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _uExtensionKeyRegExp = RegExp(r'^[a-z\d][a-z]$'); |
| |
| /// Advances and returns true if current subtag is a valid T Extension key |
| /// (`tkey` in the specification). |
| bool acceptTExtensionKey() { |
| if (atEnd()) return false; |
| if (!_tExtensionKeyRegExp.hasMatch(current())) return false; |
| advance(); |
| return true; |
| } |
| |
| static final _tExtensionKeyRegExp = RegExp(r'^[a-z]\d$'); |
| } |