Add a Locale class to package:intl.
* Locale Identifiers are described by
https://docs.google.com/document/d/1kV5lL7kDkHnzvGVBZbWDurwCI_waLaamSw6QwavXjvc/edit
which is not yet available externally. Once an external version is
available, might we want to link to it from dartdoc?
* This CL does not add the LikelySubtags data, so its addLikelySubtags
and minimizeSubtags implementations only align with
CLDR/ICU/ECMAScript for cases where the Target script and region
columns of
http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
are "Latn" and "US".
* Dartdoc output:
https://hugovdm.users.x20web.corp.google.com/locale_225071692/intl/Locale-class.html
Reviewers:
- sra: reviewed with "Dart for the Web" emphasis
- lrn: Dart Language review
- alanknight: good fit into package:intl and ecosystem
- mnita: general API design / FYI
- cira: general API design / FYI
PiperOrigin-RevId: 240140680
diff --git a/lib/src/locale.dart b/lib/src/locale.dart
new file mode 100644
index 0000000..3a941eb
--- /dev/null
+++ b/lib/src/locale.dart
@@ -0,0 +1,111 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'locale/locale_implementation.dart';
+import 'locale/locale_parser.dart' show LocaleParser;
+
+/// A representation of a [Unicode Locale
+/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+///
+/// To create Locale instances, consider using:
+/// * [fromSubtags] for language, script and region,
+/// * [parse] for Unicode Locale Identifier strings (throws exceptions on
+/// failure),
+/// * [tryParse] for Unicode Locale Identifier strings (returns null on
+/// failure).
+abstract class Locale {
+ /// Constructs a Locale instance that consists of only language, script and
+ /// region subtags.
+ ///
+ /// Throws a [FormatException] if any subtag is syntactically invalid.
+ static Locale fromSubtags(
+ {String languageCode, String scriptCode, String countryCode}) =>
+ LocaleImplementation.fromSubtags(
+ languageCode: languageCode,
+ scriptCode: scriptCode,
+ countryCode: countryCode);
+
+ /// Parses [Unicode Locale Identifiers][localeIds] to produce [Locale]
+ /// instances.
+ ///
+ /// [localeIds]:
+ /// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
+ ///
+ /// Throws a [FormatException] if [localeIdentifier] is syntactically invalid.
+ static Locale parse(String localeIdentifier) {
+ assert(localeIdentifier != null);
+ var parser = LocaleParser(localeIdentifier);
+ var locale = parser.toLocale();
+ if (locale == null) {
+ throw FormatException('Locale "$localeIdentifier": '
+ '${parser.problems.join("; ")}.');
+ }
+ return locale;
+ }
+
+ /// Parses [Unicode Locale Identifiers][localeIds] to produce [Locale]
+ /// instances.
+ ///
+ /// [localeIds]:
+ /// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
+ ///
+ /// Returns `null` if [localeIdentifier] is syntactically invalid.
+ static Locale tryParse(String localeIdentifier) {
+ assert(localeIdentifier != null);
+ var parser = LocaleParser(localeIdentifier);
+ return parser.toLocale();
+ }
+
+ /// The language subtag of the Locale Identifier.
+ ///
+ /// It is syntactically valid, normalized (has correct case) and canonical
+ /// (deprecated tags have been replaced), but not necessarily valid (the
+ /// language might not exist) because the list of valid languages changes with
+ /// time.
+ String get languageCode;
+
+ /// The script subtag of the Locale Identifier, null if absent.
+ ///
+ /// It is syntactically valid and normalized (has correct case), but not
+ /// necessarily valid (the script might not exist) because the list of valid
+ /// scripts changes with time.
+ String get scriptCode;
+
+ /// The region subtag of the Locale Identifier, null if absent.
+ ///
+ /// It is syntactically valid, normalized (has correct case) and canonical
+ /// (deprecated tags have been replaced), but not necessarily valid (the
+ /// region might not exist) because the list of valid regions changes with
+ /// time.
+ String get countryCode;
+
+ /// Iterable of variant subtags.
+ ///
+ /// They are syntactically valid, normalized (have correct case) and sorted
+ /// alphabetically, but not necessarily valid (variants might not exist)
+ /// because the list of variants changes with time.
+ Iterable<String> get variants;
+
+ /// Returns the canonical [Unicode BCP47 Locale
+ /// Identifier](http://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+ /// this locale.
+ String toLanguageTag();
+
+ /// Returns the canonical [Unicode BCP47 Locale
+ /// Identifier](http://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+ /// this locale.
+ @override
+ String toString() => toLanguageTag();
+
+ @override
+ bool operator ==(Object other) {
+ if (identical(this, other)) return true;
+ return other is Locale && this.toLanguageTag() == other.toLanguageTag();
+ }
+
+ @override
+ int get hashCode {
+ return toLanguageTag().hashCode;
+ }
+}
diff --git a/lib/src/locale/locale_deprecations.dart b/lib/src/locale/locale_deprecations.dart
new file mode 100644
index 0000000..5dd0922
--- /dev/null
+++ b/lib/src/locale/locale_deprecations.dart
@@ -0,0 +1,119 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Replaces deprecated language subtags.
+///
+/// The subtag must already be lowercase.
+///
+/// TODO(b/127689510): write a new script for updating this list from CLDR data.
+String replaceDeprecatedLanguageSubtag(String languageCode) {
+ return _deprecatedLanguageTagReplacements[languageCode] ?? languageCode;
+}
+
+const Map<String, String> _deprecatedLanguageTagReplacements = {
+ // These map entries are generated by a modified version of Flutter's
+ // gen_locale.dart. TODO(hugovdm): make improvements to the tool.
+ //
+ // Mappings generated for language subtag registry as of 2019-02-20.
+ 'in': 'id', // Indonesian; deprecated 1989-01-01
+ 'iw': 'he', // Hebrew; deprecated 1989-01-01
+ 'ji': 'yi', // Yiddish; deprecated 1989-01-01
+ 'jw': 'jv', // Javanese; deprecated 2001-08-13
+ 'mo': 'ro', // Moldavian, Moldovan; deprecated 2008-11-22
+ 'aam': 'aas', // Aramanik; deprecated 2015-02-12
+ 'adp': 'dz', // Adap; deprecated 2015-02-12
+ 'aue': 'ktz', // ǂKxʼauǁʼein; deprecated 2015-02-12
+ 'ayx': 'nun', // Ayi (China); deprecated 2011-08-16
+ 'bgm': 'bcg', // Baga Mboteni; deprecated 2016-05-30
+ 'bjd': 'drl', // Bandjigali; deprecated 2012-08-12
+ 'ccq': 'rki', // Chaungtha; deprecated 2012-08-12
+ 'cjr': 'mom', // Chorotega; deprecated 2010-03-11
+ 'cka': 'cmr', // Khumi Awa Chin; deprecated 2012-08-12
+ 'cmk': 'xch', // Chimakum; deprecated 2010-03-11
+ 'coy': 'pij', // Coyaima; deprecated 2016-05-30
+ 'cqu': 'quh', // Chilean Quechua; deprecated 2016-05-30
+ 'drh': 'khk', // Darkhat; deprecated 2010-03-11
+ 'drw': 'prs', // Darwazi; deprecated 2010-03-11
+ 'gav': 'dev', // Gabutamon; deprecated 2010-03-11
+ 'gfx': 'vaj', // Mangetti Dune ǃXung; deprecated 2015-02-12
+ 'ggn': 'gvr', // Eastern Gurung; deprecated 2016-05-30
+ 'gti': 'nyc', // Gbati-ri; deprecated 2015-02-12
+ 'guv': 'duz', // Gey; deprecated 2016-05-30
+ 'hrr': 'jal', // Horuru; deprecated 2012-08-12
+ 'ibi': 'opa', // Ibilo; deprecated 2012-08-12
+ 'ilw': 'gal', // Talur; deprecated 2013-09-10
+ 'jeg': 'oyb', // Jeng; deprecated 2017-02-23
+ 'kgc': 'tdf', // Kasseng; deprecated 2016-05-30
+ 'kgh': 'kml', // Upper Tanudan Kalinga; deprecated 2012-08-12
+ 'koj': 'kwv', // Sara Dunjo; deprecated 2015-02-12
+ 'krm': 'bmf', // Krim; deprecated 2017-02-23
+ 'ktr': 'dtp', // Kota Marudu Tinagas; deprecated 2016-05-30
+ 'kvs': 'gdj', // Kunggara; deprecated 2016-05-30
+ 'kwq': 'yam', // Kwak; deprecated 2015-02-12
+ 'kxe': 'tvd', // Kakihum; deprecated 2015-02-12
+ 'kzj': 'dtp', // Coastal Kadazan; deprecated 2016-05-30
+ 'kzt': 'dtp', // Tambunan Dusun; deprecated 2016-05-30
+ 'lii': 'raq', // Lingkhim; deprecated 2015-02-12
+ 'lmm': 'rmx', // Lamam; deprecated 2014-02-28
+ 'meg': 'cir', // Mea; deprecated 2013-09-10
+ 'mst': 'mry', // Cataelano Mandaya; deprecated 2010-03-11
+ 'mwj': 'vaj', // Maligo; deprecated 2015-02-12
+ 'myt': 'mry', // Sangab Mandaya; deprecated 2010-03-11
+ 'nad': 'xny', // Nijadali; deprecated 2016-05-30
+ 'ncp': 'kdz', // Ndaktup; deprecated 2018-03-08
+ 'nnx': 'ngv', // Ngong; deprecated 2015-02-12
+ 'nts': 'pij', // Natagaimas; deprecated 2016-05-30
+ 'oun': 'vaj', // ǃOǃung; deprecated 2015-02-12
+ 'pcr': 'adx', // Panang; deprecated 2013-09-10
+ 'pmc': 'huw', // Palumata; deprecated 2016-05-30
+ 'pmu': 'phr', // Mirpur Panjabi; deprecated 2015-02-12
+ 'ppa': 'bfy', // Pao; deprecated 2016-05-30
+ 'ppr': 'lcq', // Piru; deprecated 2013-09-10
+ 'pry': 'prt', // Pray 3; deprecated 2016-05-30
+ 'puz': 'pub', // Purum Naga; deprecated 2014-02-28
+ 'sca': 'hle', // Sansu; deprecated 2012-08-12
+ 'skk': 'oyb', // Sok; deprecated 2017-02-23
+ 'tdu': 'dtp', // Tempasuk Dusun; deprecated 2016-05-30
+ 'thc': 'tpo', // Tai Hang Tong; deprecated 2016-05-30
+ 'thx': 'oyb', // The; deprecated 2015-02-12
+ 'tie': 'ras', // Tingal; deprecated 2011-08-16
+ 'tkk': 'twm', // Takpa; deprecated 2011-08-16
+ 'tlw': 'weo', // South Wemale; deprecated 2012-08-12
+ 'tmp': 'tyj', // Tai Mène; deprecated 2016-05-30
+ 'tne': 'kak', // Tinoc Kallahan; deprecated 2016-05-30
+ 'tnf': 'prs', // Tangshewi; deprecated 2010-03-11
+ 'tsf': 'taj', // Southwestern Tamang; deprecated 2015-02-12
+ 'uok': 'ema', // Uokha; deprecated 2015-02-12
+ 'xba': 'cax', // Kamba (Brazil); deprecated 2016-05-30
+ 'xia': 'acn', // Xiandao; deprecated 2013-09-10
+ 'xkh': 'waw', // Karahawyana; deprecated 2016-05-30
+ 'xsj': 'suj', // Subi; deprecated 2015-02-12
+ 'ybd': 'rki', // Yangbye; deprecated 2012-08-12
+ 'yma': 'lrr', // Yamphe; deprecated 2012-08-12
+ 'ymt': 'mtm', // Mator-Taygi-Karagas; deprecated 2015-02-12
+ 'yos': 'zom', // Yos; deprecated 2013-09-10
+ 'yuu': 'yug', // Yugh; deprecated 2014-02-28
+};
+
+/// Replaces deprecated region subtags.
+///
+/// The subtag must already be uppercase.
+///
+/// TODO(b/127689510): write a new script for updating this list from CLDR data.
+String replaceDeprecatedRegionSubtag(String regionCode) {
+ return _deprecatedRegionTagReplacements[regionCode] ?? regionCode;
+}
+
+const Map<String, String> _deprecatedRegionTagReplacements = {
+ // These map entries are generated by a modified version of Flutter's
+ // gen_locale.dart. TODO(hugovdm): make improvements to the tool.
+ //
+ // Mappings generated for language subtag registry as of 2019-02-20.
+ 'BU': 'MM', // Burma; deprecated 1989-12-05
+ 'DD': 'DE', // German Democratic Republic; deprecated 1990-10-30
+ 'FX': 'FR', // Metropolitan France; deprecated 1997-07-14
+ 'TP': 'TL', // East Timor; deprecated 2002-05-20
+ 'YD': 'YE', // Democratic Yemen; deprecated 1990-08-14
+ 'ZR': 'CD', // Zaire; deprecated 1997-07-14
+};
diff --git a/lib/src/locale/locale_extensions.dart b/lib/src/locale/locale_extensions.dart
new file mode 100644
index 0000000..31ac841
--- /dev/null
+++ b/lib/src/locale/locale_extensions.dart
@@ -0,0 +1,228 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Locale extensions as defined for [Unicode Locale
+/// Identifiers](http://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+///
+/// These extensions cover Locale information that aren't captured by the
+/// language, script, region and variants subtags of the Unicode Language
+/// Identifier. Please see the Unicode Technical Standard linked above.
+class LocaleExtensions {
+ /// Constructor.
+ ///
+ /// Keys in each of the maps passed to this contructor must be syntactically
+ /// valid extension keys, and must already be normalized (correct case).
+ LocaleExtensions(
+ Map<String, String> uExtensions,
+ Map<String, String> tExtensions,
+ Map<String, String> otherExtensions,
+ this._xExtensions)
+ : _uExtensions = _sortedUnmodifiable(uExtensions),
+ _tExtensions = _sortedUnmodifiable(tExtensions),
+ _otherExtensions = _sortedUnmodifiable(otherExtensions) {
+ // Debug-mode asserts to ensure all parameters are normalized and UTS #35
+ // compliant.
+ assert(
+ uExtensions == null ||
+ uExtensions.entries.every((e) {
+ if (!_uExtensionsValidKeysRE.hasMatch(e.key)) return false;
+ // TODO(hugovdm) reconsider this representation: "true" values are
+ // suppressed in canonical Unicode BCP47 Locale Identifiers, but
+ // we may choose to represent them as "true" in memory.
+ if (e.value == '' && e.key != '') return true;
+ if (!_uExtensionsValidValuesRE.hasMatch(e.value)) return false;
+ return true;
+ }),
+ 'uExtensions keys must match '
+ 'RegExp/${_uExtensionsValidKeysRE.pattern}/. '
+ 'uExtensions values must match '
+ 'RegExp/${_uExtensionsValidValuesRE.pattern}/. '
+ 'uExtensions.entries: ${uExtensions.entries}.');
+ assert(
+ tExtensions == null ||
+ tExtensions.entries.every((e) {
+ if (!_tExtensionsValidKeysRE.hasMatch(e.key)) return false;
+ if (e.key == '') {
+ if (!_validTlangRE.hasMatch(e.value)) return false;
+ } else {
+ if (!_tExtensionsValidValuesRE.hasMatch(e.value)) return false;
+ }
+ return true;
+ }),
+ 'tExtensions keys must match '
+ 'RegExp/${_tExtensionsValidKeysRE.pattern}/. '
+ 'tExtensions values other than tlang must match '
+ 'RegExp/${_tExtensionsValidValuesRE.pattern}/. '
+ 'Entries: ${tExtensions.entries}.');
+ assert(
+ otherExtensions == null ||
+ otherExtensions.entries.every((e) {
+ if (!_otherExtensionsValidKeysRE.hasMatch(e.key)) return false;
+ if (!_otherExtensionsValidValuesRE.hasMatch(e.value))
+ return false;
+ return true;
+ }),
+ 'otherExtensions keys must match '
+ 'RegExp/${_otherExtensionsValidKeysRE.pattern}. '
+ 'otherExtensions values must match '
+ 'RegExp/${_otherExtensionsValidValuesRE.pattern}. '
+ 'Entries: ${otherExtensions.entries}.');
+ assert(
+ _xExtensions == null || _validXExtensionsRE.hasMatch(_xExtensions),
+ '_xExtensions must match RegExp/${_validXExtensionsRE.pattern}/ '
+ 'but is "$_xExtensions".');
+ }
+
+ /// For debug/assert-use only! Matches keys considered valid for
+ /// [_uExtensions], does not imply keys are valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _uExtensionsValidKeysRE = RegExp(r'^$|^[a-z\d][a-z]$');
+
+ /// For debug/assert-use only! Matches values considered valid for
+ /// [_uExtensions], does not imply values are valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _uExtensionsValidValuesRE =
+ RegExp(r'^[a-z]{3,8}([-][a-z]{3,8})*$');
+
+ /// For debug/assert-use only! Matches keys considered valid for
+ /// [_tExtensions], does not imply keys are valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _tExtensionsValidKeysRE = RegExp(r'^$|^[a-z]\d$');
+
+ /// For debug/assert-use only! With the exception of `tlang`, matches values
+ /// considered valid for [_tExtensions], does not imply values are valid as
+ /// per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _tExtensionsValidValuesRE =
+ RegExp(r'^[a-z]{3,8}([-][a-z]{3,8})*$');
+
+ /// For debug/assert-use only! Matches keys considered valid for
+ /// [_otherExtensions], does not imply keys are valid as per Unicode LDML
+ /// spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _otherExtensionsValidKeysRE = RegExp(r'^[a-svwyz]$');
+
+ /// For debug/assert-use only! Matches values considered valid for
+ /// [_otherExtensions], does not imply values are valid as per Unicode LDML
+ /// spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _otherExtensionsValidValuesRE =
+ RegExp(r'^[a-z\d]{2,8}([-][a-z\d]{2,8})*$');
+
+ /// For debug/assert-use only! Matches values valid for [_xExtensions].
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _validXExtensionsRE =
+ RegExp(r'^[a-z\d]{1,8}([-][a-z\d]{1,8})*$');
+
+ /// For debug/assert-use only! Matches values valid for tlang.
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _validTlangRE = RegExp(
+ // Full string match start
+ r'^'
+
+ // Language is required in a tlang identifier.
+ r'([a-z]{2,3}|[a-z]{5,8})' // Language
+
+ // Optional script
+ r'(-[a-z]{4})?'
+
+ // Optional region
+ r'(-[a-z]{2}|-\d{3})?'
+
+ // Any number of variant subtags
+ r'(-([a-z\d]{5,8}|\d[a-z\d]{3}))*'
+
+ // Full string match end
+ r'$');
+
+ /// `-u-` extension, with keys in sorted order. Attributes are stored under
+ /// the zero-length string as key. Keywords (consisting of `key` and `type`)
+ /// are stored under normalized (lowercased) `key`. See
+ /// http://www.unicode.org/reports/tr35/#unicode_locale_extensions for
+ /// details.
+ Map<String, String> _uExtensions;
+
+ /// `-t-` extension, with keys in sorted order. tlang attributes are stored
+ /// under the zero-length string as key. See
+ /// http://www.unicode.org/reports/tr35/#transformed_extensions for
+ /// details.
+ Map<String, String> _tExtensions;
+
+ /// Other extensions, with keys in sorted order. See
+ /// http://www.unicode.org/reports/tr35/#other_extensions for details.
+ Map<String, String> _otherExtensions;
+
+ /// -x- extension values. See
+ /// http://www.unicode.org/reports/tr35/#pu_extensions for details.
+ String _xExtensions;
+
+ /// List of subtags in the [Unicode Locale
+ /// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier)
+ /// extensions, including private use extensions.
+ ///
+ /// This covers everything after the unicode_language_id. If there are no
+ /// extensions (i.e. the Locale Identifier has only language, script, region
+ /// and/or variants), this will be an empty list.
+ ///
+ /// These subtags are sorted and normalized, ready for joining with a
+ /// unicode_language_id and '-' as delimiter to provide a UTS #35 compliant
+ /// normalized Locale Identifier.
+ List<String> get subtags {
+ final List<String> result = [];
+ final List<String> resultVWYZ = [];
+
+ _otherExtensions.forEach((singleton, value) {
+ final int letter = (singleton.codeUnitAt(0) - 0x61) & 0xFFFF;
+ // 't', 'u' and 'x' are handled by other members.
+ assert(letter < 26 && letter != 19 && letter != 20 && letter != 23);
+ if (letter < 19) {
+ result.addAll([singleton, value]);
+ } else {
+ resultVWYZ.addAll([singleton, value]);
+ }
+ });
+ if (_tExtensions.isNotEmpty) {
+ result.add('t');
+ _tExtensions.forEach((key, value) {
+ if (key != '') result.add(key);
+ result.add(value);
+ });
+ }
+ if (_uExtensions.isNotEmpty) {
+ result.add('u');
+ _uExtensions.forEach((key, value) {
+ if (key != '') result.add(key);
+ if (value != '') result.add(value);
+ });
+ }
+
+ if (resultVWYZ.isNotEmpty) {
+ result.addAll(resultVWYZ);
+ }
+ if (_xExtensions != null) {
+ result.add('x-${_xExtensions}');
+ }
+ return result;
+ }
+}
+
+/// Creates an unmodifiable and sorted version of `unsorted`.
+Map<String, String> _sortedUnmodifiable(Map<String, String> unsorted) {
+ if (unsorted == null) {
+ return const {};
+ }
+ Map<String, String> map = {};
+ for (var key in unsorted.keys.toList()..sort()) {
+ map[key] = unsorted[key];
+ }
+ return Map.unmodifiable(map);
+}
diff --git a/lib/src/locale/locale_implementation.dart b/lib/src/locale/locale_implementation.dart
new file mode 100644
index 0000000..11b398c
--- /dev/null
+++ b/lib/src/locale/locale_implementation.dart
@@ -0,0 +1,189 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:intl/src/locale.dart' show Locale;
+
+import 'locale_deprecations.dart';
+import 'locale_extensions.dart';
+
+/// The primary implementation of the Locale interface.
+class LocaleImplementation extends Locale {
+ /// Simple private constructor with asserts to check invariants.
+ LocaleImplementation._(this.languageCode, this.scriptCode, this.countryCode,
+ this.variants, this._extensions) {
+ // Debug-mode asserts to ensure all parameters are normalized and UTS #35
+ // compliant.
+ assert(
+ languageCode != null && _normalizedLanguageRE.hasMatch(languageCode),
+ 'languageCode must match RegExp/${_normalizedLanguageRE.pattern}/ '
+ 'but is "$languageCode".');
+ assert(
+ scriptCode == null || _normalizedScriptRE.hasMatch(scriptCode),
+ 'scriptCode must match RegExp/${_normalizedScriptRE.pattern}/ '
+ 'but is "$scriptCode".');
+ assert(
+ countryCode == null || _normalizedRegionRE.hasMatch(countryCode),
+ 'countryCode must match RegExp/${_normalizedRegionRE.pattern}/ '
+ 'but is "$countryCode".');
+ assert(
+ variants is List &&
+ variants.every((v) => _normalizedVariantRE.hasMatch(v)),
+ 'each variant must match RegExp/${_normalizedVariantRE.pattern}/ '
+ 'but variants are "$variants".');
+ }
+
+ /// For debug/assert-use only! Matches subtags considered valid for
+ /// [languageCode], does not imply subtag is valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _normalizedLanguageRE = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$');
+
+ /// For debug/assert-use only! Matches subtags considered valid for
+ /// [scriptCode], does not imply subtag is valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _normalizedScriptRE = RegExp(r'^[A-Z][a-z]{3}$');
+
+ /// For debug/assert-use only! Matches subtags considered valid for
+ /// [countryCode], does not imply subtags are valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _normalizedRegionRE = RegExp(r'^[A-Z]{2}$|^\d{3}$');
+
+ /// For debug/assert-use only! Matches subtags considered valid for
+ /// [variants], does not imply subtags are valid as per Unicode LDML spec!
+ //
+ // Must be static to get tree-shaken away in production code.
+ static final _normalizedVariantRE =
+ RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$');
+
+ /// Simple factory which assumes parameters are syntactically correct.
+ ///
+ /// In debug mode, incorrect use may result in an assertion failure. (In
+ /// production code, this class makes no promises regarding the consequence of
+ /// incorrect use.)
+ ///
+ /// For public APIs, see [Locale.fromSubtags] and [Locale.parse].
+ factory LocaleImplementation.unsafe(
+ String languageCode, {
+ String scriptCode,
+ String countryCode,
+ Iterable<String> variants,
+ LocaleExtensions extensions,
+ }) {
+ variants = (variants != null && variants.isNotEmpty)
+ ? List.unmodifiable(variants.toList()..sort())
+ : const [];
+ return LocaleImplementation._(
+ languageCode, scriptCode, countryCode, variants, extensions);
+ }
+
+ /// Constructs a Locale instance that consists of only language, region and
+ /// country subtags.
+ ///
+ /// Throws a [FormatException] if any subtag is syntactically invalid.
+ static LocaleImplementation fromSubtags(
+ {String languageCode, String scriptCode, String countryCode}) {
+ return LocaleImplementation._(
+ replaceDeprecatedLanguageSubtag(_normalizeLanguageCode(languageCode)),
+ _normalizeScriptCode(scriptCode),
+ replaceDeprecatedRegionSubtag(_normalizeCountryCode(countryCode)),
+ const [],
+ null);
+ }
+
+ /// Performs case normalization on `languageCode`.
+ ///
+ /// Throws a [FormatException] if it is syntactically invalid.
+ static String _normalizeLanguageCode(String languageCode) {
+ if (!_languageRexExp.hasMatch(languageCode)) {
+ throw FormatException('Invalid language "$languageCode"');
+ }
+ return languageCode.toLowerCase();
+ }
+
+ static final _languageRexExp = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$');
+
+ /// Performs case normalization on `scriptCode`.
+ ///
+ /// Throws a [FormatException] if it is syntactically invalid.
+ static String _normalizeScriptCode(String scriptCode) {
+ if (scriptCode == null) return null;
+ if (!_scriptRegExp.hasMatch(scriptCode)) {
+ throw FormatException('Invalid script "$scriptCode"');
+ }
+ return toCapCase(scriptCode);
+ }
+
+ static final _scriptRegExp = RegExp(r'^[a-zA-Z]{4}$');
+
+ /// Performs case normalization on `countryCode`.
+ ///
+ /// Throws a [FormatException] if it is syntactically invalid.
+ static String _normalizeCountryCode(String countryCode) {
+ if (countryCode == null) return null;
+ if (!_regionRegExp.hasMatch(countryCode)) {
+ throw FormatException('Invalid region "$countryCode"');
+ }
+ return countryCode.toUpperCase();
+ }
+
+ static final _regionRegExp = RegExp(r'^[a-zA-Z]{2}$|^\d{3}$');
+
+ /// The language subtag of the Locale Identifier.
+ ///
+ /// It is syntactically valid, normalized (has correct case) and canonical
+ /// (deprecated tags have been replaced), but not necessarily valid (the
+ /// language might not exist) because the list of valid languages changes with
+ /// time.
+ final String languageCode;
+
+ /// The script subtag of the Locale Identifier, null if absent.
+ ///
+ /// It is syntactically valid, normalized (has correct case) and canonical
+ /// (deprecated tags have been replaced), but not necessarily valid (the
+ /// script might not exist) because the list of valid scripts changes with
+ /// time.
+ final String scriptCode;
+
+ /// The region subtag of the Locale Identifier, null if absent.
+ ///
+ /// It is syntactically valid, normalized (has correct case) and canonical
+ /// (deprecated tags have been replaced), but not necessarily valid (the
+ /// region might not exist) because the list of valid regions changes with
+ /// time.
+ final String countryCode;
+
+ /// Iterable of variant subtags, zero-length iterable if variants are absent.
+ ///
+ /// They are syntactically valid, normalized (have correct case) and canonical
+ /// (sorted alphabetically and deprecated tags have been replaced) but not
+ /// necessarily valid (variants might not exist) because the list of variants
+ /// changes with time.
+ final Iterable<String> variants;
+
+ /// Locale extensions, null if the locale has no extensions.
+ // TODO(hugovdm): Not yet supported: getters for extensions.
+ final LocaleExtensions _extensions;
+
+ /// Cache of the value returned by [toLanguageTag].
+ String _languageTag;
+
+ /// Returns the canonical Unicode BCP47 Locale Identifier for this locale.
+ String toLanguageTag() {
+ if (_languageTag == null) {
+ final List<String> out = [languageCode];
+ if (scriptCode != null) out.add(scriptCode);
+ if (countryCode != null) out.add(countryCode);
+ out.addAll(variants);
+ if (_extensions != null) out.addAll(_extensions.subtags);
+ _languageTag = out.join('-');
+ }
+ return _languageTag;
+ }
+}
+
+/// Returns `input` with first letter capitalized and the rest lowercase.
+String toCapCase(String input) =>
+ '${input[0].toUpperCase()}${input.substring(1).toLowerCase()}';
diff --git a/lib/src/locale/locale_parser.dart b/lib/src/locale/locale_parser.dart
new file mode 100644
index 0000000..4fb90d5
--- /dev/null
+++ b/lib/src/locale/locale_parser.dart
@@ -0,0 +1,413 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'locale_deprecations.dart';
+import 'locale_extensions.dart';
+import 'locale_implementation.dart';
+
+/// A parser for [Unicode Locale
+/// Identifiers](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+class LocaleParser {
+ /// Language subtag of Unicode Language Identifier.
+ String _languageCode = 'und';
+
+ /// Script subtag of Unicode Language Identifier.
+ String _scriptCode;
+
+ /// Region subtag of Unicode Language Identifier.
+ String _countryCode;
+
+ /// Variant subtags of Unicode Language Identifier.
+ List<String> _variants;
+
+ /// Unicode Locale Extensions, also known as "U Extension".
+ Map<String, String> _uExtensions;
+
+ /// Transformed Extensions, also known as "T Extension".
+ Map<String, String> _tExtensions;
+
+ /// Private-Use Extensions.
+ String _xExtensions;
+
+ /// Other Extensions.
+ Map<String, String> _otherExtensions;
+
+ /// List of problems with the localeId the parser tried to parse.
+ ///
+ /// An empty list indicates problem-free parsing.
+ final List<String> problems = <String>[];
+
+ /// Produces a Locale instance for the parser's current state.
+ ///
+ /// Returns null if the Locale would be syntactically invalid.
+ LocaleImplementation toLocale() {
+ if (problems.isNotEmpty) return null;
+ LocaleExtensions extensions;
+ if (_uExtensions != null ||
+ _tExtensions != null ||
+ _otherExtensions != null ||
+ _xExtensions != null) {
+ extensions = LocaleExtensions(
+ _uExtensions, _tExtensions, _otherExtensions, _xExtensions);
+ }
+ return LocaleImplementation.unsafe(
+ _languageCode,
+ scriptCode: _scriptCode,
+ countryCode: _countryCode,
+ variants: _variants,
+ extensions: extensions,
+ );
+ }
+
+ /// Subtags of the Locale Identifier, as split by [separators].
+ List<String> _subtags;
+
+ /// RegExp that matches Unicode Locale Identifier subtag separators.
+ static final separators = RegExp('[-_]');
+
+ /// Last accepted subtag.
+ String _accepted;
+
+ /// Last accepted subtag.
+ String accepted() => _accepted;
+
+ /// Last accepted list of subtags (for variants).
+ List<String> _acceptedList;
+
+ /// Last accepted list of subtags (for variants).
+ List<String> acceptedList() => _acceptedList;
+
+ /// Current subtag pending acceptance.
+ String _current;
+
+ /// Current subtag pending acceptance.
+ String current() => _current;
+
+ /// Index of the current subtag.
+ int _currentIndex;
+
+ /// Advance to the next subtag (see [current] and [accepted]).
+ void advance() {
+ _accepted = _current;
+ _currentIndex++;
+ if (_currentIndex < _subtags.length) {
+ _current = _subtags[_currentIndex];
+ } else {
+ _current = null;
+ }
+ }
+
+ /// Returns true if all subtags have been parsed.
+ bool atEnd() {
+ return _currentIndex >= _subtags.length;
+ }
+
+ /// Parses [Unicode CLDR Locale
+ /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers).
+ ///
+ /// This method does not parse all BCP 47 tags. See [BCP 47
+ /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+ /// details.
+ ///
+ /// localeId may not be null.
+ ///
+ /// Parsing failed if there are any entries in [problems].
+ LocaleParser(String localeId) {
+ assert(localeId != null);
+
+ // Calling toLowerCase unconditionally should be efficient if
+ // string_patch.dart is in use:
+ // https://github.com/dart-lang/sdk/blob/cabaa78cc57d08bcfcd75bfe99a42c19ed497d26/runtime/lib/string_patch.dart#L1178
+ localeId = localeId.toLowerCase();
+ if (localeId == 'root') {
+ return;
+ }
+
+ _subtags = localeId.split(separators);
+ _currentIndex = 0;
+ _current = _subtags[0];
+
+ bool scriptFound = false;
+ if (acceptLanguage()) {
+ _languageCode = replaceDeprecatedLanguageSubtag(accepted());
+ scriptFound = acceptScript();
+ } else {
+ scriptFound = acceptScript();
+ if (!scriptFound) {
+ problems.add('bad language/script');
+ }
+ }
+ if (scriptFound) {
+ _scriptCode = toCapCase(accepted());
+ }
+ if (acceptRegion()) {
+ _countryCode = replaceDeprecatedRegionSubtag(accepted().toUpperCase());
+ }
+ acceptVariants();
+ _variants = acceptedList();
+
+ processExtensions();
+
+ if (!atEnd()) {
+ problems.add('bad subtag "${current()}"');
+ }
+ }
+
+ /// Consumes all remaining subtags, if syntactically valid.
+ ///
+ /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+ /// empty.
+ void processExtensions() {
+ while (acceptSingleton()) {
+ String singleton = accepted();
+ if (singleton == 'u') {
+ processUExtensions();
+ } else if (singleton == 't') {
+ processTExtensions();
+ } else if (singleton == 'x') {
+ processPrivateUseExtensions();
+ break;
+ } else {
+ processOtherExtensions(singleton);
+ }
+ }
+ }
+
+ /// Consumes tags matched by `unicode_locale_extensions` in the specification,
+ /// except that the 'u' singleton must already be accepted.
+ ///
+ /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+ /// empty.
+ void processUExtensions() {
+ if (_uExtensions != null) {
+ problems.add('duplicate "u"');
+ return;
+ }
+ _uExtensions = <String, String>{};
+ bool empty = true;
+ final List<String> attributes = [];
+ while (acceptLowAlphaNumeric3to8()) {
+ attributes.add(accepted());
+ }
+ if (attributes.isNotEmpty) {
+ empty = false;
+ attributes.sort();
+ _uExtensions[''] = attributes.join('-');
+ }
+ // unicode_locale_extensions: collect "(sep keyword)*".
+ while (acceptUExtensionKey()) {
+ empty = false;
+ String key = accepted();
+ final List<String> typeParts = <String>[];
+ while (acceptLowAlphaNumeric3to8()) {
+ typeParts.add(accepted());
+ }
+ if (!_uExtensions.containsKey(key)) {
+ if (typeParts.length == 1 && typeParts[0] == 'true') {
+ _uExtensions[key] = '';
+ } else {
+ _uExtensions[key] = typeParts.join('-');
+ }
+ } else {
+ problems.add('duplicate "$key"');
+ }
+ }
+ if (empty) {
+ problems.add('empty "u"');
+ }
+ }
+
+ /// Consumes tags matched by `transformed_extensions` in the specification,
+ /// except that the 't' singleton must already be accepted.
+ ///
+ /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+ /// empty.
+ void processTExtensions() {
+ if (_tExtensions != null) {
+ problems.add('duplicate "t"');
+ return;
+ }
+ _tExtensions = <String, String>{};
+ bool empty = true;
+ final List<String> tlang = <String>[];
+ if (acceptLanguage()) {
+ empty = false;
+ tlang.add(replaceDeprecatedLanguageSubtag(accepted()));
+ if (acceptScript()) {
+ tlang.add(accepted());
+ }
+ if (acceptRegion()) {
+ tlang.add(replaceDeprecatedRegionSubtag(accepted().toUpperCase())
+ .toLowerCase());
+ }
+ acceptVariants();
+ tlang.addAll(acceptedList());
+ _tExtensions[''] = tlang.join('-');
+ }
+ // transformed_extensions: collect "(sep tfield)*".
+ while (acceptTExtensionKey()) {
+ String tkey = accepted();
+ final List<String> tvalueParts = <String>[];
+ while (acceptLowAlphaNumeric3to8()) {
+ tvalueParts.add(accepted());
+ }
+ if (tvalueParts.isNotEmpty) {
+ empty = false;
+ if (!_tExtensions.containsKey(tkey)) {
+ _tExtensions[tkey] = tvalueParts.join('-');
+ } else {
+ problems.add('duplicate "$tkey"');
+ }
+ } else {
+ problems.add('empty "$tkey"');
+ }
+ }
+ if (empty) {
+ problems.add('empty "t"');
+ }
+ }
+
+ /// Consumes tags matched by `pu_extensions` in the specification, except that
+ /// the 'x' singleton must already be accepted.
+ ///
+ /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+ /// empty.
+ void processPrivateUseExtensions() {
+ final List<String> values = <String>[];
+ while (acceptLowAlphaNumeric1to8()) {
+ values.add(accepted());
+ }
+ if (values.isNotEmpty) {
+ _xExtensions = values.join('-');
+ }
+ }
+
+ /// Consumes tags matched by `other_extensions` in the specification, except
+ /// that the singleton in question must already be accepted and passed as
+ /// parameter.
+ ///
+ /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+ /// empty.
+ void processOtherExtensions(String singleton) {
+ final List<String> values = <String>[];
+ while (acceptLowAlphaNumeric2to8()) {
+ values.add(accepted());
+ }
+ if (values.isEmpty) return;
+ if (_otherExtensions == null) {
+ _otherExtensions = <String, String>{};
+ } else if (_otherExtensions.containsKey(singleton)) {
+ problems.add('duplicate "$singleton"');
+ return;
+ }
+ _otherExtensions[singleton] = values.join('-');
+ }
+
+ /// Advances and returns true if current subtag is a language subtag.
+ bool acceptLanguage() {
+ if (atEnd()) return false;
+ if (!_languageRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _languageRegExp = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$');
+
+ /// Advances and returns true if current subtag is a script subtag.
+ bool acceptScript() {
+ if (atEnd()) return false;
+ if (!_scriptRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _scriptRegExp = RegExp(r'^[a-z]{4}$');
+
+ /// Advances and returns true if current subtag is a region subtag.
+ bool acceptRegion() {
+ if (atEnd()) return false;
+ if (!_regionRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _regionRegExp = RegExp(r'^[a-z]{2}$|^\d{3}$');
+
+ /// Advances, collecting subtags in [_acceptedList], as long as the current
+ /// subtag is a variant subtag.
+ ///
+ /// Does not return a boolean: when done, _acceptedList will contain the
+ /// collected subtags.
+ void acceptVariants() {
+ _acceptedList = [];
+ while (!atEnd() && _variantRegExp.hasMatch(current())) {
+ _acceptedList.add(current());
+ advance();
+ }
+ }
+
+ static final _variantRegExp = RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$');
+
+ /// Advances and returns true if current subtag is a singleton.
+ bool acceptSingleton() {
+ if (atEnd()) return false;
+ if (!_singletonRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _singletonRegExp = RegExp(r'^[a-z]$');
+
+ /// Advances and returns true if current subtag is alphanumeric, with length
+ /// ranging from 1 to 8.
+ bool acceptLowAlphaNumeric1to8() {
+ if (atEnd()) return false;
+ if (!_alphaNumeric1to8RegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _alphaNumeric1to8RegExp = RegExp(r'^[a-z\d]{1,8}$');
+
+ /// Advances and returns true if current subtag is alphanumeric, with length
+ /// ranging from 2 to 8.
+ bool acceptLowAlphaNumeric2to8() {
+ if (atEnd()) return false;
+ if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 2)
+ return false;
+ advance();
+ return true;
+ }
+
+ /// Advances and returns true if current subtag is alphanumeric, with length
+ /// ranging from 3 to 8.
+ bool acceptLowAlphaNumeric3to8() {
+ if (atEnd()) return false;
+ if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 3)
+ return false;
+ advance();
+ return true;
+ }
+
+ /// Advances and returns true if current subtag is a valid U Extension key.
+ bool acceptUExtensionKey() {
+ if (atEnd()) return false;
+ if (!_uExtensionKeyRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _uExtensionKeyRegExp = RegExp(r'^[a-z\d][a-z]$');
+
+ /// Advances and returns true if current subtag is a valid T Extension key
+ /// (`tkey` in the specification).
+ bool acceptTExtensionKey() {
+ if (atEnd()) return false;
+ if (!_tExtensionKeyRegExp.hasMatch(current())) return false;
+ advance();
+ return true;
+ }
+
+ static final _tExtensionKeyRegExp = RegExp(r'^[a-z]\d$');
+}
diff --git a/test/locale_test.dart b/test/locale_test.dart
new file mode 100644
index 0000000..774fa5a
--- /dev/null
+++ b/test/locale_test.dart
@@ -0,0 +1,238 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Tests for the Locale class.
+///
+/// Currently, the primary intention of these tests is to exercise and
+/// demonstrate the API: full test coverage is a non-goal for the prototype.
+///
+/// For production code, use of ICU would influence what needs and doesn't need
+/// to be tested.
+
+import 'package:test/test.dart';
+import 'package:intl/src/locale.dart';
+
+import 'locale_test_data.dart';
+
+void main() {
+ group('Construction and properties:', () {
+ // Simple with normalization:
+ testFromSubtags('Zh', null, null, 'zh', null, null, 'zh');
+ testFromSubtags('zH', null, 'cn', 'zh', null, 'CN', 'zh-CN');
+ testFromSubtags('ZH', null, 'Cn', 'zh', null, 'CN', 'zh-CN');
+ testFromSubtags('zh', null, 'cN', 'zh', null, 'CN', 'zh-CN');
+ testFromSubtags('zh', 'hans', null, 'zh', 'Hans', null, 'zh-Hans');
+ testFromSubtags('ZH', 'HANS', 'CN', 'zh', 'Hans', 'CN', 'zh-Hans-CN');
+
+ // Region codes can be three digits.
+ testFromSubtags('es', null, '419', 'es', null, '419', 'es-419');
+
+ // While language is usually 2 characters, it can also be 3.
+ testFromSubtags('CKB', 'arab', null, 'ckb', 'Arab', null, 'ckb-Arab');
+
+ // With canonicalization:
+ testFromSubtags('Iw', null, null, 'he', null, null, 'he');
+ testFromSubtags('iW', null, null, 'he', null, null, 'he');
+ testFromSubtags('My', null, 'Bu', 'my', null, 'MM', 'my-MM');
+ });
+
+ group('Locale.fromSubtags() FormatExceptions:', () {
+ testExceptionForSubtags(String language, String script, String region) {
+ test('fromSubtags: "$language / $script / $region"', () {
+ expect(
+ () => Locale.fromSubtags(
+ languageCode: language,
+ scriptCode: script,
+ countryCode: region),
+ throwsFormatException);
+ });
+ }
+
+ testExceptionForSubtags('a', null, null);
+ testExceptionForSubtags('en', 'ZA', null);
+ testExceptionForSubtags('en', null, 'Latn');
+ });
+
+ group('Locale normalization matching ICU.', () {
+ localeParsingTestData.forEach((unnormalized, normalized) {
+ test('Locale normalization: $unnormalized -> $normalized', () {
+ expect(Locale.parse(unnormalized).toLanguageTag(), normalized);
+ });
+ });
+ });
+
+ group('Unicode LDML Locale Identifier support', () {
+ // 'root' is a valid Unicode Locale Identifier, but should be taken as
+ // 'und'[1]. ICU's toLanguageTag still returns 'root'.
+ // [1]:
+ // http://unicode.org/reports/tr35/#Unicode_Locale_Identifier_CLDR_to_BCP_47
+ testParse('root', 'und', null, null, [], 'und');
+ testParse('Root', 'und', null, null, [], 'und');
+ testParse('ROOT', 'und', null, null, [], 'und');
+
+ // We support underscores, whereas ICU's `forLanguageTag` does
+ // not.
+ testParse('CKB_arab', 'ckb', 'Arab', null, [], 'ckb-Arab');
+ testParse('My_Bu', 'my', null, 'MM', [], 'my-MM');
+
+ // Normalises tags, sorts subtags alphabetically, including variants[1]:
+ // ICU is currently not sorting variants.
+ // [1]: http://unicode.org/reports/tr35/#Unicode_locale_identifier
+ testParse('en-scouse-fonipa', 'en', null, null, ['fonipa', 'scouse'],
+ 'en-fonipa-scouse');
+
+ // Normalises tags, sorts subtags alphabetically and suppresses unneeded
+ // "true" in u extension (ICU is currently not dropping -true):
+ // http://unicode.org/reports/tr35/#u_Extension
+ testParse('en-u-Foo-bar-nu-thai-ca-buddhist-kk-true', 'en', null, null, [],
+ 'en-u-bar-foo-ca-buddhist-kk-nu-thai');
+
+ // The specification does permit empty extensions for extensions other than
+ // u- and t-.
+ testParse('en-a', 'en', null, null, [], 'en');
+ testParse('en-x', 'en', null, null, [], 'en');
+ testParse('en-z', 'en', null, null, [], 'en');
+
+ // Normalization of `tlang` - ICU still returns -t-iw-bu.
+ testParse('en-t-iw-Bu', 'en', null, null, [], 'en-t-he-mm');
+
+ test('en-u-ca is equivalent to en-u-ca-true', () {
+ expect(Locale.parse('en-u-ca').toLanguageTag(),
+ Locale.parse('en-u-ca-true').toLanguageTag());
+ });
+ });
+
+ // Normalization: sorting of extension subtags:
+ testParse('en-z-abc-001-foo-fii-bar-u-cu-usd-co-phonebk', 'en', null, null,
+ [], 'en-u-co-phonebk-cu-usd-z-abc-001-foo-fii-bar');
+
+ group('Locale.parse() throws FormatException:', () {
+ testExceptionForId(String x) {
+ test('"$x"', () {
+ expect(() => Locale.parse(x), throwsFormatException);
+ });
+ }
+
+ invalidLocales.forEach((badLocaleIdentifier) {
+ testExceptionForId(badLocaleIdentifier);
+ });
+
+ // ICU permits '', taking it as 'und', but it is not a valid Unicode Locale
+ // Identifier: We reject it.
+ testExceptionForId('');
+
+ // abcd-Latn throws exceptions in our Dart implementation, whereas
+ // ECMAScript's Intl.Locale permits it. This is because the BCP47 spec
+ // still allows for the possible addition of 4-character languages in
+ // the future, whereas the Unicode Locale Identifiers spec bans it
+ // outright.
+ testExceptionForId('abcd-Latn');
+
+ // ICU permits 'root-Latn' since it conforms to pure BCP47, but it is an
+ // invalid Unicode BCP47 Locale Identifier.
+ testExceptionForId('root-Latn');
+
+ // ICU permits empty tkeys.
+ testExceptionForId('en-t-a0');
+
+ // ICU permits duplicate tkeys, returning the content of -t- verbatim.
+ testExceptionForId('en-t-a0-one-a0-two');
+
+ // ICU permits duplicate keys, in this case dropping -ca-buddhist.
+ testExceptionForId('en-u-ca-islamic-ca-buddhist');
+ });
+
+ group('Locale.tryParse() returns null:', () {
+ invalidLocales.forEach((badLocaleIdentifier) {
+ test('"$badLocaleIdentifier"', () {
+ expect(Locale.tryParse(badLocaleIdentifier), isNull);
+ });
+ });
+ });
+
+ // TODO: determine appropriate behaviour for the following examples.
+
+ // // 'mo' is deprecated, and is a tag that ought to be replaced by *two*
+ // // subtags (ro-MD), although Chrome Unstable also doesn't presently do
+ // // that (replaces it by 'ro' only).
+ // // TODO: check up on the Chrome implementation.
+ // testParse('mo', 'ro', null, 'MD', [], 'ro-MD');
+
+ // // Script deprecation.
+ // testParse('en-Qaai', 'en', 'Zinh', null, [], 'en-Zinh');
+
+ // // Variant deprecation.
+ // testParse('sv-aaland', 'sv', null, 'AX', [], 'sv-AX');
+
+ // // Variant deprecation.
+ // testParse('en-heploc', 'en', null, null, ['alalc97'], 'en-alalc97');
+
+ // // Variant deprecation.
+ // testParse('en-polytoni', 'en', null, null, ['polyton'], 'en-polyton');
+
+ test('Locale cannot be modified via the variants field', () {
+ var l = Locale.parse('en-scotland');
+ List<String> v = l.variants;
+ bool good = false;
+ try {
+ v.add('basiceng');
+ } on Error {
+ good = true;
+ }
+ expect(l.toLanguageTag(), 'en-scotland');
+ expect(good, isTrue);
+ });
+
+ test('operator== and hashCode', () {
+ Locale l1, l2;
+
+ l1 = Locale.parse('en-Shaw-ZA');
+ l2 = Locale.fromSubtags(
+ languageCode: 'en', scriptCode: 'Shaw', countryCode: 'ZA');
+ expect(l1, l2);
+ expect(l1.hashCode, l2.hashCode);
+
+ l1 = Locale.parse('en');
+ l2 = Locale.fromSubtags(
+ languageCode: 'en', scriptCode: null, countryCode: null);
+ expect(l1, l2);
+ expect(l1.hashCode, l2.hashCode);
+ });
+}
+
+testFromSubtags(
+ String language,
+ String script,
+ String region,
+ String expectedLanguage,
+ String expectedScript,
+ String expectedRegion,
+ String expectedTag) {
+ test('Locale.fromSubtags(...) with $language, $script, $region', () {
+ Locale l = Locale.fromSubtags(
+ languageCode: language, scriptCode: script, countryCode: region);
+ expect(l.languageCode, expectedLanguage);
+ expect(l.scriptCode, expectedScript);
+ expect(l.countryCode, expectedRegion);
+ expect(l.toLanguageTag(), expectedTag);
+ expect(l.toString(), expectedTag);
+ });
+}
+
+testParse(
+ String bcp47Tag,
+ String expectedLanguage,
+ String expectedScript,
+ String expectedRegion,
+ Iterable<String> expectedVariants,
+ String expectedTag) {
+ test('Locale.parse("$bcp47Tag");', () {
+ Locale l = Locale.parse(bcp47Tag);
+ expect(l.languageCode, expectedLanguage);
+ expect(l.scriptCode, expectedScript);
+ expect(l.countryCode, expectedRegion);
+ expect(l.toLanguageTag(), expectedTag);
+ expect(l.variants, orderedEquals(expectedVariants));
+ });
+}
diff --git a/test/locale_test_data.dart b/test/locale_test_data.dart
new file mode 100755
index 0000000..2786ba9
--- /dev/null
+++ b/test/locale_test_data.dart
@@ -0,0 +1,69 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Test data for Locale handling.
+///
+/// DO NOT EDIT. This file is autogenerated by script.
+/// TODO(hugovdm): improve the script and file related ICU bugs.
+
+/// Test data: a map from unnormalized locale names to normalized locale names.
+Map<String, String> localeParsingTestData = <String, String>{
+ // Simple with normalization
+ 'Zh': 'zh',
+ 'zH-cn': 'zh-CN',
+ 'ZH-Cn': 'zh-CN',
+ 'zh-cN': 'zh-CN',
+ 'zh-hans': 'zh-Hans',
+ 'ZH-HANS-CN': 'zh-Hans-CN',
+
+ // Region codes can be three digits
+ 'es-419': 'es-419',
+
+ // While language is usually 2 characters, it can also be 3
+ 'CKB-arab': 'ckb-Arab',
+
+ // With simple canonicalization
+ 'Iw': 'he',
+ 'iW': 'he',
+ 'My-Bu': 'my-MM',
+
+ // "und" is the language tag for undefined language
+ 'und': 'und',
+
+ // Normalization: sorting of extension subtags
+ 'en-z-abc-001-foo-fii-bar-u-cu-usd-co-phonebk':
+ 'en-u-co-phonebk-cu-usd-z-abc-001-foo-fii-bar',
+
+ // Normalises tags, sorts subtags alphabetically
+ 'UND-lkjh-qw-12345-Abcde-U-Zz-Aaa-Co-Zxc-T-AF-Latn-Za-M0-Bar':
+ 'und-Lkjh-QW-12345-abcde-t-af-latn-za-m0-bar-u-co-zxc-zz-aaa',
+
+ // Supports multiple "tvalues" for each "tpart"
+ 'en-u-cu-usd-t-a0-ghi-jkl-b0-abc-def-a-aaa':
+ 'en-a-aaa-t-a0-ghi-jkl-b0-abc-def-u-cu-usd',
+
+ // Private-use extension subtags do not get sorted
+ 'en-x-BCDE0123-ABCD0123': 'en-x-bcde0123-abcd0123',
+ 'en-x-BCDE0123-123456-ABCD0123': 'en-x-bcde0123-123456-abcd0123',
+};
+
+/// Invalid Language Tags.
+List<String> invalidLocales = <String>[
+ 'en-',
+ '-za',
+ 'en--za',
+ '419',
+ 'en-t',
+ 'en-t-',
+ 'en-t-de-t-fr',
+ 'en-u',
+ 'en-u-',
+ 'en-u-ca-islamic-',
+ 'en-u-cu-eur-u-co-phonebk',
+ 'en-x-',
+ 'en-x-foo-',
+ 'en-x-abcdefghi',
+ 'en-z-',
+ 'en-z-aaa-z-bbb',
+];