Add a Locale class to package:intl. * Locale Identifiers are described by https://docs.google.com/document/d/1kV5lL7kDkHnzvGVBZbWDurwCI_waLaamSw6QwavXjvc/edit which is not yet available externally. Once an external version is available, might we want to link to it from dartdoc? * This CL does not add the LikelySubtags data, so its addLikelySubtags and minimizeSubtags implementations only align with CLDR/ICU/ECMAScript for cases where the Target script and region columns of http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html are "Latn" and "US". * Dartdoc output: https://hugovdm.users.x20web.corp.google.com/locale_225071692/intl/Locale-class.html Reviewers: - sra: reviewed with "Dart for the Web" emphasis - lrn: Dart Language review - alanknight: good fit into package:intl and ecosystem - mnita: general API design / FYI - cira: general API design / FYI PiperOrigin-RevId: 240140680

commit: 4b27a88cc6418ad31d94cafa0dc426c2c7d995e5 [log] [tgz]
author: Dart Team <misc@dartlang.org> Mon Mar 25 08:08:29 2019 -0700
committer: Alan Knight <alanknight@google.com> Fri Jun 14 10:31:53 2019 -0700
tree: bf9d3ec0d768ad0a091aae13d24c5bb6afff5512
parent: 381d5d05e21d577259f0c94fe14e5e195be62a06 [diff]
diff --git a/lib/src/locale.dart b/lib/src/locale.dart
new file mode 100644
index 0000000..3a941eb
--- /dev/null
+++ b/lib/src/locale.dart

@@ -0,0 +1,111 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'locale/locale_implementation.dart';
+import 'locale/locale_parser.dart' show LocaleParser;
+
+/// A representation of a [Unicode Locale
+/// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+///
+/// To create Locale instances, consider using:
+/// * [fromSubtags] for language, script and region,
+/// * [parse] for Unicode Locale Identifier strings (throws exceptions on
+///   failure),
+/// * [tryParse] for Unicode Locale Identifier strings (returns null on
+///   failure).
+abstract class Locale {
+  /// Constructs a Locale instance that consists of only language, script and
+  /// region subtags.
+  ///
+  /// Throws a [FormatException] if any subtag is syntactically invalid.
+  static Locale fromSubtags(
+          {String languageCode, String scriptCode, String countryCode}) =>
+      LocaleImplementation.fromSubtags(
+          languageCode: languageCode,
+          scriptCode: scriptCode,
+          countryCode: countryCode);
+
+  /// Parses [Unicode Locale Identifiers][localeIds] to produce [Locale]
+  /// instances.
+  ///
+  /// [localeIds]:
+  /// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
+  ///
+  /// Throws a [FormatException] if [localeIdentifier] is syntactically invalid.
+  static Locale parse(String localeIdentifier) {
+    assert(localeIdentifier != null);
+    var parser = LocaleParser(localeIdentifier);
+    var locale = parser.toLocale();
+    if (locale == null) {
+      throw FormatException('Locale "$localeIdentifier": '
+          '${parser.problems.join("; ")}.');
+    }
+    return locale;
+  }
+
+  /// Parses [Unicode Locale Identifiers][localeIds] to produce [Locale]
+  /// instances.
+  ///
+  /// [localeIds]:
+  /// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier
+  ///
+  /// Returns `null` if [localeIdentifier] is syntactically invalid.
+  static Locale tryParse(String localeIdentifier) {
+    assert(localeIdentifier != null);
+    var parser = LocaleParser(localeIdentifier);
+    return parser.toLocale();
+  }
+
+  /// The language subtag of the Locale Identifier.
+  ///
+  /// It is syntactically valid, normalized (has correct case) and canonical
+  /// (deprecated tags have been replaced), but not necessarily valid (the
+  /// language might not exist) because the list of valid languages changes with
+  /// time.
+  String get languageCode;
+
+  /// The script subtag of the Locale Identifier, null if absent.
+  ///
+  /// It is syntactically valid and normalized (has correct case), but not
+  /// necessarily valid (the script might not exist) because the list of valid
+  /// scripts changes with time.
+  String get scriptCode;
+
+  /// The region subtag of the Locale Identifier, null if absent.
+  ///
+  /// It is syntactically valid, normalized (has correct case) and canonical
+  /// (deprecated tags have been replaced), but not necessarily valid (the
+  /// region might not exist) because the list of valid regions changes with
+  /// time.
+  String get countryCode;
+
+  /// Iterable of variant subtags.
+  ///
+  /// They are syntactically valid, normalized (have correct case) and sorted
+  /// alphabetically, but not necessarily valid (variants might not exist)
+  /// because the list of variants changes with time.
+  Iterable<String> get variants;
+
+  /// Returns the canonical [Unicode BCP47 Locale
+  /// Identifier](http://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+  /// this locale.
+  String toLanguageTag();
+
+  /// Returns the canonical [Unicode BCP47 Locale
+  /// Identifier](http://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+  /// this locale.
+  @override
+  String toString() => toLanguageTag();
+
+  @override
+  bool operator ==(Object other) {
+    if (identical(this, other)) return true;
+    return other is Locale && this.toLanguageTag() == other.toLanguageTag();
+  }
+
+  @override
+  int get hashCode {
+    return toLanguageTag().hashCode;
+  }
+}

diff --git a/lib/src/locale/locale_deprecations.dart b/lib/src/locale/locale_deprecations.dart
new file mode 100644
index 0000000..5dd0922
--- /dev/null
+++ b/lib/src/locale/locale_deprecations.dart

@@ -0,0 +1,119 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Replaces deprecated language subtags.
+///
+/// The subtag must already be lowercase.
+///
+/// TODO(b/127689510): write a new script for updating this list from CLDR data.
+String replaceDeprecatedLanguageSubtag(String languageCode) {
+  return _deprecatedLanguageTagReplacements[languageCode] ?? languageCode;
+}
+
+const Map<String, String> _deprecatedLanguageTagReplacements = {
+  // These map entries are generated by a modified version of Flutter's
+  // gen_locale.dart. TODO(hugovdm): make improvements to the tool.
+  //
+  // Mappings generated for language subtag registry as of 2019-02-20.
+  'in': 'id', // Indonesian; deprecated 1989-01-01
+  'iw': 'he', // Hebrew; deprecated 1989-01-01
+  'ji': 'yi', // Yiddish; deprecated 1989-01-01
+  'jw': 'jv', // Javanese; deprecated 2001-08-13
+  'mo': 'ro', // Moldavian, Moldovan; deprecated 2008-11-22
+  'aam': 'aas', // Aramanik; deprecated 2015-02-12
+  'adp': 'dz', // Adap; deprecated 2015-02-12
+  'aue': 'ktz', // ǂKxʼauǁʼein; deprecated 2015-02-12
+  'ayx': 'nun', // Ayi (China); deprecated 2011-08-16
+  'bgm': 'bcg', // Baga Mboteni; deprecated 2016-05-30
+  'bjd': 'drl', // Bandjigali; deprecated 2012-08-12
+  'ccq': 'rki', // Chaungtha; deprecated 2012-08-12
+  'cjr': 'mom', // Chorotega; deprecated 2010-03-11
+  'cka': 'cmr', // Khumi Awa Chin; deprecated 2012-08-12
+  'cmk': 'xch', // Chimakum; deprecated 2010-03-11
+  'coy': 'pij', // Coyaima; deprecated 2016-05-30
+  'cqu': 'quh', // Chilean Quechua; deprecated 2016-05-30
+  'drh': 'khk', // Darkhat; deprecated 2010-03-11
+  'drw': 'prs', // Darwazi; deprecated 2010-03-11
+  'gav': 'dev', // Gabutamon; deprecated 2010-03-11
+  'gfx': 'vaj', // Mangetti Dune ǃXung; deprecated 2015-02-12
+  'ggn': 'gvr', // Eastern Gurung; deprecated 2016-05-30
+  'gti': 'nyc', // Gbati-ri; deprecated 2015-02-12
+  'guv': 'duz', // Gey; deprecated 2016-05-30
+  'hrr': 'jal', // Horuru; deprecated 2012-08-12
+  'ibi': 'opa', // Ibilo; deprecated 2012-08-12
+  'ilw': 'gal', // Talur; deprecated 2013-09-10
+  'jeg': 'oyb', // Jeng; deprecated 2017-02-23
+  'kgc': 'tdf', // Kasseng; deprecated 2016-05-30
+  'kgh': 'kml', // Upper Tanudan Kalinga; deprecated 2012-08-12
+  'koj': 'kwv', // Sara Dunjo; deprecated 2015-02-12
+  'krm': 'bmf', // Krim; deprecated 2017-02-23
+  'ktr': 'dtp', // Kota Marudu Tinagas; deprecated 2016-05-30
+  'kvs': 'gdj', // Kunggara; deprecated 2016-05-30
+  'kwq': 'yam', // Kwak; deprecated 2015-02-12
+  'kxe': 'tvd', // Kakihum; deprecated 2015-02-12
+  'kzj': 'dtp', // Coastal Kadazan; deprecated 2016-05-30
+  'kzt': 'dtp', // Tambunan Dusun; deprecated 2016-05-30
+  'lii': 'raq', // Lingkhim; deprecated 2015-02-12
+  'lmm': 'rmx', // Lamam; deprecated 2014-02-28
+  'meg': 'cir', // Mea; deprecated 2013-09-10
+  'mst': 'mry', // Cataelano Mandaya; deprecated 2010-03-11
+  'mwj': 'vaj', // Maligo; deprecated 2015-02-12
+  'myt': 'mry', // Sangab Mandaya; deprecated 2010-03-11
+  'nad': 'xny', // Nijadali; deprecated 2016-05-30
+  'ncp': 'kdz', // Ndaktup; deprecated 2018-03-08
+  'nnx': 'ngv', // Ngong; deprecated 2015-02-12
+  'nts': 'pij', // Natagaimas; deprecated 2016-05-30
+  'oun': 'vaj', // ǃOǃung; deprecated 2015-02-12
+  'pcr': 'adx', // Panang; deprecated 2013-09-10
+  'pmc': 'huw', // Palumata; deprecated 2016-05-30
+  'pmu': 'phr', // Mirpur Panjabi; deprecated 2015-02-12
+  'ppa': 'bfy', // Pao; deprecated 2016-05-30
+  'ppr': 'lcq', // Piru; deprecated 2013-09-10
+  'pry': 'prt', // Pray 3; deprecated 2016-05-30
+  'puz': 'pub', // Purum Naga; deprecated 2014-02-28
+  'sca': 'hle', // Sansu; deprecated 2012-08-12
+  'skk': 'oyb', // Sok; deprecated 2017-02-23
+  'tdu': 'dtp', // Tempasuk Dusun; deprecated 2016-05-30
+  'thc': 'tpo', // Tai Hang Tong; deprecated 2016-05-30
+  'thx': 'oyb', // The; deprecated 2015-02-12
+  'tie': 'ras', // Tingal; deprecated 2011-08-16
+  'tkk': 'twm', // Takpa; deprecated 2011-08-16
+  'tlw': 'weo', // South Wemale; deprecated 2012-08-12
+  'tmp': 'tyj', // Tai Mène; deprecated 2016-05-30
+  'tne': 'kak', // Tinoc Kallahan; deprecated 2016-05-30
+  'tnf': 'prs', // Tangshewi; deprecated 2010-03-11
+  'tsf': 'taj', // Southwestern Tamang; deprecated 2015-02-12
+  'uok': 'ema', // Uokha; deprecated 2015-02-12
+  'xba': 'cax', // Kamba (Brazil); deprecated 2016-05-30
+  'xia': 'acn', // Xiandao; deprecated 2013-09-10
+  'xkh': 'waw', // Karahawyana; deprecated 2016-05-30
+  'xsj': 'suj', // Subi; deprecated 2015-02-12
+  'ybd': 'rki', // Yangbye; deprecated 2012-08-12
+  'yma': 'lrr', // Yamphe; deprecated 2012-08-12
+  'ymt': 'mtm', // Mator-Taygi-Karagas; deprecated 2015-02-12
+  'yos': 'zom', // Yos; deprecated 2013-09-10
+  'yuu': 'yug', // Yugh; deprecated 2014-02-28
+};
+
+/// Replaces deprecated region subtags.
+///
+/// The subtag must already be uppercase.
+///
+/// TODO(b/127689510): write a new script for updating this list from CLDR data.
+String replaceDeprecatedRegionSubtag(String regionCode) {
+  return _deprecatedRegionTagReplacements[regionCode] ?? regionCode;
+}
+
+const Map<String, String> _deprecatedRegionTagReplacements = {
+  // These map entries are generated by a modified version of Flutter's
+  // gen_locale.dart. TODO(hugovdm): make improvements to the tool.
+  //
+  // Mappings generated for language subtag registry as of 2019-02-20.
+  'BU': 'MM', // Burma; deprecated 1989-12-05
+  'DD': 'DE', // German Democratic Republic; deprecated 1990-10-30
+  'FX': 'FR', // Metropolitan France; deprecated 1997-07-14
+  'TP': 'TL', // East Timor; deprecated 2002-05-20
+  'YD': 'YE', // Democratic Yemen; deprecated 1990-08-14
+  'ZR': 'CD', // Zaire; deprecated 1997-07-14
+};

diff --git a/lib/src/locale/locale_extensions.dart b/lib/src/locale/locale_extensions.dart
new file mode 100644
index 0000000..31ac841
--- /dev/null
+++ b/lib/src/locale/locale_extensions.dart

@@ -0,0 +1,228 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Locale extensions as defined for [Unicode Locale
+/// Identifiers](http://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+///
+/// These extensions cover Locale information that aren't captured by the
+/// language, script, region and variants subtags of the Unicode Language
+/// Identifier. Please see the Unicode Technical Standard linked above.
+class LocaleExtensions {
+  /// Constructor.
+  ///
+  /// Keys in each of the maps passed to this contructor must be syntactically
+  /// valid extension keys, and must already be normalized (correct case).
+  LocaleExtensions(
+      Map<String, String> uExtensions,
+      Map<String, String> tExtensions,
+      Map<String, String> otherExtensions,
+      this._xExtensions)
+      : _uExtensions = _sortedUnmodifiable(uExtensions),
+        _tExtensions = _sortedUnmodifiable(tExtensions),
+        _otherExtensions = _sortedUnmodifiable(otherExtensions) {
+    // Debug-mode asserts to ensure all parameters are normalized and UTS #35
+    // compliant.
+    assert(
+        uExtensions == null ||
+            uExtensions.entries.every((e) {
+              if (!_uExtensionsValidKeysRE.hasMatch(e.key)) return false;
+              // TODO(hugovdm) reconsider this representation: "true" values are
+              // suppressed in canonical Unicode BCP47 Locale Identifiers, but
+              // we may choose to represent them as "true" in memory.
+              if (e.value == '' && e.key != '') return true;
+              if (!_uExtensionsValidValuesRE.hasMatch(e.value)) return false;
+              return true;
+            }),
+        'uExtensions keys must match '
+        'RegExp/${_uExtensionsValidKeysRE.pattern}/. '
+        'uExtensions values must match '
+        'RegExp/${_uExtensionsValidValuesRE.pattern}/. '
+        'uExtensions.entries: ${uExtensions.entries}.');
+    assert(
+        tExtensions == null ||
+            tExtensions.entries.every((e) {
+              if (!_tExtensionsValidKeysRE.hasMatch(e.key)) return false;
+              if (e.key == '') {
+                if (!_validTlangRE.hasMatch(e.value)) return false;
+              } else {
+                if (!_tExtensionsValidValuesRE.hasMatch(e.value)) return false;
+              }
+              return true;
+            }),
+        'tExtensions keys must match '
+        'RegExp/${_tExtensionsValidKeysRE.pattern}/. '
+        'tExtensions values other than tlang must match '
+        'RegExp/${_tExtensionsValidValuesRE.pattern}/. '
+        'Entries: ${tExtensions.entries}.');
+    assert(
+        otherExtensions == null ||
+            otherExtensions.entries.every((e) {
+              if (!_otherExtensionsValidKeysRE.hasMatch(e.key)) return false;
+              if (!_otherExtensionsValidValuesRE.hasMatch(e.value))
+                return false;
+              return true;
+            }),
+        'otherExtensions keys must match '
+        'RegExp/${_otherExtensionsValidKeysRE.pattern}. '
+        'otherExtensions values must match '
+        'RegExp/${_otherExtensionsValidValuesRE.pattern}. '
+        'Entries: ${otherExtensions.entries}.');
+    assert(
+        _xExtensions == null || _validXExtensionsRE.hasMatch(_xExtensions),
+        '_xExtensions must match RegExp/${_validXExtensionsRE.pattern}/ '
+        'but is "$_xExtensions".');
+  }
+
+  /// For debug/assert-use only! Matches keys considered valid for
+  /// [_uExtensions], does not imply keys are valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _uExtensionsValidKeysRE = RegExp(r'^$|^[a-z\d][a-z]$');
+
+  /// For debug/assert-use only! Matches values considered valid for
+  /// [_uExtensions], does not imply values are valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _uExtensionsValidValuesRE =
+      RegExp(r'^[a-z]{3,8}([-][a-z]{3,8})*$');
+
+  /// For debug/assert-use only! Matches keys considered valid for
+  /// [_tExtensions], does not imply keys are valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _tExtensionsValidKeysRE = RegExp(r'^$|^[a-z]\d$');
+
+  /// For debug/assert-use only! With the exception of `tlang`, matches values
+  /// considered valid for [_tExtensions], does not imply values are valid as
+  /// per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _tExtensionsValidValuesRE =
+      RegExp(r'^[a-z]{3,8}([-][a-z]{3,8})*$');
+
+  /// For debug/assert-use only! Matches keys considered valid for
+  /// [_otherExtensions], does not imply keys are valid as per Unicode LDML
+  /// spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _otherExtensionsValidKeysRE = RegExp(r'^[a-svwyz]$');
+
+  /// For debug/assert-use only! Matches values considered valid for
+  /// [_otherExtensions], does not imply values are valid as per Unicode LDML
+  /// spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _otherExtensionsValidValuesRE =
+      RegExp(r'^[a-z\d]{2,8}([-][a-z\d]{2,8})*$');
+
+  /// For debug/assert-use only! Matches values valid for [_xExtensions].
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _validXExtensionsRE =
+      RegExp(r'^[a-z\d]{1,8}([-][a-z\d]{1,8})*$');
+
+  /// For debug/assert-use only! Matches values valid for tlang.
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _validTlangRE = RegExp(
+      // Full string match start
+      r'^'
+
+      // Language is required in a tlang identifier.
+      r'([a-z]{2,3}|[a-z]{5,8})' // Language
+
+      // Optional script
+      r'(-[a-z]{4})?'
+
+      // Optional region
+      r'(-[a-z]{2}|-\d{3})?'
+
+      // Any number of variant subtags
+      r'(-([a-z\d]{5,8}|\d[a-z\d]{3}))*'
+
+      // Full string match end
+      r'$');
+
+  /// `-u-` extension, with keys in sorted order. Attributes are stored under
+  /// the zero-length string as key. Keywords (consisting of `key` and `type`)
+  /// are stored under normalized (lowercased) `key`. See
+  /// http://www.unicode.org/reports/tr35/#unicode_locale_extensions for
+  /// details.
+  Map<String, String> _uExtensions;
+
+  /// `-t-` extension, with keys in sorted order. tlang attributes are stored
+  /// under the zero-length string as key. See
+  /// http://www.unicode.org/reports/tr35/#transformed_extensions for
+  /// details.
+  Map<String, String> _tExtensions;
+
+  /// Other extensions, with keys in sorted order. See
+  /// http://www.unicode.org/reports/tr35/#other_extensions for details.
+  Map<String, String> _otherExtensions;
+
+  /// -x- extension values. See
+  /// http://www.unicode.org/reports/tr35/#pu_extensions for details.
+  String _xExtensions;
+
+  /// List of subtags in the [Unicode Locale
+  /// Identifier](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier)
+  /// extensions, including private use extensions.
+  ///
+  /// This covers everything after the unicode_language_id. If there are no
+  /// extensions (i.e. the Locale Identifier has only language, script, region
+  /// and/or variants), this will be an empty list.
+  ///
+  /// These subtags are sorted and normalized, ready for joining with a
+  /// unicode_language_id and '-' as delimiter to provide a UTS #35 compliant
+  /// normalized Locale Identifier.
+  List<String> get subtags {
+    final List<String> result = [];
+    final List<String> resultVWYZ = [];
+
+    _otherExtensions.forEach((singleton, value) {
+      final int letter = (singleton.codeUnitAt(0) - 0x61) & 0xFFFF;
+      // 't', 'u' and 'x' are handled by other members.
+      assert(letter < 26 && letter != 19 && letter != 20 && letter != 23);
+      if (letter < 19) {
+        result.addAll([singleton, value]);
+      } else {
+        resultVWYZ.addAll([singleton, value]);
+      }
+    });
+    if (_tExtensions.isNotEmpty) {
+      result.add('t');
+      _tExtensions.forEach((key, value) {
+        if (key != '') result.add(key);
+        result.add(value);
+      });
+    }
+    if (_uExtensions.isNotEmpty) {
+      result.add('u');
+      _uExtensions.forEach((key, value) {
+        if (key != '') result.add(key);
+        if (value != '') result.add(value);
+      });
+    }
+
+    if (resultVWYZ.isNotEmpty) {
+      result.addAll(resultVWYZ);
+    }
+    if (_xExtensions != null) {
+      result.add('x-${_xExtensions}');
+    }
+    return result;
+  }
+}
+
+/// Creates an unmodifiable and sorted version of `unsorted`.
+Map<String, String> _sortedUnmodifiable(Map<String, String> unsorted) {
+  if (unsorted == null) {
+    return const {};
+  }
+  Map<String, String> map = {};
+  for (var key in unsorted.keys.toList()..sort()) {
+    map[key] = unsorted[key];
+  }
+  return Map.unmodifiable(map);
+}

diff --git a/lib/src/locale/locale_implementation.dart b/lib/src/locale/locale_implementation.dart
new file mode 100644
index 0000000..11b398c
--- /dev/null
+++ b/lib/src/locale/locale_implementation.dart

@@ -0,0 +1,189 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:intl/src/locale.dart' show Locale;
+
+import 'locale_deprecations.dart';
+import 'locale_extensions.dart';
+
+/// The primary implementation of the Locale interface.
+class LocaleImplementation extends Locale {
+  /// Simple private constructor with asserts to check invariants.
+  LocaleImplementation._(this.languageCode, this.scriptCode, this.countryCode,
+      this.variants, this._extensions) {
+    // Debug-mode asserts to ensure all parameters are normalized and UTS #35
+    // compliant.
+    assert(
+        languageCode != null && _normalizedLanguageRE.hasMatch(languageCode),
+        'languageCode must match RegExp/${_normalizedLanguageRE.pattern}/ '
+        'but is "$languageCode".');
+    assert(
+        scriptCode == null || _normalizedScriptRE.hasMatch(scriptCode),
+        'scriptCode must match RegExp/${_normalizedScriptRE.pattern}/ '
+        'but is "$scriptCode".');
+    assert(
+        countryCode == null || _normalizedRegionRE.hasMatch(countryCode),
+        'countryCode must match RegExp/${_normalizedRegionRE.pattern}/ '
+        'but is "$countryCode".');
+    assert(
+        variants is List &&
+            variants.every((v) => _normalizedVariantRE.hasMatch(v)),
+        'each variant must match RegExp/${_normalizedVariantRE.pattern}/ '
+        'but variants are "$variants".');
+  }
+
+  /// For debug/assert-use only! Matches subtags considered valid for
+  /// [languageCode], does not imply subtag is valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _normalizedLanguageRE = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$');
+
+  /// For debug/assert-use only! Matches subtags considered valid for
+  /// [scriptCode], does not imply subtag is valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _normalizedScriptRE = RegExp(r'^[A-Z][a-z]{3}$');
+
+  /// For debug/assert-use only! Matches subtags considered valid for
+  /// [countryCode], does not imply subtags are valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _normalizedRegionRE = RegExp(r'^[A-Z]{2}$|^\d{3}$');
+
+  /// For debug/assert-use only! Matches subtags considered valid for
+  /// [variants], does not imply subtags are valid as per Unicode LDML spec!
+  //
+  // Must be static to get tree-shaken away in production code.
+  static final _normalizedVariantRE =
+      RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$');
+
+  /// Simple factory which assumes parameters are syntactically correct.
+  ///
+  /// In debug mode, incorrect use may result in an assertion failure. (In
+  /// production code, this class makes no promises regarding the consequence of
+  /// incorrect use.)
+  ///
+  /// For public APIs, see [Locale.fromSubtags] and [Locale.parse].
+  factory LocaleImplementation.unsafe(
+    String languageCode, {
+    String scriptCode,
+    String countryCode,
+    Iterable<String> variants,
+    LocaleExtensions extensions,
+  }) {
+    variants = (variants != null && variants.isNotEmpty)
+        ? List.unmodifiable(variants.toList()..sort())
+        : const [];
+    return LocaleImplementation._(
+        languageCode, scriptCode, countryCode, variants, extensions);
+  }
+
+  /// Constructs a Locale instance that consists of only language, region and
+  /// country subtags.
+  ///
+  /// Throws a [FormatException] if any subtag is syntactically invalid.
+  static LocaleImplementation fromSubtags(
+      {String languageCode, String scriptCode, String countryCode}) {
+    return LocaleImplementation._(
+        replaceDeprecatedLanguageSubtag(_normalizeLanguageCode(languageCode)),
+        _normalizeScriptCode(scriptCode),
+        replaceDeprecatedRegionSubtag(_normalizeCountryCode(countryCode)),
+        const [],
+        null);
+  }
+
+  /// Performs case normalization on `languageCode`.
+  ///
+  /// Throws a [FormatException] if it is syntactically invalid.
+  static String _normalizeLanguageCode(String languageCode) {
+    if (!_languageRexExp.hasMatch(languageCode)) {
+      throw FormatException('Invalid language "$languageCode"');
+    }
+    return languageCode.toLowerCase();
+  }
+
+  static final _languageRexExp = RegExp(r'^[a-zA-Z]{2,3}$|^[a-zA-Z]{5,8}$');
+
+  /// Performs case normalization on `scriptCode`.
+  ///
+  /// Throws a [FormatException] if it is syntactically invalid.
+  static String _normalizeScriptCode(String scriptCode) {
+    if (scriptCode == null) return null;
+    if (!_scriptRegExp.hasMatch(scriptCode)) {
+      throw FormatException('Invalid script "$scriptCode"');
+    }
+    return toCapCase(scriptCode);
+  }
+
+  static final _scriptRegExp = RegExp(r'^[a-zA-Z]{4}$');
+
+  /// Performs case normalization on `countryCode`.
+  ///
+  /// Throws a [FormatException] if it is syntactically invalid.
+  static String _normalizeCountryCode(String countryCode) {
+    if (countryCode == null) return null;
+    if (!_regionRegExp.hasMatch(countryCode)) {
+      throw FormatException('Invalid region "$countryCode"');
+    }
+    return countryCode.toUpperCase();
+  }
+
+  static final _regionRegExp = RegExp(r'^[a-zA-Z]{2}$|^\d{3}$');
+
+  /// The language subtag of the Locale Identifier.
+  ///
+  /// It is syntactically valid, normalized (has correct case) and canonical
+  /// (deprecated tags have been replaced), but not necessarily valid (the
+  /// language might not exist) because the list of valid languages changes with
+  /// time.
+  final String languageCode;
+
+  /// The script subtag of the Locale Identifier, null if absent.
+  ///
+  /// It is syntactically valid, normalized (has correct case) and canonical
+  /// (deprecated tags have been replaced), but not necessarily valid (the
+  /// script might not exist) because the list of valid scripts changes with
+  /// time.
+  final String scriptCode;
+
+  /// The region subtag of the Locale Identifier, null if absent.
+  ///
+  /// It is syntactically valid, normalized (has correct case) and canonical
+  /// (deprecated tags have been replaced), but not necessarily valid (the
+  /// region might not exist) because the list of valid regions changes with
+  /// time.
+  final String countryCode;
+
+  /// Iterable of variant subtags, zero-length iterable if variants are absent.
+  ///
+  /// They are syntactically valid, normalized (have correct case) and canonical
+  /// (sorted alphabetically and deprecated tags have been replaced) but not
+  /// necessarily valid (variants might not exist) because the list of variants
+  /// changes with time.
+  final Iterable<String> variants;
+
+  /// Locale extensions, null if the locale has no extensions.
+  // TODO(hugovdm): Not yet supported: getters for extensions.
+  final LocaleExtensions _extensions;
+
+  /// Cache of the value returned by [toLanguageTag].
+  String _languageTag;
+
+  /// Returns the canonical Unicode BCP47 Locale Identifier for this locale.
+  String toLanguageTag() {
+    if (_languageTag == null) {
+      final List<String> out = [languageCode];
+      if (scriptCode != null) out.add(scriptCode);
+      if (countryCode != null) out.add(countryCode);
+      out.addAll(variants);
+      if (_extensions != null) out.addAll(_extensions.subtags);
+      _languageTag = out.join('-');
+    }
+    return _languageTag;
+  }
+}
+
+/// Returns `input` with first letter capitalized and the rest lowercase.
+String toCapCase(String input) =>
+    '${input[0].toUpperCase()}${input.substring(1).toLowerCase()}';

diff --git a/lib/src/locale/locale_parser.dart b/lib/src/locale/locale_parser.dart
new file mode 100644
index 0000000..4fb90d5
--- /dev/null
+++ b/lib/src/locale/locale_parser.dart

@@ -0,0 +1,413 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'locale_deprecations.dart';
+import 'locale_extensions.dart';
+import 'locale_implementation.dart';
+
+/// A parser for [Unicode Locale
+/// Identifiers](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
+class LocaleParser {
+  /// Language subtag of Unicode Language Identifier.
+  String _languageCode = 'und';
+
+  /// Script subtag of Unicode Language Identifier.
+  String _scriptCode;
+
+  /// Region subtag of Unicode Language Identifier.
+  String _countryCode;
+
+  /// Variant subtags of Unicode Language Identifier.
+  List<String> _variants;
+
+  /// Unicode Locale Extensions, also known as "U Extension".
+  Map<String, String> _uExtensions;
+
+  /// Transformed Extensions, also known as "T Extension".
+  Map<String, String> _tExtensions;
+
+  /// Private-Use Extensions.
+  String _xExtensions;
+
+  /// Other Extensions.
+  Map<String, String> _otherExtensions;
+
+  /// List of problems with the localeId the parser tried to parse.
+  ///
+  /// An empty list indicates problem-free parsing.
+  final List<String> problems = <String>[];
+
+  /// Produces a Locale instance for the parser's current state.
+  ///
+  /// Returns null if the Locale would be syntactically invalid.
+  LocaleImplementation toLocale() {
+    if (problems.isNotEmpty) return null;
+    LocaleExtensions extensions;
+    if (_uExtensions != null ||
+        _tExtensions != null ||
+        _otherExtensions != null ||
+        _xExtensions != null) {
+      extensions = LocaleExtensions(
+          _uExtensions, _tExtensions, _otherExtensions, _xExtensions);
+    }
+    return LocaleImplementation.unsafe(
+      _languageCode,
+      scriptCode: _scriptCode,
+      countryCode: _countryCode,
+      variants: _variants,
+      extensions: extensions,
+    );
+  }
+
+  /// Subtags of the Locale Identifier, as split by [separators].
+  List<String> _subtags;
+
+  /// RegExp that matches Unicode Locale Identifier subtag separators.
+  static final separators = RegExp('[-_]');
+
+  /// Last accepted subtag.
+  String _accepted;
+
+  /// Last accepted subtag.
+  String accepted() => _accepted;
+
+  /// Last accepted list of subtags (for variants).
+  List<String> _acceptedList;
+
+  /// Last accepted list of subtags (for variants).
+  List<String> acceptedList() => _acceptedList;
+
+  /// Current subtag pending acceptance.
+  String _current;
+
+  /// Current subtag pending acceptance.
+  String current() => _current;
+
+  /// Index of the current subtag.
+  int _currentIndex;
+
+  /// Advance to the next subtag (see [current] and [accepted]).
+  void advance() {
+    _accepted = _current;
+    _currentIndex++;
+    if (_currentIndex < _subtags.length) {
+      _current = _subtags[_currentIndex];
+    } else {
+      _current = null;
+    }
+  }
+
+  /// Returns true if all subtags have been parsed.
+  bool atEnd() {
+    return _currentIndex >= _subtags.length;
+  }
+
+  /// Parses [Unicode CLDR Locale
+  /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers).
+  ///
+  /// This method does not parse all BCP 47 tags. See [BCP 47
+  /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
+  /// details.
+  ///
+  /// localeId may not be null.
+  ///
+  /// Parsing failed if there are any entries in [problems].
+  LocaleParser(String localeId) {
+    assert(localeId != null);
+
+    // Calling toLowerCase unconditionally should be efficient if
+    // string_patch.dart is in use:
+    // https://github.com/dart-lang/sdk/blob/cabaa78cc57d08bcfcd75bfe99a42c19ed497d26/runtime/lib/string_patch.dart#L1178
+    localeId = localeId.toLowerCase();
+    if (localeId == 'root') {
+      return;
+    }
+
+    _subtags = localeId.split(separators);
+    _currentIndex = 0;
+    _current = _subtags[0];
+
+    bool scriptFound = false;
+    if (acceptLanguage()) {
+      _languageCode = replaceDeprecatedLanguageSubtag(accepted());
+      scriptFound = acceptScript();
+    } else {
+      scriptFound = acceptScript();
+      if (!scriptFound) {
+        problems.add('bad language/script');
+      }
+    }
+    if (scriptFound) {
+      _scriptCode = toCapCase(accepted());
+    }
+    if (acceptRegion()) {
+      _countryCode = replaceDeprecatedRegionSubtag(accepted().toUpperCase());
+    }
+    acceptVariants();
+    _variants = acceptedList();
+
+    processExtensions();
+
+    if (!atEnd()) {
+      problems.add('bad subtag "${current()}"');
+    }
+  }
+
+  /// Consumes all remaining subtags, if syntactically valid.
+  ///
+  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+  /// empty.
+  void processExtensions() {
+    while (acceptSingleton()) {
+      String singleton = accepted();
+      if (singleton == 'u') {
+        processUExtensions();
+      } else if (singleton == 't') {
+        processTExtensions();
+      } else if (singleton == 'x') {
+        processPrivateUseExtensions();
+        break;
+      } else {
+        processOtherExtensions(singleton);
+      }
+    }
+  }
+
+  /// Consumes tags matched by `unicode_locale_extensions` in the specification,
+  /// except that the 'u' singleton must already be accepted.
+  ///
+  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+  /// empty.
+  void processUExtensions() {
+    if (_uExtensions != null) {
+      problems.add('duplicate "u"');
+      return;
+    }
+    _uExtensions = <String, String>{};
+    bool empty = true;
+    final List<String> attributes = [];
+    while (acceptLowAlphaNumeric3to8()) {
+      attributes.add(accepted());
+    }
+    if (attributes.isNotEmpty) {
+      empty = false;
+      attributes.sort();
+      _uExtensions[''] = attributes.join('-');
+    }
+    // unicode_locale_extensions: collect "(sep keyword)*".
+    while (acceptUExtensionKey()) {
+      empty = false;
+      String key = accepted();
+      final List<String> typeParts = <String>[];
+      while (acceptLowAlphaNumeric3to8()) {
+        typeParts.add(accepted());
+      }
+      if (!_uExtensions.containsKey(key)) {
+        if (typeParts.length == 1 && typeParts[0] == 'true') {
+          _uExtensions[key] = '';
+        } else {
+          _uExtensions[key] = typeParts.join('-');
+        }
+      } else {
+        problems.add('duplicate "$key"');
+      }
+    }
+    if (empty) {
+      problems.add('empty "u"');
+    }
+  }
+
+  /// Consumes tags matched by `transformed_extensions` in the specification,
+  /// except that the 't' singleton must already be accepted.
+  ///
+  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+  /// empty.
+  void processTExtensions() {
+    if (_tExtensions != null) {
+      problems.add('duplicate "t"');
+      return;
+    }
+    _tExtensions = <String, String>{};
+    bool empty = true;
+    final List<String> tlang = <String>[];
+    if (acceptLanguage()) {
+      empty = false;
+      tlang.add(replaceDeprecatedLanguageSubtag(accepted()));
+      if (acceptScript()) {
+        tlang.add(accepted());
+      }
+      if (acceptRegion()) {
+        tlang.add(replaceDeprecatedRegionSubtag(accepted().toUpperCase())
+            .toLowerCase());
+      }
+      acceptVariants();
+      tlang.addAll(acceptedList());
+      _tExtensions[''] = tlang.join('-');
+    }
+    // transformed_extensions: collect "(sep tfield)*".
+    while (acceptTExtensionKey()) {
+      String tkey = accepted();
+      final List<String> tvalueParts = <String>[];
+      while (acceptLowAlphaNumeric3to8()) {
+        tvalueParts.add(accepted());
+      }
+      if (tvalueParts.isNotEmpty) {
+        empty = false;
+        if (!_tExtensions.containsKey(tkey)) {
+          _tExtensions[tkey] = tvalueParts.join('-');
+        } else {
+          problems.add('duplicate "$tkey"');
+        }
+      } else {
+        problems.add('empty "$tkey"');
+      }
+    }
+    if (empty) {
+      problems.add('empty "t"');
+    }
+  }
+
+  /// Consumes tags matched by `pu_extensions` in the specification, except that
+  /// the 'x' singleton must already be accepted.
+  ///
+  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+  /// empty.
+  void processPrivateUseExtensions() {
+    final List<String> values = <String>[];
+    while (acceptLowAlphaNumeric1to8()) {
+      values.add(accepted());
+    }
+    if (values.isNotEmpty) {
+      _xExtensions = values.join('-');
+    }
+  }
+
+  /// Consumes tags matched by `other_extensions` in the specification, except
+  /// that the singleton in question must already be accepted and passed as
+  /// parameter.
+  ///
+  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
+  /// empty.
+  void processOtherExtensions(String singleton) {
+    final List<String> values = <String>[];
+    while (acceptLowAlphaNumeric2to8()) {
+      values.add(accepted());
+    }
+    if (values.isEmpty) return;
+    if (_otherExtensions == null) {
+      _otherExtensions = <String, String>{};
+    } else if (_otherExtensions.containsKey(singleton)) {
+      problems.add('duplicate "$singleton"');
+      return;
+    }
+    _otherExtensions[singleton] = values.join('-');
+  }
+
+  /// Advances and returns true if current subtag is a language subtag.
+  bool acceptLanguage() {
+    if (atEnd()) return false;
+    if (!_languageRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _languageRegExp = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$');
+
+  /// Advances and returns true if current subtag is a script subtag.
+  bool acceptScript() {
+    if (atEnd()) return false;
+    if (!_scriptRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _scriptRegExp = RegExp(r'^[a-z]{4}$');
+
+  /// Advances and returns true if current subtag is a region subtag.
+  bool acceptRegion() {
+    if (atEnd()) return false;
+    if (!_regionRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _regionRegExp = RegExp(r'^[a-z]{2}$|^\d{3}$');
+
+  /// Advances, collecting subtags in [_acceptedList], as long as the current
+  /// subtag is a variant subtag.
+  ///
+  /// Does not return a boolean: when done, _acceptedList will contain the
+  /// collected subtags.
+  void acceptVariants() {
+    _acceptedList = [];
+    while (!atEnd() && _variantRegExp.hasMatch(current())) {
+      _acceptedList.add(current());
+      advance();
+    }
+  }
+
+  static final _variantRegExp = RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$');
+
+  /// Advances and returns true if current subtag is a singleton.
+  bool acceptSingleton() {
+    if (atEnd()) return false;
+    if (!_singletonRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _singletonRegExp = RegExp(r'^[a-z]$');
+
+  /// Advances and returns true if current subtag is alphanumeric, with length
+  /// ranging from 1 to 8.
+  bool acceptLowAlphaNumeric1to8() {
+    if (atEnd()) return false;
+    if (!_alphaNumeric1to8RegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _alphaNumeric1to8RegExp = RegExp(r'^[a-z\d]{1,8}$');
+
+  /// Advances and returns true if current subtag is alphanumeric, with length
+  /// ranging from 2 to 8.
+  bool acceptLowAlphaNumeric2to8() {
+    if (atEnd()) return false;
+    if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 2)
+      return false;
+    advance();
+    return true;
+  }
+
+  /// Advances and returns true if current subtag is alphanumeric, with length
+  /// ranging from 3 to 8.
+  bool acceptLowAlphaNumeric3to8() {
+    if (atEnd()) return false;
+    if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 3)
+      return false;
+    advance();
+    return true;
+  }
+
+  /// Advances and returns true if current subtag is a valid U Extension key.
+  bool acceptUExtensionKey() {
+    if (atEnd()) return false;
+    if (!_uExtensionKeyRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _uExtensionKeyRegExp = RegExp(r'^[a-z\d][a-z]$');
+
+  /// Advances and returns true if current subtag is a valid T Extension key
+  /// (`tkey` in the specification).
+  bool acceptTExtensionKey() {
+    if (atEnd()) return false;
+    if (!_tExtensionKeyRegExp.hasMatch(current())) return false;
+    advance();
+    return true;
+  }
+
+  static final _tExtensionKeyRegExp = RegExp(r'^[a-z]\d$');
+}

diff --git a/test/locale_test.dart b/test/locale_test.dart
new file mode 100644
index 0000000..774fa5a
--- /dev/null
+++ b/test/locale_test.dart

@@ -0,0 +1,238 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Tests for the Locale class.
+///
+/// Currently, the primary intention of these tests is to exercise and
+/// demonstrate the API: full test coverage is a non-goal for the prototype.
+///
+/// For production code, use of ICU would influence what needs and doesn't need
+/// to be tested.
+
+import 'package:test/test.dart';
+import 'package:intl/src/locale.dart';
+
+import 'locale_test_data.dart';
+
+void main() {
+  group('Construction and properties:', () {
+    // Simple with normalization:
+    testFromSubtags('Zh', null, null, 'zh', null, null, 'zh');
+    testFromSubtags('zH', null, 'cn', 'zh', null, 'CN', 'zh-CN');
+    testFromSubtags('ZH', null, 'Cn', 'zh', null, 'CN', 'zh-CN');
+    testFromSubtags('zh', null, 'cN', 'zh', null, 'CN', 'zh-CN');
+    testFromSubtags('zh', 'hans', null, 'zh', 'Hans', null, 'zh-Hans');
+    testFromSubtags('ZH', 'HANS', 'CN', 'zh', 'Hans', 'CN', 'zh-Hans-CN');
+
+    // Region codes can be three digits.
+    testFromSubtags('es', null, '419', 'es', null, '419', 'es-419');
+
+    // While language is usually 2 characters, it can also be 3.
+    testFromSubtags('CKB', 'arab', null, 'ckb', 'Arab', null, 'ckb-Arab');
+
+    // With canonicalization:
+    testFromSubtags('Iw', null, null, 'he', null, null, 'he');
+    testFromSubtags('iW', null, null, 'he', null, null, 'he');
+    testFromSubtags('My', null, 'Bu', 'my', null, 'MM', 'my-MM');
+  });
+
+  group('Locale.fromSubtags() FormatExceptions:', () {
+    testExceptionForSubtags(String language, String script, String region) {
+      test('fromSubtags: "$language / $script / $region"', () {
+        expect(
+            () => Locale.fromSubtags(
+                languageCode: language,
+                scriptCode: script,
+                countryCode: region),
+            throwsFormatException);
+      });
+    }
+
+    testExceptionForSubtags('a', null, null);
+    testExceptionForSubtags('en', 'ZA', null);
+    testExceptionForSubtags('en', null, 'Latn');
+  });
+
+  group('Locale normalization matching ICU.', () {
+    localeParsingTestData.forEach((unnormalized, normalized) {
+      test('Locale normalization: $unnormalized -> $normalized', () {
+        expect(Locale.parse(unnormalized).toLanguageTag(), normalized);
+      });
+    });
+  });
+
+  group('Unicode LDML Locale Identifier support', () {
+    // 'root' is a valid Unicode Locale Identifier, but should be taken as
+    // 'und'[1]. ICU's toLanguageTag still returns 'root'.
+    // [1]:
+    // http://unicode.org/reports/tr35/#Unicode_Locale_Identifier_CLDR_to_BCP_47
+    testParse('root', 'und', null, null, [], 'und');
+    testParse('Root', 'und', null, null, [], 'und');
+    testParse('ROOT', 'und', null, null, [], 'und');
+
+    // We support underscores, whereas ICU's `forLanguageTag` does
+    // not.
+    testParse('CKB_arab', 'ckb', 'Arab', null, [], 'ckb-Arab');
+    testParse('My_Bu', 'my', null, 'MM', [], 'my-MM');
+
+    // Normalises tags, sorts subtags alphabetically, including variants[1]:
+    // ICU is currently not sorting variants.
+    // [1]: http://unicode.org/reports/tr35/#Unicode_locale_identifier
+    testParse('en-scouse-fonipa', 'en', null, null, ['fonipa', 'scouse'],
+        'en-fonipa-scouse');
+
+    // Normalises tags, sorts subtags alphabetically and suppresses unneeded
+    // "true" in u extension (ICU is currently not dropping -true):
+    // http://unicode.org/reports/tr35/#u_Extension
+    testParse('en-u-Foo-bar-nu-thai-ca-buddhist-kk-true', 'en', null, null, [],
+        'en-u-bar-foo-ca-buddhist-kk-nu-thai');
+
+    // The specification does permit empty extensions for extensions other than
+    // u- and t-.
+    testParse('en-a', 'en', null, null, [], 'en');
+    testParse('en-x', 'en', null, null, [], 'en');
+    testParse('en-z', 'en', null, null, [], 'en');
+
+    // Normalization of `tlang` - ICU still returns -t-iw-bu.
+    testParse('en-t-iw-Bu', 'en', null, null, [], 'en-t-he-mm');
+
+    test('en-u-ca is equivalent to en-u-ca-true', () {
+      expect(Locale.parse('en-u-ca').toLanguageTag(),
+          Locale.parse('en-u-ca-true').toLanguageTag());
+    });
+  });
+
+  // Normalization: sorting of extension subtags:
+  testParse('en-z-abc-001-foo-fii-bar-u-cu-usd-co-phonebk', 'en', null, null,
+      [], 'en-u-co-phonebk-cu-usd-z-abc-001-foo-fii-bar');
+
+  group('Locale.parse() throws FormatException:', () {
+    testExceptionForId(String x) {
+      test('"$x"', () {
+        expect(() => Locale.parse(x), throwsFormatException);
+      });
+    }
+
+    invalidLocales.forEach((badLocaleIdentifier) {
+      testExceptionForId(badLocaleIdentifier);
+    });
+
+    // ICU permits '', taking it as 'und', but it is not a valid Unicode Locale
+    // Identifier: We reject it.
+    testExceptionForId('');
+
+    // abcd-Latn throws exceptions in our Dart implementation, whereas
+    // ECMAScript's Intl.Locale permits it. This is because the BCP47 spec
+    // still allows for the possible addition of 4-character languages in
+    // the future, whereas the Unicode Locale Identifiers spec bans it
+    // outright.
+    testExceptionForId('abcd-Latn');
+
+    // ICU permits 'root-Latn' since it conforms to pure BCP47, but it is an
+    // invalid Unicode BCP47 Locale Identifier.
+    testExceptionForId('root-Latn');
+
+    // ICU permits empty tkeys.
+    testExceptionForId('en-t-a0');
+
+    // ICU permits duplicate tkeys, returning the content of -t- verbatim.
+    testExceptionForId('en-t-a0-one-a0-two');
+
+    // ICU permits duplicate keys, in this case dropping -ca-buddhist.
+    testExceptionForId('en-u-ca-islamic-ca-buddhist');
+  });
+
+  group('Locale.tryParse() returns null:', () {
+    invalidLocales.forEach((badLocaleIdentifier) {
+      test('"$badLocaleIdentifier"', () {
+        expect(Locale.tryParse(badLocaleIdentifier), isNull);
+      });
+    });
+  });
+
+  // TODO: determine appropriate behaviour for the following examples.
+
+  // // 'mo' is deprecated, and is a tag that ought to be replaced by *two*
+  // // subtags (ro-MD), although Chrome Unstable also doesn't presently do
+  // // that (replaces it by 'ro' only).
+  // // TODO: check up on the Chrome implementation.
+  // testParse('mo', 'ro', null, 'MD', [], 'ro-MD');
+
+  // // Script deprecation.
+  // testParse('en-Qaai', 'en', 'Zinh', null, [], 'en-Zinh');
+
+  // // Variant deprecation.
+  // testParse('sv-aaland', 'sv', null, 'AX', [], 'sv-AX');
+
+  // // Variant deprecation.
+  // testParse('en-heploc', 'en', null, null, ['alalc97'], 'en-alalc97');
+
+  // // Variant deprecation.
+  // testParse('en-polytoni', 'en', null, null, ['polyton'], 'en-polyton');
+
+  test('Locale cannot be modified via the variants field', () {
+    var l = Locale.parse('en-scotland');
+    List<String> v = l.variants;
+    bool good = false;
+    try {
+      v.add('basiceng');
+    } on Error {
+      good = true;
+    }
+    expect(l.toLanguageTag(), 'en-scotland');
+    expect(good, isTrue);
+  });
+
+  test('operator== and hashCode', () {
+    Locale l1, l2;
+
+    l1 = Locale.parse('en-Shaw-ZA');
+    l2 = Locale.fromSubtags(
+        languageCode: 'en', scriptCode: 'Shaw', countryCode: 'ZA');
+    expect(l1, l2);
+    expect(l1.hashCode, l2.hashCode);
+
+    l1 = Locale.parse('en');
+    l2 = Locale.fromSubtags(
+        languageCode: 'en', scriptCode: null, countryCode: null);
+    expect(l1, l2);
+    expect(l1.hashCode, l2.hashCode);
+  });
+}
+
+testFromSubtags(
+    String language,
+    String script,
+    String region,
+    String expectedLanguage,
+    String expectedScript,
+    String expectedRegion,
+    String expectedTag) {
+  test('Locale.fromSubtags(...) with $language, $script, $region', () {
+    Locale l = Locale.fromSubtags(
+        languageCode: language, scriptCode: script, countryCode: region);
+    expect(l.languageCode, expectedLanguage);
+    expect(l.scriptCode, expectedScript);
+    expect(l.countryCode, expectedRegion);
+    expect(l.toLanguageTag(), expectedTag);
+    expect(l.toString(), expectedTag);
+  });
+}
+
+testParse(
+    String bcp47Tag,
+    String expectedLanguage,
+    String expectedScript,
+    String expectedRegion,
+    Iterable<String> expectedVariants,
+    String expectedTag) {
+  test('Locale.parse("$bcp47Tag");', () {
+    Locale l = Locale.parse(bcp47Tag);
+    expect(l.languageCode, expectedLanguage);
+    expect(l.scriptCode, expectedScript);
+    expect(l.countryCode, expectedRegion);
+    expect(l.toLanguageTag(), expectedTag);
+    expect(l.variants, orderedEquals(expectedVariants));
+  });
+}

diff --git a/test/locale_test_data.dart b/test/locale_test_data.dart
new file mode 100755
index 0000000..2786ba9
--- /dev/null
+++ b/test/locale_test_data.dart

@@ -0,0 +1,69 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Test data for Locale handling.
+///
+/// DO NOT EDIT. This file is autogenerated by script.
+/// TODO(hugovdm): improve the script and file related ICU bugs.
+
+/// Test data: a map from unnormalized locale names to normalized locale names.
+Map<String, String> localeParsingTestData = <String, String>{
+  // Simple with normalization
+  'Zh': 'zh',
+  'zH-cn': 'zh-CN',
+  'ZH-Cn': 'zh-CN',
+  'zh-cN': 'zh-CN',
+  'zh-hans': 'zh-Hans',
+  'ZH-HANS-CN': 'zh-Hans-CN',
+
+  // Region codes can be three digits
+  'es-419': 'es-419',
+
+  // While language is usually 2 characters, it can also be 3
+  'CKB-arab': 'ckb-Arab',
+
+  // With simple canonicalization
+  'Iw': 'he',
+  'iW': 'he',
+  'My-Bu': 'my-MM',
+
+  // "und" is the language tag for undefined language
+  'und': 'und',
+
+  // Normalization: sorting of extension subtags
+  'en-z-abc-001-foo-fii-bar-u-cu-usd-co-phonebk':
+      'en-u-co-phonebk-cu-usd-z-abc-001-foo-fii-bar',
+
+  // Normalises tags, sorts subtags alphabetically
+  'UND-lkjh-qw-12345-Abcde-U-Zz-Aaa-Co-Zxc-T-AF-Latn-Za-M0-Bar':
+      'und-Lkjh-QW-12345-abcde-t-af-latn-za-m0-bar-u-co-zxc-zz-aaa',
+
+  // Supports multiple "tvalues" for each "tpart"
+  'en-u-cu-usd-t-a0-ghi-jkl-b0-abc-def-a-aaa':
+      'en-a-aaa-t-a0-ghi-jkl-b0-abc-def-u-cu-usd',
+
+  // Private-use extension subtags do not get sorted
+  'en-x-BCDE0123-ABCD0123': 'en-x-bcde0123-abcd0123',
+  'en-x-BCDE0123-123456-ABCD0123': 'en-x-bcde0123-123456-abcd0123',
+};
+
+/// Invalid Language Tags.
+List<String> invalidLocales = <String>[
+  'en-',
+  '-za',
+  'en--za',
+  '419',
+  'en-t',
+  'en-t-',
+  'en-t-de-t-fr',
+  'en-u',
+  'en-u-',
+  'en-u-ca-islamic-',
+  'en-u-cu-eur-u-co-phonebk',
+  'en-x-',
+  'en-x-foo-',
+  'en-x-abcdefghi',
+  'en-z-',
+  'en-z-aaa-z-bbb',
+];
commit	4b27a88cc6418ad31d94cafa0dc426c2c7d995e5	[log] [tgz]
author	Dart Team <misc@dartlang.org>	Mon Mar 25 08:08:29 2019 -0700
committer	Alan Knight <alanknight@google.com>	Fri Jun 14 10:31:53 2019 -0700
tree	bf9d3ec0d768ad0a091aae13d24c5bb6afff5512
parent	381d5d05e21d577259f0c94fe14e5e195be62a06 [diff]