// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// @dart=2.9

import 'locale_deprecations.dart';
import 'locale_extensions.dart';
import 'locale_implementation.dart';

/// A parser for [Unicode Locale
/// Identifiers](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier).
class LocaleParser {
  /// Language subtag of Unicode Language Identifier.
  String _languageCode = 'und';

  /// Script subtag of Unicode Language Identifier.
  String _scriptCode;

  /// Region subtag of Unicode Language Identifier.
  String _countryCode;

  /// Variant subtags of Unicode Language Identifier.
  List<String> _variants;

  /// Unicode Locale Extensions, also known as "U Extension".
  Map<String, String> _uExtensions;

  /// Transformed Extensions, also known as "T Extension".
  Map<String, String> _tExtensions;

  /// Private-Use Extensions.
  String _xExtensions;

  /// Other Extensions.
  Map<String, String> _otherExtensions;

  /// List of problems with the localeId the parser tried to parse.
  ///
  /// An empty list indicates problem-free parsing.
  final List<String> problems = <String>[];

  /// Produces a Locale instance for the parser's current state.
  ///
  /// Returns null if the Locale would be syntactically invalid.
  LocaleImplementation toLocale() {
    if (problems.isNotEmpty) return null;
    LocaleExtensions extensions;
    if (_uExtensions != null ||
        _tExtensions != null ||
        _otherExtensions != null ||
        _xExtensions != null) {
      extensions = LocaleExtensions(
          _uExtensions, _tExtensions, _otherExtensions, _xExtensions);
    }
    return LocaleImplementation.unsafe(
      _languageCode,
      scriptCode: _scriptCode,
      countryCode: _countryCode,
      variants: _variants,
      extensions: extensions,
    );
  }

  /// Subtags of the Locale Identifier, as split by [separators].
  List<String> _subtags;

  /// RegExp that matches Unicode Locale Identifier subtag separators.
  static final separators = RegExp('[-_]');

  /// Last accepted subtag.
  String _accepted;

  /// Last accepted subtag.
  String accepted() => _accepted;

  /// Last accepted list of subtags (for variants).
  List<String> _acceptedList;

  /// Last accepted list of subtags (for variants).
  List<String> acceptedList() => _acceptedList;

  /// Current subtag pending acceptance.
  String _current;

  /// Current subtag pending acceptance.
  String current() => _current;

  /// Index of the current subtag.
  int _currentIndex;

  /// Advance to the next subtag (see [current] and [accepted]).
  void advance() {
    _accepted = _current;
    _currentIndex++;
    if (_currentIndex < _subtags.length) {
      _current = _subtags[_currentIndex];
    } else {
      _current = null;
    }
  }

  /// Returns true if all subtags have been parsed.
  bool atEnd() {
    return _currentIndex >= _subtags.length;
  }

  /// Parses [Unicode CLDR Locale
  /// Identifiers](https://www.unicode.org/reports/tr35/#Identifiers).
  ///
  /// This method does not parse all BCP 47 tags. See [BCP 47
  /// Conformance](https://www.unicode.org/reports/tr35/#BCP_47_Conformance) for
  /// details.
  ///
  /// localeId may not be null.
  ///
  /// Parsing failed if there are any entries in [problems].
  LocaleParser(String localeId) {
    assert(localeId != null);

    // Calling toLowerCase unconditionally should be efficient if
    // string_patch.dart is in use:
    // https://github.com/dart-lang/sdk/blob/cabaa78cc57d08bcfcd75bfe99a42c19ed497d26/runtime/lib/string_patch.dart#L1178
    localeId = localeId.toLowerCase();
    if (localeId == 'root') {
      return;
    }

    _subtags = localeId.split(separators);
    _currentIndex = 0;
    _current = _subtags[0];

    var scriptFound = false;
    if (acceptLanguage()) {
      _languageCode = replaceDeprecatedLanguageSubtag(accepted());
      scriptFound = acceptScript();
    } else {
      scriptFound = acceptScript();
      if (!scriptFound) {
        problems.add('bad language/script');
      }
    }
    if (scriptFound) {
      _scriptCode = toCapCase(accepted());
    }
    if (acceptRegion()) {
      _countryCode = replaceDeprecatedRegionSubtag(accepted().toUpperCase());
    }
    acceptVariants();
    _variants = acceptedList();

    processExtensions();

    if (!atEnd()) {
      problems.add('bad subtag "${current()}"');
    }
  }

  /// Consumes all remaining subtags, if syntactically valid.
  ///
  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
  /// empty.
  void processExtensions() {
    while (acceptSingleton()) {
      var singleton = accepted();
      if (singleton == 'u') {
        processUExtensions();
      } else if (singleton == 't') {
        processTExtensions();
      } else if (singleton == 'x') {
        processPrivateUseExtensions();
        break;
      } else {
        processOtherExtensions(singleton);
      }
    }
  }

  /// Consumes tags matched by `unicode_locale_extensions` in the specification,
  /// except that the 'u' singleton must already be accepted.
  ///
  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
  /// empty.
  void processUExtensions() {
    if (_uExtensions != null) {
      problems.add('duplicate "u"');
      return;
    }
    _uExtensions = <String, String>{};
    var empty = true;
    final attributes = <String>[];
    while (acceptLowAlphaNumeric3to8()) {
      attributes.add(accepted());
    }
    if (attributes.isNotEmpty) {
      empty = false;
      attributes.sort();
      _uExtensions[''] = attributes.join('-');
    }
    // unicode_locale_extensions: collect "(sep keyword)*".
    while (acceptUExtensionKey()) {
      empty = false;
      var key = accepted();
      final typeParts = <String>[];
      while (acceptLowAlphaNumeric3to8()) {
        typeParts.add(accepted());
      }
      if (!_uExtensions.containsKey(key)) {
        if (typeParts.length == 1 && typeParts[0] == 'true') {
          _uExtensions[key] = '';
        } else {
          _uExtensions[key] = typeParts.join('-');
        }
      } else {
        problems.add('duplicate "$key"');
      }
    }
    if (empty) {
      problems.add('empty "u"');
    }
  }

  /// Consumes tags matched by `transformed_extensions` in the specification,
  /// except that the 't' singleton must already be accepted.
  ///
  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
  /// empty.
  void processTExtensions() {
    if (_tExtensions != null) {
      problems.add('duplicate "t"');
      return;
    }
    _tExtensions = <String, String>{};
    var empty = true;
    final tlang = <String>[];
    if (acceptLanguage()) {
      empty = false;
      tlang.add(replaceDeprecatedLanguageSubtag(accepted()));
      if (acceptScript()) {
        tlang.add(accepted());
      }
      if (acceptRegion()) {
        tlang.add(replaceDeprecatedRegionSubtag(accepted().toUpperCase())
            .toLowerCase());
      }
      acceptVariants();
      tlang.addAll(acceptedList());
      _tExtensions[''] = tlang.join('-');
    }
    // transformed_extensions: collect "(sep tfield)*".
    while (acceptTExtensionKey()) {
      var tkey = accepted();
      final tvalueParts = <String>[];
      while (acceptLowAlphaNumeric3to8()) {
        tvalueParts.add(accepted());
      }
      if (tvalueParts.isNotEmpty) {
        empty = false;
        if (!_tExtensions.containsKey(tkey)) {
          _tExtensions[tkey] = tvalueParts.join('-');
        } else {
          problems.add('duplicate "$tkey"');
        }
      } else {
        problems.add('empty "$tkey"');
      }
    }
    if (empty) {
      problems.add('empty "t"');
    }
  }

  /// Consumes tags matched by `pu_extensions` in the specification, except that
  /// the 'x' singleton must already be accepted.
  ///
  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
  /// empty.
  void processPrivateUseExtensions() {
    final values = <String>[];
    while (acceptLowAlphaNumeric1to8()) {
      values.add(accepted());
    }
    if (values.isNotEmpty) {
      _xExtensions = values.join('-');
    }
  }

  /// Consumes tags matched by `other_extensions` in the specification, except
  /// that the singleton in question must already be accepted and passed as
  /// parameter.
  ///
  /// If parsing fails, `atEnd()` will be false and/or [problems] will not be
  /// empty.
  void processOtherExtensions(String singleton) {
    final values = <String>[];
    while (acceptLowAlphaNumeric2to8()) {
      values.add(accepted());
    }
    if (values.isEmpty) return;
    if (_otherExtensions == null) {
      _otherExtensions = <String, String>{};
    } else if (_otherExtensions.containsKey(singleton)) {
      problems.add('duplicate "$singleton"');
      return;
    }
    _otherExtensions[singleton] = values.join('-');
  }

  /// Advances and returns true if current subtag is a language subtag.
  bool acceptLanguage() {
    if (atEnd()) return false;
    if (!_languageRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _languageRegExp = RegExp(r'^[a-z]{2,3}$|^[a-z]{5,8}$');

  /// Advances and returns true if current subtag is a script subtag.
  bool acceptScript() {
    if (atEnd()) return false;
    if (!_scriptRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _scriptRegExp = RegExp(r'^[a-z]{4}$');

  /// Advances and returns true if current subtag is a region subtag.
  bool acceptRegion() {
    if (atEnd()) return false;
    if (!_regionRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _regionRegExp = RegExp(r'^[a-z]{2}$|^\d{3}$');

  /// Advances, collecting subtags in [_acceptedList], as long as the current
  /// subtag is a variant subtag.
  ///
  /// Does not return a boolean: when done, _acceptedList will contain the
  /// collected subtags.
  void acceptVariants() {
    _acceptedList = [];
    while (!atEnd() && _variantRegExp.hasMatch(current())) {
      _acceptedList.add(current());
      advance();
    }
  }

  static final _variantRegExp = RegExp(r'^[a-z\d]{5,8}$|^\d[a-z\d]{3}$');

  /// Advances and returns true if current subtag is a singleton.
  bool acceptSingleton() {
    if (atEnd()) return false;
    if (!_singletonRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _singletonRegExp = RegExp(r'^[a-z]$');

  /// Advances and returns true if current subtag is alphanumeric, with length
  /// ranging from 1 to 8.
  bool acceptLowAlphaNumeric1to8() {
    if (atEnd()) return false;
    if (!_alphaNumeric1to8RegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _alphaNumeric1to8RegExp = RegExp(r'^[a-z\d]{1,8}$');

  /// Advances and returns true if current subtag is alphanumeric, with length
  /// ranging from 2 to 8.
  bool acceptLowAlphaNumeric2to8() {
    if (atEnd()) return false;
    if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 2) {
      return false;
    }
    advance();
    return true;
  }

  /// Advances and returns true if current subtag is alphanumeric, with length
  /// ranging from 3 to 8.
  bool acceptLowAlphaNumeric3to8() {
    if (atEnd()) return false;
    if (!_alphaNumeric1to8RegExp.hasMatch(current()) || current().length < 3) {
      return false;
    }
    advance();
    return true;
  }

  /// Advances and returns true if current subtag is a valid U Extension key.
  bool acceptUExtensionKey() {
    if (atEnd()) return false;
    if (!_uExtensionKeyRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _uExtensionKeyRegExp = RegExp(r'^[a-z\d][a-z]$');

  /// Advances and returns true if current subtag is a valid T Extension key
  /// (`tkey` in the specification).
  bool acceptTExtensionKey() {
    if (atEnd()) return false;
    if (!_tExtensionKeyRegExp.hasMatch(current())) return false;
    advance();
    return true;
  }

  static final _tExtensionKeyRegExp = RegExp(r'^[a-z]\d$');
}
