| // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| import 'string_scanner.dart'; |
| |
| /// Validates the arguments passed to [StringScanner.error]. |
| void validateErrorArgs( |
| String string, Match? match, int? position, int? length) { |
| if (match != null && (position != null || length != null)) { |
| throw ArgumentError("Can't pass both match and position/length."); |
| } |
| |
| if (position != null) { |
| if (position < 0) { |
| throw RangeError('position must be greater than or equal to 0.'); |
| } else if (position > string.length) { |
| throw RangeError('position must be less than or equal to the ' |
| 'string length.'); |
| } |
| } |
| |
| if (length != null && length < 0) { |
| throw RangeError('length must be greater than or equal to 0.'); |
| } |
| |
| if (position != null && length != null && position + length > string.length) { |
| throw RangeError('position plus length must not go beyond the end of ' |
| 'the string.'); |
| } |
| } |
| |
| // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF |
| // for documentation on how UTF-16 encoding works and definitions of various |
| // related terms. |
| |
| /// The inclusive lower bound of Unicode's supplementary plane. |
| const _supplementaryPlaneLowerBound = 0x10000; |
| |
| /// The inclusive upper bound of Unicode's supplementary plane. |
| const _supplementaryPlaneUpperBound = 0x10FFFF; |
| |
| /// The inclusive lower bound of the UTF-16 high surrogate block. |
| const _highSurrogateLowerBound = 0xD800; |
| |
| /// The inclusive lower bound of the UTF-16 low surrogate block. |
| const _lowSurrogateLowerBound = 0xDC00; |
| |
| /// The number of low bits in each code unit of a surrogate pair that goes into |
| /// determining which code point it encodes. |
| const _surrogateBits = 10; |
| |
| /// A bit mask that covers the lower [_surrogateBits] of a code point, which can |
| /// be used to extract the value of a surrogate or the low surrogate value of a |
| /// code unit. |
| const _surrogateValueMask = (1 << _surrogateBits) - 1; |
| |
| /// Returns whether [codePoint] is in the Unicode supplementary plane, and thus |
| /// must be represented as a surrogate pair in UTF-16. |
| bool inSupplementaryPlane(int codePoint) => |
| codePoint >= _supplementaryPlaneLowerBound && |
| codePoint <= _supplementaryPlaneUpperBound; |
| |
| /// Returns whether [codeUnit] is a UTF-16 high surrogate. |
| bool isHighSurrogate(int codeUnit) => |
| (codeUnit & ~_surrogateValueMask) == _highSurrogateLowerBound; |
| |
| /// Returns whether [codeUnit] is a UTF-16 low surrogate. |
| bool isLowSurrogate(int codeUnit) => |
| (codeUnit >> _surrogateBits) == (_lowSurrogateLowerBound >> _surrogateBits); |
| |
| /// Returns the high surrogate needed to encode the supplementary-plane |
| /// [codePoint]. |
| int highSurrogate(int codePoint) { |
| assert(inSupplementaryPlane(codePoint)); |
| return ((codePoint - _supplementaryPlaneLowerBound) >> _surrogateBits) + |
| _highSurrogateLowerBound; |
| } |
| |
| /// Returns the low surrogate needed to encode the supplementary-plane |
| /// [codePoint]. |
| int lowSurrogate(int codePoint) { |
| assert(inSupplementaryPlane(codePoint)); |
| return ((codePoint - _supplementaryPlaneLowerBound) & _surrogateValueMask) + |
| _lowSurrogateLowerBound; |
| } |
| |
| /// Converts a UTF-16 surrogate pair into the Unicode code unit it represents. |
| int decodeSurrogatePair(int highSurrogate, int lowSurrogate) { |
| assert(isHighSurrogate(highSurrogate)); |
| assert(isLowSurrogate(lowSurrogate)); |
| return _supplementaryPlaneLowerBound + |
| (((highSurrogate & _surrogateValueMask) << _surrogateBits) | |
| (lowSurrogate & _surrogateValueMask)); |
| } |