blob: 39891a16af1ee71f962471719d97e98f34df1129 [file] [log] [blame]
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'string_scanner.dart';
/// Validates the arguments passed to [StringScanner.error].
void validateErrorArgs(
String string, Match? match, int? position, int? length) {
if (match != null && (position != null || length != null)) {
throw ArgumentError("Can't pass both match and position/length.");
}
if (position != null) {
if (position < 0) {
throw RangeError('position must be greater than or equal to 0.');
} else if (position > string.length) {
throw RangeError('position must be less than or equal to the '
'string length.');
}
}
if (length != null && length < 0) {
throw RangeError('length must be greater than or equal to 0.');
}
if (position != null && length != null && position + length > string.length) {
throw RangeError('position plus length must not go beyond the end of '
'the string.');
}
}
// See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
// for documentation on how UTF-16 encoding works and definitions of various
// related terms.
/// The inclusive lower bound of Unicode's supplementary plane.
const _supplementaryPlaneLowerBound = 0x10000;
/// The inclusive upper bound of Unicode's supplementary plane.
const _supplementaryPlaneUpperBound = 0x10FFFF;
/// The inclusive lower bound of the UTF-16 high surrogate block.
const _highSurrogateLowerBound = 0xD800;
/// The inclusive lower bound of the UTF-16 low surrogate block.
const _lowSurrogateLowerBound = 0xDC00;
/// The number of low bits in each code unit of a surrogate pair that goes into
/// determining which code point it encodes.
const _surrogateBits = 10;
/// A bit mask that covers the lower [_surrogateBits] of a code point, which can
/// be used to extract the value of a surrogate or the low surrogate value of a
/// code unit.
const _surrogateValueMask = (1 << _surrogateBits) - 1;
/// Returns whether [codePoint] is in the Unicode supplementary plane, and thus
/// must be represented as a surrogate pair in UTF-16.
bool inSupplementaryPlane(int codePoint) =>
codePoint >= _supplementaryPlaneLowerBound &&
codePoint <= _supplementaryPlaneUpperBound;
/// Returns whether [codeUnit] is a UTF-16 high surrogate.
bool isHighSurrogate(int codeUnit) =>
(codeUnit & ~_surrogateValueMask) == _highSurrogateLowerBound;
/// Returns whether [codeUnit] is a UTF-16 low surrogate.
bool isLowSurrogate(int codeUnit) =>
(codeUnit >> _surrogateBits) == (_lowSurrogateLowerBound >> _surrogateBits);
/// Returns the high surrogate needed to encode the supplementary-plane
/// [codePoint].
int highSurrogate(int codePoint) {
assert(inSupplementaryPlane(codePoint));
return ((codePoint - _supplementaryPlaneLowerBound) >> _surrogateBits) +
_highSurrogateLowerBound;
}
/// Returns the low surrogate needed to encode the supplementary-plane
/// [codePoint].
int lowSurrogate(int codePoint) {
assert(inSupplementaryPlane(codePoint));
return ((codePoint - _supplementaryPlaneLowerBound) & _surrogateValueMask) +
_lowSurrogateLowerBound;
}
/// Converts a UTF-16 surrogate pair into the Unicode code unit it represents.
int decodeSurrogatePair(int highSurrogate, int lowSurrogate) {
assert(isHighSurrogate(highSurrogate));
assert(isLowSurrogate(lowSurrogate));
return _supplementaryPlaneLowerBound +
(((highSurrogate & _surrogateValueMask) << _surrogateBits) |
(lowSurrogate & _surrogateValueMask));
}