| // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| part of dart.core; |
| |
| /** |
| * A sequence of UTF-16 code units. |
| * |
| * Strings are mainly used to represent text. A character may be represented by |
| * multiple code points, each code point consisting of one or two code |
| * units. For example the Papua New Guinea flag character requires four code |
| * units to represent two code points, but should be treated like a single |
| * character: "🇵🇬". Platforms that do not support the flag character may show |
| * the letters "PG" instead. If the code points are swapped, it instead becomes |
| * the Guadeloupe flag "🇬🇵" ("GP"). |
| * |
| * A string can be either single or multiline. Single line strings are |
| * written using matching single or double quotes, and multiline strings are |
| * written using triple quotes. The following are all valid Dart strings: |
| * |
| * 'Single quotes'; |
| * "Double quotes"; |
| * 'Double quotes in "single" quotes'; |
| * "Single quotes in 'double' quotes"; |
| * |
| * '''A |
| * multiline |
| * string'''; |
| * |
| * """ |
| * Another |
| * multiline |
| * string"""; |
| * |
| * Strings are immutable. Although you cannot change a string, you can perform |
| * an operation on a string and assign the result to a new string: |
| * |
| * var string = 'Dart is fun'; |
| * var newString = string.substring(0, 5); |
| * |
| * You can use the plus (`+`) operator to concatenate strings: |
| * |
| * 'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!' |
| * |
| * You can also use adjacent string literals for concatenation: |
| * |
| * 'Dart ' 'is ' 'fun!'; // 'Dart is fun!' |
| * |
| * You can use `${}` to interpolate the value of Dart expressions |
| * within strings. The curly braces can be omitted when evaluating identifiers: |
| * |
| * string = 'dartlang'; |
| * '$string has ${string.length} letters'; // 'dartlang has 8 letters' |
| * |
| * A string is represented by a sequence of Unicode UTF-16 code units |
| * accessible through the [codeUnitAt] or the [codeUnits] members: |
| * |
| * string = 'Dart'; |
| * string.codeUnitAt(0); // 68 |
| * string.codeUnits; // [68, 97, 114, 116] |
| * |
| * The string representation of code units is accessible through the index |
| * operator: |
| * |
| * string[0]; // 'D' |
| * |
| * The characters of a string are encoded in UTF-16. Decoding UTF-16, which |
| * combines surrogate pairs, yields Unicode code points. Following a similar |
| * terminology to Go, we use the name 'rune' for an integer representing a |
| * Unicode code point. Use the [runes] property to get the runes of a string: |
| * |
| * string.runes.toList(); // [68, 97, 114, 116] |
| * |
| * For a character outside the Basic Multilingual Plane (plane 0) that is |
| * composed of a surrogate pair, [runes] combines the pair and returns a |
| * single integer. For example, the Unicode character for a |
| * musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate |
| * pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair, |
| * and using `runes` returns their combined value: |
| * |
| * var clef = '\u{1D11E}'; |
| * clef.codeUnits; // [0xD834, 0xDD1E] |
| * clef.runes.toList(); // [0x1D11E] |
| * |
| * The String class can not be extended or implemented. Attempting to do so |
| * yields a compile-time error. |
| * |
| * ## Other resources |
| * |
| * See [StringBuffer] to efficiently build a string incrementally. See |
| * [RegExp] to work with regular expressions. |
| * |
| * Also see: |
| * |
| * * [Strings and regular expressions](https://dart.dev/guides/libraries/library-tour#strings-and-regular-expressions) |
| */ |
| @pragma('vm:entry-point') |
| abstract class String implements Comparable<String>, Pattern { |
| /** |
| * Allocates a new String for the specified [charCodes]. |
| * |
| * The [charCodes] can be UTF-16 code units or runes. If a char-code value is |
| * 16-bit, it is copied verbatim: |
| * |
| * new String.fromCharCodes([68]); // 'D' |
| * |
| * If a char-code value is greater than 16-bits, it is decomposed into a |
| * surrogate pair: |
| * |
| * var clef = new String.fromCharCodes([0x1D11E]); |
| * clef.codeUnitAt(0); // 0xD834 |
| * clef.codeUnitAt(1); // 0xDD1E |
| * |
| * If [start] and [end] is provided, only the values of [charCodes] |
| * at positions from `start` to, but not including, `end`, are used. |
| * The `start` and `end` values must satisfy |
| * `0 <= start <= end <= charCodes.length`. |
| */ |
| external factory String.fromCharCodes(Iterable<int> charCodes, |
| [int start = 0, int? end]); |
| |
| /** |
| * Allocates a new String for the specified [charCode]. |
| * |
| * If the [charCode] can be represented by a single UTF-16 code unit, the new |
| * string contains a single code unit. Otherwise, the [length] is 2 and |
| * the code units form a surrogate pair. See documentation for |
| * [fromCharCodes]. |
| * |
| * Creating a [String] with one half of a surrogate pair is allowed. |
| */ |
| external factory String.fromCharCode(int charCode); |
| |
| /** |
| * Returns the string value of the environment declaration [name]. |
| * |
| * Environment declarations are provided by the surrounding system compiling |
| * or running the Dart program. Declarations map a string key to a string |
| * value. |
| * |
| * If [name] is not declared in the environment, the result is instead |
| * [defaultValue]. |
| * |
| * Example of getting a value: |
| * ``` |
| * const String.fromEnvironment("defaultFloo", defaultValue: "no floo") |
| * ``` |
| * In order to check whether a declaration is there at all, use |
| * [bool.hasEnvironment]. Example: |
| * ``` |
| * const maybeDeclared = bool.hasEnvironment("maybeDeclared") |
| * ? String.fromEnvironment("maybeDeclared") |
| * : null; |
| * ``` |
| */ |
| // The .fromEnvironment() constructors are special in that we do not want |
| // users to call them using "new". We prohibit that by giving them bodies |
| // that throw, even though const constructors are not allowed to have bodies. |
| // Disable those static errors. |
| //ignore: const_constructor_with_body |
| //ignore: const_factory |
| external const factory String.fromEnvironment(String name, |
| {String defaultValue = ""}); |
| |
| /** |
| * Gets the character (as a single-code-unit [String]) at the given [index]. |
| * |
| * The returned string represents exactly one UTF-16 code unit, which may be |
| * half of a surrogate pair. A single member of a surrogate pair is an |
| * invalid UTF-16 string: |
| * |
| * var clef = '\u{1D11E}'; |
| * // These represent invalid UTF-16 strings. |
| * clef[0].codeUnits; // [0xD834] |
| * clef[1].codeUnits; // [0xDD1E] |
| * |
| * This method is equivalent to |
| * `new String.fromCharCode(this.codeUnitAt(index))`. |
| */ |
| String operator [](int index); |
| |
| /** |
| * Returns the 16-bit UTF-16 code unit at the given [index]. |
| */ |
| int codeUnitAt(int index); |
| |
| /** |
| * The length of the string. |
| * |
| * Returns the number of UTF-16 code units in this string. The number |
| * of [runes] might be fewer, if the string contains characters outside |
| * the Basic Multilingual Plane (plane 0): |
| * |
| * 'Dart'.length; // 4 |
| * 'Dart'.runes.length; // 4 |
| * |
| * var clef = '\u{1D11E}'; |
| * clef.length; // 2 |
| * clef.runes.length; // 1 |
| */ |
| int get length; |
| |
| /** |
| * Returns a hash code derived from the code units of the string. |
| * |
| * This is compatible with [operator ==]. Strings with the same sequence |
| * of code units have the same hash code. |
| */ |
| int get hashCode; |
| |
| /** |
| * Returns true if other is a `String` with the same sequence of code units. |
| * |
| * This method compares each individual code unit of the strings. |
| * It does not check for Unicode equivalence. |
| * For example, both the following strings represent the string 'Amélie', |
| * but due to their different encoding, are not equal: |
| * |
| * 'Am\xe9lie' == 'Ame\u{301}lie'; // false |
| * |
| * The first string encodes 'é' as a single unicode code unit (also |
| * a single rune), whereas the second string encodes it as 'e' with the |
| * combining accent character '◌́'. |
| */ |
| bool operator ==(Object other); |
| |
| /** |
| * Compares this string to [other]. |
| * |
| * Returns a negative value if `this` is ordered before `other`, |
| * a positive value if `this` is ordered after `other`, |
| * or zero if `this` and `other` are equivalent. |
| * |
| * The ordering is the same as the ordering of the code points at the first |
| * position where the two strings differ. |
| * If one string is a prefix of the other, |
| * then the shorter string is ordered before the longer string. |
| * If the strings have exactly the same content, they are equivalent with |
| * regard to the ordering. |
| * Ordering does not check for Unicode equivalence. |
| * The comparison is case sensitive. |
| */ |
| int compareTo(String other); |
| |
| /** |
| * Returns true if this string ends with [other]. For example: |
| * |
| * 'Dart'.endsWith('t'); // true |
| */ |
| bool endsWith(String other); |
| |
| /** |
| * Returns true if this string starts with a match of [pattern]. |
| * |
| * var string = 'Dart'; |
| * string.startsWith('D'); // true |
| * string.startsWith(new RegExp(r'[A-Z][a-z]')); // true |
| * |
| * If [index] is provided, this method checks if the substring starting |
| * at that index starts with a match of [pattern]: |
| * |
| * string.startsWith('art', 1); // true |
| * string.startsWith(new RegExp(r'\w{3}')); // true |
| * |
| * [index] must not be negative or greater than [length]. |
| * |
| * A [RegExp] containing '^' does not match if the [index] is greater than |
| * zero. The pattern works on the string as a whole, and does not extract |
| * a substring starting at [index] first: |
| * |
| * string.startsWith(new RegExp(r'^art'), 1); // false |
| * string.startsWith(new RegExp(r'art'), 1); // true |
| */ |
| bool startsWith(Pattern pattern, [int index = 0]); |
| |
| /** |
| * Returns the position of the first match of [pattern] in this string, |
| * starting at [start], inclusive: |
| * |
| * var string = 'Dartisans'; |
| * string.indexOf('art'); // 1 |
| * string.indexOf(new RegExp(r'[A-Z][a-z]')); // 0 |
| * |
| * Returns -1 if no match is found: |
| * |
| * string.indexOf(new RegExp(r'dart')); // -1 |
| * |
| * [start] must be non-negative and not greater than [length]. |
| */ |
| int indexOf(Pattern pattern, [int start = 0]); |
| |
| /** |
| * Returns the starting position of the last match [pattern] in this string, |
| * searching backward starting at [start], inclusive: |
| * |
| * var string = 'Dartisans'; |
| * string.lastIndexOf('a'); // 6 |
| * string.lastIndexOf(RegExp(r'a(r|n)')); // 6 |
| * |
| * Returns -1 if [pattern] could not be found in this string. |
| * |
| * string.lastIndexOf(RegExp(r'DART')); // -1 |
| * |
| * If [start] is omitted, search starts from the end of the string. |
| * If supplied, [start] must be non-negative and not greater than [length]. |
| */ |
| int lastIndexOf(Pattern pattern, [int? start]); |
| |
| /** |
| * Returns true if this string is empty. |
| */ |
| bool get isEmpty; |
| |
| /** |
| * Returns true if this string is not empty. |
| */ |
| bool get isNotEmpty; |
| |
| /** |
| * Creates a new string by concatenating this string with [other]. |
| * |
| * 'dart' + 'lang'; // 'dartlang' |
| */ |
| String operator +(String other); |
| |
| /** |
| * Returns the substring of this string that extends from [startIndex], |
| * inclusive, to [endIndex], exclusive. |
| * |
| * var string = 'dartlang'; |
| * string.substring(1); // 'artlang' |
| * string.substring(1, 4); // 'art' |
| */ |
| String substring(int startIndex, [int? endIndex]); |
| |
| /** |
| * Returns the string without any leading and trailing whitespace. |
| * |
| * If the string contains leading or trailing whitespace, a new string with no |
| * leading and no trailing whitespace is returned: |
| * ```dart |
| * '\tDart is fun\n'.trim(); // 'Dart is fun' |
| * ``` |
| * Otherwise, the original string itself is returned: |
| * ```dart |
| * var str1 = 'Dart'; |
| * var str2 = str1.trim(); |
| * identical(str1, str2); // true |
| * ``` |
| * Whitespace is defined by the Unicode White_Space property (as defined in |
| * version 6.2 or later) and the BOM character, 0xFEFF. |
| * |
| * Here is the list of trimmed characters according to Unicode version 6.3: |
| * ``` |
| * 0009..000D ; White_Space # Cc <control-0009>..<control-000D> |
| * 0020 ; White_Space # Zs SPACE |
| * 0085 ; White_Space # Cc <control-0085> |
| * 00A0 ; White_Space # Zs NO-BREAK SPACE |
| * 1680 ; White_Space # Zs OGHAM SPACE MARK |
| * 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE |
| * 2028 ; White_Space # Zl LINE SEPARATOR |
| * 2029 ; White_Space # Zp PARAGRAPH SEPARATOR |
| * 202F ; White_Space # Zs NARROW NO-BREAK SPACE |
| * 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE |
| * 3000 ; White_Space # Zs IDEOGRAPHIC SPACE |
| * |
| * FEFF ; BOM ZERO WIDTH NO_BREAK SPACE |
| * ``` |
| * Some later versions of Unicode do not include U+0085 as a whitespace |
| * character. Whether it is trimmed depends on the Unicode version |
| * used by the system. |
| */ |
| String trim(); |
| |
| /** |
| * Returns the string without any leading whitespace. |
| * |
| * As [trim], but only removes leading whitespace. |
| */ |
| String trimLeft(); |
| |
| /** |
| * Returns the string without any trailing whitespace. |
| * |
| * As [trim], but only removes trailing whitespace. |
| */ |
| String trimRight(); |
| |
| /** |
| * Creates a new string by concatenating this string with itself a number |
| * of times. |
| * |
| * The result of `str * n` is equivalent to |
| * `str + str + ...`(n times)`... + str`. |
| * |
| * Returns an empty string if [times] is zero or negative. |
| */ |
| String operator *(int times); |
| |
| /** |
| * Pads this string on the left if it is shorter than [width]. |
| * |
| * Return a new string that prepends [padding] onto this string |
| * one time for each position the length is less than [width]. |
| * |
| * If [width] is already smaller than or equal to `this.length`, |
| * no padding is added. A negative `width` is treated as zero. |
| * |
| * If [padding] has length different from 1, the result will not |
| * have length `width`. This may be useful for cases where the |
| * padding is a longer string representing a single character, like |
| * `" "` or `"\u{10002}`". |
| * In that case, the user should make sure that `this.length` is |
| * the correct measure of the strings length. |
| */ |
| String padLeft(int width, [String padding = ' ']); |
| |
| /** |
| * Pads this string on the right if it is shorter than [width]. |
| * |
| * Return a new string that appends [padding] after this string |
| * one time for each position the length is less than [width]. |
| * |
| * If [width] is already smaller than or equal to `this.length`, |
| * no padding is added. A negative `width` is treated as zero. |
| * |
| * If [padding] has length different from 1, the result will not |
| * have length `width`. This may be useful for cases where the |
| * padding is a longer string representing a single character, like |
| * `" "` or `"\u{10002}`". |
| * In that case, the user should make sure that `this.length` is |
| * the correct measure of the strings length. |
| */ |
| String padRight(int width, [String padding = ' ']); |
| |
| /** |
| * Returns true if this string contains a match of [other]: |
| * |
| * var string = 'Dart strings'; |
| * string.contains('D'); // true |
| * string.contains(new RegExp(r'[A-Z]')); // true |
| * |
| * If [startIndex] is provided, this method matches only at or after that |
| * index: |
| * |
| * string.contains('X', 1); // false |
| * string.contains(new RegExp(r'[A-Z]'), 1); // false |
| * |
| * [startIndex] must not be negative or greater than [length]. |
| */ |
| bool contains(Pattern other, [int startIndex = 0]); |
| |
| /** |
| * Returns a new string in which the first occurrence of [from] in this string |
| * is replaced with [to], starting from [startIndex]: |
| * |
| * '0.0001'.replaceFirst(new RegExp(r'0'), ''); // '.0001' |
| * '0.0001'.replaceFirst(new RegExp(r'0'), '7', 1); // '0.7001' |
| */ |
| String replaceFirst(Pattern from, String to, [int startIndex = 0]); |
| |
| /** |
| * Replace the first occurrence of [from] in this string. |
| * |
| * Returns a new string, which is this string |
| * except that the first match of [from], starting from [startIndex], |
| * is replaced by the result of calling [replace] with the match object. |
| * |
| * The optional [startIndex] is by default set to 0. If provided, it must be |
| * an integer in the range `[0 .. len]`, where `len` is this string's length. |
| */ |
| String replaceFirstMapped(Pattern from, String replace(Match match), |
| [int startIndex = 0]); |
| |
| /** |
| * Replaces all substrings that match [from] with [replace]. |
| * |
| * Returns a new string in which the non-overlapping substrings matching |
| * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced |
| * by the literal string [replace]. |
| * |
| * 'resume'.replaceAll(new RegExp(r'e'), 'é'); // 'résumé' |
| * |
| * Notice that the [replace] string is not interpreted. If the replacement |
| * depends on the match (for example on a [RegExp]'s capture groups), use |
| * the [replaceAllMapped] method instead. |
| */ |
| String replaceAll(Pattern from, String replace); |
| |
| /** |
| * Replace all substrings that match [from] by a string computed from the |
| * match. |
| * |
| * Returns a new string in which the non-overlapping substrings that match |
| * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced |
| * by the result of calling [replace] on the corresponding [Match] object. |
| * |
| * This can be used to replace matches with new content that depends on the |
| * match, unlike [replaceAll] where the replacement string is always the same. |
| * |
| * The [replace] function is called with the [Match] generated |
| * by the pattern, and its result is used as replacement. |
| * |
| * The function defined below converts each word in a string to simplified |
| * 'pig latin' using [replaceAllMapped]: |
| * |
| * pigLatin(String words) => words.replaceAllMapped( |
| * new RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false), |
| * (Match m) => "${m[2]}${m[1]}${m[1].isEmpty ? 'way' : 'ay'}"); |
| * |
| * pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!' |
| */ |
| String replaceAllMapped(Pattern from, String Function(Match match) replace); |
| |
| /** |
| * Replaces the substring from [start] to [end] with [replacement]. |
| * |
| * Returns a new string equivalent to: |
| * |
| * this.substring(0, start) + replacement + this.substring(end) |
| * |
| * The [start] and [end] indices must specify a valid range of this string. |
| * That is `0 <= start <= end <= this.length`. |
| * If [end] is `null`, it defaults to [length]. |
| */ |
| String replaceRange(int start, int? end, String replacement); |
| |
| /** |
| * Splits the string at matches of [pattern] and returns a list of substrings. |
| * |
| * Finds all the matches of `pattern` in this string, |
| * and returns the list of the substrings between the matches. |
| * |
| * var string = "Hello world!"; |
| * string.split(" "); // ['Hello', 'world!']; |
| * |
| * Empty matches at the beginning and end of the strings are ignored, |
| * and so are empty matches right after another match. |
| * |
| * var string = "abba"; |
| * string.split(new RegExp(r"b*")); // ['a', 'a'] |
| * // not ['', 'a', 'a', ''] |
| * |
| * If this string is empty, the result is an empty list if `pattern` matches |
| * the empty string, and it is `[""]` if the pattern doesn't match. |
| * |
| * var string = ''; |
| * string.split(''); // [] |
| * string.split("a"); // [''] |
| * |
| * Splitting with an empty pattern splits the string into single-code unit |
| * strings. |
| * |
| * var string = 'Pub'; |
| * string.split(''); // ['P', 'u', 'b'] |
| * |
| * string.codeUnits.map((unit) { |
| * return new String.fromCharCode(unit); |
| * }).toList(); // ['P', 'u', 'b'] |
| * |
| * Splitting happens at UTF-16 code unit boundaries, |
| * and not at rune boundaries: |
| * |
| * // String made up of two code units, but one rune. |
| * string = '\u{1D11E}'; |
| * string.split('').length; // 2 surrogate values |
| * |
| * To get a list of strings containing the individual runes of a string, |
| * you should not use split. You can instead map each rune to a string |
| * as follows: |
| * |
| * string.runes.map((rune) => new String.fromCharCode(rune)).toList(); |
| */ |
| List<String> split(Pattern pattern); |
| |
| /** |
| * Splits the string, converts its parts, and combines them into a new |
| * string. |
| * |
| * [pattern] is used to split the string into parts and separating matches. |
| * |
| * Each match is converted to a string by calling [onMatch]. If [onMatch] |
| * is omitted, the matched string is used. |
| * |
| * Each non-matched part is converted by a call to [onNonMatch]. If |
| * [onNonMatch] is omitted, the non-matching part is used. |
| * |
| * Then all the converted parts are combined into the resulting string. |
| * |
| * 'Eats shoots leaves'.splitMapJoin((new RegExp(r'shoots')), |
| * onMatch: (m) => '${m.group(0)}', |
| * onNonMatch: (n) => '*'); // *shoots* |
| */ |
| String splitMapJoin(Pattern pattern, |
| {String Function(Match)? onMatch, String Function(String)? onNonMatch}); |
| |
| /** |
| * Returns an unmodifiable list of the UTF-16 code units of this string. |
| */ |
| List<int> get codeUnits; |
| |
| /** |
| * Returns an [Iterable] of Unicode code-points of this string. |
| * |
| * If the string contains surrogate pairs, they are combined and returned |
| * as one integer by this iterator. Unmatched surrogate halves are treated |
| * like valid 16-bit code-units. |
| */ |
| Runes get runes; |
| |
| /** |
| * Converts all characters in this string to lower case. |
| * If the string is already in all lower case, this method returns `this`. |
| * |
| * 'ALPHABET'.toLowerCase(); // 'alphabet' |
| * 'abc'.toLowerCase(); // 'abc' |
| * |
| * This function uses the language independent Unicode mapping and thus only |
| * works in some languages. |
| */ |
| // TODO(floitsch): document better. (See EcmaScript for description). |
| String toLowerCase(); |
| |
| /** |
| * Converts all characters in this string to upper case. |
| * If the string is already in all upper case, this method returns `this`. |
| * |
| * 'alphabet'.toUpperCase(); // 'ALPHABET' |
| * 'ABC'.toUpperCase(); // 'ABC' |
| * |
| * This function uses the language independent Unicode mapping and thus only |
| * works in some languages. |
| */ |
| // TODO(floitsch): document better. (See EcmaScript for description). |
| String toUpperCase(); |
| } |
| |
| /** |
| * The runes (integer Unicode code points) of a [String]. |
| */ |
| class Runes extends Iterable<int> { |
| final String string; |
| Runes(this.string); |
| |
| RuneIterator get iterator => RuneIterator(string); |
| |
| int get last { |
| if (string.length == 0) { |
| throw StateError('No elements.'); |
| } |
| int length = string.length; |
| int code = string.codeUnitAt(length - 1); |
| if (_isTrailSurrogate(code) && string.length > 1) { |
| int previousCode = string.codeUnitAt(length - 2); |
| if (_isLeadSurrogate(previousCode)) { |
| return _combineSurrogatePair(previousCode, code); |
| } |
| } |
| return code; |
| } |
| } |
| |
| // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. |
| bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; |
| |
| // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. |
| bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; |
| |
| // Combine a lead and a trail surrogate value into a single code point. |
| int _combineSurrogatePair(int start, int end) { |
| return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); |
| } |
| |
| /** |
| * [Iterator] for reading runes (integer Unicode code points) of a Dart string. |
| */ |
| class RuneIterator implements BidirectionalIterator<int> { |
| /** String being iterated. */ |
| final String string; |
| /** Position before the current code point. */ |
| int _position; |
| /** Position after the current code point. */ |
| int _nextPosition; |
| /** |
| * Current code point. |
| * |
| * If the iterator has hit either end, the [_currentCodePoint] is -1 |
| * and `_position == _nextPosition`. |
| */ |
| int _currentCodePoint = -1; |
| |
| /** Create an iterator positioned at the beginning of the string. */ |
| RuneIterator(String string) |
| : this.string = string, |
| _position = 0, |
| _nextPosition = 0; |
| |
| /** |
| * Create an iterator positioned before the [index]th code unit of the string. |
| * |
| * When created, there is no [current] value. |
| * A [moveNext] will use the rune starting at [index] the current value, |
| * and a [movePrevious] will use the rune ending just before [index] as the |
| * the current value. |
| * |
| * The [index] position must not be in the middle of a surrogate pair. |
| */ |
| RuneIterator.at(String string, int index) |
| : string = string, |
| _position = index, |
| _nextPosition = index { |
| RangeError.checkValueInInterval(index, 0, string.length); |
| _checkSplitSurrogate(index); |
| } |
| |
| /** Throw an error if the index is in the middle of a surrogate pair. */ |
| void _checkSplitSurrogate(int index) { |
| if (index > 0 && |
| index < string.length && |
| _isLeadSurrogate(string.codeUnitAt(index - 1)) && |
| _isTrailSurrogate(string.codeUnitAt(index))) { |
| throw ArgumentError('Index inside surrogate pair: $index'); |
| } |
| } |
| |
| /** |
| * The starting position of the current rune in the string. |
| * |
| * Returns -1 if there is no current rune ([current] is -1). |
| */ |
| int get rawIndex => (_position != _nextPosition) ? _position : -1; |
| |
| /** |
| * Resets the iterator to the rune at the specified index of the string. |
| * |
| * Setting a negative [rawIndex], or one greater than or equal to |
| * `string.length`, is an error. So is setting it in the middle of a surrogate |
| * pair. |
| * |
| * Setting the position to the end of then string means that there is no |
| * current rune. |
| */ |
| void set rawIndex(int rawIndex) { |
| RangeError.checkValidIndex(rawIndex, string, "rawIndex"); |
| reset(rawIndex); |
| moveNext(); |
| } |
| |
| /** |
| * Resets the iterator to the given index into the string. |
| * |
| * After this the [current] value is unset. |
| * You must call [moveNext] make the rune at the position current, |
| * or [movePrevious] for the last rune before the position. |
| * |
| * The [rawIndex] must be non-negative and no greater than `string.length`. |
| * It must also not be the index of the trailing surrogate of a surrogate |
| * pair. |
| */ |
| void reset([int rawIndex = 0]) { |
| RangeError.checkValueInInterval(rawIndex, 0, string.length, "rawIndex"); |
| _checkSplitSurrogate(rawIndex); |
| _position = _nextPosition = rawIndex; |
| _currentCodePoint = -1; |
| } |
| |
| /** |
| * The rune (integer Unicode code point) starting at the current position in |
| * the string. |
| * |
| * The value is -1 if there is no current code point. |
| */ |
| int get current => _currentCodePoint; |
| |
| /** |
| * The number of code units comprising the current rune. |
| * |
| * Returns zero if there is no current rune ([current] is -1). |
| */ |
| int get currentSize => _nextPosition - _position; |
| |
| /** |
| * A string containing the current rune. |
| * |
| * For runes outside the basic multilingual plane, this will be |
| * a String of length 2, containing two code units. |
| * |
| * Returns an empty string if there is no [current] value. |
| */ |
| String get currentAsString { |
| if (_position == _nextPosition) return ""; |
| if (_position + 1 == _nextPosition) return string[_position]; |
| return string.substring(_position, _nextPosition); |
| } |
| |
| bool moveNext() { |
| _position = _nextPosition; |
| if (_position == string.length) { |
| _currentCodePoint = -1; |
| return false; |
| } |
| int codeUnit = string.codeUnitAt(_position); |
| int nextPosition = _position + 1; |
| if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { |
| int nextCodeUnit = string.codeUnitAt(nextPosition); |
| if (_isTrailSurrogate(nextCodeUnit)) { |
| _nextPosition = nextPosition + 1; |
| _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); |
| return true; |
| } |
| } |
| _nextPosition = nextPosition; |
| _currentCodePoint = codeUnit; |
| return true; |
| } |
| |
| bool movePrevious() { |
| _nextPosition = _position; |
| if (_position == 0) { |
| _currentCodePoint = -1; |
| return false; |
| } |
| int position = _position - 1; |
| int codeUnit = string.codeUnitAt(position); |
| if (_isTrailSurrogate(codeUnit) && position > 0) { |
| int prevCodeUnit = string.codeUnitAt(position - 1); |
| if (_isLeadSurrogate(prevCodeUnit)) { |
| _position = position - 1; |
| _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
| return true; |
| } |
| } |
| _position = position; |
| _currentCodePoint = codeUnit; |
| return true; |
| } |
| } |