| // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| part of dart.core; |
| |
| /// A sequence of UTF-16 code units. |
| /// |
| /// Strings are mainly used to represent text. A character may be represented by |
| /// multiple code points, each code point consisting of one or two code |
| /// units. For example the Papua New Guinea flag character requires four code |
| /// units to represent two code points, but should be treated like a single |
| /// character: "🇵🇬". Platforms that do not support the flag character may show |
| /// the letters "PG" instead. If the code points are swapped, it instead becomes |
| /// the Guadeloupe flag "🇬🇵" ("GP"). |
| /// |
| /// A string can be either single or multiline. Single line strings are |
| /// written using matching single or double quotes, and multiline strings are |
| /// written using triple quotes. The following are all valid Dart strings: |
| /// ```dart |
| /// 'Single quotes'; |
| /// "Double quotes"; |
| /// 'Double quotes in "single" quotes'; |
| /// "Single quotes in 'double' quotes"; |
| /// |
| /// '''A |
| /// multiline |
| /// string'''; |
| /// |
| /// """ |
| /// Another |
| /// multiline |
| /// string"""; |
| /// ``` |
| /// Strings are immutable. Although you cannot change a string, you can perform |
| /// an operation on a string which creates a new string: |
| /// ```dart |
| /// var string = 'Dart is fun'; |
| /// var newString = string.substring(0, 5); |
| /// ``` |
| /// You can use the plus (`+`) operator to concatenate strings: |
| /// ```dart |
| /// 'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!' |
| /// ``` |
| /// Adjacent string literals are concatenated automatically: |
| /// ```dart |
| /// 'Dart ' 'is ' 'fun!'; // 'Dart is fun!' |
| /// ``` |
| /// You can use `${}` to interpolate the value of Dart expressions |
| /// within strings. The curly braces can be omitted when evaluating identifiers: |
| /// ```dart |
| /// string = 'dartlang'; |
| /// '$string has ${string.length} letters'; // 'dartlang has 8 letters' |
| /// ``` |
| /// A string is represented by a sequence of Unicode UTF-16 code units |
| /// accessible through the [codeUnitAt] or the [codeUnits] members: |
| /// ```dart |
| /// string = 'Dart'; |
| /// string.codeUnitAt(0); // 68 |
| /// string.codeUnits; // [68, 97, 114, 116] |
| /// ``` |
| /// The string representation of code units is accessible through the index |
| /// operator: |
| /// ```dart |
| /// string[0]; // 'D' |
| /// ``` |
| /// The characters of a string are encoded in UTF-16. Decoding UTF-16, which |
| /// combines surrogate pairs, yields Unicode code points. Following a similar |
| /// terminology to Go, we use the name 'rune' for an integer representing a |
| /// Unicode code point. Use the [runes] property to get the runes of a string: |
| /// ```dart |
| /// string.runes.toList(); // [68, 97, 114, 116] |
| /// ``` |
| /// For a character outside the Basic Multilingual Plane (plane 0) that is |
| /// composed of a surrogate pair, [runes] combines the pair and returns a |
| /// single integer. For example, the Unicode character for a |
| /// musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate |
| /// pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair, |
| /// and using `runes` returns their combined value: |
| /// ```dart |
| /// var clef = '\u{1D11E}'; |
| /// clef.codeUnits; // [0xD834, 0xDD1E] |
| /// clef.runes.toList(); // [0x1D11E] |
| /// ``` |
| /// The `String` class cannot be extended or implemented. Attempting to do so |
| /// yields a compile-time error. |
| /// |
| /// ## Other resources |
| /// |
| /// See [StringBuffer] to efficiently build a string incrementally. See |
| /// [RegExp] to work with regular expressions. |
| /// |
| /// Also see: |
| /// |
| /// * [Strings and regular expressions](https://dart.dev/guides/libraries/library-tour#strings-and-regular-expressions) |
| @pragma('vm:entry-point') |
| abstract class String implements Comparable<String>, Pattern { |
| /// Allocates a new string containing the specified [charCodes]. |
| /// |
| /// The [charCodes] can be both UTF-16 code units or runes. |
| /// If a char-code value is 16-bit, it is used as a code unit: |
| /// ```dart |
| /// String.fromCharCodes([68]); // 'D' |
| /// ``` |
| /// If a char-code value is greater than 16-bits, it is decomposed into a |
| /// surrogate pair: |
| /// ```dart |
| /// var clef = String.fromCharCodes([0x1D11E]); |
| /// clef.codeUnitAt(0); // 0xD834 |
| /// clef.codeUnitAt(1); // 0xDD1E |
| /// ``` |
| /// If [start] and [end] are provided, only the values of [charCodes] |
| /// at positions from `start` to, but not including, `end`, are used. |
| /// The `start` and `end` values must satisfy |
| /// `0 <= start <= end <= charCodes.length`. |
| external factory String.fromCharCodes(Iterable<int> charCodes, |
| [int start = 0, int? end]); |
| |
| /// Allocates a new string containing the specified [charCode]. |
| /// |
| /// If the [charCode] can be represented by a single UTF-16 code unit, the new |
| /// string contains a single code unit. Otherwise, the [length] is 2 and |
| /// the code units form a surrogate pair. See documentation for |
| /// [fromCharCodes]. |
| /// |
| /// Creating a [String] with one half of a surrogate pair is allowed. |
| external factory String.fromCharCode(int charCode); |
| |
| /// The string value of the environment declaration [name]. |
| /// |
| /// Environment declarations are provided by the surrounding system compiling |
| /// or running the Dart program. Declarations map a string key to a string |
| /// value. |
| /// |
| /// If [name] is not declared in the environment, the result is instead |
| /// [defaultValue]. |
| /// |
| /// Example of getting a value: |
| /// ``` |
| /// const String.fromEnvironment("defaultFloo", defaultValue: "no floo") |
| /// ``` |
| /// In order to check whether a declaration is there at all, use |
| /// [bool.hasEnvironment]. Example: |
| /// ``` |
| /// const maybeDeclared = bool.hasEnvironment("maybeDeclared") |
| /// ? String.fromEnvironment("maybeDeclared") |
| /// : null; |
| /// ``` |
| /// |
| /// The string value, or lack of a value, associated with a [name] |
| /// must be consistent across all calls to `String.fromEnvironment`, |
| /// [int.fromEnvironment], [bool.fromEnvironment] and [bool.hasEnvironment] |
| /// in a single program. |
| // The .fromEnvironment() constructors are special in that we do not want |
| // users to call them using "new". We prohibit that by giving them bodies |
| // that throw, even though const constructors are not allowed to have bodies. |
| // Disable those static errors. |
| //ignore: const_constructor_with_body |
| //ignore: const_factory |
| external const factory String.fromEnvironment(String name, |
| {String defaultValue = ""}); |
| |
| /// The character (as a single-code-unit [String]) at the given [index]. |
| /// |
| /// The returned string represents exactly one UTF-16 code unit, which may be |
| /// half of a surrogate pair. A single member of a surrogate pair is an |
| /// invalid UTF-16 string: |
| /// ```dart |
| /// var clef = '\u{1D11E}'; |
| /// // These represent invalid UTF-16 strings. |
| /// clef[0].codeUnits; // [0xD834] |
| /// clef[1].codeUnits; // [0xDD1E] |
| /// ``` |
| /// This method is equivalent to |
| /// `String.fromCharCode(this.codeUnitAt(index))`. |
| String operator [](int index); |
| |
| /// Returns the 16-bit UTF-16 code unit at the given [index]. |
| int codeUnitAt(int index); |
| |
| /// The length of the string. |
| /// |
| /// Returns the number of UTF-16 code units in this string. The number |
| /// of [runes] might be fewer, if the string contains characters outside |
| /// the Basic Multilingual Plane (plane 0): |
| /// ```dart |
| /// 'Dart'.length; // 4 |
| /// 'Dart'.runes.length; // 4 |
| /// |
| /// var clef = '\u{1D11E}'; |
| /// clef.length; // 2 |
| /// clef.runes.length; // 1 |
| /// ``` |
| int get length; |
| |
| /// A hash code derived from the code units of the string. |
| /// |
| /// This is compatible with [operator ==]. Strings with the same sequence |
| /// of code units have the same hash code. |
| int get hashCode; |
| |
| /// Whether [other] is a `String` with the same sequence of code units. |
| /// |
| /// This method compares each individual code unit of the strings. |
| /// It does not check for Unicode equivalence. |
| /// For example, both the following strings represent the string 'Amélie', |
| /// but due to their different encoding, are not equal: |
| /// ```dart |
| /// 'Am\xe9lie' == 'Ame\u{301}lie'; // false |
| /// ``` |
| /// The first string encodes 'é' as a single unicode code unit (also |
| /// a single rune), whereas the second string encodes it as 'e' with the |
| /// combining accent character '◌́'. |
| bool operator ==(Object other); |
| |
| /// Compares this string to [other]. |
| /// |
| /// Returns a negative value if `this` is ordered before `other`, |
| /// a positive value if `this` is ordered after `other`, |
| /// or zero if `this` and `other` are equivalent. |
| /// |
| /// The ordering is the same as the ordering of the code points at the first |
| /// position where the two strings differ. |
| /// If one string is a prefix of the other, |
| /// then the shorter string is ordered before the longer string. |
| /// If the strings have exactly the same content, they are equivalent with |
| /// regard to the ordering. |
| /// Ordering does not check for Unicode equivalence. |
| /// The comparison is case sensitive. |
| int compareTo(String other); |
| |
| /// Whether this string ends with [other]. |
| /// |
| /// For example: |
| /// ```dart |
| /// 'Dart'.endsWith('t'); // true |
| /// ``` |
| bool endsWith(String other); |
| |
| /// Whether this string starts with a match of [pattern]. |
| /// |
| /// ```dart |
| /// var string = 'Dart'; |
| /// string.startsWith('D'); // true |
| /// string.startsWith(RegExp(r'[A-Z][a-z]')); // true |
| /// ``` |
| /// If [index] is provided, this method checks if the substring starting |
| /// at that index starts with a match of [pattern]: |
| /// ```dart |
| /// string.startsWith('art', 1); // true |
| /// string.startsWith(RegExp(r'\w{3}')); // true |
| /// ``` |
| /// [index] must not be negative or greater than [length]. |
| /// |
| /// A [RegExp] containing '^' does not match if the [index] is greater than |
| /// zero and the regexp is not multi-line. |
| /// The pattern works on the string as a whole, and does not extract |
| /// a substring starting at [index] first: |
| /// ```dart |
| /// string.startsWith(RegExp(r'^art'), 1); // false |
| /// string.startsWith(RegExp(r'art'), 1); // true |
| /// ``` |
| bool startsWith(Pattern pattern, [int index = 0]); |
| |
| /// Returns the position of the first match of [pattern] in this string, |
| /// starting at [start], inclusive: |
| /// ```dart |
| /// var string = 'Dartisans'; |
| /// string.indexOf('art'); // 1 |
| /// string.indexOf(RegExp(r'[A-Z][a-z]')); // 0 |
| /// ``` |
| /// Returns -1 if no match is found: |
| /// ```dart |
| /// string.indexOf(RegExp(r'dart')); // -1 |
| /// ``` |
| /// The [start] must be non-negative and not greater than [length]. |
| int indexOf(Pattern pattern, [int start = 0]); |
| |
| /// The starting position of the last match [pattern] in this string. |
| /// |
| /// Finds a match of pattern by searching backward starting at [start]: |
| /// ```dart |
| /// var string = 'Dartisans'; |
| /// string.lastIndexOf('a'); // 6 |
| /// string.lastIndexOf(RegExp(r'a(r|n)')); // 6 |
| /// ``` |
| /// Returns -1 if [pattern] could not be found in this string. |
| /// ```dart |
| /// string.lastIndexOf(RegExp(r'DART')); // -1 |
| /// ``` |
| /// If [start] is omitted, search starts from the end of the string. |
| /// If supplied, [start] must be non-negative and not greater than [length]. |
| int lastIndexOf(Pattern pattern, [int? start]); |
| |
| /// Whether this string is empty. |
| bool get isEmpty; |
| |
| /// Whether this string is not empty. |
| bool get isNotEmpty; |
| |
| /// Creates a new string by concatenating this string with [other]. |
| /// |
| /// Example: |
| /// ```dart |
| /// 'dart' + 'lang'; // 'dartlang' |
| /// ``` |
| String operator +(String other); |
| |
| /// The substring of this string from [start],inclusive, to [end], exclusive. |
| /// |
| /// Example: |
| /// ```dart |
| /// var string = 'dartlang'; |
| /// string.substring(1); // 'artlang' |
| /// string.substring(1, 4); // 'art' |
| /// ``` |
| String substring(int start, [int? end]); |
| |
| /// The string without any leading and trailing whitespace. |
| /// |
| /// If the string contains leading or trailing whitespace, a new string with no |
| /// leading and no trailing whitespace is returned: |
| /// ```dart |
| /// '\tDart is fun\n'.trim(); // 'Dart is fun' |
| /// ``` |
| /// Otherwise, the original string itself is returned: |
| /// ```dart |
| /// var str1 = 'Dart'; |
| /// var str2 = str1.trim(); |
| /// identical(str1, str2); // true |
| /// ``` |
| /// Whitespace is defined by the Unicode White_Space property (as defined in |
| /// version 6.2 or later) and the BOM character, 0xFEFF. |
| /// |
| /// Here is the list of trimmed characters according to Unicode version 6.3: |
| /// ``` |
| /// 0009..000D ; White_Space # Cc <control-0009>..<control-000D> |
| /// 0020 ; White_Space # Zs SPACE |
| /// 0085 ; White_Space # Cc <control-0085> |
| /// 00A0 ; White_Space # Zs NO-BREAK SPACE |
| /// 1680 ; White_Space # Zs OGHAM SPACE MARK |
| /// 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE |
| /// 2028 ; White_Space # Zl LINE SEPARATOR |
| /// 2029 ; White_Space # Zp PARAGRAPH SEPARATOR |
| /// 202F ; White_Space # Zs NARROW NO-BREAK SPACE |
| /// 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE |
| /// 3000 ; White_Space # Zs IDEOGRAPHIC SPACE |
| /// |
| /// FEFF ; BOM ZERO WIDTH NO_BREAK SPACE |
| /// ``` |
| /// Some later versions of Unicode do not include U+0085 as a whitespace |
| /// character. Whether it is trimmed depends on the Unicode version |
| /// used by the system. |
| String trim(); |
| |
| /// The string without any leading whitespace. |
| /// |
| /// As [trim], but only removes leading whitespace. |
| String trimLeft(); |
| |
| /// The string without any trailing whitespace. |
| /// |
| /// As [trim], but only removes trailing whitespace. |
| String trimRight(); |
| |
| /// Creates a new string by concatenating this string with itself a number |
| /// of times. |
| /// |
| /// The result of `str * n` is equivalent to |
| /// `str + str + ...`(n times)`... + str`. |
| /// |
| /// Returns an empty string if [times] is zero or negative. |
| String operator *(int times); |
| |
| /// Pads this string on the left if it is shorter than [width]. |
| /// |
| /// Returns a new string that prepends [padding] onto this string |
| /// one time for each position the length is less than [width]. |
| /// |
| /// If [width] is already smaller than or equal to `this.length`, |
| /// no padding is added. A negative `width` is treated as zero. |
| /// |
| /// If [padding] has length different from 1, the result will not |
| /// have length `width`. This may be useful for cases where the |
| /// padding is a longer string representing a single character, like |
| /// `" "` or `"\u{10002}`". |
| /// In that case, the user should make sure that `this.length` is |
| /// the correct measure of the strings length. |
| String padLeft(int width, [String padding = ' ']); |
| |
| /// Pads this string on the right if it is shorter than [width]. |
| /// |
| /// Returns a new string that appends [padding] after this string |
| /// one time for each position the length is less than [width]. |
| /// |
| /// If [width] is already smaller than or equal to `this.length`, |
| /// no padding is added. A negative `width` is treated as zero. |
| /// |
| /// If [padding] has length different from 1, the result will not |
| /// have length `width`. This may be useful for cases where the |
| /// padding is a longer string representing a single character, like |
| /// `" "` or `"\u{10002}`". |
| /// In that case, the user should make sure that `this.length` is |
| /// the correct measure of the strings length. |
| String padRight(int width, [String padding = ' ']); |
| |
| /// Whether this string contains a match of [other]. |
| /// |
| /// Example: |
| /// ```dart |
| /// var string = 'Dart strings'; |
| /// string.contains('D'); // true |
| /// string.contains(RegExp(r'[A-Z]')); // true |
| /// ``` |
| /// If [startIndex] is provided, this method matches only at or after that |
| /// index: |
| /// ```dart |
| /// string.contains('D', 1); // false |
| /// string.contains(RegExp(r'[A-Z]'), 1); // false |
| /// ``` |
| /// The [startIndex] must not be negative or greater than [length]. |
| bool contains(Pattern other, [int startIndex = 0]); |
| |
| /// Creates a new string with the first occurrence of [from] replaced by [to]. |
| /// |
| /// Finds the first match of [from] in this string, starting from [startIndex], |
| /// and creates a new string where that match is replaced with the [to] string. |
| /// |
| /// Example: |
| /// ```dart |
| /// '0.0001'.replaceFirst(RegExp(r'0'), ''); // '.0001' |
| /// '0.0001'.replaceFirst(RegExp(r'0'), '7', 1); // '0.7001' |
| /// ``` |
| String replaceFirst(Pattern from, String to, [int startIndex = 0]); |
| |
| /// Replace the first occurrence of [from] in this string. |
| /// |
| /// Returns a new string, which is this string |
| /// except that the first match of [from], starting from [startIndex], |
| /// is replaced by the result of calling [replace] with the match object. |
| /// |
| /// The [startIndex] must be non-negative and no greater than [length]. |
| String replaceFirstMapped(Pattern from, String replace(Match match), |
| [int startIndex = 0]); |
| |
| /// Replaces all substrings that match [from] with [replace]. |
| /// |
| /// Creates a new string in which the non-overlapping substrings matching |
| /// [from] (the ones iterated by `from.allMatches(thisString)`) are replaced |
| /// by the literal string [replace]. |
| /// ```dart |
| /// 'resume'.replaceAll(RegExp(r'e'), 'é'); // 'résumé' |
| /// ``` |
| /// Notice that the [replace] string is not interpreted. If the replacement |
| /// depends on the match (for example on a [RegExp]'s capture groups), use |
| /// the [replaceAllMapped] method instead. |
| String replaceAll(Pattern from, String replace); |
| |
| /// Replace all substrings that match [from] by a computed string. |
| /// |
| /// Creates a new string in which the non-overlapping substrings that match |
| /// [from] (the ones iterated by `from.allMatches(thisString)`) are replaced |
| /// by the result of calling [replace] on the corresponding [Match] object. |
| /// |
| /// This can be used to replace matches with new content that depends on the |
| /// match, unlike [replaceAll] where the replacement string is always the same. |
| /// |
| /// The [replace] function is called with the [Match] generated |
| /// by the pattern, and its result is used as replacement. |
| /// |
| /// The function defined below converts each word in a string to simplified |
| /// 'pig latin' using [replaceAllMapped]: |
| /// ```dart |
| /// pigLatin(String words) => words.replaceAllMapped( |
| /// RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false), |
| /// (Match m) => "${m[2]}${m[1]}${m[1]!.isEmpty ? 'way' : 'ay'}"); |
| /// |
| /// pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!' |
| /// ``` |
| String replaceAllMapped(Pattern from, String Function(Match match) replace); |
| |
| /// Replaces the substring from [start] to [end] with [replacement]. |
| /// |
| /// Creates a new string equivalent to: |
| /// ```dart |
| /// this.substring(0, start) + replacement + this.substring(end) |
| /// ``` |
| /// The [start] and [end] indices must specify a valid range of this string. |
| /// That is `0 <= start <= end <= this.length`. |
| /// If [end] is `null`, it defaults to [length]. |
| String replaceRange(int start, int? end, String replacement); |
| |
| /// Splits the string at matches of [pattern] and returns a list of substrings. |
| /// |
| /// Finds all the matches of `pattern` in this string, |
| /// and returns the list of the substrings between the matches. |
| /// ```dart |
| /// var string = "Hello world!"; |
| /// string.split(" "); // ['Hello', 'world!']; |
| /// ``` |
| /// Empty matches at the beginning and end of the strings are ignored, |
| /// and so are empty matches right after another match. |
| /// ```dart |
| /// var string = "abba"; |
| /// // Matches: ^^ ^^ |
| /// string.split(RegExp(r"b*")); // ['a', 'a'] |
| /// // not ['', 'a', 'a', ''] |
| /// // not ['a', '', 'a'] |
| /// ``` |
| /// If this string is empty, the result is an empty list if `pattern` matches |
| /// the empty string, and it is `[""]` if the pattern doesn't match. |
| /// ```dart |
| /// var string = ''; |
| /// string.split(''); // [] |
| /// string.split("a"); // [''] |
| /// ``` |
| /// Splitting with an empty pattern splits the string into single-code unit |
| /// strings. |
| /// ```dart |
| /// var string = 'Pub'; |
| /// string.split(''); // ['P', 'u', 'b'] |
| /// |
| /// string.codeUnits.map((unit) { |
| /// return String.fromCharCode(unit); |
| /// }).toList(); // ['P', 'u', 'b'] |
| /// ``` |
| /// Splitting happens at UTF-16 code unit boundaries, |
| /// and not at rune boundaries: |
| /// ```dart |
| /// // String made up of two code units, but one rune. |
| /// string = '\u{1D11E}'; |
| /// string.split('').length; // 2 surrogate values |
| /// ``` |
| /// To get a list of strings containing the individual runes of a string, |
| /// you should not use split. You can instead map each rune to a string |
| /// as follows: |
| /// ```dart |
| /// string.runes.map((rune) => String.fromCharCode(rune)).toList(); |
| /// ``` |
| List<String> split(Pattern pattern); |
| |
| /// Splits the string, converts its parts, and combines them into a new |
| /// string. |
| /// |
| /// The [pattern] is used to split the string |
| /// into parts and separating matches. |
| /// |
| /// Each match is converted to a string by calling [onMatch]. If [onMatch] |
| /// is omitted, the matched substring is used. |
| /// |
| /// Each non-matched part is converted by a call to [onNonMatch]. If |
| /// [onNonMatch] is omitted, the non-matching substring is used. |
| /// |
| /// Then all the converted parts are combined into the resulting string. |
| /// ```dart |
| /// 'Eats shoots leaves'.splitMapJoin((RegExp(r'shoots')), |
| /// onMatch: (m) => '${m[0]!}', // or no onMatch at all |
| /// onNonMatch: (n) => '*'); // *shoots* |
| /// ``` |
| String splitMapJoin(Pattern pattern, |
| {String Function(Match)? onMatch, String Function(String)? onNonMatch}); |
| |
| /// An unmodifiable list of the UTF-16 code units of this string. |
| List<int> get codeUnits; |
| |
| /// An [Iterable] of Unicode code-points of this string. |
| /// |
| /// If the string contains surrogate pairs, they are combined and returned |
| /// as one integer by this iterator. Unmatched surrogate halves are treated |
| /// like valid 16-bit code-units. |
| Runes get runes; |
| |
| /// Converts all characters in this string to lower case. |
| /// |
| /// If the string is already in all lower case, this method returns `this`. |
| /// ```dart |
| /// 'ALPHABET'.toLowerCase(); // 'alphabet' |
| /// 'abc'.toLowerCase(); // 'abc' |
| /// ``` |
| /// This function uses the language independent Unicode mapping and thus only |
| /// works in some languages. |
| // TODO(floitsch): document better. (See EcmaScript for description). |
| String toLowerCase(); |
| |
| /// Converts all characters in this string to upper case. |
| /// |
| /// If the string is already in all upper case, this method returns `this`. |
| /// ```dart |
| /// 'alphabet'.toUpperCase(); // 'ALPHABET' |
| /// 'ABC'.toUpperCase(); // 'ABC' |
| /// ``` |
| /// This function uses the language independent Unicode mapping and thus only |
| /// works in some languages. |
| // TODO(floitsch): document better. (See EcmaScript for description). |
| String toUpperCase(); |
| } |
| |
| /// The runes (integer Unicode code points) of a [String]. |
| class Runes extends Iterable<int> { |
| /// The string that this is the runes of. |
| final String string; |
| |
| /// Creates a [Runes] iterator for [string]. |
| Runes(this.string); |
| |
| RuneIterator get iterator => RuneIterator(string); |
| |
| int get last { |
| if (string.length == 0) { |
| throw StateError('No elements.'); |
| } |
| int length = string.length; |
| int code = string.codeUnitAt(length - 1); |
| if (_isTrailSurrogate(code) && string.length > 1) { |
| int previousCode = string.codeUnitAt(length - 2); |
| if (_isLeadSurrogate(previousCode)) { |
| return _combineSurrogatePair(previousCode, code); |
| } |
| } |
| return code; |
| } |
| } |
| |
| // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. |
| bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; |
| |
| // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. |
| bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; |
| |
| // Combine a lead and a trail surrogate value into a single code point. |
| int _combineSurrogatePair(int start, int end) { |
| return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); |
| } |
| |
| /// [Iterator] for reading runes (integer Unicode code points) of a Dart string. |
| class RuneIterator implements BidirectionalIterator<int> { |
| /// String being iterated. |
| final String string; |
| |
| /// Position before the current code point. |
| int _position; |
| |
| /// Position after the current code point. |
| int _nextPosition; |
| |
| /// Current code point. |
| /// |
| /// If the iterator has hit either end, the [_currentCodePoint] is -1 |
| /// and `_position == _nextPosition`. |
| int _currentCodePoint = -1; |
| |
| /// Create an iterator positioned at the beginning of the string. |
| RuneIterator(String string) |
| : this.string = string, |
| _position = 0, |
| _nextPosition = 0; |
| |
| /// Create an iterator positioned before the [index]th code unit of the string. |
| /// |
| /// When created, there is no [current] value. |
| /// A [moveNext] will use the rune starting at [index] the current value, |
| /// and a [movePrevious] will use the rune ending just before [index] as the |
| /// the current value. |
| /// |
| /// The [index] position must not be in the middle of a surrogate pair. |
| RuneIterator.at(String string, int index) |
| : string = string, |
| _position = index, |
| _nextPosition = index { |
| RangeError.checkValueInInterval(index, 0, string.length); |
| _checkSplitSurrogate(index); |
| } |
| |
| /// Throw an error if the index is in the middle of a surrogate pair. |
| void _checkSplitSurrogate(int index) { |
| if (index > 0 && |
| index < string.length && |
| _isLeadSurrogate(string.codeUnitAt(index - 1)) && |
| _isTrailSurrogate(string.codeUnitAt(index))) { |
| throw ArgumentError('Index inside surrogate pair: $index'); |
| } |
| } |
| |
| /// The starting position of the current rune in the string. |
| /// |
| /// Returns -1 if there is no current rune ([current] is -1). |
| int get rawIndex => (_position != _nextPosition) ? _position : -1; |
| |
| /// Resets the iterator to the rune at the specified index of the string. |
| /// |
| /// Setting a negative [rawIndex], or one greater than or equal to |
| /// `string.length`, is an error. So is setting it in the middle of a surrogate |
| /// pair. |
| /// |
| /// Setting the position to the end of the string means that there is no |
| /// current rune. |
| void set rawIndex(int rawIndex) { |
| RangeError.checkValidIndex(rawIndex, string, "rawIndex"); |
| reset(rawIndex); |
| moveNext(); |
| } |
| |
| /// Resets the iterator to the given index into the string. |
| /// |
| /// After this the [current] value is unset. |
| /// You must call [moveNext] make the rune at the position current, |
| /// or [movePrevious] for the last rune before the position. |
| /// |
| /// The [rawIndex] must be non-negative and no greater than `string.length`. |
| /// It must also not be the index of the trailing surrogate of a surrogate |
| /// pair. |
| void reset([int rawIndex = 0]) { |
| RangeError.checkValueInInterval(rawIndex, 0, string.length, "rawIndex"); |
| _checkSplitSurrogate(rawIndex); |
| _position = _nextPosition = rawIndex; |
| _currentCodePoint = -1; |
| } |
| |
| /// The rune (integer Unicode code point) starting at the current position in |
| /// the string. |
| /// |
| /// The value is -1 if there is no current code point. |
| int get current => _currentCodePoint; |
| |
| /// The number of code units comprising the current rune. |
| /// |
| /// Returns zero if there is no current rune ([current] is -1). |
| int get currentSize => _nextPosition - _position; |
| |
| /// A string containing the current rune. |
| /// |
| /// For runes outside the basic multilingual plane, this will be |
| /// a String of length 2, containing two code units. |
| /// |
| /// Returns an empty string if there is no [current] value. |
| String get currentAsString { |
| if (_position == _nextPosition) return ""; |
| if (_position + 1 == _nextPosition) return string[_position]; |
| return string.substring(_position, _nextPosition); |
| } |
| |
| bool moveNext() { |
| _position = _nextPosition; |
| if (_position == string.length) { |
| _currentCodePoint = -1; |
| return false; |
| } |
| int codeUnit = string.codeUnitAt(_position); |
| int nextPosition = _position + 1; |
| if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { |
| int nextCodeUnit = string.codeUnitAt(nextPosition); |
| if (_isTrailSurrogate(nextCodeUnit)) { |
| _nextPosition = nextPosition + 1; |
| _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); |
| return true; |
| } |
| } |
| _nextPosition = nextPosition; |
| _currentCodePoint = codeUnit; |
| return true; |
| } |
| |
| bool movePrevious() { |
| _nextPosition = _position; |
| if (_position == 0) { |
| _currentCodePoint = -1; |
| return false; |
| } |
| int position = _position - 1; |
| int codeUnit = string.codeUnitAt(position); |
| if (_isTrailSurrogate(codeUnit) && position > 0) { |
| int prevCodeUnit = string.codeUnitAt(position - 1); |
| if (_isLeadSurrogate(prevCodeUnit)) { |
| _position = position - 1; |
| _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
| return true; |
| } |
| } |
| _position = position; |
| _currentCodePoint = codeUnit; |
| return true; |
| } |
| } |