sdk/lib/core/string.dart - sdk.git - Git at Google

 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 part of dart.core;

 /**
  * A sequence of characters.
  *
  * A string can be either single or multiline. Single line strings are
  * written using matching single or double quotes, and multiline strings are
  * written using triple quotes. The following are all valid Dart strings:
  *
  *     'Single quotes';
  *     "Double quotes";
  *     'Double quotes in "single" quotes';
  *     "Single quotes in 'double' quotes";
  *
  *     '''A
  *     multiline
  *     string''';
  *
  *     """
  *     Another
  *     multiline
  *     string""";
  *
  * Strings are immutable. Although you cannot change a string, you can perform
  * an operation on a string and assign the result to a new string:
  *
  *     var string = 'Dart is fun';
  *     var newString = string.substring(0, 5);
  *
  * You can use the plus (`+`) operator to concatenate strings:
  *
  *     'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!'
  *
  * You can also use adjacent string literals for concatenation:
  *
  *     'Dart ' 'is ' 'fun!';    // 'Dart is fun!'
  *
  * You can use `${}` to interpolate the value of Dart expressions
  * within strings. The curly braces can be omitted when evaluating identifiers:
  *
  *     string = 'dartlang';
  *     '$string has ${string.length} letters'; // 'dartlang has 8 letters'
  *
  * A string is represented by a sequence of Unicode UTF-16 code units
  * accessible through the [codeUnitAt] or the [codeUnits] members:
  *
  *     string = 'Dart';
  *     string.codeUnitAt(0); // 68
  *     string.codeUnits;     // [68, 97, 114, 116]
  *
  * The string representation of code units is accessible through the index
  * operator:
  *
  *     string[0];            // 'D'
  *
  * The characters of a string are encoded in UTF-16. Decoding UTF-16, which
  * combines surrogate pairs, yields Unicode code points. Following a similar
  * terminology to Go, we use the name 'rune' for an integer representing a
  * Unicode code point. Use the [runes] property to get the runes of a string:
  *
  *     string.runes.toList(); // [68, 97, 114, 116]
  *
  * For a character outside the Basic Multilingual Plane (plane 0) that is
  * composed of a surrogate pair, [runes] combines the pair and returns a
  * single integer.  For example, the Unicode character for a
  * musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate
  * pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair,
  * and using `runes` returns their combined value:
  *
  *     var clef = '\u{1D11E}';
  *     clef.codeUnits;         // [0xD834, 0xDD1E]
  *     clef.runes.toList();    // [0x1D11E]
  *
  * The String class can not be extended or implemented. Attempting to do so
  * yields a compile-time error.
  *
  * ## Other resources
  *
  * See [StringBuffer] to efficiently build a string incrementally. See
  * [RegExp] to work with regular expressions.
  *
  * Also see:

  * * [Dart Cookbook](https://www.dartlang.org/docs/cookbook/#strings)
  * for String examples and recipes.
  * * [Dart Up and Running]
  * (https://www.dartlang.org/docs/dart-up-and-running/contents/ch03.html#ch03-strings-and-regular-expressions)
  */
 abstract class String implements Comparable<String>, Pattern {
   /**
    * Allocates a new String for the specified [charCodes].
    *
    * The [charCodes] can be UTF-16 code units or runes. If a char-code value is
    * 16-bit, it is copied verbatim:
    *
    *     new String.fromCharCodes([68]); // 'D'
    *
    * If a char-code value is greater than 16-bits, it is decomposed into a
    * surrogate pair:
    *
    *     var clef = new String.fromCharCodes([0x1D11E]);
    *     clef.codeUnitAt(0); // 0xD834
    *     clef.codeUnitAt(1); // 0xDD1E
    */
   external factory String.fromCharCodes(Iterable<int> charCodes);

   /**
    * Allocates a new String for the specified [charCode].
    *
    * If the [charCode] can be represented by a single UTF-16 code unit, the new
    * string contains a single code unit. Otherwise, the [length] is 2 and
    * the code units form a surrogate pair. See documentation for
    * [fromCharCodes].
    *
    * Creating a String with half of a surrogate pair is legal but generally
    * discouraged.
    */
   factory String.fromCharCode(int charCode) {
     List<int> charCodes = new List<int>.filled(1, charCode);
     return new String.fromCharCodes(charCodes);
   }

   /**
    * Gets the character (as a single-code-unit [String]) at the given [index].
    *
    * The returned string represents exactly one UTF-16 code unit, which may be
    * half of a surrogate pair. A single member of a surrogate pair is an
    * invalid UTF-16 string:
    *
    *     var clef = '\u{1D11E}';
    *     // These represent invalid UTF-16 strings.
    *     clef[0].codeUnits;      // [0xD834]
    *     clef[1].codeUnits;      // [0xDD1E]
    *
    * This method is equivalent to
    * `new String.fromCharCode(this.codeUnitAt(index))`.
    */
   String operator [](int index);

   /**
    * Returns the 16-bit UTF-16 code unit at the given [index].
    */
   int codeUnitAt(int index);

   /**
    * The length of the string.
    *
    * Returns the number of UTF-16 code units in this string. The number
    * of [runes] might be fewer, if the string contains characters outside
    * the Basic Multilingual Plane (plane 0):
    *
    *     'Dart'.length;          // 4
    *     'Dart'.runes.length;    // 4
    *
    *     var clef = '\u{1D11E}';
    *     clef.length;            // 2
    *     clef.runes.length;      // 1
    */
   int get length;

   /**
    * Returns true if the two strings are equal. False, otherwise.
    *
    * This method compares each individual code unit of the strings.
    * It does not check for Unicode equivalence.
    * For example, both the following strings represent the string 'Amélie',
    * but due to their different encoding, are not equal:
    *
    *     'Am\xe9lie' == 'Ame\u{301}lie'; // false
    *
    * The first string encodes 'é' as a single unicode code unit (also
    * a single rune), whereas the second string encodes it as 'e' with the
    * combining accent character '◌́'.
    */
   bool operator ==(var other);

   /**
    * Returns true if this string ends with [other]. For example:
    *
    *     'Dart'.endsWith('t'); // true
    */
   bool endsWith(String other);

   /**
    * Returns true if this string starts with a match of [pattern].
    *
    *     var string = 'Dart';
    *     string.startsWith('D');                       // true
    *     string.startsWith(new RegExp(r'[A-Z][a-z]')); // true
    *
    * If [index] is provided, this method checks if the substring starting
    * at that index starts with a match of [pattern]:
    *
    *     string.startsWith('art', 1);                  // true
    *     string.startsWith(new RegExp(r'\w{3}'));      // true
    *
    * [index] must not be negative or greater than [length].
    *
    * A [RegExp] containing '^' does not match if the [index] is greater than
    * zero. The pattern works on the string as a whole, and does not extract
    * a substring starting at [index] first:
    *
    *     string.startsWith(new RegExp(r'^art'), 1);    // false
    *     string.startsWith(new RegExp(r'art'), 1);     // true
    */
   bool startsWith(Pattern pattern, [int index = 0]);

   /**
    * Returns the position of the first match of [pattern] in this string,
    * starting at [start], inclusive:
    *
    *     var string = 'Dartisans';
    *     string.indexOf('art');                     // 1
    *     string.indexOf(new RegExp(r'[A-Z][a-z]')); // 0
    *
    * Returns -1 if no match is found:
    *
    *     string.indexOf(new RegExp(r'dart'));       // -1
    *
    * [start] must not be negative or greater than [length].
    */
   int indexOf(Pattern pattern, [int start]);

   /**
    * Returns the position of the last match [pattern] in this string, searching
    * backward starting at [start], inclusive:
    *
    *     var string = 'Dartisans';
    *     string.lastIndexOf('a');                    // 6
    *     string.lastIndexOf(new RegExp(r'a(r|n)'));  // 6
    *
    * Returns -1 if [other] could not be found.
    *
    *     string.lastIndexOf(new RegExp(r'DART'));    // -1
    *
    * [start] must not be negative or greater than [length].
    */
   int lastIndexOf(Pattern pattern, [int start]);

   /**
    * Returns true if this string is empty.
    */
   bool get isEmpty;

   /**
    * Returns true if this string is not empty.
    */
   bool get isNotEmpty;

   /**
    * Creates a new string by concatenating this string with [other].
    *
    *     'dart' + 'lang'; // 'dartlang'
    */
   String operator +(String other);

   /**
    * Returns the substring of this string that extends from [startIndex],
    * inclusive, to [endIndex], exclusive.
    *
    *     var string = 'dartlang';
    *     string.substring(1);    // 'artlang'
    *     string.substring(1, 4); // 'art'
    */
   String substring(int startIndex, [int endIndex]);

   /**
    * Removes leading and trailing whitespace from a string.
    *
    * If the string contains leading or trailing whitespace, a new string with no
    * leading and no trailing whitespace is returned:
    *
    *     '\tDart is fun\n'.trim(); // 'Dart is fun'
    *
    * Otherwise, the original string itself is returned:
    *
    *     var str1 = 'Dart';
    *     var str2 = str1.trim();
    *     identical(str1, str2);    // true
    *
    * Whitespace is defined by the Unicode White_Space property (as defined in
    * version 6.2 or later) and the BOM character, 0xFEFF.
    *
    * Here is the list of trimmed characters (following version 6.2):
    *
    *     0009..000D    ; White_Space # Cc   <control-0009>..<control-000D>
    *     0020          ; White_Space # Zs   SPACE
    *     0085          ; White_Space # Cc   <control-0085>
    *     00A0          ; White_Space # Zs   NO-BREAK SPACE
    *     1680          ; White_Space # Zs   OGHAM SPACE MARK
    *     180E          ; White_Space # Zs   MONGOLIAN VOWEL SEPARATOR
    *     2000..200A    ; White_Space # Zs   EN QUAD..HAIR SPACE
    *     2028          ; White_Space # Zl   LINE SEPARATOR
    *     2029          ; White_Space # Zp   PARAGRAPH SEPARATOR
    *     202F          ; White_Space # Zs   NARROW NO-BREAK SPACE
    *     205F          ; White_Space # Zs   MEDIUM MATHEMATICAL SPACE
    *     3000          ; White_Space # Zs   IDEOGRAPHIC SPACE
    *
    *     FEFF          ; BOM                ZERO WIDTH NO_BREAK SPACE
    */
   String trim();

   /**
    * Returns true if this string contains a match of [other]:
    *
    *     var string = 'Dart strings';
    *     string.contains('D');                     // true
    *     string.contains(new RegExp(r'[A-Z]'));    // true
    *
    * If [startIndex] is provided, this method matches only at or after that
    * index:
    *
    *     string.contains('X', 1);                  // false
    *     string.contains(new RegExp(r'[A-Z]'), 1); // false
    *
    * [startIndex] must not be negative or greater than [length].
    */
   bool contains(Pattern other, [int startIndex = 0]);

   /**
    * Returns a new string in which  the first occurence of [from] in this string
    * is replaced with [to]:
    *
    *     '0.0001'.replaceFirst(new RegExp(r'0'), ''); // '.0001'
    */
   String replaceFirst(Pattern from, String to);

   /**
    * Replaces all substrings that match [from] with [replace].
    *
    * Returns a new string in which the non-overlapping substrings matching
    * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
    * by the literal string [replace].
    *
    *     'resume'.replaceAll(new RegExp(r'e'), 'é'); // 'résumé'
    *
    * Notice that the [replace] string is not interpreted. If the replacement
    * depends on the match (for example on a [RegExp]'s capture groups), use
    * the [replaceAllMapped] method instead.
    */
   String replaceAll(Pattern from, String replace);

   /**
    * Replace all substrings that match [from] by a string computed from the
    * match.
    *
    * Returns a new string in which the non-overlapping substrings that match
    * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
    * by the result of calling [replace] on the corresponding [Match] object.
    *
    * This can be used to replace matches with new content that depends on the
    * match, unlike [replaceAll] where the replacement string is always the same.
    *
    * The [replace] function is called with the [Match] generated
    * by the pattern, and its result is used as replacement.
    *
    * The function defined below converts each word in a string to simplified
    * 'pig latin' using [replaceAllMapped]:
    *
    *     pigLatin(String words) => words.replaceAllMapped(
    *         new RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false),
    *         (Match m) => "${m[2]}${m[1]}${m[1].isEmpty ? 'way' : 'ay'}");
    *
    *     pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!'
    */
   String replaceAllMapped(Pattern from, String replace(Match match));

   /**
    * Splits the string at matches of [pattern]. Returns
    * a list of substrings.
    *
    * Splitting with an empty string pattern (`''`) splits at UTF-16 code unit
    * boundaries and not at rune boundaries:
    *
    *     var string = 'Pub';
    *     string.split('');                       // ['P', 'u', 'b']
    *
    *     string.codeUnits.map((unit) {
    *       return new String.fromCharCode(unit);
    *     }).toList();                            // ['P', 'u', 'b']
    *
    *     // String made up of two code units, but one rune.
    *     string = '\u{1D11E}';
    *     string.split('').length;                 // 2
    *
    * You should [map] the runes unless you are certain that the string is in
    * the basic multilingual plane (meaning that each code unit represents a
    * rune):
    *
    *     string.runes.map((rune) => new String.fromCharCode(rune));
    */
   List<String> split(Pattern pattern);

   /**
    * Splits the string, converts its parts, and combines them into a new
    * string.
    *
    * [pattern] is used to split the string into parts and separating matches.
    *
    * Each match is converted to a string by calling [onMatch]. If [onMatch]
    * is omitted, the matched string is used.
    *
    * Each non-matched part is converted by a call to [onNonMatch]. If
    * [onNonMatch] is omitted, the non-matching part is used.
    *
    * Then all the converted parts are combined into the resulting string.
    *
    *     'Eats shoots leaves'.splitMapJoin((new RegExp(r'shoots')),
    *         onMatch:    (m) => '${m.group(0)}',
    *         onNonMatch: (n) => '*'); // *shoots*
    */
   String splitMapJoin(Pattern pattern,
                       {String onMatch(Match match),
                        String onNonMatch(String nonMatch)});

   /**
    * Returns an unmodifiable list of the UTF-16 code units of this string.
    */
   List<int> get codeUnits;

   /**
    * Returns an [Iterable] of Unicode code-points of this string.
    *
    * If the string contains surrogate pairs, they are combined and returned
    * as one integer by this iterator. Unmatched surrogate halves are treated
    * like valid 16-bit code-units.
    */
   Runes get runes;

   /**
    * Converts all characters in this string to lower case.
    * If the string is already in all lower case, this method returns [:this:].
    *
    *     'ALPHABET'.toLowerCase(); // 'alphabet'
    *     'abc'.toLowerCase();      // 'abc'
    *
    * This function uses the language independent Unicode mapping and thus only
    * works in some languages.
    */
   // TODO(floitsch): document better. (See EcmaScript for description).
   String toLowerCase();

   /**
    * Converts all characters in this string to upper case.
    * If the string is already in all upper case, this method returns [:this:].
    *
    *     'alphabet'.toUpperCase(); // 'ALPHABET'
    *     'ABC'.toUpperCase();      // 'ABC'
    *
    * This function uses the language independent Unicode mapping and thus only
    * works in some languages.
    */
   // TODO(floitsch): document better. (See EcmaScript for description).
   String toUpperCase();
 }

 /**
  * The runes (integer Unicode code points) of a [String].
  */
 class Runes extends IterableBase<int> {
   final String string;
   Runes(this.string);

   RuneIterator get iterator => new RuneIterator(string);

   int get last {
     if (string.length == 0) {
       throw new StateError('No elements.');
     }
     int length = string.length;
     int code = string.codeUnitAt(length - 1);
     if (_isTrailSurrogate(code) && string.length > 1) {
       int previousCode = string.codeUnitAt(length - 2);
       if (_isLeadSurrogate(previousCode)) {
         return _combineSurrogatePair(previousCode, code);
       }
     }
     return code;
   }

 }

 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate.
 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800;

 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate.
 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00;

 // Combine a lead and a trail surrogate value into a single code point.
 int _combineSurrogatePair(int start, int end) {
   return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF);
 }

 /** [Iterator] for reading runes (integer Unicode code points) out of a Dart
   * string.
   */
 class RuneIterator implements BidirectionalIterator<int> {
   /** String being iterated. */
   final String string;
   /** Position before the current code point. */
   int _position;
   /** Position after the current code point. */
   int _nextPosition;
   /**
    * Current code point.
    *
    * If the iterator has hit either end, the [_currentCodePoint] is null
    * and [: _position == _nextPosition :].
    */
   int _currentCodePoint;

   /** Create an iterator positioned at the beginning of the string. */
   RuneIterator(String string)
       : this.string = string, _position = 0, _nextPosition = 0;

   /**
    * Create an iterator positioned before the [index]th code unit of the string.
    *
    * When created, there is no [current] value.
    * A [moveNext] will use the rune starting at [index] the current value,
    * and a [movePrevious] will use the rune ending just before [index] as the
    * the current value.
    *
    * It is an error if the [index] position is in the middle of a surrogate
    * pair.
    */
   RuneIterator.at(String string, int index)
       : string = string, _position = index, _nextPosition = index {
     if (index < 0 || index > string.length) {
       throw new RangeError.range(index, 0, string.length);
     }
     _checkSplitSurrogate(index);
   }

   /** Throw an error if the index is in the middle of a surrogate pair. */
   void _checkSplitSurrogate(int index) {
     if (index > 0 && index < string.length &&
         _isLeadSurrogate(string.codeUnitAt(index - 1)) &&
         _isTrailSurrogate(string.codeUnitAt(index))) {
       throw new ArgumentError('Index inside surrogate pair: $index');
     }
   }

   /**
    * Returns the starting position of the current rune in the string.
    *
    * Returns null if the [current] rune is null.
    */
   int get rawIndex => (_position != _nextPosition) ? _position : null;

   /**
    * Resets the iterator to the rune at the specified index of the string.
    *
    * Setting a negative [rawIndex], or one greater than or equal to
    * [:string.length:],
    * is an error. So is setting it in the middle of a surrogate pair.
    *
    * Setting the position to the end of then string will set [current] to null.
    */
   void set rawIndex(int rawIndex) {
     if (rawIndex >= string.length) {
       throw new RangeError.range(rawIndex, 0, string.length - 1);
     }
     reset(rawIndex);
     moveNext();
   }

   /**
    * Resets the iterator to the given index into the string.
    *
    * After this the [current] value is unset.
    * You must call [moveNext] make the rune at the position current,
    * or [movePrevious] for the last rune before the position.
    *
    * Setting a negative [rawIndex], or one greater than [:string.length:],
    * is an error. So is setting it in the middle of a surrogate pair.
    */
   void reset([int rawIndex = 0]) {
     if (rawIndex < 0 || rawIndex > string.length) {
       throw new RangeError.range(rawIndex, 0, string.length);
     }
     _checkSplitSurrogate(rawIndex);
     _position = _nextPosition = rawIndex;
     _currentCodePoint = null;
   }

   /** The rune (integer Unicode code point) starting at the current position in
    *  the string.
    */
   int get current => _currentCodePoint;

   /**
    * The number of code units comprising the current rune.
    *
    * Returns zero if there is no current rune ([current] is null).
    */
   int get currentSize => _nextPosition - _position;

   /**
    * A string containing the current rune.
    *
    * For runes outside the basic multilingual plane, this will be
    * a String of length 2, containing two code units.
    *
    * Returns null if [current] is null.
    */
   String get currentAsString {
     if (_position == _nextPosition) return null;
     if (_position + 1 == _nextPosition) return string[_position];
     return string.substring(_position, _nextPosition);
   }

   bool moveNext() {
     _position = _nextPosition;
     if (_position == string.length) {
       _currentCodePoint = null;
       return false;
     }
     int codeUnit = string.codeUnitAt(_position);
     int nextPosition = _position + 1;
     if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) {
       int nextCodeUnit = string.codeUnitAt(nextPosition);
       if (_isTrailSurrogate(nextCodeUnit)) {
         _nextPosition = nextPosition + 1;
         _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit);
         return true;
       }
     }
     _nextPosition = nextPosition;
     _currentCodePoint = codeUnit;
     return true;
   }

   bool movePrevious() {
     _nextPosition = _position;
     if (_position == 0) {
       _currentCodePoint = null;
       return false;
     }
     int position = _position - 1;
     int codeUnit = string.codeUnitAt(position);
     if (_isTrailSurrogate(codeUnit) && position > 0) {
       int prevCodeUnit = string.codeUnitAt(position - 1);
       if (_isLeadSurrogate(prevCodeUnit)) {
         _position = position - 1;
         _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit);
         return true;
       }
     }
     _position = position;
     _currentCodePoint = codeUnit;
     return true;
   }
 }
	// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	part of dart.core;

	/**
	* A sequence of characters.
	*
	* A string can be either single or multiline. Single line strings are
	* written using matching single or double quotes, and multiline strings are
	* written using triple quotes. The following are all valid Dart strings:
	*
	* 'Single quotes';
	* "Double quotes";
	* 'Double quotes in "single" quotes';
	* "Single quotes in 'double' quotes";
	*
	* '''A
	* multiline
	* string''';
	*
	* """
	* Another
	* multiline
	* string""";
	*
	* Strings are immutable. Although you cannot change a string, you can perform
	* an operation on a string and assign the result to a new string:
	*
	* var string = 'Dart is fun';
	* var newString = string.substring(0, 5);
	*
	* You can use the plus (`+`) operator to concatenate strings:
	*
	* 'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!'
	*
	* You can also use adjacent string literals for concatenation:
	*
	* 'Dart ' 'is ' 'fun!'; // 'Dart is fun!'
	*
	* You can use `${}` to interpolate the value of Dart expressions
	* within strings. The curly braces can be omitted when evaluating identifiers:
	*
	* string = 'dartlang';
	* '$string has ${string.length} letters'; // 'dartlang has 8 letters'
	*
	* A string is represented by a sequence of Unicode UTF-16 code units
	* accessible through the [codeUnitAt] or the [codeUnits] members:
	*
	* string = 'Dart';
	* string.codeUnitAt(0); // 68
	* string.codeUnits; // [68, 97, 114, 116]
	*
	* The string representation of code units is accessible through the index
	* operator:
	*
	* string[0]; // 'D'
	*
	* The characters of a string are encoded in UTF-16. Decoding UTF-16, which
	* combines surrogate pairs, yields Unicode code points. Following a similar
	* terminology to Go, we use the name 'rune' for an integer representing a
	* Unicode code point. Use the [runes] property to get the runes of a string:
	*
	* string.runes.toList(); // [68, 97, 114, 116]
	*
	* For a character outside the Basic Multilingual Plane (plane 0) that is
	* composed of a surrogate pair, [runes] combines the pair and returns a
	* single integer. For example, the Unicode character for a
	* musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate
	* pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair,
	* and using `runes` returns their combined value:
	*
	* var clef = '\u{1D11E}';
	* clef.codeUnits; // [0xD834, 0xDD1E]
	* clef.runes.toList(); // [0x1D11E]
	*
	* The String class can not be extended or implemented. Attempting to do so
	* yields a compile-time error.
	*
	* ## Other resources
	*
	* See [StringBuffer] to efficiently build a string incrementally. See
	* [RegExp] to work with regular expressions.
	*
	* Also see:

	* * [Dart Cookbook](https://www.dartlang.org/docs/cookbook/#strings)
	* for String examples and recipes.
	* * [Dart Up and Running]
	* (https://www.dartlang.org/docs/dart-up-and-running/contents/ch03.html#ch03-strings-and-regular-expressions)
	*/
	abstract class String implements Comparable<String>, Pattern {
	/**
	* Allocates a new String for the specified [charCodes].
	*
	* The [charCodes] can be UTF-16 code units or runes. If a char-code value is
	* 16-bit, it is copied verbatim:
	*
	* new String.fromCharCodes([68]); // 'D'
	*
	* If a char-code value is greater than 16-bits, it is decomposed into a
	* surrogate pair:
	*
	* var clef = new String.fromCharCodes([0x1D11E]);
	* clef.codeUnitAt(0); // 0xD834
	* clef.codeUnitAt(1); // 0xDD1E
	*/
	external factory String.fromCharCodes(Iterable<int> charCodes);

	/**
	* Allocates a new String for the specified [charCode].
	*
	* If the [charCode] can be represented by a single UTF-16 code unit, the new
	* string contains a single code unit. Otherwise, the [length] is 2 and
	* the code units form a surrogate pair. See documentation for
	* [fromCharCodes].
	*
	* Creating a String with half of a surrogate pair is legal but generally
	* discouraged.
	*/
	factory String.fromCharCode(int charCode) {
	List<int> charCodes = new List<int>.filled(1, charCode);
	return new String.fromCharCodes(charCodes);
	}

	/**
	* Gets the character (as a single-code-unit [String]) at the given [index].
	*
	* The returned string represents exactly one UTF-16 code unit, which may be
	* half of a surrogate pair. A single member of a surrogate pair is an
	* invalid UTF-16 string:
	*
	* var clef = '\u{1D11E}';
	* // These represent invalid UTF-16 strings.
	* clef[0].codeUnits; // [0xD834]
	* clef[1].codeUnits; // [0xDD1E]
	*
	* This method is equivalent to
	* `new String.fromCharCode(this.codeUnitAt(index))`.
	*/
	String operator [](int index);

	/**
	* Returns the 16-bit UTF-16 code unit at the given [index].
	*/
	int codeUnitAt(int index);

	/**
	* The length of the string.
	*
	* Returns the number of UTF-16 code units in this string. The number
	* of [runes] might be fewer, if the string contains characters outside
	* the Basic Multilingual Plane (plane 0):
	*
	* 'Dart'.length; // 4
	* 'Dart'.runes.length; // 4
	*
	* var clef = '\u{1D11E}';
	* clef.length; // 2
	* clef.runes.length; // 1
	*/
	int get length;

	/**
	* Returns true if the two strings are equal. False, otherwise.
	*
	* This method compares each individual code unit of the strings.
	* It does not check for Unicode equivalence.
	* For example, both the following strings represent the string 'Amélie',
	* but due to their different encoding, are not equal:
	*
	* 'Am\xe9lie' == 'Ame\u{301}lie'; // false
	*
	* The first string encodes 'é' as a single unicode code unit (also
	* a single rune), whereas the second string encodes it as 'e' with the
	* combining accent character '◌́'.
	*/
	bool operator ==(var other);

	/**
	* Returns true if this string ends with [other]. For example:
	*
	* 'Dart'.endsWith('t'); // true
	*/
	bool endsWith(String other);

	/**
	* Returns true if this string starts with a match of [pattern].
	*
	* var string = 'Dart';
	* string.startsWith('D'); // true
	* string.startsWith(new RegExp(r'[A-Z][a-z]')); // true
	*
	* If [index] is provided, this method checks if the substring starting
	* at that index starts with a match of [pattern]:
	*
	* string.startsWith('art', 1); // true
	* string.startsWith(new RegExp(r'\w{3}')); // true
	*
	* [index] must not be negative or greater than [length].
	*
	* A [RegExp] containing '^' does not match if the [index] is greater than
	* zero. The pattern works on the string as a whole, and does not extract
	* a substring starting at [index] first:
	*
	* string.startsWith(new RegExp(r'^art'), 1); // false
	* string.startsWith(new RegExp(r'art'), 1); // true
	*/
	bool startsWith(Pattern pattern, [int index = 0]);

	/**
	* Returns the position of the first match of [pattern] in this string,
	* starting at [start], inclusive:
	*
	* var string = 'Dartisans';
	* string.indexOf('art'); // 1
	* string.indexOf(new RegExp(r'[A-Z][a-z]')); // 0
	*
	* Returns -1 if no match is found:
	*
	* string.indexOf(new RegExp(r'dart')); // -1
	*
	* [start] must not be negative or greater than [length].
	*/
	int indexOf(Pattern pattern, [int start]);

	/**
	* Returns the position of the last match [pattern] in this string, searching
	* backward starting at [start], inclusive:
	*
	* var string = 'Dartisans';
	* string.lastIndexOf('a'); // 6
	* string.lastIndexOf(new RegExp(r'a(r\|n)')); // 6
	*
	* Returns -1 if [other] could not be found.
	*
	* string.lastIndexOf(new RegExp(r'DART')); // -1
	*
	* [start] must not be negative or greater than [length].
	*/
	int lastIndexOf(Pattern pattern, [int start]);

	/**
	* Returns true if this string is empty.
	*/
	bool get isEmpty;

	/**
	* Returns true if this string is not empty.
	*/
	bool get isNotEmpty;

	/**
	* Creates a new string by concatenating this string with [other].
	*
	* 'dart' + 'lang'; // 'dartlang'
	*/
	String operator +(String other);

	/**
	* Returns the substring of this string that extends from [startIndex],
	* inclusive, to [endIndex], exclusive.
	*
	* var string = 'dartlang';
	* string.substring(1); // 'artlang'
	* string.substring(1, 4); // 'art'
	*/
	String substring(int startIndex, [int endIndex]);

	/**
	* Removes leading and trailing whitespace from a string.
	*
	* If the string contains leading or trailing whitespace, a new string with no
	* leading and no trailing whitespace is returned:
	*
	* '\tDart is fun\n'.trim(); // 'Dart is fun'
	*
	* Otherwise, the original string itself is returned:
	*
	* var str1 = 'Dart';
	* var str2 = str1.trim();
	* identical(str1, str2); // true
	*
	* Whitespace is defined by the Unicode White_Space property (as defined in
	* version 6.2 or later) and the BOM character, 0xFEFF.
	*
	* Here is the list of trimmed characters (following version 6.2):
	*
	* 0009..000D ; White_Space # Cc <control-0009>..<control-000D>
	* 0020 ; White_Space # Zs SPACE
	* 0085 ; White_Space # Cc <control-0085>
	* 00A0 ; White_Space # Zs NO-BREAK SPACE
	* 1680 ; White_Space # Zs OGHAM SPACE MARK
	* 180E ; White_Space # Zs MONGOLIAN VOWEL SEPARATOR
	* 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE
	* 2028 ; White_Space # Zl LINE SEPARATOR
	* 2029 ; White_Space # Zp PARAGRAPH SEPARATOR
	* 202F ; White_Space # Zs NARROW NO-BREAK SPACE
	* 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE
	* 3000 ; White_Space # Zs IDEOGRAPHIC SPACE
	*
	* FEFF ; BOM ZERO WIDTH NO_BREAK SPACE
	*/
	String trim();

	/**
	* Returns true if this string contains a match of [other]:
	*
	* var string = 'Dart strings';
	* string.contains('D'); // true
	* string.contains(new RegExp(r'[A-Z]')); // true
	*
	* If [startIndex] is provided, this method matches only at or after that
	* index:
	*
	* string.contains('X', 1); // false
	* string.contains(new RegExp(r'[A-Z]'), 1); // false
	*
	* [startIndex] must not be negative or greater than [length].
	*/
	bool contains(Pattern other, [int startIndex = 0]);

	/**
	* Returns a new string in which the first occurence of [from] in this string
	* is replaced with [to]:
	*
	* '0.0001'.replaceFirst(new RegExp(r'0'), ''); // '.0001'
	*/
	String replaceFirst(Pattern from, String to);

	/**
	* Replaces all substrings that match [from] with [replace].
	*
	* Returns a new string in which the non-overlapping substrings matching
	* [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
	* by the literal string [replace].
	*
	* 'resume'.replaceAll(new RegExp(r'e'), 'é'); // 'résumé'
	*
	* Notice that the [replace] string is not interpreted. If the replacement
	* depends on the match (for example on a [RegExp]'s capture groups), use
	* the [replaceAllMapped] method instead.
	*/
	String replaceAll(Pattern from, String replace);

	/**
	* Replace all substrings that match [from] by a string computed from the
	* match.
	*
	* Returns a new string in which the non-overlapping substrings that match
	* [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
	* by the result of calling [replace] on the corresponding [Match] object.
	*
	* This can be used to replace matches with new content that depends on the
	* match, unlike [replaceAll] where the replacement string is always the same.
	*
	* The [replace] function is called with the [Match] generated
	* by the pattern, and its result is used as replacement.
	*
	* The function defined below converts each word in a string to simplified
	* 'pig latin' using [replaceAllMapped]:
	*
	* pigLatin(String words) => words.replaceAllMapped(
	* new RegExp(r'\b(\w?)([aeiou]\w)', caseSensitive: false),
	* (Match m) => "${m[2]}${m[1]}${m[1].isEmpty ? 'way' : 'ay'}");
	*
	* pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!'
	*/
	String replaceAllMapped(Pattern from, String replace(Match match));

	/**
	* Splits the string at matches of [pattern]. Returns
	* a list of substrings.
	*
	* Splitting with an empty string pattern (`''`) splits at UTF-16 code unit
	* boundaries and not at rune boundaries:
	*
	* var string = 'Pub';
	* string.split(''); // ['P', 'u', 'b']
	*
	* string.codeUnits.map((unit) {
	* return new String.fromCharCode(unit);
	* }).toList(); // ['P', 'u', 'b']
	*
	* // String made up of two code units, but one rune.
	* string = '\u{1D11E}';
	* string.split('').length; // 2
	*
	* You should [map] the runes unless you are certain that the string is in
	* the basic multilingual plane (meaning that each code unit represents a
	* rune):
	*
	* string.runes.map((rune) => new String.fromCharCode(rune));
	*/
	List<String> split(Pattern pattern);

	/**
	* Splits the string, converts its parts, and combines them into a new
	* string.
	*
	* [pattern] is used to split the string into parts and separating matches.
	*
	* Each match is converted to a string by calling [onMatch]. If [onMatch]
	* is omitted, the matched string is used.
	*
	* Each non-matched part is converted by a call to [onNonMatch]. If
	* [onNonMatch] is omitted, the non-matching part is used.
	*
	* Then all the converted parts are combined into the resulting string.
	*
	* 'Eats shoots leaves'.splitMapJoin((new RegExp(r'shoots')),
	* onMatch: (m) => '${m.group(0)}',
	* onNonMatch: (n) => ''); // shoots*
	*/
	String splitMapJoin(Pattern pattern,
	{String onMatch(Match match),
	String onNonMatch(String nonMatch)});

	/**
	* Returns an unmodifiable list of the UTF-16 code units of this string.
	*/
	List<int> get codeUnits;

	/**
	* Returns an [Iterable] of Unicode code-points of this string.
	*
	* If the string contains surrogate pairs, they are combined and returned
	* as one integer by this iterator. Unmatched surrogate halves are treated
	* like valid 16-bit code-units.
	*/
	Runes get runes;

	/**
	* Converts all characters in this string to lower case.
	* If the string is already in all lower case, this method returns [:this:].
	*
	* 'ALPHABET'.toLowerCase(); // 'alphabet'
	* 'abc'.toLowerCase(); // 'abc'
	*
	* This function uses the language independent Unicode mapping and thus only
	* works in some languages.
	*/
	// TODO(floitsch): document better. (See EcmaScript for description).
	String toLowerCase();

	/**
	* Converts all characters in this string to upper case.
	* If the string is already in all upper case, this method returns [:this:].
	*
	* 'alphabet'.toUpperCase(); // 'ALPHABET'
	* 'ABC'.toUpperCase(); // 'ABC'
	*
	* This function uses the language independent Unicode mapping and thus only
	* works in some languages.
	*/
	// TODO(floitsch): document better. (See EcmaScript for description).
	String toUpperCase();
	}

	/**
	* The runes (integer Unicode code points) of a [String].
	*/
	class Runes extends IterableBase<int> {
	final String string;
	Runes(this.string);

	RuneIterator get iterator => new RuneIterator(string);

	int get last {
	if (string.length == 0) {
	throw new StateError('No elements.');
	}
	int length = string.length;
	int code = string.codeUnitAt(length - 1);
	if (_isTrailSurrogate(code) && string.length > 1) {
	int previousCode = string.codeUnitAt(length - 2);
	if (_isLeadSurrogate(previousCode)) {
	return _combineSurrogatePair(previousCode, code);
	}
	}
	return code;
	}

	}

	// Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate.
	bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800;

	// Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate.
	bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00;

	// Combine a lead and a trail surrogate value into a single code point.
	int _combineSurrogatePair(int start, int end) {
	return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF);
	}

	/** [Iterator] for reading runes (integer Unicode code points) out of a Dart
	* string.
	*/
	class RuneIterator implements BidirectionalIterator<int> {
	/** String being iterated. */
	final String string;
	/** Position before the current code point. */
	int _position;
	/** Position after the current code point. */
	int _nextPosition;
	/**
	* Current code point.
	*
	* If the iterator has hit either end, the [_currentCodePoint] is null
	* and [: _position == _nextPosition :].
	*/
	int _currentCodePoint;

	/** Create an iterator positioned at the beginning of the string. */
	RuneIterator(String string)
	: this.string = string, _position = 0, _nextPosition = 0;

	/**
	* Create an iterator positioned before the [index]th code unit of the string.
	*
	* When created, there is no [current] value.
	* A [moveNext] will use the rune starting at [index] the current value,
	* and a [movePrevious] will use the rune ending just before [index] as the
	* the current value.
	*
	* It is an error if the [index] position is in the middle of a surrogate
	* pair.
	*/
	RuneIterator.at(String string, int index)
	: string = string, _position = index, _nextPosition = index {
	if (index < 0 \|\| index > string.length) {
	throw new RangeError.range(index, 0, string.length);
	}
	_checkSplitSurrogate(index);
	}

	/** Throw an error if the index is in the middle of a surrogate pair. */
	void _checkSplitSurrogate(int index) {
	if (index > 0 && index < string.length &&
	_isLeadSurrogate(string.codeUnitAt(index - 1)) &&
	_isTrailSurrogate(string.codeUnitAt(index))) {
	throw new ArgumentError('Index inside surrogate pair: $index');
	}
	}

	/**
	* Returns the starting position of the current rune in the string.
	*
	* Returns null if the [current] rune is null.
	*/
	int get rawIndex => (_position != _nextPosition) ? _position : null;

	/**
	* Resets the iterator to the rune at the specified index of the string.
	*
	* Setting a negative [rawIndex], or one greater than or equal to
	* [:string.length:],
	* is an error. So is setting it in the middle of a surrogate pair.
	*
	* Setting the position to the end of then string will set [current] to null.
	*/
	void set rawIndex(int rawIndex) {
	if (rawIndex >= string.length) {
	throw new RangeError.range(rawIndex, 0, string.length - 1);
	}
	reset(rawIndex);
	moveNext();
	}

	/**
	* Resets the iterator to the given index into the string.
	*
	* After this the [current] value is unset.
	* You must call [moveNext] make the rune at the position current,
	* or [movePrevious] for the last rune before the position.
	*
	* Setting a negative [rawIndex], or one greater than [:string.length:],
	* is an error. So is setting it in the middle of a surrogate pair.
	*/
	void reset([int rawIndex = 0]) {
	if (rawIndex < 0 \|\| rawIndex > string.length) {
	throw new RangeError.range(rawIndex, 0, string.length);
	}
	_checkSplitSurrogate(rawIndex);
	_position = _nextPosition = rawIndex;
	_currentCodePoint = null;
	}

	/** The rune (integer Unicode code point) starting at the current position in
	* the string.
	*/
	int get current => _currentCodePoint;

	/**
	* The number of code units comprising the current rune.
	*
	* Returns zero if there is no current rune ([current] is null).
	*/
	int get currentSize => _nextPosition - _position;

	/**
	* A string containing the current rune.
	*
	* For runes outside the basic multilingual plane, this will be
	* a String of length 2, containing two code units.
	*
	* Returns null if [current] is null.
	*/
	String get currentAsString {
	if (_position == _nextPosition) return null;
	if (_position + 1 == _nextPosition) return string[_position];
	return string.substring(_position, _nextPosition);
	}

	bool moveNext() {
	_position = _nextPosition;
	if (_position == string.length) {
	_currentCodePoint = null;
	return false;
	}
	int codeUnit = string.codeUnitAt(_position);
	int nextPosition = _position + 1;
	if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) {
	int nextCodeUnit = string.codeUnitAt(nextPosition);
	if (_isTrailSurrogate(nextCodeUnit)) {
	_nextPosition = nextPosition + 1;
	_currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit);
	return true;
	}
	}
	_nextPosition = nextPosition;
	_currentCodePoint = codeUnit;
	return true;
	}

	bool movePrevious() {
	_nextPosition = _position;
	if (_position == 0) {
	_currentCodePoint = null;
	return false;
	}
	int position = _position - 1;
	int codeUnit = string.codeUnitAt(position);
	if (_isTrailSurrogate(codeUnit) && position > 0) {
	int prevCodeUnit = string.codeUnitAt(position - 1);
	if (_isLeadSurrogate(prevCodeUnit)) {
	_position = position - 1;
	_currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit);
	return true;
	}
	}
	_position = position;
	_currentCodePoint = codeUnit;
	return true;
	}
	}