sdk/lib/core/string.dart - sdk.git - Git at Google

 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 part of dart.core;

 /**
  * The String class represents sequences of characters. Strings are
  * immutable. A string is represented by a sequence of Unicode UTF-16
  * code units accessible through the [codeUnitAt] or the
  * [codeUnits] members. Their string representation is accessible through
  * the index-operator.
  *
  * The characters of a string are encoded in UTF-16. Decoding UTF-16, which
  * combines surrogate pairs, yields Unicode code points. Following a similar
  * terminology to Go we use the name "rune" for an integer representing a
  * Unicode code point. The runes of a string are accessible through the [runes]
  * getter.
  */
 abstract class String implements Comparable, Pattern {
   /**
    * Allocates a new String for the specified [charCodes].
    *
    * The [charCodes] can be UTF-16 code units or runes. If a char-code value is
    * 16-bit it is copied verbatim. If it is greater than 16 bits it is
    * decomposed into a surrogate pair.
    */
   external factory String.fromCharCodes(Iterable<int> charCodes);

   /**
    * *Deprecated*. Use [String.fromCharCode] instead.
    */
   @deprecated
   factory String.character(int charCode) => new String.fromCharCode(charCode);

   /**
    * Allocates a new String for the specified [charCode].
    *
    * The new string contains a single code unit if the [charCode] can be
    * represented by a single UTF-16 code unit. Otherwise the [length] is 2 and
    * the code units form a surrogate pair.
    *
    * It is allowed (though generally discouraged) to create a String with only
    * one half of a surrogate pair.
    */
   factory String.fromCharCode(int charCode) {
     List<int> charCodes = new List<int>.fixedLength(1, fill: charCode);
     return new String.fromCharCodes(charCodes);
   }

   /**
    * Gets the character (as [String]) at the given [index].
    *
    * The returned string represents exactly one UTF-16 code unit which may be
    * half of a surrogate pair. For example the Unicode character for a
    * musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate
    * pair: `"\uDBFF\uDFFD"`. Using the index-operator on this string yields
    * a String with half of a surrogate pair:
    *
    *     var clef = "\uDBFF\uDFFD";
    *     clef.length;  // => 2
    *     clef.runes.first == 0x1D11E;  // => true
    *     clef.runes.length;  // => 1
    *     // The following strings are halves of a UTF-16 surrogate pair and
    *     // thus invalid UTF-16 strings:
    *     clef[0];  // => "\uDBFF"
    *     clef[1];  // => "\uDFFD"
    *
    * This method is equivalent to
    * `new String.fromCharCode(this.codeUnitAt(index))`.
    */
   String operator [](int index);

   /**
    * Gets the scalar character code at the given [index].
    *
    * *This method is deprecated. Please use [codeUnitAt] instead.*
    */
   @deprecated
   int charCodeAt(int index);

   /**
    * Returns the 16-bit UTF-16 code unit at the given [index].
    */
   int codeUnitAt(int index);

   /**
    * The length of the string.
    *
    * Returns the number of UTF-16 code units in this string. The number
    * of [runes] might be less, if the string contains characters outside
    * the basic multilingual plane (plane 0).
    */
   int get length;

   /**
    * Returns whether the two strings are equal.
    *
    * This method compares each individual code unit of the strings. It does not
    * check for Unicode equivalence. For example the two following strings both
    * represent the string "Amélie" but, due to their different encoding will
    * not return equal.
    *
    *     "Am\xe9lie"
    *     "Ame\u{301}lie"
    *
    * In the first string the "é" is encoded as a single unicode code unit,
    * whereas the second string encodes it as "e" with the combining
    * accent character "◌́".
    */
   bool operator ==(var other);

   /**
    * Returns whether this string ends with [other].
    */
   bool endsWith(String other);

   /**
    * Returns whether this string starts with [other].
    */
   bool startsWith(String other);

   /**
    * Returns the first location of [other] in this string starting at
    * [start] (inclusive).
    * Returns -1 if [other] could not be found.
    */
   int indexOf(String other, [int start]);

   /**
    * Returns the last location of [other] in this string, searching
    * backward starting at [start] (inclusive).
    * Returns -1 if [other] could not be found.
    */
   int lastIndexOf(String other, [int start]);

   /**
    * Returns whether this string is empty.
    */
   bool get isEmpty;

   /**
    * Creates a new string by concatenating this string with [other].
    *
    * A sequence of strings can be concatenated by using [Iterable.join]:
    *
    *     var strings = ['foo', 'bar', 'geez'];
    *     var concatenated = strings.join();
    */
   String concat(String other);

   /**
    * Returns a slice of this string from [startIndex] to [endIndex].
    *
    * If [startIndex] is omitted, it defaults to the start of the string.
    *
    * If [endIndex] is omitted, it defaults to the end of the string.
    *
    * If either index is negative, it's taken as a negative index from the
    * end of the string. Their effective value is computed by adding the
    * negative value to the [length] of the string.
    *
    * The effective indices, after  must be non-negative, no greater than the
    * length of the string, and [endIndex] must not be less than [startIndex].
    */
   String slice([int startIndex, int endIndex]);

   /**
    * Returns a substring of this string in the given range.
    * [startIndex] is inclusive and [endIndex] is exclusive.
    */
   String substring(int startIndex, [int endIndex]);

   /**
    * Removes leading and trailing whitespace from a string. If the string
    * contains leading or trailing whitespace a new string with no leading and
    * no trailing whitespace is returned. Otherwise, the string itself is
    * returned.  Whitespace is defined as every Unicode character in the Zs, Zl
    * and Zp categories (this includes no-break space), the spacing control
    * characters from 9 to 13 (tab, lf, vtab, ff and cr), and 0xfeff the BOM
    * character.
    */
   String trim();

   /**
    * Returns whether this string contains [other] starting
    * at [startIndex] (inclusive).
    */
   bool contains(Pattern other, [int startIndex]);

   /**
    * Returns a new string where the first occurence of [from] in this string
    * is replaced with [to].
    */
   String replaceFirst(Pattern from, String to);

   /**
    * Returns a new string where all occurences of [from] in this string
    * are replaced with [replace].
    */
   String replaceAll(Pattern from, var replace);

   /**
    * Returns a new string where all occurences of [from] in this string
    * are replaced with a [String] depending on [replace].
    *
    *
    * The [replace] function is called with the [Match] generated
    * by the pattern, and its result is used as replacement.
    */
   String replaceAllMapped(Pattern from, String replace(Match match));

   /**
    * Splits the string around matches of [pattern]. Returns
    * a list of substrings.
    *
    * Splitting with an empty string pattern (`""`) splits at UTF-16 code unit
    * boundaries and not at rune boundaries. The following two expressions
    * are hence equivalent:
    *
    *     string.split("")
    *     string.codeUnits.map((unit) => new String.character(unit))
    *
    * Unless it guaranteed that the string is in the basic multilingual plane
    * (meaning that each code unit represents a rune) it is often better to
    * map the runes instead:
    *
    *     string.runes.map((rune) => new String.character(rune))
    */
   List<String> split(Pattern pattern);

   /**
    * Returns a list of the individual code-units converted to strings.
    *
    * *Deprecated*
    * If you want to split on code-unit boundaries, use [split]. If you
    * want to split on rune boundaries, use [runes] and map the result.
    *
    *     Iterable<String> characters =
    *         string.runes.map((c) => new String.fromCharCode(c));
    */
   @deprecated
   List<String> splitChars();

   /**
    * Splits the string on the [pattern], then converts each part and each match.
    *
    * The pattern is used to split the string into parts and separating matches.
    *
    * Each match is converted to a string by calling [onMatch]. If [onMatch]
    * is omitted, the matched string is used.
    *
    * Each non-matched part is converted by a call to [onNonMatch]. If
    * [onNonMatch] is omitted, the non-matching part is used.
    *
    * Then all the converted parts are combined into the resulting string.
    */
   String splitMapJoin(Pattern pattern,
                       {String onMatch(Match match),
                        String onNonMatch(String nonMatch)});

   /**
    * Returns a list of UTF-16 code units of this string.
    *
    * *This getter is deprecated. Use [codeUnits] instead.*
    */
   List<int> get charCodes;

   /**
    * Returns an iterable of the UTF-16 code units of this string.
    */
   // TODO(floitsch): should it return a list?
   // TODO(floitsch): make it a bidirectional iterator.
   Iterable<int> get codeUnits;

   /**
    * Returns an iterable of Unicode code-points of this string.
    *
    * If the string contains surrogate pairs, they will be combined and returned
    * as one integer by this iterator. Unmatched surrogate halves are treated
    * like valid 16-bit code-units.
    */
   // TODO(floitsch): make it a Runes class.
   Iterable<int> get runes;

   /**
    * If this string is not already all lower case, returns a new string
    * where all characters are made lower case. Returns [:this:] otherwise.
    */
   // TODO(floitsch): document better. (See EcmaScript for description).
   String toLowerCase();

   /**
    * If this string is not already all upper case, returns a new string
    * where all characters are made upper case. Returns [:this:] otherwise.
    */
   // TODO(floitsch): document better. (See EcmaScript for description).
   String toUpperCase();
 }
	// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	part of dart.core;

	/**
	* The String class represents sequences of characters. Strings are
	* immutable. A string is represented by a sequence of Unicode UTF-16
	* code units accessible through the [codeUnitAt] or the
	* [codeUnits] members. Their string representation is accessible through
	* the index-operator.
	*
	* The characters of a string are encoded in UTF-16. Decoding UTF-16, which
	* combines surrogate pairs, yields Unicode code points. Following a similar
	* terminology to Go we use the name "rune" for an integer representing a
	* Unicode code point. The runes of a string are accessible through the [runes]
	* getter.
	*/
	abstract class String implements Comparable, Pattern {
	/**
	* Allocates a new String for the specified [charCodes].
	*
	* The [charCodes] can be UTF-16 code units or runes. If a char-code value is
	* 16-bit it is copied verbatim. If it is greater than 16 bits it is
	* decomposed into a surrogate pair.
	*/
	external factory String.fromCharCodes(Iterable<int> charCodes);

	/**
	* Deprecated. Use [String.fromCharCode] instead.
	*/
	@deprecated
	factory String.character(int charCode) => new String.fromCharCode(charCode);

	/**
	* Allocates a new String for the specified [charCode].
	*
	* The new string contains a single code unit if the [charCode] can be
	* represented by a single UTF-16 code unit. Otherwise the [length] is 2 and
	* the code units form a surrogate pair.
	*
	* It is allowed (though generally discouraged) to create a String with only
	* one half of a surrogate pair.
	*/
	factory String.fromCharCode(int charCode) {
	List<int> charCodes = new List<int>.fixedLength(1, fill: charCode);
	return new String.fromCharCodes(charCodes);
	}

	/**
	* Gets the character (as [String]) at the given [index].
	*
	* The returned string represents exactly one UTF-16 code unit which may be
	* half of a surrogate pair. For example the Unicode character for a
	* musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate
	* pair: `"\uDBFF\uDFFD"`. Using the index-operator on this string yields
	* a String with half of a surrogate pair:
	*
	* var clef = "\uDBFF\uDFFD";
	* clef.length; // => 2
	* clef.runes.first == 0x1D11E; // => true
	* clef.runes.length; // => 1
	* // The following strings are halves of a UTF-16 surrogate pair and
	* // thus invalid UTF-16 strings:
	* clef[0]; // => "\uDBFF"
	* clef[1]; // => "\uDFFD"
	*
	* This method is equivalent to
	* `new String.fromCharCode(this.codeUnitAt(index))`.
	*/
	String operator [](int index);

	/**
	* Gets the scalar character code at the given [index].
	*
	* This method is deprecated. Please use [codeUnitAt] instead.
	*/
	@deprecated
	int charCodeAt(int index);

	/**
	* Returns the 16-bit UTF-16 code unit at the given [index].
	*/
	int codeUnitAt(int index);

	/**
	* The length of the string.
	*
	* Returns the number of UTF-16 code units in this string. The number
	* of [runes] might be less, if the string contains characters outside
	* the basic multilingual plane (plane 0).
	*/
	int get length;

	/**
	* Returns whether the two strings are equal.
	*
	* This method compares each individual code unit of the strings. It does not
	* check for Unicode equivalence. For example the two following strings both
	* represent the string "Amélie" but, due to their different encoding will
	* not return equal.
	*
	* "Am\xe9lie"
	* "Ame\u{301}lie"
	*
	* In the first string the "é" is encoded as a single unicode code unit,
	* whereas the second string encodes it as "e" with the combining
	* accent character "◌́".
	*/
	bool operator ==(var other);

	/**
	* Returns whether this string ends with [other].
	*/
	bool endsWith(String other);

	/**
	* Returns whether this string starts with [other].
	*/
	bool startsWith(String other);

	/**
	* Returns the first location of [other] in this string starting at
	* [start] (inclusive).
	* Returns -1 if [other] could not be found.
	*/
	int indexOf(String other, [int start]);

	/**
	* Returns the last location of [other] in this string, searching
	* backward starting at [start] (inclusive).
	* Returns -1 if [other] could not be found.
	*/
	int lastIndexOf(String other, [int start]);

	/**
	* Returns whether this string is empty.
	*/
	bool get isEmpty;

	/**
	* Creates a new string by concatenating this string with [other].
	*
	* A sequence of strings can be concatenated by using [Iterable.join]:
	*
	* var strings = ['foo', 'bar', 'geez'];
	* var concatenated = strings.join();
	*/
	String concat(String other);

	/**
	* Returns a slice of this string from [startIndex] to [endIndex].
	*
	* If [startIndex] is omitted, it defaults to the start of the string.
	*
	* If [endIndex] is omitted, it defaults to the end of the string.
	*
	* If either index is negative, it's taken as a negative index from the
	* end of the string. Their effective value is computed by adding the
	* negative value to the [length] of the string.
	*
	* The effective indices, after must be non-negative, no greater than the
	* length of the string, and [endIndex] must not be less than [startIndex].
	*/
	String slice([int startIndex, int endIndex]);

	/**
	* Returns a substring of this string in the given range.
	* [startIndex] is inclusive and [endIndex] is exclusive.
	*/
	String substring(int startIndex, [int endIndex]);

	/**
	* Removes leading and trailing whitespace from a string. If the string
	* contains leading or trailing whitespace a new string with no leading and
	* no trailing whitespace is returned. Otherwise, the string itself is
	* returned. Whitespace is defined as every Unicode character in the Zs, Zl
	* and Zp categories (this includes no-break space), the spacing control
	* characters from 9 to 13 (tab, lf, vtab, ff and cr), and 0xfeff the BOM
	* character.
	*/
	String trim();

	/**
	* Returns whether this string contains [other] starting
	* at [startIndex] (inclusive).
	*/
	bool contains(Pattern other, [int startIndex]);

	/**
	* Returns a new string where the first occurence of [from] in this string
	* is replaced with [to].
	*/
	String replaceFirst(Pattern from, String to);

	/**
	* Returns a new string where all occurences of [from] in this string
	* are replaced with [replace].
	*/
	String replaceAll(Pattern from, var replace);

	/**
	* Returns a new string where all occurences of [from] in this string
	* are replaced with a [String] depending on [replace].
	*
	*
	* The [replace] function is called with the [Match] generated
	* by the pattern, and its result is used as replacement.
	*/
	String replaceAllMapped(Pattern from, String replace(Match match));

	/**
	* Splits the string around matches of [pattern]. Returns
	* a list of substrings.
	*
	* Splitting with an empty string pattern (`""`) splits at UTF-16 code unit
	* boundaries and not at rune boundaries. The following two expressions
	* are hence equivalent:
	*
	* string.split("")
	* string.codeUnits.map((unit) => new String.character(unit))
	*
	* Unless it guaranteed that the string is in the basic multilingual plane
	* (meaning that each code unit represents a rune) it is often better to
	* map the runes instead:
	*
	* string.runes.map((rune) => new String.character(rune))
	*/
	List<String> split(Pattern pattern);

	/**
	* Returns a list of the individual code-units converted to strings.
	*
	* Deprecated
	* If you want to split on code-unit boundaries, use [split]. If you
	* want to split on rune boundaries, use [runes] and map the result.
	*
	* Iterable<String> characters =
	* string.runes.map((c) => new String.fromCharCode(c));
	*/
	@deprecated
	List<String> splitChars();

	/**
	* Splits the string on the [pattern], then converts each part and each match.
	*
	* The pattern is used to split the string into parts and separating matches.
	*
	* Each match is converted to a string by calling [onMatch]. If [onMatch]
	* is omitted, the matched string is used.
	*
	* Each non-matched part is converted by a call to [onNonMatch]. If
	* [onNonMatch] is omitted, the non-matching part is used.
	*
	* Then all the converted parts are combined into the resulting string.
	*/
	String splitMapJoin(Pattern pattern,
	{String onMatch(Match match),
	String onNonMatch(String nonMatch)});

	/**
	* Returns a list of UTF-16 code units of this string.
	*
	* This getter is deprecated. Use [codeUnits] instead.
	*/
	List<int> get charCodes;

	/**
	* Returns an iterable of the UTF-16 code units of this string.
	*/
	// TODO(floitsch): should it return a list?
	// TODO(floitsch): make it a bidirectional iterator.
	Iterable<int> get codeUnits;

	/**
	* Returns an iterable of Unicode code-points of this string.
	*
	* If the string contains surrogate pairs, they will be combined and returned
	* as one integer by this iterator. Unmatched surrogate halves are treated
	* like valid 16-bit code-units.
	*/
	// TODO(floitsch): make it a Runes class.
	Iterable<int> get runes;

	/**
	* If this string is not already all lower case, returns a new string
	* where all characters are made lower case. Returns [:this:] otherwise.
	*/
	// TODO(floitsch): document better. (See EcmaScript for description).
	String toLowerCase();

	/**
	* If this string is not already all upper case, returns a new string
	* where all characters are made upper case. Returns [:this:] otherwise.
	*/
	// TODO(floitsch): document better. (See EcmaScript for description).
	String toUpperCase();
	}