blob: 6fb1b9035abe13dced4a3fe0c69424c88e589242 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.core;
/**
* The String class represents sequences of characters. Strings are
* immutable. A string is represented by a sequence of Unicode UTF-16
* code units accessible through the [codeUnitAt] or the
* [codeUnits] members. Their string representation is accessible through
* the index-operator.
*
* The characters of a string are encoded in UTF-16. Decoding UTF-16, which
* combines surrogate pairs, yields Unicode code points. Following a similar
* terminology to Go we use the name "rune" for an integer representing a
* Unicode code point. The runes of a string are accessible through the [runes]
* getter.
*/
abstract class String implements Comparable, Pattern {
/**
* Allocates a new String for the specified [charCodes].
*
* The [charCodes] can be UTF-16 code units or runes. If a char-code value is
* 16-bit it is copied verbatim. If it is greater than 16 bits it is
* decomposed into a surrogate pair.
*/
external factory String.fromCharCodes(Iterable<int> charCodes);
/**
* *Deprecated*. Use [String.fromCharCode] instead.
*/
@deprecated
factory String.character(int charCode) => new String.fromCharCode(charCode);
/**
* Allocates a new String for the specified [charCode].
*
* The new string contains a single code unit if the [charCode] can be
* represented by a single UTF-16 code unit. Otherwise the [length] is 2 and
* the code units form a surrogate pair.
*
* It is allowed (though generally discouraged) to create a String with only
* one half of a surrogate pair.
*/
factory String.fromCharCode(int charCode) {
List<int> charCodes = new List<int>.fixedLength(1, fill: charCode);
return new String.fromCharCodes(charCodes);
}
/**
* Gets the character (as [String]) at the given [index].
*
* The returned string represents exactly one UTF-16 code unit which may be
* half of a surrogate pair. For example the Unicode character for a
* musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate
* pair: `"\uDBFF\uDFFD"`. Using the index-operator on this string yields
* a String with half of a surrogate pair:
*
* var clef = "\uDBFF\uDFFD";
* clef.length; // => 2
* clef.runes.first == 0x1D11E; // => true
* clef.runes.length; // => 1
* // The following strings are halves of a UTF-16 surrogate pair and
* // thus invalid UTF-16 strings:
* clef[0]; // => "\uDBFF"
* clef[1]; // => "\uDFFD"
*
* This method is equivalent to
* `new String.fromCharCode(this.codeUnitAt(index))`.
*/
String operator [](int index);
/**
* Gets the scalar character code at the given [index].
*
* *This method is deprecated. Please use [codeUnitAt] instead.*
*/
@deprecated
int charCodeAt(int index);
/**
* Returns the 16-bit UTF-16 code unit at the given [index].
*/
int codeUnitAt(int index);
/**
* The length of the string.
*
* Returns the number of UTF-16 code units in this string. The number
* of [runes] might be less, if the string contains characters outside
* the basic multilingual plane (plane 0).
*/
int get length;
/**
* Returns whether the two strings are equal.
*
* This method compares each individual code unit of the strings. It does not
* check for Unicode equivalence. For example the two following strings both
* represent the string "Amélie" but, due to their different encoding will
* not return equal.
*
* "Am\xe9lie"
* "Ame\u{301}lie"
*
* In the first string the "é" is encoded as a single unicode code unit,
* whereas the second string encodes it as "e" with the combining
* accent character "◌́".
*/
bool operator ==(var other);
/**
* Returns whether this string ends with [other].
*/
bool endsWith(String other);
/**
* Returns whether this string starts with [other].
*/
bool startsWith(String other);
/**
* Returns the first location of [other] in this string starting at
* [start] (inclusive).
* Returns -1 if [other] could not be found.
*/
int indexOf(String other, [int start]);
/**
* Returns the last location of [other] in this string, searching
* backward starting at [start] (inclusive).
* Returns -1 if [other] could not be found.
*/
int lastIndexOf(String other, [int start]);
/**
* Returns whether this string is empty.
*/
bool get isEmpty;
/**
* Creates a new string by concatenating this string with [other].
*
* A sequence of strings can be concatenated by using [Iterable.join]:
*
* var strings = ['foo', 'bar', 'geez'];
* var concatenated = strings.join();
*/
String concat(String other);
/**
* Returns a slice of this string from [startIndex] to [endIndex].
*
* If [startIndex] is omitted, it defaults to the start of the string.
*
* If [endIndex] is omitted, it defaults to the end of the string.
*
* If either index is negative, it's taken as a negative index from the
* end of the string. Their effective value is computed by adding the
* negative value to the [length] of the string.
*
* The effective indices, after must be non-negative, no greater than the
* length of the string, and [endIndex] must not be less than [startIndex].
*/
String slice([int startIndex, int endIndex]);
/**
* Returns a substring of this string in the given range.
* [startIndex] is inclusive and [endIndex] is exclusive.
*/
String substring(int startIndex, [int endIndex]);
/**
* Removes leading and trailing whitespace from a string. If the string
* contains leading or trailing whitespace a new string with no leading and
* no trailing whitespace is returned. Otherwise, the string itself is
* returned. Whitespace is defined as every Unicode character in the Zs, Zl
* and Zp categories (this includes no-break space), the spacing control
* characters from 9 to 13 (tab, lf, vtab, ff and cr), and 0xfeff the BOM
* character.
*/
String trim();
/**
* Returns whether this string contains [other] starting
* at [startIndex] (inclusive).
*/
bool contains(Pattern other, [int startIndex]);
/**
* Returns a new string where the first occurence of [from] in this string
* is replaced with [to].
*/
String replaceFirst(Pattern from, String to);
/**
* Returns a new string where all occurences of [from] in this string
* are replaced with [replace].
*/
String replaceAll(Pattern from, var replace);
/**
* Returns a new string where all occurences of [from] in this string
* are replaced with a [String] depending on [replace].
*
*
* The [replace] function is called with the [Match] generated
* by the pattern, and its result is used as replacement.
*/
String replaceAllMapped(Pattern from, String replace(Match match));
/**
* Splits the string around matches of [pattern]. Returns
* a list of substrings.
*
* Splitting with an empty string pattern (`""`) splits at UTF-16 code unit
* boundaries and not at rune boundaries. The following two expressions
* are hence equivalent:
*
* string.split("")
* string.codeUnits.map((unit) => new String.character(unit))
*
* Unless it guaranteed that the string is in the basic multilingual plane
* (meaning that each code unit represents a rune) it is often better to
* map the runes instead:
*
* string.runes.map((rune) => new String.character(rune))
*/
List<String> split(Pattern pattern);
/**
* Returns a list of the individual code-units converted to strings.
*
* *Deprecated*
* If you want to split on code-unit boundaries, use [split]. If you
* want to split on rune boundaries, use [runes] and map the result.
*
* Iterable<String> characters =
* string.runes.map((c) => new String.fromCharCode(c));
*/
@deprecated
List<String> splitChars();
/**
* Splits the string on the [pattern], then converts each part and each match.
*
* The pattern is used to split the string into parts and separating matches.
*
* Each match is converted to a string by calling [onMatch]. If [onMatch]
* is omitted, the matched string is used.
*
* Each non-matched part is converted by a call to [onNonMatch]. If
* [onNonMatch] is omitted, the non-matching part is used.
*
* Then all the converted parts are combined into the resulting string.
*/
String splitMapJoin(Pattern pattern,
{String onMatch(Match match),
String onNonMatch(String nonMatch)});
/**
* Returns a list of UTF-16 code units of this string.
*
* *This getter is deprecated. Use [codeUnits] instead.*
*/
List<int> get charCodes;
/**
* Returns an iterable of the UTF-16 code units of this string.
*/
// TODO(floitsch): should it return a list?
// TODO(floitsch): make it a bidirectional iterator.
Iterable<int> get codeUnits;
/**
* Returns an iterable of Unicode code-points of this string.
*
* If the string contains surrogate pairs, they will be combined and returned
* as one integer by this iterator. Unmatched surrogate halves are treated
* like valid 16-bit code-units.
*/
// TODO(floitsch): make it a Runes class.
Iterable<int> get runes;
/**
* If this string is not already all lower case, returns a new string
* where all characters are made lower case. Returns [:this:] otherwise.
*/
// TODO(floitsch): document better. (See EcmaScript for description).
String toLowerCase();
/**
* If this string is not already all upper case, returns a new string
* where all characters are made upper case. Returns [:this:] otherwise.
*/
// TODO(floitsch): document better. (See EcmaScript for description).
String toUpperCase();
}