blob: 40ddbd2880e48e90743cfb9bb3ed354561478cf3 [file] [log] [blame]
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import "dart:collection";
import "grapheme_clusters/constants.dart";
import "grapheme_clusters/breaks.dart";
part "characters_impl.dart";
/// The characters of a string.
///
/// A character is a Unicode Grapheme cluster represented
/// by a substring of the original string.
/// The `Characters` class is an [Iterable] of those strings.
/// However, unlike most iterables, many of the operations are
/// *eager*. Since the underlying string is known in its entirety,
/// and is known not to change, operations which select a subset of
/// the elements can be computed eagerly, and in that case the
/// operation returns a new `Characters` object.
///
/// A `Characters` also supports operations based on
/// string indices into the underlying string.
///
/// Inspection operations like [indexOf] or [lastIndexAfter]
/// returns such indices which are guranteed to be at character
/// boundaries.
/// Most such operations use the index as starting point,
/// but will still only work on entire characters.
/// A few, like [substring] and [replaceSubstring], work directly
/// on the underlying string, independently of character
/// boundaries.
abstract class Characters implements Iterable<String> {
/// Creates a [Characters] allowing iteration of
/// the characters of [string].
factory Characters(String string) = _Characters;
/// The string to iterate over.
String get string;
/// A specialized character iterator.
///
/// Allows iterating the characters of [string] as a plain iterator,
// as well as controlling the iteration in more detail.
Character get iterator;
/// Whether [Character] is an element of this sequence of
/// characters.
///
/// Returns false if [Character] is not a string containing
/// a single character,
/// because then it is not a single element of this [Iterable]
/// of characters.
bool contains(Object Character);
/// Whether this sequence of characters contains [other]
/// as a subsequence.
bool containsAll(Characters other);
/// Whether [other] is an initial subsequence of this sequence
/// of characters.
///
/// If [startIndex] is provided, then checks whether
/// [other] is an initial subsequence of the characters
/// starting at the character boundary [startIndex].
///
/// Returns `true` if [other] is a sub-sequence of this sequence of
/// characters startings at the character boundary [startIndex].
/// Returns `false` if [startIndex] is not a character boundary,
/// or if [other] does not occur at that position.
bool startsWith(Characters other, [int startIndex = 0]);
/// Whether [other] is an trailing subsequence of this sequence
/// of characters.
///
/// If [endIndex] is provided, then checks whether
/// [other] is a trailing subsequence of the characters
/// starting at the character boundary [endIndex].
///
/// Returns `true` if [other] is a sub-sequence of this sequence of
/// characters startings at the character boundary [endIndex].
/// Returns `false` if [endIndex] is not a character boundary,
/// or if [other] does not occur at that position.
bool endsWith(Characters other, [int endIndex]);
/// The string index before the first place where [other] occurs as
/// a subsequence of these characters.
///
/// Returns the [string] index before first occurrence of the character
/// of [other] in the sequence of characters of [string].
/// Returns a negative number if there is no such occurrence of [other].
///
/// If [startIndex] is supplied, returns the index after the first occurrence
/// of [other] in this which starts no earlier than [startIndex], and again
/// returns `null` if there is no such occurrence. That is, if the result
/// is non-negative, it is greater than or equal to [startIndex].
int indexOf(Characters other, [int startIndex]);
/// The string index after the first place [other] occurs as a subsequence of
/// these characters.
///
/// Returns the [string] index after the first occurrence of the character
/// of [other] in the sequence of characters of [string].
/// Returns a negative number if there is no such occurrence of [other].
///
/// If [startIndex] is supplied, returns the index after the first occurrence
/// of [other] in this which starts no earlier than [startIndex], and again
/// returns `null` if there is no such occurrence. That is, if the result
/// is non-negative, it is greater than or equal to [startIndex].
int indexAfter(Characters other, [int startIndex]);
/// The string index before the last place where [other] occurs as
/// a subsequence of these characters.
///
/// Returns the [string] index before last occurrence of the character
/// of [other] in the sequence of characters of [string].
/// Returns a negative number if there is no such occurrence of [other].
///
/// If [startIndex] is supplied, returns the before after the first occurrence
/// of [other] in this which starts no later than [startIndex], and again
/// returns `null` if there is no such occurrence. That is the result
/// is less than or equal to [startIndex].
int lastIndexOf(Characters other, [int startIndex]);
/// The string index after the last place where [other] occurs as
/// a subsequence of these characters.
///
/// Returns the [string] index after the last occurrence of the character
/// of [other] in the sequence of characters of [string].
/// Returns a negative number if there is no such occurrence of [other].
///
/// If [startIndex] is supplied, returns the index after the last occurrence
/// of [other] in this which ends no later than [startIndex], and again
/// returns `null` if there is no such occurrence. That is the result
/// is less than or equal to [startIndex].
int lastIndexAfter(Characters other, [int startIndex]);
/// Eagerly selects a subset of the characters.
///
/// Tests each character against [test], and returns the
/// characters of the concatenation of those character strings.
Characters where(bool Function(String) test);
/// Eagerly selects all but the first [count] characters.
///
/// If [count] is greater than [length], the count of character
/// available, then the empty sequence of characters
/// is returned.
Characters skip(int count);
/// Eagerly selects the first [count] characters.
///
/// If [count] is greater than [length], the count of character
/// available, then the entire sequence of characters
/// is returned.
Characters take(int count);
/// Eagerly selects all but the last [count] characters.
///
/// If [count] is greater than [length], the count of character
/// available, then the empty sequence of characters
/// is returned.
Characters skipLast(int count);
/// Eagerly selects the last [count] characters.
///
/// If [count] is greater than [length], the count of character
/// available, then the entire sequence of characters
/// is returned.
Characters takeLast(int count);
/// Eagerly selects a range of characters.
///
/// Both [start] and [end] are offsets of characters,
/// not indices into [string].
/// The [start] must be non-negative and [end] must be at least
/// as large as [start].
///
/// If [start] is at least as great as [length], then the result
/// is an empty sequence of graphemes.
/// If [end] is greater than [length], the count of character
/// available, then it acts the same as if it was [length].
///
/// A call like `gc.getRange(a, b)` is equivalent to `gc.take(b).skip(a)`.
Characters getRange(int start, int end);
/// Eagerly selects a trailing sequence of characters.
///
/// Checks each character, from first to last, against [test],
/// until one is found whwere [test] returns `false`.
/// The characters starting with the first one
/// where [test] returns `false`, are included in the result.
///
/// If no characters test `false`, the result is an empty sequence
/// of characters.
Characters skipWhile(bool Function(String) test);
/// Eagerly selects a leading sequnce of characters.
///
/// Checks each character, from first to last, against [test],
/// until one is found whwere [test] returns `false`.
/// The characters up to, but not including, the first one
/// where [test] returns `false` are included in the result.
///
/// If no characters test `false`, the entire sequence of character
/// is returned.
Characters takeWhile(bool Function(String) test);
/// Eagerly selects a leading sequnce of characters.
///
/// Checks each character, from last to first, against [test],
/// until one is found whwere [test] returns `false`.
/// The characters up to and including the one with the latest index
/// where [test] returns `false` are included in the result.
///
/// If no characters test `false`, the empty sequence of character
/// is returned.
Characters skipLastWhile(bool Function(String) test);
/// Eagerly selects a trailing sequence of characters.
///
/// Checks each character, from last to first, against [test],
/// until one is found whwere [test] returns `false`.
/// The characters after the one with the latest index where
/// [test] returns `false` are included in the result.
///
/// If no characters test `false`, the entire sequence of character
/// is returned.
Characters takeLastWhile(bool Function(String) test);
/// The characters of the concatenation of this and [other].
///
/// This is the characters of the concatenation of the underlying
/// strings. If there is no character break at the concatenation
/// point in the resulting string, then the result is not the concatenation
/// of the two character sequences.
///
/// This differs from [followedBy] which provides the lazy concatenation
/// of this sequence of strings with any other sequence of strings.
Characters operator +(Characters other);
/// The characters of [string] with [other] inserted at [index].
///
/// The [index] is a string can be any index into [string].
Characters insertAt(int index, Characters other);
/// The characters of [string] with a substring replaced by other.
Characters replaceSubstring(int startIndex, int endIndex, Characters other);
/// The characters of a substring of [string].
///
/// The [startIndex] and [endIndex] must be a valid range of [string]
/// (0 &le; `startIndex` &le; `endIndex` &le; `string.length`).
/// If [endIndex] is omitted, it defaults to `string.length`.
Characters substring(int startIndex, [int endIndex]);
/// Replaces [source] with [replacement].
///
/// Returns a new [GrapehemeClusters] where all occurrences of the
/// [source] character sequence are replaced by [replacement],
/// unless the occurrence overlaps a prior replaced sequence.
///
/// If [startIndex] is provided, only replace characters
/// starting no earlier than [startIndex] in [string].
Characters replaceAll(Characters source, Characters replacement,
[int startIndex = 0]);
/// Replaces the first [source] with [replacement].
///
/// Returns a new [Characters] where the first occurence of the
/// [source] character sequence, if any, is replaced by [replacement].
///
/// If [startIndex] is provided, replaces the first occurrence
/// of [source] starting no earlier than [startIndex] in [string], if any.
Characters replaceFirst(Characters source, Characters replacement,
[int startIndex = 0]);
/// The characters of the lower-case version of [string].
Characters toLowerCase();
/// The characters of the upper-case version of [string].
Characters toUpperCase();
/// The hash code of [string].
int get hashCode;
/// Whether [other] to another [Characters] with the same [string].
bool operator ==(Object other);
/// The [string] content of these characters.
String toString();
}
/// Iterator over characters of a string.
///
/// Characters are Unicode grapheme clusters represented as substrings
/// of the original string.
///
/// The [start] and [end] indices will iterate the grapheme cluster
/// boundaries of the string while the [Character] is iterating the
/// grapheme clusters. A string with *n* grapheme clusters will have
/// *n + 1* boundaries (except when *n* is zero, then there are also
/// zero boundaries). Those boundaries can be accessed as, for example:
/// ```dart
/// Iterable<int> graphemeClusterBoundaries(String string) sync* {
/// if (string.isEmpty) return;
/// var char = Characters(string).iterator;
/// var hasNext = false;
/// do {
/// hasNext = char.moveNext();
/// yield char.start;
/// } while (hasNext);
/// }
/// ```
abstract class Character implements BidirectionalIterator<String> {
/// Creates a new character iterator iterating the character
/// of [string].
factory Character(String string) = _Character;
/// The beginning of the current character in the underlying string.
///
/// This index is always at a cluster boundary unless the iterator
/// has been reset to a non-boundary index.
///
/// If equal to [end], there is no current character, and [moveNext]
/// needs to be called first before accessing [current].
/// This is the case at the beginning of iteration,
/// after [moveNext] has returned false,
/// or after calling [reset].
int get start;
/// The end of the current character in the underlying string.
///
/// This index is always at a cluster boundary unless the iterator
/// has been reset to a non-boundary index.
///
/// If equal to [start], there is no current character.
int get end;
/// The code units of the current character.
List<int> get codeUnits;
/// The code points of the current character.
Runes get runes;
/// Resets the iterator to the [index] position.
///
/// There is no [current] character after a reset,
/// a call to [moveNext] is needed to find the end of the character
/// at the [index] position.
/// A `reset(0)` will reset to the beginning of the string, as for a newly
/// created iterator.
void reset(int index);
/// Resets the iterator to the start of the string.
///
/// The iterator will be in the same state as a newly created iterator
/// from [Characters.iterator].
void resetStart();
/// Resets the iterator to the end of the string.
///
/// The iterator will be in the same state as an iterator which has
/// performed [moveNext] until it returned false.
void resetEnd();
/// Creates a copy of this [Character].
///
/// The copy is in the exact same state as this iterator.
/// Can be used to iterate the following characters more than once
/// at the same time. To simply rewind an iterator, remember the
/// [start] or [end] position and use [reset] to reset the iterator
/// to that position.
Character copy();
}