First attempt at index-agnostic API. (#6)
Version 0.2.0.
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..c078e2b
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,14 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Dart",
+ "program": "bin/main.dart",
+ "request": "launch",
+ "type": "dart"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 456ba84..94d3cc7 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,4 @@
also know as [Unicode (extended) grapheme clusters](https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
The `Characters` class allows access to the individual characters of a string,
-and a way to navigate back and forth between them.
-It also has a set of utility functions for inspecting and modifying strings
-without breaking up graphemes clusters.
+and a way to navigate back and forth between them using a `CharacterRange`.
diff --git a/benchmark/benchmark.dart b/benchmark/benchmark.dart
new file mode 100644
index 0000000..5345b9d
--- /dev/null
+++ b/benchmark/benchmark.dart
@@ -0,0 +1,99 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// Benchmark of efficiency of grapheme cluster operations.
+
+import "package:characters/characters.dart";
+
+import "../test/src/text_samples.dart";
+
+double bench(int Function() action, int ms) {
+ int elapsed = 0;
+ int count = 0;
+ var stopwatch = Stopwatch()..start();
+ do {
+ count += action();
+ elapsed = stopwatch.elapsedMilliseconds;
+ } while (elapsed < ms);
+ return count / elapsed;
+}
+
+int iterateIndicesOnly() {
+ int graphemeClusters = 0;
+ var char = Characters(hangul).iterator;
+ while (char.moveNext()) graphemeClusters++;
+ char = Characters(genesis).iterator;
+ while (char.moveNext()) graphemeClusters++;
+ return graphemeClusters;
+}
+
+int iterateStrings() {
+ int codeUnits = 0;
+ var char = Characters(hangul).iterator;
+ while (char.moveNext()) codeUnits += char.current.length;
+ char = Characters(genesis).iterator;
+ while (char.moveNext()) codeUnits += char.current.length;
+ return codeUnits;
+}
+
+int reverseStrings() {
+ var revHangul = reverse(hangul);
+ var rev2Hangul = reverse(revHangul);
+ if (hangul != rev2Hangul || hangul == revHangul) throw "Bad reverse";
+
+ var revGenesis = reverse(genesis);
+ var rev2Genesis = reverse(revGenesis);
+ if (genesis != rev2Genesis || genesis == revGenesis) throw "Bad reverse";
+
+ return (hangul.length + genesis.length) * 2;
+}
+
+int replaceStrings() {
+ int count = 0;
+ {
+ const language = "한글";
+ assert(language.length == 6);
+ var chars = Characters(hangul);
+ var replaced =
+ chars.replaceAll(Characters(language), Characters("Hangul!"));
+ count += replaced.string.length - hangul.length;
+ }
+ {
+ var chars = Characters(genesis);
+ var replaced = chars.replaceAll(Characters("And"), Characters("Also"));
+ count += replaced.string.length - genesis.length;
+ }
+ return count;
+}
+
+String reverse(String input) {
+ var chars = Characters(input);
+ var buffer = StringBuffer();
+ for (var it = chars.iteratorAtEnd; it.moveBack();) {
+ buffer.write(it.current);
+ }
+ return buffer.toString();
+}
+
+void main(List<String> args) {
+ int count = 1;
+ if (args.isNotEmpty) count = int.tryParse(args[0]) ?? 1;
+
+ // Warmup.
+ bench(iterateIndicesOnly, 250);
+ bench(iterateStrings, 250);
+ bench(reverseStrings, 250);
+ bench(replaceStrings, 250);
+
+ for (int i = 0; i < count; i++) {
+ var performance = bench(iterateIndicesOnly, 2000);
+ print("Index Iteration: $performance gc/ms");
+ performance = bench(iterateStrings, 2000);
+ print("String Iteration: $performance cu/ms");
+ performance = bench(reverseStrings, 2000);
+ print("String Reversing: $performance cu/ms");
+ performance = bench(replaceStrings, 2000);
+ print("String Replacing: $performance changes/ms");
+ }
+}
diff --git a/lib/src/characters.dart b/lib/src/characters.dart
index 40ddbd2..3e5170c 100644
--- a/lib/src/characters.dart
+++ b/lib/src/characters.dart
@@ -2,12 +2,7 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-import "dart:collection";
-
-import "grapheme_clusters/constants.dart";
-import "grapheme_clusters/breaks.dart";
-
-part "characters_impl.dart";
+import "characters_impl.dart";
/// The characters of a string.
///
@@ -20,30 +15,37 @@
/// the elements can be computed eagerly, and in that case the
/// operation returns a new `Characters` object.
///
-/// A `Characters` also supports operations based on
-/// string indices into the underlying string.
-///
-/// Inspection operations like [indexOf] or [lastIndexAfter]
-/// returns such indices which are guranteed to be at character
-/// boundaries.
-/// Most such operations use the index as starting point,
-/// but will still only work on entire characters.
-/// A few, like [substring] and [replaceSubstring], work directly
-/// on the underlying string, independently of character
-/// boundaries.
+/// The [iterator] provided by [Characters] is a [CharacterRange]
+/// which allows iterating the independent characters in both directions,
+/// but which also provides ways to select other ranges of characters
+/// in different ways.
abstract class Characters implements Iterable<String> {
/// Creates a [Characters] allowing iteration of
/// the characters of [string].
- factory Characters(String string) = _Characters;
+ factory Characters(String string) = StringCharacters;
/// The string to iterate over.
String get string;
- /// A specialized character iterator.
+ /// Iterator over the characters of this string.
+ ///
+ /// Returns [CharacterRange] positioned before the first character
+ /// of this [Characters].
///
/// Allows iterating the characters of [string] as a plain iterator,
- // as well as controlling the iteration in more detail.
- Character get iterator;
+ /// using [CharacterRange.moveNext],
+ /// as well as controlling the iteration in more detail.
+ CharacterRange get iterator;
+
+ /// Iterator over the characters of this string.
+ ///
+ /// Returns [CharacterRange] positioned after the last character
+ /// of this [Characters].
+ ///
+ /// Allows iterating the characters of [string] backwards
+ /// using [CharacterRange.movePrevious],
+ /// as well as controlling the iteration in more detail.
+ CharacterRange get iteratorAtEnd;
/// Whether [Character] is an element of this sequence of
/// characters.
@@ -58,83 +60,32 @@
/// as a subsequence.
bool containsAll(Characters other);
- /// Whether [other] is an initial subsequence of this sequence
- /// of characters.
+ /// Whether this string starts with the characters of [other].
///
- /// If [startIndex] is provided, then checks whether
- /// [other] is an initial subsequence of the characters
- /// starting at the character boundary [startIndex].
- ///
- /// Returns `true` if [other] is a sub-sequence of this sequence of
- /// characters startings at the character boundary [startIndex].
- /// Returns `false` if [startIndex] is not a character boundary,
- /// or if [other] does not occur at that position.
- bool startsWith(Characters other, [int startIndex = 0]);
+ /// Returns `true` if [other] the characters of [other]
+ /// are also the first characters of this string,
+ /// and `false` otherwise.
+ bool startsWith(Characters other);
- /// Whether [other] is an trailing subsequence of this sequence
- /// of characters.
+ /// Whether this string ends with the characters of [other].
///
- /// If [endIndex] is provided, then checks whether
- /// [other] is a trailing subsequence of the characters
- /// starting at the character boundary [endIndex].
- ///
- /// Returns `true` if [other] is a sub-sequence of this sequence of
- /// characters startings at the character boundary [endIndex].
- /// Returns `false` if [endIndex] is not a character boundary,
- /// or if [other] does not occur at that position.
- bool endsWith(Characters other, [int endIndex]);
+ /// Returns `true` if [other] the characters of [other]
+ /// are also the last characters of this string,
+ /// and `false` otherwise.
+ bool endsWith(Characters other);
- /// The string index before the first place where [other] occurs as
- /// a subsequence of these characters.
+ /// Finds the first occurrence of [characters] in this string.
///
- /// Returns the [string] index before first occurrence of the character
- /// of [other] in the sequence of characters of [string].
- /// Returns a negative number if there is no such occurrence of [other].
- ///
- /// If [startIndex] is supplied, returns the index after the first occurrence
- /// of [other] in this which starts no earlier than [startIndex], and again
- /// returns `null` if there is no such occurrence. That is, if the result
- /// is non-negative, it is greater than or equal to [startIndex].
- int indexOf(Characters other, [int startIndex]);
+ /// Returns a [CharacterRange] containing the first occurrence of
+ /// [characters] in this string.
+ /// Returns `null` if there is no such occurrence.
+ CharacterRange /*?*/ findFirst(Characters characters);
- /// The string index after the first place [other] occurs as a subsequence of
- /// these characters.
+ /// Finds the last occurrence of [characters].
///
- /// Returns the [string] index after the first occurrence of the character
- /// of [other] in the sequence of characters of [string].
- /// Returns a negative number if there is no such occurrence of [other].
- ///
- /// If [startIndex] is supplied, returns the index after the first occurrence
- /// of [other] in this which starts no earlier than [startIndex], and again
- /// returns `null` if there is no such occurrence. That is, if the result
- /// is non-negative, it is greater than or equal to [startIndex].
- int indexAfter(Characters other, [int startIndex]);
-
- /// The string index before the last place where [other] occurs as
- /// a subsequence of these characters.
- ///
- /// Returns the [string] index before last occurrence of the character
- /// of [other] in the sequence of characters of [string].
- /// Returns a negative number if there is no such occurrence of [other].
- ///
- /// If [startIndex] is supplied, returns the before after the first occurrence
- /// of [other] in this which starts no later than [startIndex], and again
- /// returns `null` if there is no such occurrence. That is the result
- /// is less than or equal to [startIndex].
- int lastIndexOf(Characters other, [int startIndex]);
-
- /// The string index after the last place where [other] occurs as
- /// a subsequence of these characters.
- ///
- /// Returns the [string] index after the last occurrence of the character
- /// of [other] in the sequence of characters of [string].
- /// Returns a negative number if there is no such occurrence of [other].
- ///
- /// If [startIndex] is supplied, returns the index after the last occurrence
- /// of [other] in this which ends no later than [startIndex], and again
- /// returns `null` if there is no such occurrence. That is the result
- /// is less than or equal to [startIndex].
- int lastIndexAfter(Characters other, [int startIndex]);
+ /// Returns a [CharacterRange] containing the last occurrence of
+ /// [characters]. Returns `null` if there is no such occurrence,
+ CharacterRange /*?*/ findLast(Characters characters);
/// Eagerly selects a subset of the characters.
///
@@ -170,21 +121,6 @@
/// is returned.
Characters takeLast(int count);
- /// Eagerly selects a range of characters.
- ///
- /// Both [start] and [end] are offsets of characters,
- /// not indices into [string].
- /// The [start] must be non-negative and [end] must be at least
- /// as large as [start].
- ///
- /// If [start] is at least as great as [length], then the result
- /// is an empty sequence of graphemes.
- /// If [end] is greater than [length], the count of character
- /// available, then it acts the same as if it was [length].
- ///
- /// A call like `gc.getRange(a, b)` is equivalent to `gc.take(b).skip(a)`.
- Characters getRange(int start, int end);
-
/// Eagerly selects a trailing sequence of characters.
///
/// Checks each character, from first to last, against [test],
@@ -240,41 +176,18 @@
/// of this sequence of strings with any other sequence of strings.
Characters operator +(Characters other);
- /// The characters of [string] with [other] inserted at [index].
- ///
- /// The [index] is a string can be any index into [string].
- Characters insertAt(int index, Characters other);
-
- /// The characters of [string] with a substring replaced by other.
- Characters replaceSubstring(int startIndex, int endIndex, Characters other);
-
- /// The characters of a substring of [string].
- ///
- /// The [startIndex] and [endIndex] must be a valid range of [string]
- /// (0 ≤ `startIndex` ≤ `endIndex` ≤ `string.length`).
- /// If [endIndex] is omitted, it defaults to `string.length`.
- Characters substring(int startIndex, [int endIndex]);
-
- /// Replaces [source] with [replacement].
+ /// Replaces [pattern] with [replacement].
///
/// Returns a new [GrapehemeClusters] where all occurrences of the
- /// [source] character sequence are replaced by [replacement],
+ /// [pattern] character sequence are replaced by [replacement],
/// unless the occurrence overlaps a prior replaced sequence.
- ///
- /// If [startIndex] is provided, only replace characters
- /// starting no earlier than [startIndex] in [string].
- Characters replaceAll(Characters source, Characters replacement,
- [int startIndex = 0]);
+ Characters replaceAll(Characters pattern, Characters replacement);
- /// Replaces the first [source] with [replacement].
+ /// Replaces the first [pattern] with [replacement].
///
/// Returns a new [Characters] where the first occurence of the
- /// [source] character sequence, if any, is replaced by [replacement].
- ///
- /// If [startIndex] is provided, replaces the first occurrence
- /// of [source] starting no earlier than [startIndex] in [string], if any.
- Characters replaceFirst(Characters source, Characters replacement,
- [int startIndex = 0]);
+ /// [pattern] character sequence, if any, is replaced by [replacement].
+ Characters replaceFirst(Characters pattern, Characters replacement);
/// The characters of the lower-case version of [string].
Characters toLowerCase();
@@ -292,79 +205,51 @@
String toString();
}
-/// Iterator over characters of a string.
+/// A range of characters of a [Characters].
///
-/// Characters are Unicode grapheme clusters represented as substrings
-/// of the original string.
+/// A range of consecutive characters in [source],
+/// corresponding to a start and end position in the source sequence.
+/// The range may even be empty, but that will still correspond to a position
+/// where both start and end happen to be the same position.
///
-/// The [start] and [end] indices will iterate the grapheme cluster
-/// boundaries of the string while the [Character] is iterating the
-/// grapheme clusters. A string with *n* grapheme clusters will have
-/// *n + 1* boundaries (except when *n* is zero, then there are also
-/// zero boundaries). Those boundaries can be accessed as, for example:
-/// ```dart
-/// Iterable<int> graphemeClusterBoundaries(String string) sync* {
-/// if (string.isEmpty) return;
-/// var char = Characters(string).iterator;
-/// var hasNext = false;
-/// do {
-/// hasNext = char.moveNext();
-/// yield char.start;
-/// } while (hasNext);
-/// }
-/// ```
-abstract class Character implements BidirectionalIterator<String> {
+/// The source sequence can be separated into the *preceeding* characters,
+/// those before the range, the range itself, and the *following* characters,
+/// those after the range.
+///
+/// Some operations inspect or act on the characters of the current range,
+/// and other operations modify the range by moving the start and/or end
+/// position.
+///
+/// In general, an operation with a name starting with `move` will move
+/// both start and end positions, selecting an entirely new range
+/// which does not overlap the current range.
+/// Operations starting with `collapse` reduces the current range to
+/// a sub-range of itself.
+/// Operations starting with `expand` increase the current range
+/// by moving/ the end postion to a later position
+/// or the start position to an earlier position,
+/// and operations starting with `drop` reduce the current range
+/// by moving the start to a later position or the end to an earlier position,
+/// therebyt dropping characters from one or both ends from the current range.
+///
+///
+/// The character range implements [Iterator]
+/// The [moveNext] operation, when called with no argument,
+/// iterates the *next* single characters of the [source] sequence.
+abstract class CharacterRange implements Iterator<String> {
/// Creates a new character iterator iterating the character
/// of [string].
- factory Character(String string) = _Character;
+ factory CharacterRange(String string) = StringCharacterRange;
- /// The beginning of the current character in the underlying string.
- ///
- /// This index is always at a cluster boundary unless the iterator
- /// has been reset to a non-boundary index.
- ///
- /// If equal to [end], there is no current character, and [moveNext]
- /// needs to be called first before accessing [current].
- /// This is the case at the beginning of iteration,
- /// after [moveNext] has returned false,
- /// or after calling [reset].
- int get start;
+ /// The character sequence that this range is a sub-sequence of.
+ Characters get source;
- /// The end of the current character in the underlying string.
- ///
- /// This index is always at a cluster boundary unless the iterator
- /// has been reset to a non-boundary index.
- ///
- /// If equal to [start], there is no current character.
- int get end;
-
- /// The code units of the current character.
+ /// The code units of the current character range.
List<int> get codeUnits;
- /// The code points of the current character.
+ /// The code points of the current character range.
Runes get runes;
- /// Resets the iterator to the [index] position.
- ///
- /// There is no [current] character after a reset,
- /// a call to [moveNext] is needed to find the end of the character
- /// at the [index] position.
- /// A `reset(0)` will reset to the beginning of the string, as for a newly
- /// created iterator.
- void reset(int index);
-
- /// Resets the iterator to the start of the string.
- ///
- /// The iterator will be in the same state as a newly created iterator
- /// from [Characters.iterator].
- void resetStart();
-
- /// Resets the iterator to the end of the string.
- ///
- /// The iterator will be in the same state as an iterator which has
- /// performed [moveNext] until it returned false.
- void resetEnd();
-
/// Creates a copy of this [Character].
///
/// The copy is in the exact same state as this iterator.
@@ -372,5 +257,385 @@
/// at the same time. To simply rewind an iterator, remember the
/// [start] or [end] position and use [reset] to reset the iterator
/// to that position.
- Character copy();
+ CharacterRange copy();
+
+ /// Whether the current range is empty.
+ ///
+ /// An empty range has no characters, but still has a position as
+ /// a sub-sequence of the source character sequence.
+ bool get isEmpty;
+
+ /// Whether the current range is not empty.
+ ///
+ /// A non-empty range contains at least one character.
+ bool get isNotEmpty;
+
+ /// Moves the range to be the next [count] characters after the current range.
+ ///
+ /// The new range starts and the end of the current range and includes
+ /// the next [count] characters, or as many as available if there
+ /// are fewer than [count] characters following the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the call has the same effect as [collapseToEnd].
+ ///
+ /// Returns `true` if there were [count] following characters
+ /// and `false` if not.
+ bool moveNext([int count = 1]);
+
+ /// Moves the range to the next occurrence of [target]
+ /// after the current range.
+ ///
+ /// If there is an occurrence of [target] in the characters following
+ /// the current range,
+ /// then the new range contains exactly the first such occurrence of [target].
+ ///
+ /// If there is no occurrence of [target] after the current range,
+ /// the range is not modified.
+ ///
+ /// Returns `true` if the range is modified and `false` if not.
+ bool moveTo(Characters target);
+
+ /// Moves to the range until the next occurrence of [target].
+ ///
+ /// If there is an occurrence of [target] in the characters following
+ /// the current range,
+ /// then the new range contains the characters from the end
+ /// of the current range until, but no including the first such
+ /// occurrence of [target].
+ ///
+ /// If there is no occurrence of [target] after the current range,
+ /// the new range contains all the characters following the current range,
+ /// from the end of the current range until the end of the string.
+ ///
+ /// Returns `true` if there was an occurrence of [target].
+ bool moveUntil(Characters target);
+
+ /// Moves the range to be the last [count] characters before the current
+ /// range.
+ ///
+ /// The new range ends at the start of the current range and includes
+ /// the previous [count] characters, or as many as available if there
+ /// are fewer than [count] characters preceding the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the call has the same effect as [collapseToStart].
+ ///
+ /// Returns `true` if there were [count] preceding characters
+ /// and `false` if not.
+ bool moveBack([int count = 1]);
+
+ /// Moves the range to the last occurrence of [target]
+ /// before the current range.
+ ///
+ /// If there is an occurrence of [target] in the characters preceding
+ /// the current range,
+ /// then the new range contains exactly the last such occurrence of [target].
+ ///
+ /// If there is no occurrence of [target] after the current range,
+ /// the range is not modified.
+ ///
+ /// Returns `true` if the range is modified and `false` if not.
+ bool moveBackTo(Characters target);
+
+ /// Moves to the range after the previous occurence of [target].
+ ///
+ /// If there is an occurrence of [target] in the characters preceding
+ /// the current range,
+ /// then the new range contains the characters after
+ /// the last such occurrence, and up to the start of the current range.
+ ///
+ /// If there is no occurrence of [target] after the current range,
+ /// the new range contains all the characters preceding the current range,
+ /// from the start of the string to the start of the current range.
+ ///
+ /// Returns `true` if there was an occurrence of [target].
+ bool moveBackUntil(Characters target);
+
+ /// Expands the current range with the next [count] characters.
+ ///
+ /// Expands the current range to include the first [count] characters
+ /// following the current range, or as many as are available if
+ /// there are fewer than [count] characters following the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the range does not change.
+ ///
+ /// Returns `true` if there are at least [count] characters following
+ /// the current range, and `false` if not.
+ bool expandNext([int count = 1]);
+
+ /// Expands the range to include the next occurence of [target].
+ ///
+ /// If there is an occurrence of [target] in the characters following
+ /// the current range, the end of the the range is moved to just after
+ /// the first such occurrence.
+ ///
+ /// If there is no such occurrence of [target], the range is not modified.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ /// Notice that if [target] is empty,
+ /// the result is `true` even though the range is not modified.
+ bool expandTo(Characters target);
+
+ /// Expands the range to include characters until the next [target].
+ ///
+ /// If there is an occurrence of [target] in the characters following
+ /// the current range, the end of the the range is moved to just before
+ /// the first such occurrence.
+ ///
+ /// If there is no such occurrence of [target], the end of the range is
+ /// moved to the end of [source].
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool expandUntil(Characters target);
+
+ /// Expands the range with the following characters satisfying [test].
+ ///
+ /// Iterates through the characters following the current range
+ /// and includes them into the range until finding a character that
+ /// [test] returns `false` for.
+ void expandWhile(bool Function(String) test);
+
+ /// Expands the range to the end of [source].
+ void expandAll();
+
+ /// Expands the current range with the preceding [count] characters.
+ ///
+ /// Expands the current range to include the last [count] characters
+ /// preceding the current range, or as many as are available if
+ /// there are fewer than [count] characters preceding the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the range does not change.
+ ///
+ /// Returns `true` if there are at least [count] characters preceding
+ /// the current range, and `false` if not.
+ bool expandBack([int count = 1]);
+
+ /// Expands the range to include the previous occurence of [target].
+ ///
+ /// If there is an occurrence of [target] in the characters preceding
+ /// the current range, the stat of the the range is moved to just before
+ /// the last such occurrence.
+ ///
+ /// If there is no such occurrence of [target], the range is not modified.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ /// Notice that if [target] is empty,
+ /// the result is `true` even though the range is not modified.
+ bool expandBackTo(Characters target);
+
+ /// Expands the range to include characters back until the previous [target].
+ ///
+ /// If there is an occurrence of [target] in the characters preceding
+ /// the current range, the start of the the range is moved to just after
+ /// the last such occurrence.
+ ///
+ /// If there is no such occurrence of [target], the end of the range is
+ /// moved to the end of [source].
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool expandBackUntil(Characters target);
+
+ /// Expands the range with the preceding characters satisffying [test].
+ ///
+ /// Iterates back through the characters preceding the current range
+ /// and includes them into the range until finding a character that
+ /// [test] returns `false` for.
+ void expandBackWhile(bool Function(String) test);
+
+ /// Expands the range back to the start of [source].
+ void expandBackAll();
+
+ /// Collapses the range to its start.
+ ///
+ /// Sets the end of the range to be the same position as the start.
+ /// The new range is empty and positioned at the start of the current range.
+ void collapseToStart();
+
+ /// Collapses to the first occurrence of [target] in the current range.
+ ///
+ /// If there is an occurrence of [target] in the characters of the current
+ /// range, then the new range contains exactly the characters of the
+ /// first such occurrence.
+ ///
+ /// If there is no such occurrence, the range is not changed.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool collapseToFirst(Characters target);
+
+ /// Collapses to the last occurrence of [target] in the current range.
+ ///
+ /// If there is an occurrence of [target] in the characters of the current
+ /// range, then the new range contains exactly the characters of the
+ /// last such occurrence.
+ ///
+ /// If there is no such occurrence, the range is not changed.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool collapseToLast(Characters target);
+
+ /// Collapses the range to its end.
+ ///
+ /// Sets the start of the range to be the same as its end.
+ /// The new range is an empty range positioned at the end
+ /// of the current range.
+ void collapseToEnd();
+
+ /// Drop the first [count] characters from the range.
+ ///
+ /// Advances the start of the range to after the [count] first characters
+ /// of the range, or as many as are available if
+ /// there are fewer than [count] characters in the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the range is not changed.
+ ///
+ /// Returns `true` if there are [count] characters in the range,
+ /// and `false` if there are fewer.
+ bool dropFirst([int count = 1]);
+
+ /// Drops the first occurrence of [target] in the range.
+ ///
+ /// If the range contains any occurrences of [target],
+ /// then all characters before the end of the first such occurrence
+ /// is removed from the range.
+ /// This advances the start of the range to the end of the
+ /// first occurrence of [target].
+ ///
+ /// If there are no occurrences of [target] in the range,
+ /// the range is not changed.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool dropTo(Characters target);
+
+ /// Drops characters from the start of the range until before
+ /// the first occurrence of [target].
+ ///
+ /// If the range contains any occurrences of [target],
+ /// then all characters before the start of the first such occurrence
+ /// is removed from the range.
+ /// This advances the start of the range to the start of the
+ /// first occurrence of [target].
+ ///
+ /// If there are no occurrences of [target] in the range,
+ /// all characteres in the range are removed,
+ /// which gives the same effect as [collapseToEnd].
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool dropUntil(Characters target);
+
+ /// Drops characters from the start of the range while they satisfy [test].
+ ///
+ /// Iterates the characters of the current range from the start
+ /// and removes all the iterated characters until one is
+ /// reached for which [test] returns `false`.
+ /// If on such character is found, all characters are removed,
+ /// which gives the same effect as [collapseToEnd].
+ void dropWhile(bool Function(String) test);
+
+ /// Drop the last [count] characters from the range.
+ ///
+ /// Retracts the end of the range to before the [count] last characters
+ /// of the range, or as many as are available if
+ /// there are fewer than [count] characters in the current range.
+ ///
+ /// The [count] must not be negative.
+ /// If it is zero, the range is not changed.
+ ///
+ /// Returns `true` if there are [count] characters in the range,
+ /// and `false` if there are fewer.
+ bool dropLast([int count = 1]);
+
+ /// Drops the last occurrence of [target] in the range.
+ ///
+ /// If the range contains any occurrences of [target],
+ /// then all characters after the start of the first such occurrence
+ /// is removed from the range.
+ /// This retracts the end of the range to the start of the
+ /// last occurrence of [target].
+ ///
+ /// If there are no occurrences of [target] in the range,
+ /// the range is not changed.
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool dropBackTo(Characters target);
+
+ /// Drops characters from the end of the range until after
+ /// the last occurrence of [target].
+ ///
+ /// If the range contains any occurrences of [target],
+ /// then all characters after the end of the last such occurrence
+ /// is removed from the range.
+ /// This retracts the end of the range to the end of the
+ /// last occurrence of [target].
+ ///
+ /// If there are no occurrences of [target] in the range,
+ /// all characteres in the range are removed,
+ /// which gives the same effect as [collapseToStart].
+ ///
+ /// Returns `true` if there is an occurrence of [target] and `false` if not.
+ bool dropBackUntil(Characters target);
+
+ /// Drops characters from the end of the range while they satisfy [test].
+ ///
+ /// Iterates the characters of the current range backwards from the end
+ /// and removes all the iterated characters until one is
+ /// reached for which [test] returns `false`.
+ /// If on such character is found, all characters are removed,
+ /// which gives the same effect as [collapseToStart].
+ void dropBackWhile(bool Function(String) test);
+
+ /// Creates a new [Characters] sequence by replacing the current range.
+ ///
+ /// Replaces the current range in [source] with [replacement].
+ ///
+ /// Returns a new [Characters] instance. Since the inserted characters
+ /// may combine with the preceding or following characters, grapheme cluster
+ /// boundaries need to be recomputed from scratch.
+ Characters replaceRange(Characters replacement);
+
+ /// Replaces all occurrences of [pattern] in the range with [replacement].
+ ///
+ /// Replaces the first occurrence of [pattern] in the range, then repeatedly
+ /// finds and replaces the next occurrence which does not overlap with
+ /// the earlier, already replaced, occurrence.
+ ///
+ /// Returns new [Characters] instance for the resulting string.
+ Characters replaceAll(Characters pattern, Characters replacement);
+
+ /// Replaces the first occurrence of [pattern] with [replacement].
+ ///
+ /// Finds the first occurrence of [pattern] in the current range,
+ /// then replaces that occurrence with [replacement] and returns
+ /// the [Characters] of that string.
+ ///
+ /// If there is no first occurrence of [pattern], then the
+ /// characters of the source string is returned.
+ Characters replaceFirst(Characters pattern, Characters replacement);
+
+ /// Whether the current range starts with [characters].
+ ///
+ /// Returns `true` if the characters of the current range starts with
+ /// [characters], `false` if not.
+ bool startsWith(Characters characters);
+
+ /// Whether the current range ends with [characters].
+ ///
+ /// Returns `true` if the characters of the current range ends with
+ /// [characters], `false` if not.
+ bool endsWith(Characters characters);
+
+ /// Whether the current range is preceded by [characters].
+ ///
+ /// Returns `true` if the characters immediately preceding the current
+ /// range are [characters], and `false` if not.
+ bool isPrecededBy(Characters characters);
+
+ /// Whether the current range is followed by [characters].
+ ///
+ /// Returns `true` if the characters immediately following the current
+ /// range are [characters], and `false` if not.
+ bool isFollowedBy(Characters characters);
}
diff --git a/lib/src/characters_impl.dart b/lib/src/characters_impl.dart
index 2369eb9..109d750 100644
--- a/lib/src/characters_impl.dart
+++ b/lib/src/characters_impl.dart
@@ -2,32 +2,48 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of "characters.dart";
+import "dart:collection" show ListBase;
+
+import 'package:characters/src/grapheme_clusters/table.dart';
+
+import "characters.dart";
+import "grapheme_clusters/constants.dart";
+import "grapheme_clusters/breaks.dart";
/// The grapheme clusters of a string.
-class _Characters extends Iterable<String> implements Characters {
+///
+/// Backed by a single string.
+class StringCharacters extends Iterable<String> implements Characters {
// Try to avoid allocating more empty grapheme clusters.
- static const Characters _empty = const _Characters._("");
+ static const StringCharacters _empty = const StringCharacters("");
final String string;
- const _Characters._(this.string);
+ const StringCharacters(this.string);
- factory _Characters(String string) =>
- string.isEmpty ? _empty : _Characters._(string);
+ @override
+ CharacterRange get iterator => StringCharacterRange._(string, 0, 0);
- Character get iterator => _Character(string);
+ @override
+ CharacterRange get iteratorAtEnd =>
+ StringCharacterRange._(string, string.length, string.length);
+ StringCharacterRange get _rangeAll =>
+ StringCharacterRange._(string, 0, string.length);
+
+ @override
String get first => string.isEmpty
? throw StateError("No element")
: string.substring(
0, Breaks(string, 0, string.length, stateSoTNoBreak).nextBreak());
+ @override
String get last => string.isEmpty
? throw StateError("No element")
: string.substring(
BackBreaks(string, string.length, 0, stateEoTNoBreak).nextBreak());
+ @override
String get single {
if (string.isEmpty) throw StateError("No element");
int firstEnd =
@@ -36,10 +52,13 @@
throw StateError("Too many elements");
}
+ @override
bool get isEmpty => string.isEmpty;
+ @override
bool get isNotEmpty => string.isNotEmpty;
+ @override
int get length {
if (string.isEmpty) return 0;
var brk = Breaks(string, 0, string.length, stateSoTNoBreak);
@@ -48,6 +67,7 @@
return length;
}
+ @override
Iterable<T> whereType<T>() {
Iterable<Object> self = this;
if (self is Iterable<T>) {
@@ -56,11 +76,13 @@
return Iterable<T>.empty();
}
+ @override
String join([String separator = ""]) {
if (separator == "") return string;
- return _explodeReplace(separator, "", 0);
+ return _explodeReplace(string, 0, string.length, separator, "");
}
+ @override
String lastWhere(bool test(String element), {String orElse()}) {
int cursor = string.length;
var brk = BackBreaks(string, cursor, 0, stateEoTNoBreak);
@@ -74,6 +96,7 @@
throw StateError("no element");
}
+ @override
String elementAt(int index) {
RangeError.checkNotNegative(index, "index");
int count = 0;
@@ -90,162 +113,61 @@
throw RangeError.index(index, this, "index", null, count);
}
+ @override
bool contains(Object other) {
if (other is String) {
if (other.isEmpty) return false;
int next = Breaks(other, 0, other.length, stateSoTNoBreak).nextBreak();
if (next != other.length) return false;
// [other] is single grapheme cluster.
- return _indexOf(other, 0) >= 0;
+ return _indexOf(string, other, 0, string.length) >= 0;
}
return false;
}
- int indexOf(Characters other, [int startIndex]) {
+ @override
+ bool startsWith(Characters other) {
int length = string.length;
- if (startIndex == null) {
- startIndex = 0;
- } else {
- RangeError.checkValidRange(startIndex, length, length, "startIndex");
- }
- return _indexOf(other.string, startIndex);
- }
-
- /// Finds first occurrence of [otherString] at grapheme cluster boundaries.
- ///
- /// Only finds occurrences starting at or after [startIndex].
- int _indexOf(String otherString, int startIndex) {
- int otherLength = otherString.length;
- if (otherLength == 0) {
- return nextBreak(string, 0, string.length, startIndex);
- }
- int length = string.length;
- while (startIndex + otherLength <= length) {
- int matchIndex = string.indexOf(otherString, startIndex);
- if (matchIndex < 0) return matchIndex;
- if (isGraphemeClusterBoundary(string, 0, length, matchIndex) &&
- isGraphemeClusterBoundary(
- string, 0, length, matchIndex + otherLength)) {
- return matchIndex;
- }
- startIndex = matchIndex + 1;
- }
- return -1;
- }
-
- /// Finds last occurrence of [otherString] at grapheme cluster boundaries.
- ///
- /// Starts searching backwards at [startIndex].
- int _lastIndexOf(String otherString, int startIndex) {
- int otherLength = otherString.length;
- if (otherLength == 0) {
- return previousBreak(string, 0, string.length, startIndex);
- }
- int length = string.length;
- while (startIndex >= 0) {
- int matchIndex = string.lastIndexOf(otherString, startIndex);
- if (matchIndex < 0) return matchIndex;
- if (isGraphemeClusterBoundary(string, 0, length, matchIndex) &&
- isGraphemeClusterBoundary(
- string, 0, length, matchIndex + otherLength)) {
- return matchIndex;
- }
- startIndex = matchIndex - 1;
- }
- return -1;
- }
-
- bool startsWith(Characters other, [int startIndex = 0]) {
- int length = string.length;
- RangeError.checkValueInInterval(startIndex, 0, length, "startIndex");
String otherString = other.string;
if (otherString.isEmpty) return true;
- return string.startsWith(otherString, startIndex) &&
- isGraphemeClusterBoundary(
- string, 0, length, startIndex + otherString.length);
+ return string.startsWith(otherString) &&
+ isGraphemeClusterBoundary(string, 0, length, otherString.length);
}
- bool endsWith(Characters other, [int endIndex]) {
+ @override
+ bool endsWith(Characters other) {
int length = string.length;
- if (endIndex == null) {
- endIndex = length;
- } else {
- RangeError.checkValueInInterval(endIndex, 0, length, "endIndex");
- }
String otherString = other.string;
if (otherString.isEmpty) return true;
int otherLength = otherString.length;
- int start = endIndex - otherLength;
+ int start = string.length - otherLength;
return start >= 0 &&
string.startsWith(otherString, start) &&
- isGraphemeClusterBoundary(string, 0, endIndex, start);
+ isGraphemeClusterBoundary(string, 0, length, start);
}
- Characters replaceAll(Characters pattern, Characters replacement,
- [int startIndex = 0]) {
- if (startIndex > 0) {
- RangeError.checkValueInInterval(
- startIndex, 0, string.length, "startIndex");
- }
- if (pattern.string.isEmpty) {
- if (string.isEmpty) return replacement;
- var replacementString = replacement.string;
- return Characters(
- _explodeReplace(replacementString, replacementString, startIndex));
- }
- int start = startIndex;
- StringBuffer buffer;
- int next = -1;
- while ((next = this.indexOf(pattern, start)) >= 0) {
- (buffer ??= StringBuffer())
- ..write(string.substring(start, next))
- ..write(replacement);
- start = next + pattern.string.length;
- }
- if (buffer == null) return this;
- buffer.write(string.substring(start));
- return Characters(buffer.toString());
+ @override
+ Characters replaceAll(Characters pattern, Characters replacement) =>
+ _rangeAll.replaceAll(pattern, replacement);
+
+ @override
+ Characters replaceFirst(Characters pattern, Characters replacement) {
+ var range = _rangeAll;
+ if (!range.collapseToFirst(pattern)) return this;
+ return range.replaceRange(replacement);
}
- // Replaces every internal grapheme cluster boundary with
- // [internalReplacement] and adds [outerReplacement] at both ends
- // Starts at [startIndex].
- String _explodeReplace(
- String internalReplacement, String outerReplacement, int startIndex) {
- var buffer = StringBuffer(string.substring(0, startIndex));
- var breaks = Breaks(string, startIndex, string.length, stateSoTNoBreak);
- int index = 0;
- String replacement = outerReplacement;
- while ((index = breaks.nextBreak()) >= 0) {
- buffer..write(replacement)..write(string.substring(startIndex, index));
- startIndex = index;
- replacement = internalReplacement;
- }
- buffer.write(outerReplacement);
- return buffer.toString();
- }
+ @override
+ bool containsAll(Characters other) =>
+ _indexOf(string, other.string, 0, string.length) >= 0;
- Characters replaceFirst(Characters source, Characters replacement,
- [int startIndex = 0]) {
- if (startIndex != 0) {
- RangeError.checkValueInInterval(
- startIndex, 0, string.length, "startIndex");
- }
- int index = _indexOf(source.string, startIndex);
- if (index < 0) return this;
- return Characters(string.replaceRange(
- index, index + source.string.length, replacement.string));
- }
-
- bool containsAll(Characters other) {
- return _indexOf(other.string, 0) >= 0;
- }
-
+ @override
Characters skip(int count) {
RangeError.checkNotNegative(count, "count");
if (count == 0) return this;
if (string.isNotEmpty) {
- var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
+ int stringLength = string.length;
+ var breaks = Breaks(string, 0, stringLength, stateSoTNoBreak);
int startIndex = 0;
while (count > 0) {
int index = breaks.nextBreak();
@@ -256,11 +178,13 @@
return _empty;
}
}
- return _Characters(string.substring(startIndex));
+ if (startIndex == stringLength) return _empty;
+ return StringCharacters(string.substring(startIndex));
}
return this;
}
+ @override
Characters take(int count) {
RangeError.checkNotNegative(count, "count");
if (count == 0) return _empty;
@@ -270,26 +194,29 @@
while (count > 0) {
int index = breaks.nextBreak();
if (index >= 0) {
- count--;
endIndex = index;
+ count--;
} else {
return this;
}
}
- return _Characters._(string.substring(0, endIndex));
+ return StringCharacters(string.substring(0, endIndex));
}
return this;
}
+ @override
Characters skipWhile(bool Function(String) test) {
if (string.isNotEmpty) {
- var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
+ int stringLength = string.length;
+ var breaks = Breaks(string, 0, stringLength, stateSoTNoBreak);
int index = 0;
int startIndex = 0;
while ((index = breaks.nextBreak()) >= 0) {
if (!test(string.substring(startIndex, index))) {
if (startIndex == 0) return this;
- return _Characters._(string.substring(startIndex));
+ if (startIndex == stringLength) return _empty;
+ return StringCharacters(string.substring(startIndex));
}
startIndex = index;
}
@@ -297,6 +224,7 @@
return _empty;
}
+ @override
Characters takeWhile(bool Function(String) test) {
if (string.isNotEmpty) {
var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
@@ -305,7 +233,7 @@
while ((index = breaks.nextBreak()) >= 0) {
if (!test(string.substring(endIndex, index))) {
if (endIndex == 0) return _empty;
- return _Characters._(string.substring(0, endIndex));
+ return StringCharacters(string.substring(0, endIndex));
}
endIndex = index;
}
@@ -313,42 +241,18 @@
return this;
}
- Characters where(bool Function(String) test) =>
- _Characters(super.where(test).join());
-
- Characters operator +(Characters other) => _Characters(string + other.string);
-
- Characters getRange(int start, int end) {
- RangeError.checkNotNegative(start, "start");
- if (end < start) throw RangeError.range(end, start, null, "end");
- if (string.isEmpty) return this;
- var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
- int startIndex = 0;
- int endIndex = string.length;
- end -= start;
- while (start > 0) {
- int index = breaks.nextBreak();
- if (index >= 0) {
- startIndex = index;
- start--;
- } else {
- return _empty;
- }
- }
- while (end > 0) {
- int index = breaks.nextBreak();
- if (index >= 0) {
- endIndex = index;
- end--;
- } else {
- if (startIndex == 0) return this;
- return _Characters(string.substring(startIndex));
- }
- }
- if (startIndex == 0 && endIndex == string.length) return this;
- return _Characters(string.substring(startIndex, endIndex));
+ @override
+ Characters where(bool Function(String) test) {
+ var string = super.where(test).join();
+ if (string.isEmpty) return _empty;
+ return StringCharacters(super.where(test).join());
}
+ @override
+ Characters operator +(Characters other) =>
+ StringCharacters(string + other.string);
+
+ @override
Characters skipLast(int count) {
RangeError.checkNotNegative(count, "count");
if (count == 0) return this;
@@ -364,11 +268,12 @@
return _empty;
}
}
- return _Characters(string.substring(0, endIndex));
+ if (endIndex > 0) return StringCharacters(string.substring(0, endIndex));
}
return _empty;
}
+ @override
Characters skipLastWhile(bool Function(String) test) {
if (string.isNotEmpty) {
var breaks = BackBreaks(string, string.length, 0, stateEoTNoBreak);
@@ -377,7 +282,7 @@
while ((index = breaks.nextBreak()) >= 0) {
if (!test(string.substring(index, end))) {
if (end == string.length) return this;
- return _Characters(string.substring(0, end));
+ return end == 0 ? _empty : StringCharacters(string.substring(0, end));
}
end = index;
}
@@ -385,9 +290,10 @@
return _empty;
}
+ @override
Characters takeLast(int count) {
RangeError.checkNotNegative(count, "count");
- if (count == 0) return this;
+ if (count == 0) return _empty;
if (string.isNotEmpty) {
var breaks = BackBreaks(string, string.length, 0, stateEoTNoBreak);
int startIndex = string.length;
@@ -400,11 +306,14 @@
return this;
}
}
- return _Characters(string.substring(startIndex));
+ if (startIndex > 0) {
+ return StringCharacters(string.substring(startIndex));
+ }
}
return this;
}
+ @override
Characters takeLastWhile(bool Function(String) test) {
if (string.isNotEmpty) {
var breaks = BackBreaks(string, string.length, 0, stateEoTNoBreak);
@@ -412,7 +321,8 @@
int start = string.length;
while ((index = breaks.nextBreak()) >= 0) {
if (!test(string.substring(index, start))) {
- return _Characters(string.substring(start));
+ if (start == string.length) return _empty;
+ return StringCharacters(string.substring(start));
}
start = index;
}
@@ -420,179 +330,531 @@
return this;
}
- int indexAfter(Characters other, [int startIndex]) {
- int length = string.length;
- String otherString = other.string;
- int otherLength = otherString.length;
- if (startIndex == null) {
- startIndex = 0;
- } else {
- RangeError.checkValueInInterval(startIndex, 0, length, "startIndex");
- }
- if (otherLength > startIndex) startIndex = otherLength;
- int start = _indexOf(other.string, startIndex - otherLength);
- if (start < 0) return start;
- return start + otherLength;
- }
+ @override
+ Characters toLowerCase() => StringCharacters(string.toLowerCase());
- Characters insertAt(int index, Characters other) {
- int length = string.length;
- RangeError.checkValidRange(index, length, length, "index");
- if (string.isEmpty) {
- assert(index == 0);
- return other;
- }
- return _Characters._(string.replaceRange(index, index, other.string));
- }
+ @override
+ Characters toUpperCase() => StringCharacters(string.toUpperCase());
- int lastIndexAfter(Characters other, [int startIndex]) {
- String otherString = other.string;
- int otherLength = otherString.length;
- if (startIndex == null) {
- startIndex = string.length;
- } else {
- RangeError.checkValueInInterval(
- startIndex, 0, string.length, "startIndex");
- }
- if (otherLength > startIndex) return -1;
- int start = _lastIndexOf(otherString, startIndex - otherLength);
- if (start < 0) return start;
- return start + otherLength;
- }
-
- int lastIndexOf(Characters other, [int startIndex]) {
- if (startIndex == null) {
- startIndex = string.length;
- } else {
- RangeError.checkValueInInterval(
- startIndex, 0, string.length, "startIndex");
- }
- return _lastIndexOf(other.string, startIndex);
- }
-
- Characters replaceSubstring(int startIndex, int endIndex, Characters other) {
- RangeError.checkValidRange(
- startIndex, endIndex, string.length, "startIndex", "endIndex");
- if (startIndex == 0 && endIndex == string.length) return other;
- return _Characters._(
- string.replaceRange(startIndex, endIndex, other.string));
- }
-
- Characters substring(int startIndex, [int endIndex]) {
- endIndex = RangeError.checkValidRange(
- startIndex, endIndex, string.length, "startIndex", "endIndex");
- return _Characters(string.substring(startIndex, endIndex));
- }
-
- Characters toLowerCase() => _Characters(string.toLowerCase());
-
- Characters toUpperCase() => _Characters(string.toUpperCase());
-
+ @override
bool operator ==(Object other) =>
other is Characters && string == other.string;
+ @override
int get hashCode => string.hashCode;
+ @override
String toString() => string;
+
+ @override
+ CharacterRange findFirst(Characters characters) {
+ var range = _rangeAll;
+ if (range.collapseToFirst(characters)) return range;
+ return null;
+ }
+
+ @override
+ CharacterRange findLast(Characters characters) {
+ var range = _rangeAll;
+ if (range.collapseToLast(characters)) return range;
+ return null;
+ }
}
-class _Character implements Character {
- static const int _directionForward = 0;
- static const int _directionBackward = 0x04;
- static const int _directionMask = 0x04;
- static const int _cursorDeltaMask = 0x03;
-
+/// A [CharacterRange] on a single string.
+class StringCharacterRange implements CharacterRange {
+ /// The source string.
final String _string;
+
+ /// Start index of range in string.
+ ///
+ /// The index is a code unit index in the [String].
+ /// It is always at a grapheme cluster boundary.
int _start;
+
+ /// End index of range in string.
+ ///
+ /// The index is a code unit index in the [String].
+ /// It is always at a grapheme cluster boundary.
int _end;
- // Encodes current state,
- // whether we are moving forwards or backwards ([_directionMask]),
- // and how far ahead the cursor is from the start/end ([_cursorDeltaMask]).
- int _state;
- // The [current] value is created lazily and cached to avoid repeated
- // or unnecessary string allocation.
+
+ /// The [current] value is created lazily and cached to avoid repeated
+ /// or unnecessary string allocation.
String _currentCache;
- _Character(String string) : this._(string, 0, 0, stateSoTNoBreak);
- _Character._(this._string, this._start, this._end, this._state);
+ StringCharacterRange(String string) : this._(string, 0, 0);
+ StringCharacterRange._(this._string, this._start, this._end);
- int get start => _start;
- int get end => _end;
-
- String get current => _currentCache ??=
- (_start == _end ? null : _string.substring(_start, _end));
-
- bool moveNext() {
- int state = _state;
- int cursor = _end;
- if (state & _directionMask != _directionForward) {
- state = stateSoTNoBreak;
- } else {
- cursor += state & _cursorDeltaMask;
- }
- var breaks = Breaks(_string, cursor, _string.length, state);
- var next = breaks.nextBreak();
+ /// Changes the current range.
+ ///
+ /// Resets all cached state.
+ void _move(int start, int end) {
+ _start = start;
+ _end = end;
_currentCache = null;
- _start = _end;
- if (next >= 0) {
- _end = next;
- _state =
- (breaks.state & 0xF0) | _directionForward | (breaks.cursor - next);
+ }
+
+ /// Creates a [Breaks] from [_end] to `_string.length`.
+ ///
+ /// Uses information stored in [_state] for cases where the next
+ /// character has already been seen.
+ Breaks _breaksFromEnd() {
+ return Breaks(_string, _end, _string.length, stateSoTNoBreak);
+ }
+
+ /// Creates a [Breaks] from string start to [_start].
+ ///
+ /// Uses information stored in [_state] for cases where the previous
+ /// character has already been seen.
+ BackBreaks _backBreaksFromStart() {
+ return BackBreaks(_string, _start, 0, stateEoTNoBreak);
+ }
+
+ @override
+ String get current => _currentCache ??= _string.substring(_start, _end);
+
+ @override
+ bool moveNext([int count = 1]) => _advanceEnd(count, _end);
+
+ bool _advanceEnd(int count, int newStart) {
+ if (count > 0) {
+ var state = stateSoTNoBreak;
+ int index = _end;
+ while (index < _string.length) {
+ int char = _string.codeUnitAt(index);
+ int category = categoryControl;
+ int nextIndex = index + 1;
+ if (char & 0xFC00 != 0xD800) {
+ category = low(char);
+ } else if (nextIndex < _string.length) {
+ int nextChar = _string.codeUnitAt(nextIndex);
+ if (nextChar & 0xFC00 == 0xDC00) {
+ nextIndex += 1;
+ category = high(char, nextChar);
+ }
+ }
+ state = move(state, category);
+ if (state & stateNoBreak == 0 && --count == 0) {
+ _move(newStart, index);
+ return true;
+ }
+ index = nextIndex;
+ }
+ _move(newStart, _string.length);
+ return count == 1 && state != stateSoTNoBreak;
+ } else if (count == 0) {
+ _move(newStart, _end);
+ return true;
+ } else {
+ throw RangeError.range(count, 0, null, "count");
+ }
+ }
+
+ bool _moveNextPattern(String patternString, int start, int end) {
+ int offset = _indexOf(_string, patternString, start, end);
+ if (offset >= 0) {
+ _move(offset, offset + patternString.length);
return true;
}
- _state = stateEoTNoBreak | _directionBackward;
return false;
}
- bool movePrevious() {
- int state = _state;
- int cursor = _start;
- if (state & _directionMask == _directionForward) {
- state = stateEoTNoBreak;
- } else {
- cursor -= state & _cursorDeltaMask;
+ @override
+ bool moveBack([int count = 1]) => _retractStart(count, _start);
+
+ bool _retractStart(int count, int newEnd) {
+ RangeError.checkNotNegative(count, "count");
+ var breaks = _backBreaksFromStart();
+ int start = _start;
+ while (count > 0) {
+ int nextBreak = breaks.nextBreak();
+ if (nextBreak >= 0) {
+ start = nextBreak;
+ } else {
+ break;
+ }
+ count--;
}
- var breaks = BackBreaks(_string, cursor, 0, state);
- var next = breaks.nextBreak();
- _currentCache = null;
- _end = _start;
- if (next >= 0) {
- _start = next;
- _state =
- (breaks.state & 0xF0) | _directionBackward | (next - breaks.cursor);
+ _move(start, newEnd);
+ return count == 0;
+ }
+
+ bool _movePreviousPattern(String patternString, int start, int end) {
+ int offset = _lastIndexOf(_string, patternString, start, end);
+ if (offset >= 0) {
+ _move(offset, offset + patternString.length);
return true;
}
- _state = stateSoTNoBreak | _directionForward;
return false;
}
+ @override
List<int> get codeUnits => _CodeUnits(_string, _start, _end);
+ @override
Runes get runes => Runes(current);
- void reset(int index) {
- RangeError.checkValueInInterval(index, 0, _string.length, "index");
- _reset(index);
+ @override
+ CharacterRange copy() {
+ return StringCharacterRange._(_string, _start, _end);
}
- void resetStart() {
- _reset(0);
+ @override
+ void collapseToEnd() {
+ _move(_end, _end);
}
- void resetEnd() {
- _state = stateEoTNoBreak | _directionBackward;
- _currentCache = null;
- _start = _end = _string.length;
+ @override
+ void collapseToStart() {
+ _move(_start, _start);
}
- void _reset(int index) {
- _state = stateSoTNoBreak | _directionForward;
- _currentCache = null;
- _start = _end = index;
+ @override
+ bool dropFirst([int count = 1]) {
+ RangeError.checkNotNegative(count, "count");
+ if (_start == _end) return count == 0;
+ var breaks = Breaks(_string, _start, _end, stateSoTNoBreak);
+ while (count > 0) {
+ int nextBreak = breaks.nextBreak();
+ if (nextBreak >= 0) {
+ _start = nextBreak;
+ _currentCache = null;
+ count--;
+ } else {
+ return false;
+ }
+ }
+ return true;
}
- Character copy() {
- return _Character._(_string, _start, _end, _state);
+ @override
+ bool dropTo(Characters target) {
+ if (_start == _end) return target.isEmpty;
+ var targetString = target.string;
+ var index = _indexOf(_string, targetString, _start, _end);
+ if (index >= 0) {
+ _move(index + targetString.length, _end);
+ return true;
+ }
+ return false;
+ }
+
+ @override
+ bool dropUntil(Characters target) {
+ if (_start == _end) return target.isEmpty;
+ var targetString = target.string;
+ var index = _indexOf(_string, targetString, _start, _end);
+ if (index >= 0) {
+ _move(index, _end);
+ return true;
+ }
+ _move(_end, _end);
+ return false;
+ }
+
+ @override
+ void dropWhile(bool Function(String) test) {
+ if (_start == _end) return;
+ var breaks = Breaks(_string, _start, _end, stateSoTNoBreak);
+ int cursor = _start;
+ int next = 0;
+ while ((next = breaks.nextBreak()) >= 0) {
+ if (!test(_string.substring(cursor, next))) {
+ break;
+ }
+ cursor = next;
+ }
+ _move(cursor, _end);
+ }
+
+ @override
+ bool dropLast([int count = 1]) {
+ RangeError.checkNotNegative(count, "count");
+ var breaks = BackBreaks(_string, _end, _start, stateEoTNoBreak);
+ while (count > 0) {
+ int nextBreak = breaks.nextBreak();
+ if (nextBreak >= 0) {
+ _end = nextBreak;
+ _currentCache = null;
+ count--;
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @override
+ bool dropBackTo(Characters target) {
+ if (_start == _end) return target.isEmpty;
+ var targetString = target.string;
+ var index = _lastIndexOf(_string, targetString, _start, _end);
+ if (index >= 0) {
+ _move(_start, index);
+ return true;
+ }
+ return false;
+ }
+
+ @override
+ bool dropBackUntil(Characters target) {
+ if (_start == _end) return target.isEmpty;
+ var targetString = target.string;
+ var index = _lastIndexOf(_string, targetString, _start, _end);
+ if (index >= 0) {
+ _move(_start, index + targetString.length);
+ return true;
+ }
+ _move(_start, _start);
+ return false;
+ }
+
+ @override
+ void dropBackWhile(bool Function(String) test) {
+ if (_start == _end) return;
+ var breaks = BackBreaks(_string, _end, _start, stateEoTNoBreak);
+ int cursor = _end;
+ int next = 0;
+ while ((next = breaks.nextBreak()) >= 0) {
+ if (!test(_string.substring(next, cursor))) {
+ break;
+ }
+ cursor = next;
+ }
+ _move(_start, cursor);
+ }
+
+ @override
+ bool expandNext([int count = 1]) => _advanceEnd(count, _start);
+
+ @override
+ bool expandTo(Characters target) {
+ String targetString = target.string;
+ int index = _indexOf(_string, targetString, _end, _string.length);
+ if (index >= 0) {
+ _move(_start, index + targetString.length);
+ return true;
+ }
+ return false;
+ }
+
+ @override
+ void expandWhile(bool Function(String character) test) {
+ var breaks = _breaksFromEnd();
+ int cursor = _end;
+ int next = 0;
+ while ((next = breaks.nextBreak()) >= 0) {
+ if (!test(_string.substring(cursor, next))) {
+ break;
+ }
+ cursor = next;
+ }
+ _move(_start, cursor);
+ }
+
+ @override
+ void expandAll() {
+ _move(_start, _string.length);
+ }
+
+ @override
+ bool expandBack([int count = 1]) => _retractStart(count, _end);
+
+ @override
+ bool expandBackTo(Characters target) {
+ var targetString = target.string;
+ int index = _lastIndexOf(_string, targetString, 0, _start);
+ if (index >= 0) {
+ _move(index, _end);
+ return true;
+ }
+ return false;
+ }
+
+ @override
+ void expandBackWhile(bool Function(String character) test) {
+ var breaks = _backBreaksFromStart();
+ int cursor = _start;
+ int next = 0;
+ while ((next = breaks.nextBreak()) >= 0) {
+ if (!test(_string.substring(next, cursor))) {
+ _move(cursor, _end);
+ return;
+ }
+ cursor = next;
+ }
+ _move(0, _end);
+ }
+
+ @override
+ bool expandBackUntil(Characters target) {
+ return _retractStartUntil(target.string, _end);
+ }
+
+ @override
+ void expandBackAll() {
+ _move(0, _end);
+ }
+
+ @override
+ bool expandUntil(Characters target) {
+ return _advanceEndUntil(target.string, _start);
+ }
+
+ @override
+ bool get isEmpty => _start == _end;
+
+ @override
+ bool get isNotEmpty => _start != _end;
+
+ @override
+ bool moveBackUntil(Characters target) {
+ var targetString = target.string;
+ return _retractStartUntil(targetString, _start);
+ }
+
+ bool _retractStartUntil(String targetString, int newEnd) {
+ var index = _lastIndexOf(_string, targetString, 0, _start);
+ if (index >= 0) {
+ _move(index + targetString.length, newEnd);
+ return true;
+ }
+ _move(0, newEnd);
+ return false;
+ }
+
+ @override
+ bool collapseToFirst(Characters target) {
+ return _moveNextPattern(target.string, _start, _end);
+ }
+
+ @override
+ bool collapseToLast(Characters target) {
+ return _movePreviousPattern(target.string, _start, _end);
+ }
+
+ @override
+ bool moveUntil(Characters target) {
+ var targetString = target.string;
+ return _advanceEndUntil(targetString, _end);
+ }
+
+ bool _advanceEndUntil(String targetString, int newStart) {
+ int index = _indexOf(_string, targetString, _end, _string.length);
+ if (index >= 0) {
+ _move(newStart, index);
+ return true;
+ }
+ _move(newStart, _string.length);
+ return false;
+ }
+
+ @override
+ Characters replaceFirst(Characters pattern, Characters replacement) {
+ String patternString = pattern.string;
+ String replacementString = replacement.string;
+ if (patternString.isEmpty) {
+ return StringCharacters(
+ _string.replaceRange(_start, _start, replacementString));
+ }
+ int index = _indexOf(_string, patternString, _start, _end);
+ String result = _string;
+ if (index >= 0) {
+ result = _string.replaceRange(
+ index, index + patternString.length, replacementString);
+ }
+ return StringCharacters(result);
+ }
+
+ @override
+ Characters replaceAll(Characters pattern, Characters replacement) {
+ var patternString = pattern.string;
+ var replacementString = replacement.string;
+ if (patternString.isEmpty) {
+ var replaced = _explodeReplace(
+ _string, _start, _end, replacementString, replacementString);
+ return StringCharacters(replaced);
+ }
+ if (_start == _end) return Characters(_string);
+ int start = 0;
+ int cursor = _start;
+ StringBuffer buffer;
+ while ((cursor = _indexOf(_string, patternString, cursor, _end)) >= 0) {
+ (buffer ??= StringBuffer())
+ ..write(_string.substring(start, cursor))
+ ..write(replacementString);
+ cursor += patternString.length;
+ start = cursor;
+ }
+ if (buffer == null) return Characters(_string);
+ buffer.write(_string.substring(start));
+ return Characters(buffer.toString());
+ }
+
+ @override
+ Characters replaceRange(Characters replacement) {
+ return Characters(_string.replaceRange(_start, _end, replacement.string));
+ }
+
+ @override
+ Characters get source => Characters(_string);
+
+ @override
+ bool startsWith(Characters characters) {
+ return _startsWith(_start, _end, characters.string);
+ }
+
+ @override
+ bool endsWith(Characters characters) {
+ return _endsWith(_start, _end, characters.string);
+ }
+
+ @override
+ bool isFollowedBy(Characters characters) {
+ return _startsWith(_end, _string.length, characters.string);
+ }
+
+ @override
+ bool isPrecededBy(Characters characters) {
+ return _endsWith(0, _start, characters.string);
+ }
+
+ bool _endsWith(int start, int end, String string) {
+ int length = string.length;
+ int stringStart = end - length;
+ return stringStart >= start &&
+ _string.startsWith(string, stringStart) &&
+ isGraphemeClusterBoundary(_string, start, end, stringStart);
+ }
+
+ bool _startsWith(int start, int end, String string) {
+ int length = string.length;
+ int stringEnd = start + length;
+ return stringEnd <= end &&
+ _string.startsWith(string, start) &&
+ isGraphemeClusterBoundary(_string, start, end, stringEnd);
+ }
+
+ @override
+ bool moveBackTo(Characters target) {
+ var targetString = target.string;
+ int index = _lastIndexOf(_string, targetString, 0, _start);
+ if (index >= 0) {
+ _move(index, index + targetString.length);
+ return true;
+ }
+ return false;
+ }
+
+ @override
+ bool moveTo(Characters target) {
+ var targetString = target.string;
+ int index = _indexOf(_string, targetString, _end, _string.length);
+ if (index >= 0) {
+ _move(index, index + targetString.length);
+ return true;
+ }
+ return false;
}
}
@@ -619,3 +881,106 @@
throw UnsupportedError("Cannot modify an unmodifiable list");
}
}
+
+String _explodeReplace(String string, int start, int end,
+ String internalReplacement, String outerReplacement) {
+ if (start == end) {
+ return string.replaceRange(start, start, outerReplacement);
+ }
+ var buffer = StringBuffer(string.substring(0, start));
+ var breaks = Breaks(string, start, end, stateSoTNoBreak);
+ int index = 0;
+ String replacement = outerReplacement;
+ while ((index = breaks.nextBreak()) >= 0) {
+ buffer..write(replacement)..write(string.substring(start, index));
+ start = index;
+ replacement = internalReplacement;
+ }
+ buffer..write(outerReplacement)..write(string.substring(end));
+ return buffer.toString();
+}
+
+/// Finds [pattern] in the range from [start] to [end].
+///
+/// Both [start] and [end] are grapheme cluster boundaries in the
+/// [source] string.
+int _indexOf(String source, String pattern, int start, int end) {
+ int patternLength = pattern.length;
+ if (patternLength == 0) return start;
+ // Any start position after realEnd won't fit the pattern before end.
+ int realEnd = end - patternLength;
+ if (realEnd < start) return -1;
+ // Use indexOf if what we can overshoot is
+ // less than twice as much as what we have left to search.
+ int rest = source.length - realEnd;
+ if (rest <= (realEnd - start) * 2) {
+ int index = 0;
+ while (start < realEnd && (index = source.indexOf(pattern, start)) >= 0) {
+ if (index > realEnd) return -1;
+ if (isGraphemeClusterBoundary(source, start, end, index) &&
+ isGraphemeClusterBoundary(
+ source, start, end, index + patternLength)) {
+ return index;
+ }
+ start = index + 1;
+ }
+ return -1;
+ }
+ return _gcIndexOf(source, pattern, start, end);
+}
+
+int _gcIndexOf(String source, String pattern, int start, int end) {
+ var breaks = Breaks(source, start, end, stateSoT);
+ int index = 0;
+ while ((index = breaks.nextBreak()) >= 0) {
+ int endIndex = index + pattern.length;
+ if (endIndex > end) break;
+ if (source.startsWith(pattern, index) &&
+ isGraphemeClusterBoundary(source, start, end, endIndex)) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+/// Finds pattern in the range from [start] to [end].
+/// Both [start] and [end] are grapheme cluster boundaries in the
+/// [source] string.
+int _lastIndexOf(String source, String pattern, int start, int end) {
+ int patternLength = pattern.length;
+ if (patternLength == 0) return end;
+ // Start of pattern must be in range [start .. end - patternLength].
+ int realEnd = end - patternLength;
+ if (realEnd < start) return -1;
+ // If the range from 0 to start is no more than double the range from
+ // start to end, use lastIndexOf.
+ if (realEnd * 2 > start) {
+ int index = 0;
+ while (realEnd >= start &&
+ (index = source.lastIndexOf(pattern, realEnd)) >= 0) {
+ if (index < start) return -1;
+ if (isGraphemeClusterBoundary(source, start, end, index) &&
+ isGraphemeClusterBoundary(
+ source, start, end, index + patternLength)) {
+ return index;
+ }
+ realEnd = index - 1;
+ }
+ return -1;
+ }
+ return _gcLastIndexOf(source, pattern, start, end);
+}
+
+int _gcLastIndexOf(String source, String pattern, int start, int end) {
+ var breaks = BackBreaks(source, end, start, stateEoT);
+ int index = 0;
+ while ((index = breaks.nextBreak()) >= 0) {
+ int startIndex = index - pattern.length;
+ if (startIndex < start) break;
+ if (source.startsWith(pattern, startIndex) &&
+ isGraphemeClusterBoundary(source, start, end, startIndex)) {
+ return startIndex;
+ }
+ }
+ return -1;
+}
diff --git a/lib/src/grapheme_clusters/breaks.dart b/lib/src/grapheme_clusters/breaks.dart
index 1460e05..9a41609 100644
--- a/lib/src/grapheme_clusters/breaks.dart
+++ b/lib/src/grapheme_clusters/breaks.dart
@@ -329,6 +329,8 @@
}
/// The next break no earlier than [position] in `string.substring(start, end)`.
+///
+/// The index need not be at a grapheme cluster boundary.
int nextBreak(String text, int start, int end, int index) {
assert(0 <= start);
assert(start <= index);
diff --git a/pubspec.yaml b/pubspec.yaml
index db26108..5f1858c 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: characters
-version: 0.1.0
+version: 0.2.0
environment:
sdk: "^2.4.0"
dev_dependencies:
diff --git a/test/characters_test.dart b/test/characters_test.dart
index 7039038..cbb6e03 100644
--- a/test/characters_test.dart
+++ b/test/characters_test.dart
@@ -22,119 +22,50 @@
: Random().nextInt(0x3FFFFFFF);
random = Random(seed);
group("[Random Seed: $seed]", tests);
- group("index", () {
- test("simple", () {
- var flag = "\u{1F1E9}\u{1F1F0}";
- var string = "Hi $flag!"; // Regional Indications "DK".
+ group("characters", () {
+ test("operations", () {
+ var flag = "\u{1F1E9}\u{1F1F0}"; // Regional Indicators "DK".
+ var string = "Hi $flag!";
expect(string.length, 8);
- expect(gc(string).toList(), ["H", "i", " ", flag, "!"]);
+ var cs = gc(string);
+ expect(cs.length, 5);
+ expect(cs.toList(), ["H", "i", " ", flag, "!"]);
+ expect(cs.skip(2).toString(), " $flag!");
+ expect(cs.skipLast(2).toString(), "Hi ");
+ expect(cs.take(2).toString(), "Hi");
+ expect(cs.takeLast(2).toString(), "$flag!");
- expect(gc(string).indexOf(gc("")), 0);
- expect(gc(string).indexOf(gc(""), 3), 3);
- expect(gc(string).indexOf(gc(""), 4), 7);
- expect(gc(string).indexOf(gc(flag)), 3);
- expect(gc(string).indexOf(gc(flag), 3), 3);
- expect(gc(string).indexOf(gc(flag), 4), lessThan(0));
+ expect(cs.contains("\u{1F1E9}"), false);
+ expect(cs.contains(flag), true);
+ expect(cs.contains("$flag!"), false);
+ expect(cs.containsAll(gc("$flag!")), true);
- expect(gc(string).indexAfter(gc("")), 0);
- expect(gc(string).indexAfter(gc(""), 3), 3);
- expect(gc(string).indexAfter(gc(""), 4), 7);
- expect(gc(string).indexAfter(gc(flag)), 7);
- expect(gc(string).indexAfter(gc(flag), 7), 7);
- expect(gc(string).indexAfter(gc(flag), 8), lessThan(0));
+ expect(cs.takeWhile((x) => x != " ").toString(), "Hi");
+ expect(cs.takeLastWhile((x) => x != " ").toString(), "$flag!");
+ expect(cs.skipWhile((x) => x != " ").toString(), " $flag!");
+ expect(cs.skipLastWhile((x) => x != " ").toString(), "Hi ");
- expect(gc(string).lastIndexOf(gc("")), string.length);
- expect(gc(string).lastIndexOf(gc(""), 7), 7);
- expect(gc(string).lastIndexOf(gc(""), 6), 3);
- expect(gc(string).lastIndexOf(gc(""), 0), 0);
- expect(gc(string).lastIndexOf(gc(flag)), 3);
- expect(gc(string).lastIndexOf(gc(flag), 6), 3);
- expect(gc(string).lastIndexOf(gc(flag), 2), lessThan(0));
-
- expect(gc(string).lastIndexAfter(gc("")), string.length);
- expect(gc(string).lastIndexAfter(gc(""), 7), 7);
- expect(gc(string).lastIndexAfter(gc(""), 6), 3);
- expect(gc(string).lastIndexAfter(gc(""), 0), 0);
- expect(gc(string).lastIndexAfter(gc(flag)), 7);
- expect(gc(string).lastIndexAfter(gc(flag), 7), 7);
- expect(gc(string).lastIndexAfter(gc(flag), 6), lessThan(0));
- });
- test("multiple", () {
- var flag = "\u{1F1E9}\u{1F1F0}"; // DK.
- var revFlag = "\u{1F1F0}\u{1F1E9}"; // KD.
- var string = "-${flag}-$flag$flag-";
- expect(gc(string).indexOf(gc(flag)), 1);
- expect(gc(string).indexOf(gc(flag), 2), 6);
- expect(gc(string).indexOf(gc(flag), 6), 6);
- expect(gc(string).indexOf(gc(flag), 7), 10);
- expect(gc(string).indexOf(gc(flag), 10), 10);
- expect(gc(string).indexOf(gc(flag), 11), lessThan(0));
-
- expect(gc(string).indexOf(gc(revFlag)), lessThan(0));
+ expect(cs.findFirst(gc("")).moveBack(), false);
+ expect(cs.findFirst(gc(flag)).current, flag);
+ expect(cs.findLast(gc(flag)).current, flag);
+ expect(cs.iterator.moveNext(), true);
+ expect(cs.iterator.moveBack(), false);
+ expect((cs.iterator..moveNext()).current, "H");
+ expect(cs.iteratorAtEnd.moveNext(), false);
+ expect(cs.iteratorAtEnd.moveBack(), true);
+ expect((cs.iteratorAtEnd..moveBack()).current, "!");
});
- test("nonBoundary", () {
- // Composite pictogram example, from https://en.wikipedia.org/wiki/Zero-width_joiner.
- var flag = "\u{1f3f3}"; // U+1F3F3, Flag, waving. Category Pictogram.
- var white = "\ufe0f"; // U+FE0F, Variant selector 16. Category Extend.
- var zwj = "\u200d"; // U+200D, ZWJ
- var rainbow = "\u{1f308}"; // U+1F308, Rainbow. Category Pictogram
- var flagRainbow = "$flag$white$zwj$rainbow";
- expect(gc(flagRainbow).length, 1);
- for (var part in [flag, white, zwj, rainbow]) {
- expect(gc(flagRainbow).indexOf(gc(part)), lessThan(0));
- expect(gc(flagRainbow).indexAfter(gc(part)), lessThan(0));
- expect(gc(flagRainbow).lastIndexOf(gc(part)), lessThan(0));
- expect(gc(flagRainbow).lastIndexAfter(gc(part)), lessThan(0));
- }
- expect(gc(flagRainbow + flagRainbow).indexOf(gc(flagRainbow)), 0);
- expect(gc(flagRainbow + flagRainbow).indexAfter(gc(flagRainbow)), 6);
- expect(gc(flagRainbow + flagRainbow).lastIndexOf(gc(flagRainbow)), 6);
- expect(gc(flagRainbow + flagRainbow).lastIndexAfter(gc(flagRainbow)), 12);
- // 1 11 11 11 2
- // indices 0 67 90 12 34 67 3
- var partsAndWhole =
- "$flagRainbow $flag $white $zwj $rainbow $flagRainbow";
- // Flag and rainbow are independent graphemes.
- expect(gc(partsAndWhole).toList(), [
- flagRainbow,
- " ",
- flag,
- " $white", // Other + Extend
- " $zwj", // Other + ZWJ
- " ",
- rainbow,
- " ",
- flagRainbow
- ]);
- expect(gc(partsAndWhole).indexOf(gc(flag)), 7);
- expect(gc(partsAndWhole).indexAfter(gc(flag)), 9);
- expect(gc(partsAndWhole).lastIndexOf(gc(flag)), 7);
- expect(gc(partsAndWhole).lastIndexAfter(gc(flag)), 9);
+ testParts(gc("a"), gc("b"), gc("c"), gc("d"), gc("e"));
- expect(gc(partsAndWhole).indexOf(gc(rainbow)), 14);
- expect(gc(partsAndWhole).indexAfter(gc(rainbow)), 16);
- expect(gc(partsAndWhole).lastIndexOf(gc(rainbow)), 14);
- expect(gc(partsAndWhole).lastIndexAfter(gc(rainbow)), 16);
+ // Composite pictogram example, from https://en.wikipedia.org/wiki/Zero-width_joiner.
+ var flag = "\u{1f3f3}"; // U+1F3F3, Flag, waving. Category Pictogram.
+ var white = "\ufe0f"; // U+FE0F, Variant selector 16. Category Extend.
+ var zwj = "\u200d"; // U+200D, ZWJ
+ var rainbow = "\u{1f308}"; // U+1F308, Rainbow. Category Pictogram
- expect(gc(partsAndWhole).indexOf(gc(white)), lessThan(0));
- expect(gc(partsAndWhole).indexAfter(gc(white)), lessThan(0));
- expect(gc(partsAndWhole).lastIndexOf(gc(white)), lessThan(0));
- expect(gc(partsAndWhole).lastIndexAfter(gc(white)), lessThan(0));
- expect(gc(partsAndWhole).indexOf(gc(" $white")), 9);
- expect(gc(partsAndWhole).indexAfter(gc(" $white")), 11);
- expect(gc(partsAndWhole).lastIndexOf(gc(" $white")), 9);
- expect(gc(partsAndWhole).lastIndexAfter(gc(" $white")), 11);
-
- expect(gc(partsAndWhole).indexOf(gc(zwj)), lessThan(0));
- expect(gc(partsAndWhole).indexAfter(gc(zwj)), lessThan(0));
- expect(gc(partsAndWhole).lastIndexOf(gc(zwj)), lessThan(0));
- expect(gc(partsAndWhole).lastIndexAfter(gc(zwj)), lessThan(0));
- expect(gc(partsAndWhole).indexOf(gc(" $zwj")), 11);
- expect(gc(partsAndWhole).indexAfter(gc(" $zwj")), 13);
- expect(gc(partsAndWhole).lastIndexOf(gc(" $zwj")), 11);
- expect(gc(partsAndWhole).lastIndexAfter(gc(" $zwj")), 13);
- });
+ testParts(gc("$flag$white$zwj$rainbow"), gc("$flag$white"), gc("$rainbow"),
+ gc("$flag$zwj$rainbow"), gc("!"));
});
}
@@ -277,22 +208,7 @@
expected.take(expected.length - 1).join());
expect(actual.takeLast(1).toString(),
expected.skip(expected.length - 1).join());
-
- expect(actual.indexOf(gc(expected.first)), 0);
- expect(actual.indexAfter(gc(expected.first)), expected.first.length);
- expect(actual.lastIndexOf(gc(expected.last)),
- text.length - expected.last.length);
- expect(actual.lastIndexAfter(gc(expected.last)), text.length);
- if (expected.length > 1) {
- if (expected[0] != expected[1]) {
- expect(actual.indexOf(gc(expected[1])), expected[0].length);
- }
- }
}
-
- expect(actual.getRange(1, 3).toString(), expected.take(3).skip(1).join());
- expect(actual.getRange(1, 3).toString(), expected.take(3).skip(1).join());
-
bool isEven(String s) => s.length.isEven;
expect(
@@ -313,48 +229,27 @@
expect((actual + actual).toString(), actual.string + actual.string);
- List<int> accumulatedLengths = [0];
- for (int i = 0; i < expected.length; i++) {
- accumulatedLengths.add(accumulatedLengths.last + expected[i].length);
- }
-
// Iteration.
var it = actual.iterator;
- expect(it.start, 0);
- expect(it.end, 0);
+ expect(it.isEmpty, true);
for (var i = 0; i < expected.length; i++) {
expect(it.moveNext(), true);
- expect(it.start, accumulatedLengths[i]);
- expect(it.end, accumulatedLengths[i + 1]);
expect(it.current, expected[i]);
expect(actual.elementAt(i), expected[i]);
expect(actual.skip(i).first, expected[i]);
}
expect(it.moveNext(), false);
- expect(it.start, accumulatedLengths.last);
- expect(it.end, accumulatedLengths.last);
for (var i = expected.length - 1; i >= 0; i--) {
- expect(it.movePrevious(), true);
- expect(it.start, accumulatedLengths[i]);
- expect(it.end, accumulatedLengths[i + 1]);
+ expect(it.moveBack(), true);
expect(it.current, expected[i]);
}
- expect(it.movePrevious(), false);
- expect(it.start, 0);
- expect(it.end, 0);
+ expect(it.moveBack(), false);
+ expect(it.isEmpty, true);
// GraphemeClusters operations.
- expect(actual.toUpperCase().toString(), text.toUpperCase());
- expect(actual.toLowerCase().toString(), text.toLowerCase());
-
- if (text.isNotEmpty) {
- expect(actual.insertAt(1, gc("abc")).toString(),
- text.replaceRange(1, 1, "abc"));
- expect(actual.replaceSubstring(0, 1, gc("abc")).toString(),
- text.replaceRange(0, 1, "abc"));
- expect(actual.substring(0, 1).string, actual.string.substring(0, 1));
- }
+ expect(actual.toUpperCase().string, text.toUpperCase());
+ expect(actual.toLowerCase().string, text.toLowerCase());
expect(actual.string, text);
@@ -368,36 +263,9 @@
expect(actual.endsWith(gc(expected.sublist(i).join())), true);
for (int t = s + 1; t <= steps; t++) {
int j = expected.length * t ~/ steps;
- int start = accumulatedLengths[i];
- int end = accumulatedLengths[j];
var slice = expected.sublist(i, j).join();
var gcs = gc(slice);
expect(actual.containsAll(gcs), true);
- expect(actual.startsWith(gcs, start), true);
- expect(actual.endsWith(gcs, end), true);
- }
- }
- if (accumulatedLengths.last > expected.length) {
- int i = expected.indexWhere((s) => s.length != 1);
- assert(accumulatedLengths[i + 1] > accumulatedLengths[i] + 1);
- expect(
- actual.startsWith(gc(text.substring(0, accumulatedLengths[i] + 1))),
- false);
- expect(actual.endsWith(gc(text.substring(accumulatedLengths[i] + 1))),
- false);
- if (i > 0) {
- expect(
- actual.startsWith(
- gc(text.substring(1, accumulatedLengths[i] + 1)), 1),
- false);
- }
- if (i < expected.length - 1) {
- int secondToLast = accumulatedLengths[expected.length - 1];
- expect(
- actual.endsWith(
- gc(text.substring(accumulatedLengths[i] + 1, secondToLast)),
- secondToLast),
- false);
}
}
}
@@ -408,7 +276,7 @@
int pos = -1;
if (random.nextBool()) {
pos = expected.length;
- it.reset(text.length);
+ it = actual.iteratorAtEnd;
}
int steps = 5 + random.nextInt(expected.length * 2 + 1);
bool lastMove = false;
@@ -416,23 +284,19 @@
bool back = false;
if (pos < 0) {
expect(lastMove, false);
- expect(it.start, 0);
- expect(it.end, 0);
+ expect(it.isEmpty, true);
} else if (pos >= expected.length) {
expect(lastMove, false);
- expect(it.start, text.length);
- expect(it.end, text.length);
+ expect(it.isEmpty, true);
back = true;
} else {
expect(lastMove, true);
expect(it.current, expected[pos]);
- expect(it.start, accumulatedLengths[pos]);
- expect(it.end, accumulatedLengths[pos + 1]);
back = random.nextBool();
}
if (--steps < 0) break;
if (back) {
- lastMove = it.movePrevious();
+ lastMove = it.moveBack();
pos -= 1;
} else {
lastMove = it.moveNext();
@@ -443,3 +307,249 @@
}
Characters gc(String string) => Characters(string);
+
+void testParts(
+ Characters a, Characters b, Characters c, Characters d, Characters e) {
+ var cs = gc("$a$b$c$d$e");
+ test("$cs", () {
+ var it = cs.iterator;
+ expect(it.isEmpty, true);
+ expect(it.isNotEmpty, false);
+ expect(it.current, "");
+
+ // moveNext().
+ expect(it.moveNext(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$a");
+ expect(it.moveNext(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$b");
+ expect(it.moveNext(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$c");
+ expect(it.moveNext(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$d");
+ expect(it.moveNext(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$e");
+ expect(it.moveNext(), false);
+ expect(it.isEmpty, true);
+ expect(it.current, "");
+
+ // moveBack().
+ expect(it.moveBack(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$e");
+ expect(it.moveBack(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$d");
+ expect(it.moveBack(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$c");
+ expect(it.moveBack(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$b");
+ expect(it.moveBack(), true);
+ expect(it.isEmpty, false);
+ expect(it.current, "$a");
+ expect(it.moveBack(), false);
+ expect(it.isEmpty, true);
+ expect(it.current, "");
+
+ // moveNext(int).
+ expect(it.moveTo(c), true);
+ expect(it.current, "$c");
+ expect(it.moveTo(b), false);
+ expect(it.moveTo(c), false);
+ expect(it.current, "$c");
+ expect(it.moveTo(d), true);
+ expect(it.current, "$d");
+
+ // moveBack(c).
+ expect(it.moveBackTo(c), true);
+ expect(it.current, "$c");
+ expect(it.moveBackTo(d), false);
+ expect(it.moveBackTo(c), false);
+ expect(it.moveBackTo(a), true);
+ expect(it.current, "$a");
+
+ // moveNext(n)
+ expect(it.moveBack(), false);
+
+ expect(it.moveNext(2), true);
+ expect(it.current, "$a$b");
+ expect(it.moveNext(4), false);
+ expect(it.current, "$c$d$e");
+ expect(it.moveNext(0), true);
+ expect(it.current, "");
+ expect(it.moveNext(1), false);
+ expect(it.current, "");
+
+ // moveBack(n).
+ expect(it.moveBack(2), true);
+ expect(it.current, "$d$e");
+ expect(it.moveBack(1), true);
+ expect(it.current, "$c");
+ expect(it.moveBack(3), false);
+ expect(it.current, "$a$b");
+ expect(it.moveBack(), false);
+
+ // moveFirst.
+ it.expandAll();
+ expect(it.current, "$a$b$c$d$e");
+ expect(it.collapseToFirst(b), true);
+ expect(it.current, "$b");
+ it.expandAll();
+ expect(it.current, "$b$c$d$e");
+ expect(it.collapseToFirst(a), false);
+ expect(it.current, "$b$c$d$e");
+
+ // moveBackTo
+ it.expandBackAll();
+ expect(it.current, "$a$b$c$d$e");
+ expect(it.collapseToLast(c), true);
+ expect(it.current, "$c");
+
+ // includeNext/includePrevious
+ expect(it.expandTo(e), true);
+ expect(it.current, "$c$d$e");
+ expect(it.expandTo(e), false);
+ expect(it.expandBackTo(b), true);
+ expect(it.current, "$b$c$d$e");
+ expect(it.expandBackTo(b), false);
+ expect(it.current, "$b$c$d$e");
+ expect(it.collapseToFirst(c), true);
+ expect(it.current, "$c");
+
+ // includeUntilNext/expandBackUntil
+ expect(it.expandBackUntil(a), true);
+ expect(it.current, "$b$c");
+ expect(it.expandBackUntil(a), true);
+ expect(it.current, "$b$c");
+ expect(it.expandUntil(e), true);
+ expect(it.current, "$b$c$d");
+ expect(it.expandUntil(e), true);
+ expect(it.current, "$b$c$d");
+
+ // dropFirst/dropLast
+ expect(it.dropFirst(), true);
+ expect(it.current, "$c$d");
+ expect(it.dropLast(), true);
+ expect(it.current, "$c");
+ it.expandBackAll();
+ it.expandAll();
+ expect(it.current, "$a$b$c$d$e");
+ expect(it.dropTo(b), true);
+ expect(it.current, "$c$d$e");
+ expect(it.dropBackTo(d), true);
+ expect(it.current, "$c");
+
+ it.expandBackAll();
+ it.expandAll();
+ expect(it.current, "$a$b$c$d$e");
+
+ expect(it.dropUntil(b), true);
+ expect(it.current, "$b$c$d$e");
+ expect(it.dropBackUntil(d), true);
+ expect(it.current, "$b$c$d");
+
+ it.dropWhile((x) => x == b.string);
+ expect(it.current, "$c$d");
+ it.expandBackAll();
+ expect(it.current, "$a$b$c$d");
+ it.dropBackWhile((x) => x != b.string);
+ expect(it.current, "$a$b");
+ it.dropBackWhile((x) => false);
+ expect(it.current, "$a$b");
+
+ // include..While
+ it.expandWhile((x) => false);
+ expect(it.current, "$a$b");
+ it.expandWhile((x) => x != e.string);
+ expect(it.current, "$a$b$c$d");
+ expect(it.collapseToFirst(c), true);
+ expect(it.current, "$c");
+ it.expandBackWhile((x) => false);
+ expect(it.current, "$c");
+ it.expandBackWhile((x) => x != a.string);
+ expect(it.current, "$b$c");
+
+ var cs2 = cs.replaceAll(c, gc(""));
+ var cs3 = cs.replaceFirst(c, gc(""));
+ var cs4 = cs.findFirst(c).replaceRange(gc(""));
+ var cse = gc("$a$b$d$e");
+ expect(cs2, cse);
+ expect(cs3, cse);
+ expect(cs4, cse);
+ var cs5 = cs4.replaceAll(a, c);
+ expect(cs5, gc("$c$b$d$e"));
+ var cs6 = cs5.replaceAll(gc(""), a);
+ expect(cs6, gc("$a$c$a$b$a$d$a$e$a"));
+ var cs7 = cs6.replaceFirst(b, a);
+ expect(cs7, gc("$a$c$a$a$a$d$a$e$a"));
+ var cs8 = cs7.replaceFirst(e, a);
+ expect(cs8, gc("$a$c$a$a$a$d$a$a$a"));
+ var cs9 = cs8.replaceAll(a + a, b);
+ expect(cs9, gc("$a$c$b$a$d$b$a"));
+ it = cs9.iterator;
+ it.moveTo(b + a);
+ expect("$b$a", it.current);
+ it.expandTo(b + a);
+ expect("$b$a$d$b$a", it.current);
+ var cs10 = it.replaceAll(b + a, e + e);
+ expect(cs10, gc("$a$c$e$e$d$e$e"));
+ var cs11 = it.replaceRange(e);
+ expect(cs11, gc("$a$c$e"));
+
+ expect(cs.startsWith(gc("")), true);
+ expect(cs.startsWith(a), true);
+ expect(cs.startsWith(a + b), true);
+ expect(cs.startsWith(gc("$a$b$c")), true);
+ expect(cs.startsWith(gc("$a$b$c$d")), true);
+ expect(cs.startsWith(gc("$a$b$c$d$e")), true);
+ expect(cs.startsWith(b), false);
+ expect(cs.startsWith(c), false);
+ expect(cs.startsWith(d), false);
+ expect(cs.startsWith(e), false);
+
+ expect(cs.endsWith(gc("")), true);
+ expect(cs.endsWith(e), true);
+ expect(cs.endsWith(d + e), true);
+ expect(cs.endsWith(gc("$c$d$e")), true);
+ expect(cs.endsWith(gc("$b$c$d$e")), true);
+ expect(cs.endsWith(gc("$a$b$c$d$e")), true);
+ expect(cs.endsWith(d), false);
+ expect(cs.endsWith(c), false);
+ expect(cs.endsWith(b), false);
+ expect(cs.endsWith(a), false);
+
+ it = cs.findFirst(b + c);
+ expect(it.startsWith(gc("")), true);
+ expect(it.startsWith(b), true);
+ expect(it.startsWith(b + c), true);
+ expect(it.startsWith(a + b + c), false);
+ expect(it.startsWith(b + c + d), false);
+ expect(it.startsWith(a), false);
+
+ expect(it.endsWith(gc("")), true);
+ expect(it.endsWith(c), true);
+ expect(it.endsWith(b + c), true);
+ expect(it.endsWith(a + b + c), false);
+ expect(it.endsWith(b + c + d), false);
+ expect(it.endsWith(d), false);
+
+ it.collapseToFirst(c);
+ expect(it.isPrecededBy(gc("")), true);
+ expect(it.isPrecededBy(b), true);
+ expect(it.isPrecededBy(a + b), true);
+ expect(it.isPrecededBy(a + b + c), false);
+ expect(it.isPrecededBy(a), false);
+
+ expect(it.isFollowedBy(gc("")), true);
+ expect(it.isFollowedBy(d), true);
+ expect(it.isFollowedBy(d + e), true);
+ expect(it.isFollowedBy(c + d + e), false);
+ expect(it.isFollowedBy(e), false);
+ });
+}