Add split methods. (#16)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1904b5e..6e104f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# Changelog
+## 0.5.1
+
+* Added `split` methods on `Characters` and `CharacterRange`.
+
## 0.5.0
* Change [codeUnits] getter to [utf16CodeUnits] which returns an iterable.
diff --git a/analysis_options.yaml b/analysis_options.yaml
index 108d105..3bad72b 100644
--- a/analysis_options.yaml
+++ b/analysis_options.yaml
@@ -1 +1,7 @@
include: package:pedantic/analysis_options.yaml
+analyzer:
+ errors:
+ omit_local_variable_types: ignore
+ annotate_overrides: ignore
+ prefer_single_quotes: ignore
+ use_function_type_syntax_for_parameters: ignore
diff --git a/example/main.dart b/example/main.dart
index 4e6316b..467a2f4 100644
--- a/example/main.dart
+++ b/example/main.dart
@@ -2,7 +2,7 @@
// Small API examples. For full API docs see:
// https://pub.dev/documentation/characters/latest/characters/characters-library.html
-main() {
+void main() {
String hi = 'Hi 🇩🇰';
print('String is "$hi"\n');
diff --git a/lib/src/characters.dart b/lib/src/characters.dart
index 17a45fe..676716b 100644
--- a/lib/src/characters.dart
+++ b/lib/src/characters.dart
@@ -186,6 +186,40 @@
/// Returns the current characters if there is no occurrence of [pattern].
Characters replaceAll(Characters pattern, Characters replacement);
+ /// Splits this sequence of characters at each occurrence of [pattern].
+ ///
+ /// Returns a lazy iterable of characters that were separated by [pattern].
+ /// The iterable has *at most* [maxParts] elements if a positive [maxParts]
+ /// is supplied.
+ ///
+ /// Finds each occurrence of [pattern], which does not overlap with
+ /// a previously found occurrence, then the non-matched characters
+ /// before, after, and between the matches are provided in first-to-last
+ /// position order.
+
+ /// If [pattern] is empty, the character sequence is split into separate
+ /// characters, and no leading or trailing empty ranges are provided
+ /// unless the range itself is empty,
+ /// in which case a single empty range is the only result range.
+ /// Otherwise a range starting or ending with [pattern] will cause
+ /// an empty character sequence to be emitted at the start or end.
+ ///
+ /// If [maxParts] is provided and greater than zero,
+ /// only the first `maxParts - 1` occurrences of [pattern] are found
+ /// and split at.
+ /// Any further occurrences will be included in the last part.
+ /// Example:
+ /// ```dart
+ /// var c = "abracadabra".characters;
+ /// var parts = c.split("a".characters, 4).toList();
+ /// print(parts); // Prints is ["", "br", "c", "dabra"]
+ /// ```
+ /// If there are fewer than `maxParts - 1` occurrences of [pattern],
+ /// then the characters are split at all occurrences.
+ /// If [maxParts] is zero or negative, it is ignored and the result
+ /// is split at all occurrences of [pattern].
+ Iterable<Characters> split(Characters pattern, [int maxParts = 0]);
+
/// Replaces the first occurrence of [pattern] with [replacement].
///
/// Returns a new [Characters] where the first occurence of the
@@ -647,6 +681,45 @@
/// in the current range.
CharacterRange /*?*/ replaceAll(Characters pattern, Characters replacement);
+ /// Splits the current range of characters at each occurrence of [pattern].
+ ///
+ /// Returns a lazy iterable of character ranges that were separated by
+ /// [pattern].
+ /// Each provided character range object is new
+ /// and unrelated to this character range
+ /// The iterable has *at most* [maxParts] elements if a positive [maxParts]
+ /// is supplied.
+ ///
+ /// Finds each occurrence of [pattern] in the range, which does not overlap
+ /// with a previously found occurrence, then the non-matched characters
+ /// of the range before, after and between the matches are provided
+ /// in first-to-last position order.
+ ///
+ /// If [pattern] is empty, the range is split into separate characters,
+ /// and no leading or trailing empty ranges are provided unless the
+ /// range itself is empty, in which case a single empty range is the
+ /// only result range.
+ /// Otherwise a range starting or ending with [pattern] will cause
+ /// an empty range to be emitted at the start or end.
+ ///
+ /// If [maxParts] is provided and greater than zero,
+ /// only the first `maxParts - 1` occurrences of [pattern] are found
+ /// and split at.
+ /// Any further occurrences will be included in the last part.
+ ///
+ /// Example:
+ /// ```dart
+ /// var c = "abracadabra".characters.dropFirst().dropLast();
+ /// // c is "bracadabr".
+ /// var parts = c.split("a".characters, 3).toList();
+ /// print(parts); // [br, c, dabr]
+ /// ```
+ /// If there are fewer than `maxParts - 1` occurrences of [pattern],
+ /// then the characters are split at all occurrences.
+ /// If [maxParts] is zero or negative, it is ignored and the result
+ /// is split at all occurrences of [pattern].
+ Iterable<CharacterRange> split(Characters pattern, [int maxParts = 0]);
+
/// Replaces the first occurrence of [pattern] with [replacement].
///
/// Finds the first occurrence of [pattern] in the current range,
diff --git a/lib/src/characters_impl.dart b/lib/src/characters_impl.dart
index e3dc6ac..4f474c8 100644
--- a/lib/src/characters_impl.dart
+++ b/lib/src/characters_impl.dart
@@ -155,6 +155,37 @@
_rangeAll.replaceFirst(pattern, replacement)?.source ?? this;
@override
+ Iterable<Characters> split(Characters pattern, [int maxParts = 0]) sync* {
+ if (maxParts == 1 || string.isEmpty) {
+ yield this;
+ return;
+ }
+ var patternString = pattern.string;
+ var start = 0;
+ if (patternString.isNotEmpty) {
+ do {
+ var match = _indexOf(string, patternString, start, string.length);
+ if (match < 0) break;
+ yield StringCharacters(string.substring(start, match));
+ start = match + patternString.length;
+ maxParts--;
+ } while (maxParts != 1);
+ } else {
+ // Empty pattern. Split on internal boundaries only.
+ var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
+ do {
+ var match = breaks.nextBreak();
+ if (match < 0) return;
+ yield StringCharacters(string.substring(start, match));
+ start = match;
+ maxParts--;
+ } while (maxParts != 1);
+ if (start == string.length) return;
+ }
+ yield StringCharacters(string.substring(start));
+ }
+
+ @override
bool containsAll(Characters other) =>
_indexOf(string, other.string, 0, string.length) >= 0;
@@ -896,6 +927,39 @@
@override
String get stringBefore => _string.substring(0, _start);
+
+ @override
+ Iterable<CharacterRange> split(Characters pattern, [int maxParts = 0]) sync* {
+ if (maxParts == 1 || _start == _end) {
+ yield this;
+ return;
+ }
+ var patternString = pattern.string;
+ var start = _start;
+ if (patternString.isNotEmpty) {
+ do {
+ var match = _indexOf(_string, patternString, start, _end);
+ if (match < 0) break;
+ yield StringCharacterRange._(_string, start, match);
+ start = match + patternString.length;
+ maxParts--;
+ } while (maxParts != 1);
+ yield StringCharacterRange._(_string, start, _end);
+ } else {
+ // Empty pattern. Split on internal boundaries only.
+ var breaks = Breaks(_string, _start, _end, stateSoTNoBreak);
+ do {
+ var match = breaks.nextBreak();
+ if (match < 0) return;
+ yield StringCharacterRange._(_string, start, match);
+ start = match;
+ maxParts--;
+ } while (maxParts != 1);
+ if (start < _end) {
+ yield StringCharacterRange._(_string, start, _end);
+ }
+ }
+ }
}
String _explodeReplace(String string, int start, int end,
diff --git a/pubspec.yaml b/pubspec.yaml
index 5750ec9..c3072ad 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: characters
-version: 0.5.0
+version: 0.5.1
description: String replacement with operations that are Unicode/grapheme cluster aware.
homepage: https://www.github.com/dart-lang/characters
@@ -7,4 +7,4 @@
sdk: ">=2.6.0 <3.0.0"
dev_dependencies:
test: "^1.6.0"
- pedantic:
+ pedantic: ^1.9.0
diff --git a/test/characters_test.dart b/test/characters_test.dart
index 8ef5ec2..f34e6ec 100644
--- a/test/characters_test.dart
+++ b/test/characters_test.dart
@@ -504,6 +504,55 @@
expect(cs11.currentCharacters, e);
expect(cs11.source, gc("$a$c$e"));
+ var cs12 = gc("$a$b$a");
+ expect(cs12.split(b), [a, a]);
+ expect(cs12.split(a), [gc(""), b, gc("")]);
+ expect(cs12.split(a, 2), [gc(""), gc("$b$a")]);
+
+ expect(cs12.split(gc("")), [a, b, a]);
+ expect(cs12.split(gc(""), 2), [a, gc("$b$a")]);
+
+ expect(gc("").split(gc("")), [gc("")]);
+
+ var cs13 = gc("$b$a$b$a$b$a");
+ expect(cs13.split(b), [gc(""), a, a, a]);
+ expect(cs13.split(b, 1), [cs13]);
+ expect(cs13.split(b, 2), [gc(""), gc("$a$b$a$b$a")]);
+ expect(cs13.split(b, 3), [gc(""), a, gc("$a$b$a")]);
+ expect(cs13.split(b, 4), [gc(""), a, a, a]);
+ expect(cs13.split(b, 5), [gc(""), a, a, a]);
+ expect(cs13.split(b, 9999), [gc(""), a, a, a]);
+ expect(cs13.split(b, 0), [gc(""), a, a, a]);
+ expect(cs13.split(b, -1), [gc(""), a, a, a]);
+ expect(cs13.split(b, -9999), [gc(""), a, a, a]);
+
+ it = cs13.iterator..expandAll();
+ expect(it.current, "$b$a$b$a$b$a");
+ it.dropFirst();
+ it.dropLast();
+ expect(it.current, "$a$b$a$b");
+ expect(it.split(a).map((range) => range.current), ["", "$b", "$b"]);
+ expect(it.split(a, 2).map((range) => range.current), ["", "$b$a$b"]);
+ // Each split is after an *a*.
+ bool first = true;
+ for (var range in it.split(a)) {
+ if (range.isEmpty) {
+ // First range is empty.
+ expect(first, true);
+ first = false;
+ continue;
+ }
+ // Later ranges are "b" that come after "a".
+ expect(range.current, "$b");
+ range.moveBack();
+ expect(range.current, "$a");
+ }
+
+ expect(it.split(gc("")).map((range) => range.current),
+ ["$a", "$b", "$a", "$b"]);
+
+ expect(gc("").iterator.split(gc("")).map((range) => range.current), [""]);
+
expect(cs.startsWith(gc("")), true);
expect(cs.startsWith(a), true);
expect(cs.startsWith(a + b), true);