pkgs/characters/lib/src/grapheme_clusters/breaks.dart - core.git - Git at Google

 // BSD-style license that can be found in the LICENSE file.

 /// API for accessing the generated state and category tables.
 ///
 /// Use only [Breaks], [BackBreaks] and [nextBreak] from files other than this.
 /// Do not use any function from `table.dart` directly.
 /// (In both cases this package's own testing is exempted.)

 library;

 import 'constants.dart';
 import 'table.dart';

 /// Iterates grapheme cluster breaks of a string.
 ///
 /// Iterates the grapheme cluster breaks of the substring of
 /// [base] from [cursor] to [end].
 ///
 /// To iterate a substring, use:
 /// ```dart
 /// var breaks = Breaks(string, start, end, stateSoT);
 /// int brk = 0;
 /// while((brk = breaks.nextBreak) >= 0) {
 ///   print('Break at index $brk');
 /// }
 /// ```
 /// If you use [stateSoTNoBreak] instead of [stateSoT], the
 /// initial break between the start-of-text and the first grapheme
 /// is suppressed.
 class Breaks {
   /// Text being iterated.
   final String base;

   /// end of substring of [base] being iterated.
   final int end;

   /// Position of the first yet-unprocessed code point.
   int cursor;

   /// Current state based on code points processed so far.
   ///
   /// A state value is a multiple of [automatonRowLength] plus possibly
   /// a few bits of flags.
   int state;

   Breaks(this.base, this.cursor, this.end, this.state);

   /// Creates a copy of the current iteration, at the exact same state.
   Breaks copy() => Breaks(base, cursor, end, state);

   /// The index of the next grapheme cluster break in last-to-first index order.
   ///
   /// Returns a negative number if there are no further breaks,
   /// which means that [cursor] has reached [end].
   ///
   /// Also stops if reaching a state with the `flagLookahead` bit set,
   /// with the returned position being before the character which triggered
   /// that look-behind.
   /// If the state is not one which can trigger a look-behind, the exit position
   /// is always the next break (if any, or -1 if none, which only happens on
   /// empty strings.)
   int nextBreak() {
     while (cursor < end) {
       var breakAt = cursor;
       step();
       if (state & maskFlags != flagNoBreak) {
         return breakAt;
       }
     }
     state = move(state, categoryEoT);
     if (state & maskFlags != flagNoBreak) return cursor;
     return -1;
   }

   /// Takes one step forward in the state machine.
   void step() {
     assert(cursor < end);
     var char = base.codeUnitAt(cursor++);
     var surrogate = char ^ 0xD800;
     if (surrogate > 0x3FF) {
       state = move(state, low(char));
       return;
     }
     // The category of an unpaired lead surrogate is Control.
     int category;
     int nextSurrogate;
     if (cursor < end &&
         (nextSurrogate = base.codeUnitAt(cursor) ^ 0xDC00) <= 0x3FF) {
       category = high(surrogate, nextSurrogate);
       cursor++;
     } else {
       category = categoryControl;
     }
     state = move(state, category);
   }

   /// Start with no knowledge about the position at [cursor].
   ///
   /// Starts from state `CAny` and takes one step based on the
   /// latest character starting earlier than [cursor].
   ///
   /// Can be used if the [cursor] isn't even known to be at
   /// a grapheme cluster boundary.
   ///
   /// Returns the start of that prior character, which is
   /// one of `cursor` (if cursor is at start of input) or
   /// `cursor - 1`or `cursor - 2` depending whether
   /// it is a surrogate pair, and if so, where it ends.
   int _unknownPositionFirstStep(int start) {
     if (cursor == start) {
       state = stateSoTNoBreak;
       return cursor;
     }
     var cursorBefore = cursor - 1;
     var prevChar = base.codeUnitAt(cursorBefore);
     var prevSurrogate = prevChar ^ 0xD800;
     if (prevSurrogate > 0x7FF) {
       // Not surrogate.
       var prevCategory = low(prevChar);
       state = move(stateCAny, prevCategory);
       return cursorBefore;
     }
     int prevCategory;
     if (prevSurrogate > 0x3FF) {
       // Tail surrogate, check for prior lead surrogate.
       int leadSurrogate;
       var leadIndex = cursorBefore - 1;
       prevSurrogate &= 0x3FF;
       if (leadIndex >= start &&
           (leadSurrogate = base.codeUnitAt(leadIndex) ^ 0xD800) <= 0x3FF) {
         prevCategory = high(leadSurrogate, prevSurrogate);
         cursorBefore = leadIndex;
       } else {
         prevCategory = categoryControl;
       }
     } else {
       // Lead surrogate. Check for a following tail surrogate.
       int tailSurrogate;
       if (cursor < end &&
           (tailSurrogate = base.codeUnitAt(cursor) ^ 0xDC00) <= 0x3FF) {
         cursor += 1;
         prevCategory = high(prevSurrogate, tailSurrogate);
       } else {
         prevCategory = categoryControl;
       }
     }
     state = move(stateCAny, prevCategory);
     return cursorBefore;
   }
 }

 /// Iterates grapheme cluster breaks backwards.
 ///
 /// Given a substring of a [base] string from [start] to [cursor],
 /// iterates the grapheme cluster breaks from [cursor] to [start].
 ///
 /// To iterate a substring, do
 /// ```dart
 /// var breaks = BackBreaks(string, start, end, idStateEoT);
 /// int brk = 0;
 /// while ((brk = breaks.nextBreak()) >= 0) {
 ///   print('Break at index $brk');
 /// }
 /// ```
 /// If the initial [state] is [stateEoTNoBreak] instead of [stateEoT],
 /// the initial break between the last grapheme and the end-of-text
 /// is suppressed.
 class BackBreaks {
   /// Text being iterated.
   final String base;

   /// Start of substring of [base] being iterated.
   final int start;

   /// Position after the last yet-unprocessed code point.
   int cursor;

   /// Current state based on code points processed so far.
   int state;

   BackBreaks(this.base, this.cursor, this.start, this.state);

   BackBreaks copy() => BackBreaks(base, cursor, start, state);

   /// The index of the next grapheme cluster break in first-to-last index order.
   ///
   /// Returns a negative number if there are no further breaks,
   /// which means that [cursor] was already at [start].
   int nextBreak() {
     while (cursor > start) {
       var breakAt = cursor;
       step();
       if (state & maskFlags == flagNoBreak) {
         continue;
       }
       if (state & maskLookahead != 0) {
         _lookaheadInNextBreak();
       }
       if (state & maskBreak != flagNoBreak) {
         return breakAt;
       }
     }
     state = moveBack(state, categoryEoT);
     assert(state < stateLookaheadMin, state);
     if (state & maskBreak != flagNoBreak) return cursor;
     return -1;
   }

   /// Reads a single code point before [cursor] and transition on it.
   ///
   /// Puts cursor before the code point.
   void step() {
     assert(cursor > start);
     var char = base.codeUnitAt(--cursor);
     var surrogate = char ^ 0xDC00;
     if (surrogate > 0x3FF) {
       var category = low(char);
       state = moveBack(state, category);
       return;
     }
     // Found tail surrogate, check for prior lead surrogate.
     // The category of an unpaired tail surrogate is Control.
     int category;
     int prevSurrogate;
     if (cursor >= start &&
         (prevSurrogate = base.codeUnitAt(--cursor) ^ 0xD800) <= 0x3FF) {
       category = high(prevSurrogate, surrogate);
     } else {
       category = categoryControl;
       cursor++;
     }
     state = moveBack(state, category);
   }

   /// Steps back using lookahead states.
   ///
   /// Returns the position before the last scanned character.
   /// (Because in some cases the next break will be at that point.)
   int _lookahead() {
     assert(state >= stateLookaheadMin);
     while (cursor > start) {
       var cursorBeforeLast = cursor;
       step();
       if (state < stateLookaheadMin) return cursorBeforeLast;
     }
     state = moveBack(state, categorySoT);
     assert(state < stateLookaheadMin, state);
     return start;
   }

   /// Called from [nextBreak] to perform a lookahead, and set the result state.
   ///
   /// After this call, the state has [flagBreak] set if it should break
   /// between the two characters which triggered lookahead.
   /// The state and cursor are set to a position prior to reaching the next
   /// break.
   void _lookaheadInNextBreak() {
     assert(state >= stateLookaheadMin, state);
     // To check if this was a regional lookahead afterwards.
     var preState = state;
     // Regional lookahead resets to this position.
     var preCursor = cursor;
     // Non-regional lookahead may reset to the position before the last seen,
     // to avoid having to report two breaks.
     var breakAt = _lookahead();

     if (preState >= stateLookaheadRegionalEven) {
       // Result is always one of one of flagBreak or flagNoBreak.
       assert(
         preState == (stateLookaheadRegionalOdd | flagLookahead) ||
             preState == (stateLookaheadRegionalEven | flagLookahead),
         preState,
       );
       assert(
         state == (stateRegionalEven | flagNoBreak) ||
             state == (stateRegionalOdd | flagBreak),
       );
       // Always reset cursor for regional lookahead.
       // (Could detect stateRegionalOdd, decrease cursor two positions and
       // switch to stateRegionalEven. Not worth the extra code.)
       cursor = preCursor;
     } else {
       // Flags mean:
       // flagNoBreak: Do not break before position, or before cursor.
       // flagBreak: break at position before lookahead, keep cursor.
       // flagLookahead: Not used.
       // flagBreak+flagLookahead: Break at position before lookahead,
       //   set cursor to reread the last character before cursor
       //   (because it'll break again there.)

       // Keep cursor at or just before last read.
       if (state & maskFlags == flagLookaheadBreakBoth) {
         cursor = breakAt;
       }
     }
   }
 }

 /// Whether there is a grapheme cluster boundary before [index] in [text].
 ///
 /// This is a low-level function. There is no validation of the arguments.
 /// They should satisfy `0 <= start <= index <= end <= text.length`.
 ///
 /// Allows [index] to not be at a grapheme cluster boundary
 /// (or even a code point boundary).
 bool isGraphemeClusterBoundary(String text, int start, int end, int index) {
   assert(0 <= start);
   assert(start <= index);
   assert(index <= end);
   assert(end <= text.length);
   // Uses the backwards automaton because answering the question
   // might be answered by looking only at the code points around the
   // index, but it may also require looking further back. It never
   // requires looking further ahead, though.
   // The backwards automaton is built for this use case.
   // Most of the apparent complication in this function is merely dealing with
   // surrogates.
   if (start < index && index < end) {
     var breaks = Breaks(text, index, end, stateCAny);
     var cursorBefore = breaks._unknownPositionFirstStep(start);
     // If cursor moved, index is in the middle of a surrogate pair.
     if (breaks.cursor != index) return false;
     breaks.step();
     if (breaks.state & maskBreak != flagNoBreak) {
       return true;
     }
     if (breaks.state & maskLookahead == 0) return false;
     assert(breaks.state >= stateLookaheadMin);

     var backBreaks = BackBreaks(text, cursorBefore, start, breaks.state);
     backBreaks._lookahead();
     return (backBreaks.state & maskBreak != flagNoBreak);
   }
   return true;
 }

 /// The most recent break no later than [index] in
 /// `string.substring(start, end)`.
 ///
 /// Allows [index] to not be at a grapheme cluster boundary
 /// (or even a code point boundary).
 int previousBreak(String text, int start, int end, int index) {
   assert(0 <= start);
   assert(start <= index);
   assert(index <= end);
   assert(end <= text.length);
   // First character ending after `index`.
   // Accounts for an `index` in the middle of a surrogate pair.
   if (start < index && index < end) {
     var cursorBefore = index;
     var nextChar = text.codeUnitAt(index);
     var nextSurrogate = nextChar ^ 0xD800;
     var category = categoryControl;
     if (nextSurrogate > 0x7FF) {
       category = low(nextChar);
     } else if (nextSurrogate <= 0x3FF) {
       var indexAfter = index + 1;
       if (indexAfter < end) {
         var secondSurrogate = text.codeUnitAt(indexAfter) ^ 0xDC00;
         if (secondSurrogate <= 0x3FF) {
           category = high(nextSurrogate, secondSurrogate);
         }
       }
     } else {
       var prevSurrogate = text.codeUnitAt(index - 1) ^ 0xD800;
       nextSurrogate &= 0x3FF;
       if (prevSurrogate <= 0x3FF) {
         category = high(prevSurrogate, nextSurrogate);
         cursorBefore -= 1;
       }
     }
     return BackBreaks(
       text,
       cursorBefore,
       start,
       moveBack(stateEoTNoBreak, category),
     ).nextBreak();
   }
   return index;
 }

 /// The next break no earlier than [index] in `string.substring(start, end)`.
 ///
 /// Allows [index] to not be at a grapheme cluster boundary
 /// (or even a code point boundary).
 int nextBreak(String text, int start, int end, int index) {
   assert(0 <= start);
   assert(start <= index);
   assert(index <= end);
   assert(end <= text.length);
   // Always break at start or end (GB1).
   if (index == start || index == end) return index;
   var breaks = Breaks(text, index, end, stateCAny);
   var cursorBefore = breaks._unknownPositionFirstStep(start);
   var possibleBreak = breaks.nextBreak();
   assert(breaks.state & maskFlags != 0);
   if (breaks.state & maskFlags == flagBreak) return possibleBreak;
   var lookbehindState = breaks.state;
   assert(lookbehindState & maskFlags == flagLookahead);
   assert(lookbehindState & maskState >= stateLookaheadMin);

   var backBreaks = BackBreaks(text, cursorBefore, start, lookbehindState);
   backBreaks._lookahead();
   if (backBreaks.state & maskBreak != flagNoBreak) {
     return possibleBreak;
   }

   // Find the correct forward category to continue with.
   // There are only three possible character categories that can trigger
   // a look-behind.
   if (lookbehindState == stateLookaheadRegionalEven | flagLookahead) {
     assert(backBreaks.state == stateRegionalEven);
     // Started by RI+RI.
     breaks.state = stateRegionalEven;
   } else {
     // Was triggered by ZWJ+Pic or InCB={Extend|Linked}+InCB=Consonant.
     assert(
       lookbehindState == (stateLookaheadZWJPictographic | flagLookahead) ||
           lookbehindState == (stateLookaheadInC | flagLookahead) ||
           lookbehindState == (stateLookaheadInCL | flagLookahead),
     );
     // If starting in lookahead state ZWJ+Pic, and not breaking,
     // final backwards state is Pic.
     assert(
       lookbehindState != (stateLookaheadZWJPictographic | flagLookahead) ||
           backBreaks.state == statePictographic,
     );
     // If starting in lookahead state InC or InCL, and not breaking,
     // final backwards state is Inc.
     assert(
       lookbehindState != (stateLookaheadInC | flagLookahead) &&
               lookbehindState != (stateLookaheadInCL | flagLookahead) ||
           backBreaks.state == stateInC,
     );
     // In both cases, that's the same as the forward state
     // at the point that triggered the look-behind.
     breaks.state = backBreaks.state;
   }
   var result = breaks.nextBreak();
   assert(breaks.state & maskFlags == flagBreak);
   return result;
 }
	// BSD-style license that can be found in the LICENSE file.

	/// API for accessing the generated state and category tables.
	///
	/// Use only [Breaks], [BackBreaks] and [nextBreak] from files other than this.
	/// Do not use any function from `table.dart` directly.
	/// (In both cases this package's own testing is exempted.)

	library;

	import 'constants.dart';
	import 'table.dart';

	/// Iterates grapheme cluster breaks of a string.
	///
	/// Iterates the grapheme cluster breaks of the substring of
	/// [base] from [cursor] to [end].
	///
	/// To iterate a substring, use:
	/// ```dart
	/// var breaks = Breaks(string, start, end, stateSoT);
	/// int brk = 0;
	/// while((brk = breaks.nextBreak) >= 0) {
	/// print('Break at index $brk');
	/// }
	/// ```
	/// If you use [stateSoTNoBreak] instead of [stateSoT], the
	/// initial break between the start-of-text and the first grapheme
	/// is suppressed.
	class Breaks {
	/// Text being iterated.
	final String base;

	/// end of substring of [base] being iterated.
	final int end;

	/// Position of the first yet-unprocessed code point.
	int cursor;

	/// Current state based on code points processed so far.
	///
	/// A state value is a multiple of [automatonRowLength] plus possibly
	/// a few bits of flags.
	int state;

	Breaks(this.base, this.cursor, this.end, this.state);

	/// Creates a copy of the current iteration, at the exact same state.
	Breaks copy() => Breaks(base, cursor, end, state);

	/// The index of the next grapheme cluster break in last-to-first index order.
	///
	/// Returns a negative number if there are no further breaks,
	/// which means that [cursor] has reached [end].
	///
	/// Also stops if reaching a state with the `flagLookahead` bit set,
	/// with the returned position being before the character which triggered
	/// that look-behind.
	/// If the state is not one which can trigger a look-behind, the exit position
	/// is always the next break (if any, or -1 if none, which only happens on
	/// empty strings.)
	int nextBreak() {
	while (cursor < end) {
	var breakAt = cursor;
	step();
	if (state & maskFlags != flagNoBreak) {
	return breakAt;
	}
	}
	state = move(state, categoryEoT);
	if (state & maskFlags != flagNoBreak) return cursor;
	return -1;
	}

	/// Takes one step forward in the state machine.
	void step() {
	assert(cursor < end);
	var char = base.codeUnitAt(cursor++);
	var surrogate = char ^ 0xD800;
	if (surrogate > 0x3FF) {
	state = move(state, low(char));
	return;
	}
	// The category of an unpaired lead surrogate is Control.
	int category;
	int nextSurrogate;
	if (cursor < end &&
	(nextSurrogate = base.codeUnitAt(cursor) ^ 0xDC00) <= 0x3FF) {
	category = high(surrogate, nextSurrogate);
	cursor++;
	} else {
	category = categoryControl;
	}
	state = move(state, category);
	}

	/// Start with no knowledge about the position at [cursor].
	///
	/// Starts from state `CAny` and takes one step based on the
	/// latest character starting earlier than [cursor].
	///
	/// Can be used if the [cursor] isn't even known to be at
	/// a grapheme cluster boundary.
	///
	/// Returns the start of that prior character, which is
	/// one of `cursor` (if cursor is at start of input) or
	/// `cursor - 1`or `cursor - 2` depending whether
	/// it is a surrogate pair, and if so, where it ends.
	int _unknownPositionFirstStep(int start) {
	if (cursor == start) {
	state = stateSoTNoBreak;
	return cursor;
	}
	var cursorBefore = cursor - 1;
	var prevChar = base.codeUnitAt(cursorBefore);
	var prevSurrogate = prevChar ^ 0xD800;
	if (prevSurrogate > 0x7FF) {
	// Not surrogate.
	var prevCategory = low(prevChar);
	state = move(stateCAny, prevCategory);
	return cursorBefore;
	}
	int prevCategory;
	if (prevSurrogate > 0x3FF) {
	// Tail surrogate, check for prior lead surrogate.
	int leadSurrogate;
	var leadIndex = cursorBefore - 1;
	prevSurrogate &= 0x3FF;
	if (leadIndex >= start &&
	(leadSurrogate = base.codeUnitAt(leadIndex) ^ 0xD800) <= 0x3FF) {
	prevCategory = high(leadSurrogate, prevSurrogate);
	cursorBefore = leadIndex;
	} else {
	prevCategory = categoryControl;
	}
	} else {
	// Lead surrogate. Check for a following tail surrogate.
	int tailSurrogate;
	if (cursor < end &&
	(tailSurrogate = base.codeUnitAt(cursor) ^ 0xDC00) <= 0x3FF) {
	cursor += 1;
	prevCategory = high(prevSurrogate, tailSurrogate);
	} else {
	prevCategory = categoryControl;
	}
	}
	state = move(stateCAny, prevCategory);
	return cursorBefore;
	}
	}

	/// Iterates grapheme cluster breaks backwards.
	///
	/// Given a substring of a [base] string from [start] to [cursor],
	/// iterates the grapheme cluster breaks from [cursor] to [start].
	///
	/// To iterate a substring, do
	/// ```dart
	/// var breaks = BackBreaks(string, start, end, idStateEoT);
	/// int brk = 0;
	/// while ((brk = breaks.nextBreak()) >= 0) {
	/// print('Break at index $brk');
	/// }
	/// ```
	/// If the initial [state] is [stateEoTNoBreak] instead of [stateEoT],
	/// the initial break between the last grapheme and the end-of-text
	/// is suppressed.
	class BackBreaks {
	/// Text being iterated.
	final String base;

	/// Start of substring of [base] being iterated.
	final int start;

	/// Position after the last yet-unprocessed code point.
	int cursor;

	/// Current state based on code points processed so far.
	int state;

	BackBreaks(this.base, this.cursor, this.start, this.state);

	BackBreaks copy() => BackBreaks(base, cursor, start, state);

	/// The index of the next grapheme cluster break in first-to-last index order.
	///
	/// Returns a negative number if there are no further breaks,
	/// which means that [cursor] was already at [start].
	int nextBreak() {
	while (cursor > start) {
	var breakAt = cursor;
	step();
	if (state & maskFlags == flagNoBreak) {
	continue;
	}
	if (state & maskLookahead != 0) {
	_lookaheadInNextBreak();
	}
	if (state & maskBreak != flagNoBreak) {
	return breakAt;
	}
	}
	state = moveBack(state, categoryEoT);
	assert(state < stateLookaheadMin, state);
	if (state & maskBreak != flagNoBreak) return cursor;
	return -1;
	}

	/// Reads a single code point before [cursor] and transition on it.
	///
	/// Puts cursor before the code point.
	void step() {
	assert(cursor > start);
	var char = base.codeUnitAt(--cursor);
	var surrogate = char ^ 0xDC00;
	if (surrogate > 0x3FF) {
	var category = low(char);
	state = moveBack(state, category);
	return;
	}
	// Found tail surrogate, check for prior lead surrogate.
	// The category of an unpaired tail surrogate is Control.
	int category;
	int prevSurrogate;
	if (cursor >= start &&
	(prevSurrogate = base.codeUnitAt(--cursor) ^ 0xD800) <= 0x3FF) {
	category = high(prevSurrogate, surrogate);
	} else {
	category = categoryControl;
	cursor++;
	}
	state = moveBack(state, category);
	}

	/// Steps back using lookahead states.
	///
	/// Returns the position before the last scanned character.
	/// (Because in some cases the next break will be at that point.)
	int _lookahead() {
	assert(state >= stateLookaheadMin);
	while (cursor > start) {
	var cursorBeforeLast = cursor;
	step();
	if (state < stateLookaheadMin) return cursorBeforeLast;
	}
	state = moveBack(state, categorySoT);
	assert(state < stateLookaheadMin, state);
	return start;
	}

	/// Called from [nextBreak] to perform a lookahead, and set the result state.
	///
	/// After this call, the state has [flagBreak] set if it should break
	/// between the two characters which triggered lookahead.
	/// The state and cursor are set to a position prior to reaching the next
	/// break.
	void _lookaheadInNextBreak() {
	assert(state >= stateLookaheadMin, state);
	// To check if this was a regional lookahead afterwards.
	var preState = state;
	// Regional lookahead resets to this position.
	var preCursor = cursor;
	// Non-regional lookahead may reset to the position before the last seen,
	// to avoid having to report two breaks.
	var breakAt = _lookahead();

	if (preState >= stateLookaheadRegionalEven) {
	// Result is always one of one of flagBreak or flagNoBreak.
	assert(
	preState == (stateLookaheadRegionalOdd \| flagLookahead) \|\|
	preState == (stateLookaheadRegionalEven \| flagLookahead),
	preState,
	);
	assert(
	state == (stateRegionalEven \| flagNoBreak) \|\|
	state == (stateRegionalOdd \| flagBreak),
	);
	// Always reset cursor for regional lookahead.
	// (Could detect stateRegionalOdd, decrease cursor two positions and
	// switch to stateRegionalEven. Not worth the extra code.)
	cursor = preCursor;
	} else {
	// Flags mean:
	// flagNoBreak: Do not break before position, or before cursor.
	// flagBreak: break at position before lookahead, keep cursor.
	// flagLookahead: Not used.
	// flagBreak+flagLookahead: Break at position before lookahead,
	// set cursor to reread the last character before cursor
	// (because it'll break again there.)

	// Keep cursor at or just before last read.
	if (state & maskFlags == flagLookaheadBreakBoth) {
	cursor = breakAt;
	}
	}
	}
	}

	/// Whether there is a grapheme cluster boundary before [index] in [text].
	///
	/// This is a low-level function. There is no validation of the arguments.
	/// They should satisfy `0 <= start <= index <= end <= text.length`.
	///
	/// Allows [index] to not be at a grapheme cluster boundary
	/// (or even a code point boundary).
	bool isGraphemeClusterBoundary(String text, int start, int end, int index) {
	assert(0 <= start);
	assert(start <= index);
	assert(index <= end);
	assert(end <= text.length);
	// Uses the backwards automaton because answering the question
	// might be answered by looking only at the code points around the
	// index, but it may also require looking further back. It never
	// requires looking further ahead, though.
	// The backwards automaton is built for this use case.
	// Most of the apparent complication in this function is merely dealing with
	// surrogates.
	if (start < index && index < end) {
	var breaks = Breaks(text, index, end, stateCAny);
	var cursorBefore = breaks._unknownPositionFirstStep(start);
	// If cursor moved, index is in the middle of a surrogate pair.
	if (breaks.cursor != index) return false;
	breaks.step();
	if (breaks.state & maskBreak != flagNoBreak) {
	return true;
	}
	if (breaks.state & maskLookahead == 0) return false;
	assert(breaks.state >= stateLookaheadMin);

	var backBreaks = BackBreaks(text, cursorBefore, start, breaks.state);
	backBreaks._lookahead();
	return (backBreaks.state & maskBreak != flagNoBreak);
	}
	return true;
	}

	/// The most recent break no later than [index] in
	/// `string.substring(start, end)`.
	///
	/// Allows [index] to not be at a grapheme cluster boundary
	/// (or even a code point boundary).
	int previousBreak(String text, int start, int end, int index) {
	assert(0 <= start);
	assert(start <= index);
	assert(index <= end);
	assert(end <= text.length);
	// First character ending after `index`.
	// Accounts for an `index` in the middle of a surrogate pair.
	if (start < index && index < end) {
	var cursorBefore = index;
	var nextChar = text.codeUnitAt(index);
	var nextSurrogate = nextChar ^ 0xD800;
	var category = categoryControl;
	if (nextSurrogate > 0x7FF) {
	category = low(nextChar);
	} else if (nextSurrogate <= 0x3FF) {
	var indexAfter = index + 1;
	if (indexAfter < end) {
	var secondSurrogate = text.codeUnitAt(indexAfter) ^ 0xDC00;
	if (secondSurrogate <= 0x3FF) {
	category = high(nextSurrogate, secondSurrogate);
	}
	}
	} else {
	var prevSurrogate = text.codeUnitAt(index - 1) ^ 0xD800;
	nextSurrogate &= 0x3FF;
	if (prevSurrogate <= 0x3FF) {
	category = high(prevSurrogate, nextSurrogate);
	cursorBefore -= 1;
	}
	}
	return BackBreaks(
	text,
	cursorBefore,
	start,
	moveBack(stateEoTNoBreak, category),
	).nextBreak();
	}
	return index;
	}

	/// The next break no earlier than [index] in `string.substring(start, end)`.
	///
	/// Allows [index] to not be at a grapheme cluster boundary
	/// (or even a code point boundary).
	int nextBreak(String text, int start, int end, int index) {
	assert(0 <= start);
	assert(start <= index);
	assert(index <= end);
	assert(end <= text.length);
	// Always break at start or end (GB1).
	if (index == start \|\| index == end) return index;
	var breaks = Breaks(text, index, end, stateCAny);
	var cursorBefore = breaks._unknownPositionFirstStep(start);
	var possibleBreak = breaks.nextBreak();
	assert(breaks.state & maskFlags != 0);
	if (breaks.state & maskFlags == flagBreak) return possibleBreak;
	var lookbehindState = breaks.state;
	assert(lookbehindState & maskFlags == flagLookahead);
	assert(lookbehindState & maskState >= stateLookaheadMin);

	var backBreaks = BackBreaks(text, cursorBefore, start, lookbehindState);
	backBreaks._lookahead();
	if (backBreaks.state & maskBreak != flagNoBreak) {
	return possibleBreak;
	}

	// Find the correct forward category to continue with.
	// There are only three possible character categories that can trigger
	// a look-behind.
	if (lookbehindState == stateLookaheadRegionalEven \| flagLookahead) {
	assert(backBreaks.state == stateRegionalEven);
	// Started by RI+RI.
	breaks.state = stateRegionalEven;
	} else {
	// Was triggered by ZWJ+Pic or InCB={Extend\|Linked}+InCB=Consonant.
	assert(
	lookbehindState == (stateLookaheadZWJPictographic \| flagLookahead) \|\|
	lookbehindState == (stateLookaheadInC \| flagLookahead) \|\|
	lookbehindState == (stateLookaheadInCL \| flagLookahead),
	);
	// If starting in lookahead state ZWJ+Pic, and not breaking,
	// final backwards state is Pic.
	assert(
	lookbehindState != (stateLookaheadZWJPictographic \| flagLookahead) \|\|
	backBreaks.state == statePictographic,
	);
	// If starting in lookahead state InC or InCL, and not breaking,
	// final backwards state is Inc.
	assert(
	lookbehindState != (stateLookaheadInC \| flagLookahead) &&
	lookbehindState != (stateLookaheadInCL \| flagLookahead) \|\|
	backBreaks.state == stateInC,
	);
	// In both cases, that's the same as the forward state
	// at the point that triggered the look-behind.
	breaks.state = backBreaks.state;
	}
	var result = breaks.nextBreak();
	assert(breaks.state & maskFlags == flagBreak);
	return result;
	}