lib/web_ui/lib/src/engine/text/word_breaker.dart - external/github.com/flutter/engine - Git at Google

 // Copyright 2013 The Flutter Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // @dart = 2.6
 part of engine;

 enum _FindBreakDirection {
   /// Indicates to find the word break by looking forward.
   forward,

   /// Indicates to find the word break by looking backward.
   backward,
 }

 /// [WordBreaker] exposes static methods to identify word boundaries.
 abstract class WordBreaker {
   /// It starts from [index] and tries to find the next word boundary in [text].
   static int nextBreakIndex(String text, int index) =>
       _findBreakIndex(_FindBreakDirection.forward, text, index);

   /// It starts from [index] and tries to find the previous word boundary in
   /// [text].
   static int prevBreakIndex(String text, int index) =>
       _findBreakIndex(_FindBreakDirection.backward, text, index);

   static int _findBreakIndex(
     _FindBreakDirection direction,
     String text,
     int index,
   ) {
     int step, min, max;
     if (direction == _FindBreakDirection.forward) {
       step = 1;
       min = 0;
       max = text.length - 1;
     } else {
       step = -1;
       min = 1;
       max = text.length;
     }

     int i = index;
     while (i >= min && i <= max) {
       i += step;
       if (_isBreak(text, i)) {
         break;
       }
     }
     return i;
   }

   /// Find out if there's a word break between [index - 1] and [index].
   /// http://unicode.org/reports/tr29/#Word_Boundary_Rules
   static bool _isBreak(String text, int index) {
     // Break at the start and end of text.
     // WB1: sot ÷ Any
     // WB2: Any ÷ eot
     if (index <= 0 || index >= text.length) {
       return true;
     }

     // Do not break inside surrogate pair
     if (_isUtf16Surrogate(text.codeUnitAt(index - 1))) {
       return false;
     }

     final WordCharProperty immediateRight = wordLookup.find(text, index);
     WordCharProperty immediateLeft = wordLookup.find(text, index - 1);

     // Do not break within CRLF.
     // WB3: CR × LF
     if (immediateLeft == WordCharProperty.CR && immediateRight == WordCharProperty.LF)
       return false;

     // Otherwise break before and after Newlines (including CR and LF)
     // WB3a: (Newline | CR | LF) ÷
     if (_oneOf(
       immediateLeft,
       WordCharProperty.Newline,
       WordCharProperty.CR,
       WordCharProperty.LF,
     )) {
       return true;
     }

     // WB3b: ÷ (Newline | CR | LF)
     if (_oneOf(
       immediateRight,
       WordCharProperty.Newline,
       WordCharProperty.CR,
       WordCharProperty.LF,
     )) {
       return true;
     }

     // WB3c: ZWJ	×	\p{Extended_Pictographic}
     // TODO(flutter_web): What's the right way to implement this?

     // Keep horizontal whitespace together.
     // WB3d: WSegSpace × WSegSpace
     if (immediateLeft == WordCharProperty.WSegSpace &&
         immediateRight == WordCharProperty.WSegSpace) {
       return false;
     }

     // Ignore Format and Extend characters, except after sot, CR, LF, and
     // Newline.
     // WB4: X (Extend | Format | ZWJ)* → X
     if (_oneOf(
       immediateRight,
       WordCharProperty.Extend,
       WordCharProperty.Format,
       WordCharProperty.ZWJ,
     )) {
       // The Extend|Format|ZWJ character is to the right, so it is attached
       // to a character to the left, don't split here
       return false;
     }

     // We've reached the end of an Extend|Format|ZWJ sequence, collapse it.
     int l = 0;
     while (_oneOf(
       immediateLeft,
       WordCharProperty.Extend,
       WordCharProperty.Format,
       WordCharProperty.ZWJ,
     )) {
       l++;
       if (index - l - 1 < 0) {
         // Reached the beginning of text.
         return true;
       }
       immediateLeft = wordLookup.find(text, index - l - 1);
     }

     // Do not break between most letters.
     // WB5: (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
     if (_isAHLetter(immediateLeft) && _isAHLetter(immediateRight)) {
       return false;
     }

     // Some tests beyond this point require more context. We need to get that
     // context while also respecting rule WB4. So ignore Format, Extend and ZWJ.

     // Skip all Format, Extend and ZWJ to the right.
     int r = 0;
     WordCharProperty nextRight;
     do {
       r++;
       nextRight = wordLookup.find(text, index + r);
     } while (_oneOf(
       nextRight,
       WordCharProperty.Extend,
       WordCharProperty.Format,
       WordCharProperty.ZWJ,
     ));

     // Skip all Format, Extend and ZWJ to the left.
     WordCharProperty nextLeft;
     do {
       l++;
       nextLeft = wordLookup.find(text, index - l - 1);
     } while (_oneOf(
       nextLeft,
       WordCharProperty.Extend,
       WordCharProperty.Format,
       WordCharProperty.ZWJ,
     ));

     // Do not break letters across certain punctuation.
     // WB6: (AHLetter) × (MidLetter | MidNumLet | Single_Quote) (AHLetter)
     if (_isAHLetter(immediateLeft) &&
         _oneOf(
           immediateRight,
           WordCharProperty.MidLetter,
           WordCharProperty.MidNumLet,
           WordCharProperty.SingleQuote,
         ) &&
         _isAHLetter(nextRight)) {
       return false;
     }

     // WB7: (AHLetter) (MidLetter | MidNumLet | Single_Quote) × (AHLetter)
     if (_isAHLetter(nextLeft) &&
         _oneOf(
           immediateLeft,
           WordCharProperty.MidLetter,
           WordCharProperty.MidNumLet,
           WordCharProperty.SingleQuote,
         ) &&
         _isAHLetter(immediateRight)) {
       return false;
     }

     // WB7a: Hebrew_Letter × Single_Quote
     if (immediateLeft == WordCharProperty.HebrewLetter &&
         immediateRight == WordCharProperty.SingleQuote) {
       return false;
     }

     // WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter
     if (immediateLeft == WordCharProperty.HebrewLetter &&
         immediateRight == WordCharProperty.DoubleQuote &&
         nextRight == WordCharProperty.HebrewLetter) {
       return false;
     }

     // WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter
     if (nextLeft == WordCharProperty.HebrewLetter &&
         immediateLeft == WordCharProperty.DoubleQuote &&
         immediateRight == WordCharProperty.HebrewLetter) {
       return false;
     }

     // Do not break within sequences of digits, or digits adjacent to letters
     // (“3a”, or “A3”).
     // WB8: Numeric × Numeric
     if (immediateLeft == WordCharProperty.Numeric &&
         immediateRight == WordCharProperty.Numeric) {
       return false;
     }

     // WB9: AHLetter × Numeric
     if (_isAHLetter(immediateLeft) && immediateRight == WordCharProperty.Numeric)
       return false;

     // WB10: Numeric × AHLetter
     if (immediateLeft == WordCharProperty.Numeric && _isAHLetter(immediateRight))
       return false;

     // Do not break within sequences, such as “3.2” or “3,456.789”.
     // WB11: Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
     if (nextLeft == WordCharProperty.Numeric &&
         _oneOf(
           immediateLeft,
           WordCharProperty.MidNum,
           WordCharProperty.MidNumLet,
           WordCharProperty.SingleQuote,
         ) &&
         immediateRight == WordCharProperty.Numeric) {
       return false;
     }

     // WB12: Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
     if (immediateLeft == WordCharProperty.Numeric &&
         _oneOf(
           immediateRight,
           WordCharProperty.MidNum,
           WordCharProperty.MidNumLet,
           WordCharProperty.SingleQuote,
         ) &&
         nextRight == WordCharProperty.Numeric) {
       return false;
     }

     // Do not break between Katakana.
     // WB13: Katakana × Katakana
     if (immediateLeft == WordCharProperty.Katakana &&
         immediateRight == WordCharProperty.Katakana) {
       return false;
     }

     // Do not break from extenders.
     // WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
     if (_oneOf(
           immediateLeft,
           WordCharProperty.ALetter,
           WordCharProperty.HebrewLetter,
           WordCharProperty.Numeric,
           WordCharProperty.Katakana,
           WordCharProperty.ExtendNumLet,
         ) &&
         immediateRight == WordCharProperty.ExtendNumLet) {
       return false;
     }

     // WB13b: ExtendNumLet × (AHLetter | Numeric | Katakana)
     if (immediateLeft == WordCharProperty.ExtendNumLet &&
         _oneOf(
           immediateRight,
           WordCharProperty.ALetter,
           WordCharProperty.HebrewLetter,
           WordCharProperty.Numeric,
           WordCharProperty.Katakana,
         )) {
       return false;
     }

     // Do not break within emoji flag sequences. That is, do not break between
     // regional indicator (RI) symbols if there is an odd number of RI
     // characters before the break point.
     // WB15: sot (RI RI)* RI × RI
     // TODO(mdebbar): implement this.

     // WB16: [^RI] (RI RI)* RI × RI
     // TODO(mdebbar): implement this.

     // Otherwise, break everywhere (including around ideographs).
     // WB999: Any ÷ Any
     return true;
   }

   static bool _isUtf16Surrogate(int value) {
     return value & 0xF800 == 0xD800;
   }

   static bool _oneOf(
     WordCharProperty value,
     WordCharProperty choice1,
     WordCharProperty choice2, [
     WordCharProperty choice3,
     WordCharProperty choice4,
     WordCharProperty choice5,
   ]) {
     if (value == choice1) {
       return true;
     }
     if (value == choice2) {
       return true;
     }
     if (choice3 != null && value == choice3) {
       return true;
     }
     if (choice4 != null && value == choice4) {
       return true;
     }
     if (choice5 != null && value == choice5) {
       return true;
     }
     return false;
   }

   static bool _isAHLetter(WordCharProperty property) {
     return _oneOf(property, WordCharProperty.ALetter, WordCharProperty.HebrewLetter);
   }
 }
	// Copyright 2013 The Flutter Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// @dart = 2.6
	part of engine;

	enum _FindBreakDirection {
	/// Indicates to find the word break by looking forward.
	forward,

	/// Indicates to find the word break by looking backward.
	backward,
	}

	/// [WordBreaker] exposes static methods to identify word boundaries.
	abstract class WordBreaker {
	/// It starts from [index] and tries to find the next word boundary in [text].
	static int nextBreakIndex(String text, int index) =>
	_findBreakIndex(_FindBreakDirection.forward, text, index);

	/// It starts from [index] and tries to find the previous word boundary in
	/// [text].
	static int prevBreakIndex(String text, int index) =>
	_findBreakIndex(_FindBreakDirection.backward, text, index);

	static int _findBreakIndex(
	_FindBreakDirection direction,
	String text,
	int index,
	) {
	int step, min, max;
	if (direction == _FindBreakDirection.forward) {
	step = 1;
	min = 0;
	max = text.length - 1;
	} else {
	step = -1;
	min = 1;
	max = text.length;
	}

	int i = index;
	while (i >= min && i <= max) {
	i += step;
	if (_isBreak(text, i)) {
	break;
	}
	}
	return i;
	}

	/// Find out if there's a word break between [index - 1] and [index].
	/// http://unicode.org/reports/tr29/#Word_Boundary_Rules
	static bool _isBreak(String text, int index) {
	// Break at the start and end of text.
	// WB1: sot ÷ Any
	// WB2: Any ÷ eot
	if (index <= 0 \|\| index >= text.length) {
	return true;
	}

	// Do not break inside surrogate pair
	if (_isUtf16Surrogate(text.codeUnitAt(index - 1))) {
	return false;
	}

	final WordCharProperty immediateRight = wordLookup.find(text, index);
	WordCharProperty immediateLeft = wordLookup.find(text, index - 1);

	// Do not break within CRLF.
	// WB3: CR × LF
	if (immediateLeft == WordCharProperty.CR && immediateRight == WordCharProperty.LF)
	return false;

	// Otherwise break before and after Newlines (including CR and LF)
	// WB3a: (Newline \| CR \| LF) ÷
	if (_oneOf(
	immediateLeft,
	WordCharProperty.Newline,
	WordCharProperty.CR,
	WordCharProperty.LF,
	)) {
	return true;
	}

	// WB3b: ÷ (Newline \| CR \| LF)
	if (_oneOf(
	immediateRight,
	WordCharProperty.Newline,
	WordCharProperty.CR,
	WordCharProperty.LF,
	)) {
	return true;
	}

	// WB3c: ZWJ × \p{Extended_Pictographic}
	// TODO(flutter_web): What's the right way to implement this?

	// Keep horizontal whitespace together.
	// WB3d: WSegSpace × WSegSpace
	if (immediateLeft == WordCharProperty.WSegSpace &&
	immediateRight == WordCharProperty.WSegSpace) {
	return false;
	}

	// Ignore Format and Extend characters, except after sot, CR, LF, and
	// Newline.
	// WB4: X (Extend \| Format \| ZWJ)* → X
	if (_oneOf(
	immediateRight,
	WordCharProperty.Extend,
	WordCharProperty.Format,
	WordCharProperty.ZWJ,
	)) {
	// The Extend\|Format\|ZWJ character is to the right, so it is attached
	// to a character to the left, don't split here
	return false;
	}

	// We've reached the end of an Extend\|Format\|ZWJ sequence, collapse it.
	int l = 0;
	while (_oneOf(
	immediateLeft,
	WordCharProperty.Extend,
	WordCharProperty.Format,
	WordCharProperty.ZWJ,
	)) {
	l++;
	if (index - l - 1 < 0) {
	// Reached the beginning of text.
	return true;
	}
	immediateLeft = wordLookup.find(text, index - l - 1);
	}

	// Do not break between most letters.
	// WB5: (ALetter \| Hebrew_Letter) × (ALetter \| Hebrew_Letter)
	if (_isAHLetter(immediateLeft) && _isAHLetter(immediateRight)) {
	return false;
	}

	// Some tests beyond this point require more context. We need to get that
	// context while also respecting rule WB4. So ignore Format, Extend and ZWJ.

	// Skip all Format, Extend and ZWJ to the right.
	int r = 0;
	WordCharProperty nextRight;
	do {
	r++;
	nextRight = wordLookup.find(text, index + r);
	} while (_oneOf(
	nextRight,
	WordCharProperty.Extend,
	WordCharProperty.Format,
	WordCharProperty.ZWJ,
	));

	// Skip all Format, Extend and ZWJ to the left.
	WordCharProperty nextLeft;
	do {
	l++;
	nextLeft = wordLookup.find(text, index - l - 1);
	} while (_oneOf(
	nextLeft,
	WordCharProperty.Extend,
	WordCharProperty.Format,
	WordCharProperty.ZWJ,
	));

	// Do not break letters across certain punctuation.
	// WB6: (AHLetter) × (MidLetter \| MidNumLet \| Single_Quote) (AHLetter)
	if (_isAHLetter(immediateLeft) &&
	_oneOf(
	immediateRight,
	WordCharProperty.MidLetter,
	WordCharProperty.MidNumLet,
	WordCharProperty.SingleQuote,
	) &&
	_isAHLetter(nextRight)) {
	return false;
	}

	// WB7: (AHLetter) (MidLetter \| MidNumLet \| Single_Quote) × (AHLetter)
	if (_isAHLetter(nextLeft) &&
	_oneOf(
	immediateLeft,
	WordCharProperty.MidLetter,
	WordCharProperty.MidNumLet,
	WordCharProperty.SingleQuote,
	) &&
	_isAHLetter(immediateRight)) {
	return false;
	}

	// WB7a: Hebrew_Letter × Single_Quote
	if (immediateLeft == WordCharProperty.HebrewLetter &&
	immediateRight == WordCharProperty.SingleQuote) {
	return false;
	}

	// WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter
	if (immediateLeft == WordCharProperty.HebrewLetter &&
	immediateRight == WordCharProperty.DoubleQuote &&
	nextRight == WordCharProperty.HebrewLetter) {
	return false;
	}

	// WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter
	if (nextLeft == WordCharProperty.HebrewLetter &&
	immediateLeft == WordCharProperty.DoubleQuote &&
	immediateRight == WordCharProperty.HebrewLetter) {
	return false;
	}

	// Do not break within sequences of digits, or digits adjacent to letters
	// (“3a”, or “A3”).
	// WB8: Numeric × Numeric
	if (immediateLeft == WordCharProperty.Numeric &&
	immediateRight == WordCharProperty.Numeric) {
	return false;
	}

	// WB9: AHLetter × Numeric
	if (_isAHLetter(immediateLeft) && immediateRight == WordCharProperty.Numeric)
	return false;

	// WB10: Numeric × AHLetter
	if (immediateLeft == WordCharProperty.Numeric && _isAHLetter(immediateRight))
	return false;

	// Do not break within sequences, such as “3.2” or “3,456.789”.
	// WB11: Numeric (MidNum \| MidNumLet \| Single_Quote) × Numeric
	if (nextLeft == WordCharProperty.Numeric &&
	_oneOf(
	immediateLeft,
	WordCharProperty.MidNum,
	WordCharProperty.MidNumLet,
	WordCharProperty.SingleQuote,
	) &&
	immediateRight == WordCharProperty.Numeric) {
	return false;
	}

	// WB12: Numeric × (MidNum \| MidNumLet \| Single_Quote) Numeric
	if (immediateLeft == WordCharProperty.Numeric &&
	_oneOf(
	immediateRight,
	WordCharProperty.MidNum,
	WordCharProperty.MidNumLet,
	WordCharProperty.SingleQuote,
	) &&
	nextRight == WordCharProperty.Numeric) {
	return false;
	}

	// Do not break between Katakana.
	// WB13: Katakana × Katakana
	if (immediateLeft == WordCharProperty.Katakana &&
	immediateRight == WordCharProperty.Katakana) {
	return false;
	}

	// Do not break from extenders.
	// WB13a: (AHLetter \| Numeric \| Katakana \| ExtendNumLet) × ExtendNumLet
	if (_oneOf(
	immediateLeft,
	WordCharProperty.ALetter,
	WordCharProperty.HebrewLetter,
	WordCharProperty.Numeric,
	WordCharProperty.Katakana,
	WordCharProperty.ExtendNumLet,
	) &&
	immediateRight == WordCharProperty.ExtendNumLet) {
	return false;
	}

	// WB13b: ExtendNumLet × (AHLetter \| Numeric \| Katakana)
	if (immediateLeft == WordCharProperty.ExtendNumLet &&
	_oneOf(
	immediateRight,
	WordCharProperty.ALetter,
	WordCharProperty.HebrewLetter,
	WordCharProperty.Numeric,
	WordCharProperty.Katakana,
	)) {
	return false;
	}

	// Do not break within emoji flag sequences. That is, do not break between
	// regional indicator (RI) symbols if there is an odd number of RI
	// characters before the break point.
	// WB15: sot (RI RI)* RI × RI
	// TODO(mdebbar): implement this.

	// WB16: [^RI] (RI RI)* RI × RI
	// TODO(mdebbar): implement this.

	// Otherwise, break everywhere (including around ideographs).
	// WB999: Any ÷ Any
	return true;
	}

	static bool _isUtf16Surrogate(int value) {
	return value & 0xF800 == 0xD800;
	}

	static bool _oneOf(
	WordCharProperty value,
	WordCharProperty choice1,
	WordCharProperty choice2, [
	WordCharProperty choice3,
	WordCharProperty choice4,
	WordCharProperty choice5,
	]) {
	if (value == choice1) {
	return true;
	}
	if (value == choice2) {
	return true;
	}
	if (choice3 != null && value == choice3) {
	return true;
	}
	if (choice4 != null && value == choice4) {
	return true;
	}
	if (choice5 != null && value == choice5) {
	return true;
	}
	return false;
	}

	static bool _isAHLetter(WordCharProperty property) {
	return _oneOf(property, WordCharProperty.ALetter, WordCharProperty.HebrewLetter);
	}
	}