blob: ed3ad429065788df023c0cd35e7e6b71fef45de1 [file] [log] [blame]
// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// Unicode Grapheme Breaking Algorithm Character Categories.
/// (Order is irrelevent to correctness, so it is chosen
/// to minimize the size of the generated table strings
/// by avoiding many bytes that need escapes).
const int categoryCR = 0;
const int categoryZWJ = 1;
const int categoryControl = 2;
const int categoryOther = 3; // Any character not in any other category.
const int categoryExtend = 4;
const int categorySpacingMark = 5;
const int categoryRegionalIndicator = 6;
const int categoryPictographic = 7;
const int categoryLF = 8;
const int categoryPrepend = 9;
const int categoryL = 10;
const int categoryV = 11;
const int categoryT = 12;
const int categoryLV = 13;
const int categoryLVT = 14;
const int categoryEoT = 15; // End of Text (synthetic input)
// Automaton states for forwards automaton.
const int stateSoT = 0; // Start of text (or grapheme).
const int stateBreak = 0x10; // Always break before next.
const int stateCR = 0x20; // Break unless next is LF.
const int stateOther = 0x30; // Break unless next is Extend, ZWJ, SpacingMark.
const int statePrepend = 0x40; // Only break if next is Control/CR/LF/eot.
const int stateL = 0x50; // As Other unless next is L, V, LV, LVT.
const int stateV = 0x60; // As Other unless next is V, T.
const int stateT = 0x70; // As Other unless next is T.
const int statePictographic = 0x80; // As Other unless followed by Ext* ZWJ Pic.
const int statePictographicZWJ = 0x90; // As Other unless followed by Pic.
const int stateRegionalSingle = 0xA0; // As Other unless followed by RI
const int stateSoTNoBreak = 0xB0; // As SoT but never cause break before next.
/// Bit flag or'ed to the automaton output if there should not be a break
/// before the most recent input character.
const int stateNoBreak = 1;
// Backwards Automaton extra/alternative states and categories.
const int categorySoT = 15; // Start of Text (synthetic input)
const int stateEoT = 0; // Start of text (or grapheme).
const int stateLF = 0x20; // Break unless prev is CR.
const int stateExtend = 0x40; // Only break if prev is Control/CR/LF/sot.
const int stateZWJPictographic = 0x90; // Preceeded by Pic Ext*.
const int stateEoTNoBreak = 0xB0; // As EoT but never cause break before.
const int stateRegionalEven = 0xC0; // There is an even number of RIs before.
const int stateRegionalOdd =
stateZWJPictographic; // There is an odd (non-zero!) number of RIs before.
/// Minimum state requesting a look-ahead.
const int stateLookaheadMin = stateRegionalLookahead;
/// State requesting a look-ahead for an even or odd number of RIs.
const int stateRegionalLookahead = 0xD0;
/// State requesting a look-ahead for Pic Ext*.
const int stateZWJPictographicLookahead = 0xE0;