blob: bbfba77ee1a5d29ac0e9b661d94b082e2fb699b7 [file] [log] [blame]
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.convert;
/// The Unicode Replacement character `U+FFFD` (�).
const int unicodeReplacementCharacterRune = 0xFFFD;
/// The Unicode Byte Order Marker (BOM) character `U+FEFF`.
const int unicodeBomCharacterRune = 0xFEFF;
/// An instance of the default implementation of the [Utf8Codec].
///
/// This instance provides a convenient access to the most common UTF-8
/// use cases.
///
/// Examples:
/// ```dart
/// var encoded = utf8.encode("Îñţérñåţîöñåļîžåţîờñ");
/// var decoded = utf8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6,
/// 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]);
/// ```
const Utf8Codec utf8 = Utf8Codec();
/// A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes
/// UTF-8 code units to strings.
class Utf8Codec extends Encoding {
final bool _allowMalformed;
/// Instantiates a new [Utf8Codec].
///
/// The optional [allowMalformed] argument defines how [decoder] (and [decode])
/// deal with invalid or unterminated character sequences.
///
/// If it is `true` (and not overridden at the method invocation) [decode] and
/// the [decoder] replace invalid (or unterminated) octet
/// sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise
/// they throw a [FormatException].
const Utf8Codec({bool allowMalformed = false})
: _allowMalformed = allowMalformed;
/// The name of this codec, "utf-8".
String get name => "utf-8";
/// Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
/// corresponding string.
///
/// If the [codeUnits] start with the encoding of a
/// [unicodeBomCharacterRune], that character is discarded.
///
/// If [allowMalformed] is `true` the decoder replaces invalid (or
/// unterminated) character sequences with the Unicode Replacement character
/// `U+FFFD` (�). Otherwise it throws a [FormatException].
///
/// If [allowMalformed] is not given, it defaults to the `allowMalformed` that
/// was used to instantiate `this`.
String decode(List<int> codeUnits, {bool? allowMalformed}) {
// Switch between const objects to avoid allocation.
Utf8Decoder decoder = allowMalformed ?? _allowMalformed
? const Utf8Decoder(allowMalformed: true)
: const Utf8Decoder(allowMalformed: false);
return decoder.convert(codeUnits);
}
Utf8Encoder get encoder => const Utf8Encoder();
Utf8Decoder get decoder {
// Switch between const objects to avoid allocation.
return _allowMalformed
? const Utf8Decoder(allowMalformed: true)
: const Utf8Decoder(allowMalformed: false);
}
}
/// This class converts strings to their UTF-8 code units (a list of
/// unsigned 8-bit integers).
class Utf8Encoder extends Converter<String, List<int>> {
const Utf8Encoder();
/// Converts [string] to its UTF-8 code units (a list of
/// unsigned 8-bit integers).
///
/// If [start] and [end] are provided, only the substring
/// `string.substring(start, end)` is converted.
///
/// Any unpaired surrogate character (`U+D800`-`U+DFFF`) in the input string
/// is encoded as a Unicode Replacement character `U+FFFD` (�).
Uint8List convert(String string, [int start = 0, int? end]) {
var stringLength = string.length;
end = RangeError.checkValidRange(start, end, stringLength);
// TODO(38725): Remove workaround when assignment promotion is implemented
if (end == null) {
throw RangeError("Invalid range");
}
var length = end - start;
if (length == 0) return Uint8List(0);
// Create a new encoder with a length that is guaranteed to be big enough.
// A single code unit uses at most 3 bytes, a surrogate pair at most 4.
var encoder = _Utf8Encoder.withBufferSize(length * 3);
var endPosition = encoder._fillBuffer(string, start, end);
assert(endPosition >= end - 1);
if (endPosition != end) {
// Encoding skipped the last code unit.
// That can only happen if the last code unit is a leadsurrogate.
// Force encoding of the lead surrogate by itself.
var lastCodeUnit = string.codeUnitAt(end - 1);
assert(_isLeadSurrogate(lastCodeUnit));
// Write a replacement character to represent the unpaired surrogate.
encoder._writeReplacementCharacter();
}
return encoder._buffer.sublist(0, encoder._bufferIndex);
}
/// Starts a chunked conversion.
///
/// The converter works more efficiently if the given [sink] is a
/// [ByteConversionSink].
StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
return _Utf8EncoderSink(
sink is ByteConversionSink ? sink : ByteConversionSink.from(sink));
}
// Override the base-classes bind, to provide a better type.
Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
}
/// This class encodes Strings to UTF-8 code units (unsigned 8 bit integers).
// TODO(floitsch): make this class public.
class _Utf8Encoder {
int _carry = 0;
int _bufferIndex = 0;
final Uint8List _buffer;
static const _DEFAULT_BYTE_BUFFER_SIZE = 1024;
_Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE);
_Utf8Encoder.withBufferSize(int bufferSize)
: _buffer = _createBuffer(bufferSize);
/// Allow an implementation to pick the most efficient way of storing bytes.
static Uint8List _createBuffer(int size) => Uint8List(size);
/// Write a replacement character (U+FFFD). Used for unpaired surrogates.
void _writeReplacementCharacter() {
_buffer[_bufferIndex++] = 0xEF;
_buffer[_bufferIndex++] = 0xBF;
_buffer[_bufferIndex++] = 0xBD;
}
/// Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and
/// writes it to [_buffer].
///
/// Returns true if the [nextCodeUnit] was combined with the
/// [leadingSurrogate]. If it wasn't then nextCodeUnit was not a trailing
/// surrogate and has not been written yet.
///
/// It is safe to pass 0 for [nextCodeUnit] in which case a replacement
/// character is written to represent the unpaired lead surrogate.
bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) {
if (_isTailSurrogate(nextCodeUnit)) {
var rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit);
// If the rune is encoded with 2 code-units then it must be encoded
// with 4 bytes in UTF-8.
assert(rune > _THREE_BYTE_LIMIT);
assert(rune <= _FOUR_BYTE_LIMIT);
_buffer[_bufferIndex++] = 0xF0 | (rune >> 18);
_buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f);
_buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f);
_buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
return true;
} else {
// Unpaired lead surrogate.
_writeReplacementCharacter();
return false;
}
}
/// Fills the [_buffer] with as many characters as possible.
///
/// Does not encode any trailing lead-surrogate. This must be done by the
/// caller.
///
/// Returns the position in the string. The returned index points to the
/// first code unit that hasn't been encoded.
int _fillBuffer(String str, int start, int end) {
if (start != end && _isLeadSurrogate(str.codeUnitAt(end - 1))) {
// Don't handle a trailing lead-surrogate in this loop. The caller has
// to deal with those.
end--;
}
int stringIndex;
for (stringIndex = start; stringIndex < end; stringIndex++) {
var codeUnit = str.codeUnitAt(stringIndex);
// ASCII has the same representation in UTF-8 and UTF-16.
if (codeUnit <= _ONE_BYTE_LIMIT) {
if (_bufferIndex >= _buffer.length) break;
_buffer[_bufferIndex++] = codeUnit;
} else if (_isLeadSurrogate(codeUnit)) {
if (_bufferIndex + 4 > _buffer.length) break;
// Note that it is safe to read the next code unit. We decremented
// [end] above when the last valid code unit was a leading surrogate.
var nextCodeUnit = str.codeUnitAt(stringIndex + 1);
var wasCombined = _writeSurrogate(codeUnit, nextCodeUnit);
if (wasCombined) stringIndex++;
} else if (_isTailSurrogate(codeUnit)) {
if (_bufferIndex + 3 > _buffer.length) break;
// Unpaired tail surrogate.
_writeReplacementCharacter();
} else {
var rune = codeUnit;
if (rune <= _TWO_BYTE_LIMIT) {
if (_bufferIndex + 1 >= _buffer.length) break;
_buffer[_bufferIndex++] = 0xC0 | (rune >> 6);
_buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
} else {
assert(rune <= _THREE_BYTE_LIMIT);
if (_bufferIndex + 2 >= _buffer.length) break;
_buffer[_bufferIndex++] = 0xE0 | (rune >> 12);
_buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f);
_buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
}
}
}
return stringIndex;
}
}
/// This class encodes chunked strings to UTF-8 code units (unsigned 8-bit
/// integers).
class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {
final ByteConversionSink _sink;
_Utf8EncoderSink(this._sink);
void close() {
if (_carry != 0) {
// addSlice will call close again, but then the carry must be equal to 0.
addSlice("", 0, 0, true);
return;
}
_sink.close();
}
void addSlice(String str, int start, int end, bool isLast) {
_bufferIndex = 0;
if (start == end && !isLast) {
return;
}
if (_carry != 0) {
var nextCodeUnit = 0;
if (start != end) {
nextCodeUnit = str.codeUnitAt(start);
} else {
assert(isLast);
}
var wasCombined = _writeSurrogate(_carry, nextCodeUnit);
// Either we got a non-empty string, or we must not have been combined.
assert(!wasCombined || start != end);
if (wasCombined) start++;
_carry = 0;
}
do {
start = _fillBuffer(str, start, end);
var isLastSlice = isLast && (start == end);
if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {
if (isLast && _bufferIndex < _buffer.length - 3) {
// There is still space for the replacement character to represent
// the last incomplete surrogate.
_writeReplacementCharacter();
} else {
// Otherwise store it in the carry. If isLast is true, then
// close will flush the last carry.
_carry = str.codeUnitAt(start);
}
start++;
}
_sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice);
_bufferIndex = 0;
} while (start < end);
if (isLast) close();
}
// TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it
// needs to deal with malformed input.
}
/// This class converts UTF-8 code units (lists of unsigned 8-bit integers)
/// to a string.
class Utf8Decoder extends Converter<List<int>, String> {
final bool _allowMalformed;
/// Instantiates a new [Utf8Decoder].
///
/// The optional [allowMalformed] argument defines how [convert] deals
/// with invalid or unterminated character sequences.
///
/// If it is `true` [convert] replaces invalid (or unterminated) character
/// sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise
/// it throws a [FormatException].
const Utf8Decoder({bool allowMalformed = false})
: _allowMalformed = allowMalformed;
/// Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
/// corresponding string.
///
/// Uses the code units from [start] to, but no including, [end].
/// If [end] is omitted, it defaults to `codeUnits.length`.
///
/// If the [codeUnits] start with the encoding of a
/// [unicodeBomCharacterRune], that character is discarded.
String convert(List<int> codeUnits, [int start = 0, int? end]) {
// Allow the implementation to intercept and specialize based on the type
// of codeUnits.
var result = _convertIntercepted(_allowMalformed, codeUnits, start, end);
if (result != null) {
return result;
}
return _Utf8Decoder(_allowMalformed).convertSingle(codeUnits, start, end);
}
/// Starts a chunked conversion.
///
/// The converter works more efficiently if the given [sink] is a
/// [StringConversionSink].
ByteConversionSink startChunkedConversion(Sink<String> sink) {
StringConversionSink stringSink;
if (sink is StringConversionSink) {
stringSink = sink;
} else {
stringSink = StringConversionSink.from(sink);
}
return stringSink.asUtf8Sink(_allowMalformed);
}
// Override the base-classes bind, to provide a better type.
Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
external Converter<List<int>, T> fuse<T>(Converter<String, T> next);
external static String? _convertIntercepted(
bool allowMalformed, List<int> codeUnits, int start, int? end);
}
// UTF-8 constants.
const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits
const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits
const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits
const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max.
// UTF-16 constants.
const int _SURROGATE_TAG_MASK = 0xFC00;
const int _SURROGATE_VALUE_MASK = 0x3FF;
const int _LEAD_SURROGATE_MIN = 0xD800;
const int _TAIL_SURROGATE_MIN = 0xDC00;
bool _isLeadSurrogate(int codeUnit) =>
(codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;
bool _isTailSurrogate(int codeUnit) =>
(codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;
int _combineSurrogatePair(int lead, int tail) =>
0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
(tail & _SURROGATE_VALUE_MASK);
class _Utf8Decoder {
/// Decode malformed UTF-8 as replacement characters (instead of throwing)?
final bool allowMalformed;
/// Decoder DFA state.
int _state;
/// Partially decoded character. Meaning depends on state. Not used when in
/// the initial/accept state. When in an error state, contains the index into
/// the input of the error.
int _charOrIndex = 0;
// State machine for UTF-8 decoding, based on this decoder by Björn Höhrmann:
// https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
//
// One iteration in the state machine proceeds as:
//
// type = typeTable[byte];
// char = (state != accept)
// ? (byte & 0x3F) | (char << 6)
// : byte & (shiftedByteMask >> type);
// state = transitionTable[state + type];
//
// After each iteration, if state == accept, char is output as a character.
// Mask to and on the type read from the table.
static const int typeMask = 0x1F;
// Mask shifted right by byte type to mask first byte of sequence.
static const int shiftedByteMask = 0xF0FE;
// Byte types.
// 'A' = ASCII, 00-7F
// 'B' = 2-byte, C2-DF
// 'C' = 3-byte, E1-EC, EE
// 'D' = 3-byte (possibly surrogate), ED
// 'E' = Illegal, C0-C1, F5+
// 'F' = Low extension, 80-8F
// 'G' = Mid extension, 90-9F
// 'H' = High extension, A0-BA, BC-BE
// 'I' = Second byte of BOM, BB
// 'J' = Third byte of BOM, BF
// 'K' = 3-byte (possibly overlong), E0
// 'L' = First byte of BOM, EF
// 'M' = 4-byte (possibly out-of-range), F4
// 'N' = 4-byte, F1-F3
// 'O' = 4-byte (possibly overlong), F0
static const String typeTable = ""
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" // 00-1F
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" // 20-3F
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" // 40-5F
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" // 60-7F
"FFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGG" // 80-9F
"HHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHJ" // A0-BF
"EEBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" // C0-DF
"KCCCCCCCCCCCCDCLONNNMEEEEEEEEEEE" // E0-FF
;
// States (offsets into transition table).
static const int IA = 0x00; // Initial / Accept
static const int BB = 0x10; // Before BOM
static const int AB = 0x20; // After BOM
static const int X1 = 0x30; // Expecting one extension byte
static const int X2 = 0x3A; // Expecting two extension bytes
static const int X3 = 0x44; // Expecting three extension bytes
static const int TO = 0x4E; // Possibly overlong 3-byte
static const int TS = 0x58; // Possibly surrogate
static const int QO = 0x62; // Possibly overlong 4-byte
static const int QR = 0x6C; // Possibly out-of-range 4-byte
static const int B1 = 0x76; // One byte into BOM
static const int B2 = 0x80; // Two bytes into BOM
static const int E1 = 0x41; // Error: Missing extension byte
static const int E2 = 0x43; // Error: Unexpected extension byte
static const int E3 = 0x45; // Error: Invalid byte
static const int E4 = 0x47; // Error: Overlong encoding
static const int E5 = 0x49; // Error: Out of range
static const int E6 = 0x4B; // Error: Surrogate
static const int E7 = 0x4D; // Error: Unfinished
// Character equivalents for states.
static const String _IA = '\u0000';
static const String _BB = '\u0010';
static const String _AB = '\u0020';
static const String _X1 = '\u0030';
static const String _X2 = '\u003A';
static const String _X3 = '\u0044';
static const String _TO = '\u004E';
static const String _TS = '\u0058';
static const String _QO = '\u0062';
static const String _QR = '\u006C';
static const String _B1 = '\u0076';
static const String _B2 = '\u0080';
static const String _E1 = '\u0041';
static const String _E2 = '\u0043';
static const String _E3 = '\u0045';
static const String _E4 = '\u0047';
static const String _E5 = '\u0049';
static const String _E6 = '\u004B';
static const String _E7 = '\u004D';
// Transition table of the state machine. Maps state and byte type
// to next state.
static const String transitionTable = " "
// A B C D E F G H I J K L M N O
"$_IA$_X1$_X2$_TS$_E3$_E2$_E2$_E2$_E2$_E2$_TO$_X2$_QR$_X3$_QO " // IA
"$_IA$_X1$_X2$_TS$_E3$_E2$_E2$_E2$_E2$_E2$_TO$_B1$_QR$_X3$_QO " // BB
"$_IA$_X1$_X2$_TS$_E3$_E2$_E2$_E2$_E2$_E2$_TO$_X2$_QR$_X3$_QO " // AB
"$_E1$_E1$_E1$_E1$_E1$_IA$_IA$_IA$_IA$_IA" // Overlap 5 E1s X1
"$_E1$_E1$_E1$_E1$_E1$_X1$_X1$_X1$_X1$_X1" // Overlap 5 E1s X2
"$_E1$_E1$_E1$_E1$_E1$_X2$_X2$_X2$_X2$_X2" // Overlap 5 E1s X3
"$_E1$_E1$_E1$_E1$_E1$_E4$_E4$_X1$_X1$_X1" // Overlap 5 E1s TO
"$_E1$_E1$_E1$_E1$_E1$_X1$_X1$_E6$_E6$_E6" // Overlap 5 E1s TS
"$_E1$_E1$_E1$_E1$_E1$_E4$_X2$_X2$_X2$_X2" // Overlap 5 E1s QO
"$_E1$_E1$_E1$_E1$_E1$_X2$_E5$_E5$_E5$_E5" // Overlap 5 E1s QR
"$_E1$_E1$_E1$_E1$_E1$_X1$_X1$_X1$_B2$_X1" // Overlap 5 E1s B1
"$_E1$_E1$_E1$_E1$_E1$_IA$_IA$_IA$_IA$_AB$_E1$_E1$_E1$_E1$_E1" // B2
;
// Aliases for states.
static const int initial = IA;
static const int accept = IA;
static const int beforeBom = BB;
static const int afterBom = AB;
static const int errorMissingExtension = E1;
static const int errorUnexpectedExtension = E2;
static const int errorInvalid = E3;
static const int errorOverlong = E4;
static const int errorOutOfRange = E5;
static const int errorSurrogate = E6;
static const int errorUnfinished = E7;
@pragma("vm:prefer-inline")
static bool isErrorState(int state) => (state & 1) != 0;
static String errorDescription(int state) {
switch (state) {
case errorMissingExtension:
return "Missing extension byte";
case errorUnexpectedExtension:
return "Unexpected extension byte";
case errorInvalid:
return "Invalid UTF-8 byte";
case errorOverlong:
return "Overlong encoding";
case errorOutOfRange:
return "Out of unicode range";
case errorSurrogate:
return "Encoded surrogate";
case errorUnfinished:
return "Unfinished UTF-8 octet sequence";
default:
return "";
}
}
external _Utf8Decoder(bool allowMalformed);
external String convertSingle(List<int> codeUnits, int start, int? maybeEnd);
external String convertChunked(List<int> codeUnits, int start, int? maybeEnd);
String convertGeneral(
List<int> codeUnits, int start, int? maybeEnd, bool single) {
int end = RangeError.checkValidRange(start, maybeEnd, codeUnits.length);
if (start == end) return "";
// Have bytes as Uint8List.
Uint8List bytes;
int errorOffset;
if (codeUnits is Uint8List) {
bytes = codeUnits;
errorOffset = 0;
} else {
bytes = _makeUint8List(codeUnits, start, end);
errorOffset = start;
end -= start;
start = 0;
}
String result = _convertRecursive(bytes, start, end, single);
if (isErrorState(_state)) {
String message = errorDescription(_state);
_state = initial; // Ready for more input.
throw FormatException(message, codeUnits, errorOffset + _charOrIndex);
}
return result;
}
String _convertRecursive(Uint8List bytes, int start, int end, bool single) {
// Chunk long strings to avoid a pathological case of JS repeated string
// concatenation.
if (end - start > 1000) {
int mid = (start + end) ~/ 2;
String s1 = _convertRecursive(bytes, start, mid, false);
if (isErrorState(_state)) return s1;
String s2 = _convertRecursive(bytes, mid, end, single);
return s1 + s2;
}
return decodeGeneral(bytes, start, end, single);
}
/// Flushes this decoder as if closed.
///
/// This method throws if the input was partial and the decoder was
/// constructed with `allowMalformed` set to `false`.
void flush(StringSink sink) {
final int state = _state;
_state = initial;
if (state <= afterBom) {
return;
}
// Unfinished sequence.
if (allowMalformed) {
sink.writeCharCode(unicodeReplacementCharacterRune);
} else {
throw FormatException(errorDescription(errorUnfinished), null, null);
}
}
String decodeGeneral(Uint8List bytes, int start, int end, bool single) {
final String typeTable = _Utf8Decoder.typeTable;
final String transitionTable = _Utf8Decoder.transitionTable;
int state = _state;
int char = _charOrIndex;
final StringBuffer buffer = StringBuffer();
int i = start;
int byte = bytes[i++];
loop:
while (true) {
multibyte:
while (true) {
int type = typeTable.codeUnitAt(byte) & typeMask;
char = (state <= afterBom)
? byte & (shiftedByteMask >> type)
: (byte & 0x3F) | (char << 6);
state = transitionTable.codeUnitAt(state + type);
if (state == accept) {
buffer.writeCharCode(char);
if (i == end) break loop;
break multibyte;
} else if (isErrorState(state)) {
if (allowMalformed) {
switch (state) {
case errorInvalid:
case errorUnexpectedExtension:
// A single byte that can't start a sequence.
buffer.writeCharCode(unicodeReplacementCharacterRune);
break;
case errorMissingExtension:
// Unfinished sequence followed by a byte that can start a
// sequence.
buffer.writeCharCode(unicodeReplacementCharacterRune);
// Re-parse offending byte.
i -= 1;
break;
default:
// Unfinished sequence followed by a byte that can't start a
// sequence.
buffer.writeCharCode(unicodeReplacementCharacterRune);
buffer.writeCharCode(unicodeReplacementCharacterRune);
break;
}
state = initial;
} else {
_state = state;
_charOrIndex = i - 1;
return "";
}
}
if (i == end) break loop;
byte = bytes[i++];
}
final int markStart = i;
byte = bytes[i++];
if (byte < 128) {
int markEnd = end;
while (i < end) {
byte = bytes[i++];
if (byte >= 128) {
markEnd = i - 1;
break;
}
}
assert(markStart < markEnd);
if (markEnd - markStart < 20) {
for (int m = markStart; m < markEnd; m++) {
buffer.writeCharCode(bytes[m]);
}
} else {
buffer.write(String.fromCharCodes(bytes, markStart, markEnd));
}
if (markEnd == end) break loop;
}
}
if (single && state > afterBom) {
// Unfinished sequence.
if (allowMalformed) {
buffer.writeCharCode(unicodeReplacementCharacterRune);
} else {
_state = errorUnfinished;
_charOrIndex = end;
return "";
}
}
_state = state;
_charOrIndex = char;
return buffer.toString();
}
static Uint8List _makeUint8List(List<int> codeUnits, int start, int end) {
final int length = end - start;
final Uint8List bytes = Uint8List(length);
for (int i = 0; i < length; i++) {
int b = codeUnits[start + i];
if ((b & ~0xFF) != 0) {
// Replace invalid byte values by FF, which is also invalid.
b = 0xFF;
}
bytes[i] = b;
}
return bytes;
}
}