blob: 444a0b3b34fe1751a006858638c9367e38f19108 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'package:expect/expect.dart';
import 'dart:convert';
import 'dart:typed_data' show Uint8List;
const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog.";
const List<int> testEnglishUtf8 = const <int>[
0x54,
0x68,
0x65,
0x20,
0x71,
0x75,
0x69,
0x63,
0x6b,
0x20,
0x62,
0x72,
0x6f,
0x77,
0x6e,
0x20,
0x66,
0x6f,
0x78,
0x20,
0x6a,
0x75,
0x6d,
0x70,
0x73,
0x20,
0x6f,
0x76,
0x65,
0x72,
0x20,
0x74,
0x68,
0x65,
0x20,
0x6c,
0x61,
0x7a,
0x79,
0x20,
0x64,
0x6f,
0x67,
0x2e,
];
const String testDanishPhrase =
"Quizdeltagerne spiste jordbær med "
"fløde mens cirkusklovnen Wolther spillede på xylofon.";
const List<int> testDanishUtf8 = const <int>[
0x51,
0x75,
0x69,
0x7a,
0x64,
0x65,
0x6c,
0x74,
0x61,
0x67,
0x65,
0x72,
0x6e,
0x65,
0x20,
0x73,
0x70,
0x69,
0x73,
0x74,
0x65,
0x20,
0x6a,
0x6f,
0x72,
0x64,
0x62,
0xc3,
0xa6,
0x72,
0x20,
0x6d,
0x65,
0x64,
0x20,
0x66,
0x6c,
0xc3,
0xb8,
0x64,
0x65,
0x20,
0x6d,
0x65,
0x6e,
0x73,
0x20,
0x63,
0x69,
0x72,
0x6b,
0x75,
0x73,
0x6b,
0x6c,
0x6f,
0x76,
0x6e,
0x65,
0x6e,
0x20,
0x57,
0x6f,
0x6c,
0x74,
0x68,
0x65,
0x72,
0x20,
0x73,
0x70,
0x69,
0x6c,
0x6c,
0x65,
0x64,
0x65,
0x20,
0x70,
0xc3,
0xa5,
0x20,
0x78,
0x79,
0x6c,
0x6f,
0x66,
0x6f,
0x6e,
0x2e,
];
// unusual formatting due to strange editor interaction w/ text direction.
const String testHebrewPhrase =
"דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
const List<int> testHebrewUtf8 = const <int>[
0xd7,
0x93,
0xd7,
0x92,
0x20,
0xd7,
0xa1,
0xd7,
0xa7,
0xd7,
0xa8,
0xd7,
0x9f,
0x20,
0xd7,
0xa9,
0xd7,
0x98,
0x20,
0xd7,
0x91,
0xd7,
0x99,
0xd7,
0x9d,
0x20,
0xd7,
0x9e,
0xd7,
0x90,
0xd7,
0x95,
0xd7,
0x9b,
0xd7,
0x96,
0xd7,
0x91,
0x20,
0xd7,
0x95,
0xd7,
0x9c,
0xd7,
0xa4,
0xd7,
0xaa,
0xd7,
0xa2,
0x20,
0xd7,
0x9e,
0xd7,
0xa6,
0xd7,
0x90,
0x20,
0xd7,
0x9c,
0xd7,
0x95,
0x20,
0xd7,
0x97,
0xd7,
0x91,
0xd7,
0xa8,
0xd7,
0x94,
0x20,
0xd7,
0x90,
0xd7,
0x99,
0xd7,
0x9a,
0x20,
0xd7,
0x94,
0xd7,
0xa7,
0xd7,
0x9c,
0xd7,
0x99,
0xd7,
0x98,
0xd7,
0x94,
];
const String testRussianPhrase =
"Съешь же ещё этих мягких "
"французских булок да выпей чаю";
const List<int> testRussianUtf8 = const <int>[
0xd0,
0xa1,
0xd1,
0x8a,
0xd0,
0xb5,
0xd1,
0x88,
0xd1,
0x8c,
0x20,
0xd0,
0xb6,
0xd0,
0xb5,
0x20,
0xd0,
0xb5,
0xd1,
0x89,
0xd1,
0x91,
0x20,
0xd1,
0x8d,
0xd1,
0x82,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd0,
0xbc,
0xd1,
0x8f,
0xd0,
0xb3,
0xd0,
0xba,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd1,
0x84,
0xd1,
0x80,
0xd0,
0xb0,
0xd0,
0xbd,
0xd1,
0x86,
0xd1,
0x83,
0xd0,
0xb7,
0xd1,
0x81,
0xd0,
0xba,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd0,
0xb1,
0xd1,
0x83,
0xd0,
0xbb,
0xd0,
0xbe,
0xd0,
0xba,
0x20,
0xd0,
0xb4,
0xd0,
0xb0,
0x20,
0xd0,
0xb2,
0xd1,
0x8b,
0xd0,
0xbf,
0xd0,
0xb5,
0xd0,
0xb9,
0x20,
0xd1,
0x87,
0xd0,
0xb0,
0xd1,
0x8e,
];
const String testGreekPhrase =
"Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
"στὸ χρυσαφὶ ξέφωτο";
const List<int> testGreekUtf8 = const <int>[
0xce,
0x93,
0xce,
0xb1,
0xce,
0xb6,
0xce,
0xad,
0xce,
0xb5,
0xcf,
0x82,
0x20,
0xce,
0xba,
0xce,
0xb1,
0xe1,
0xbd,
0xb6,
0x20,
0xce,
0xbc,
0xcf,
0x85,
0xcf,
0x81,
0xcf,
0x84,
0xce,
0xb9,
0xe1,
0xbd,
0xb2,
0xcf,
0x82,
0x20,
0xce,
0xb4,
0xe1,
0xbd,
0xb2,
0xce,
0xbd,
0x20,
0xce,
0xb8,
0xe1,
0xbd,
0xb0,
0x20,
0xce,
0xb2,
0xcf,
0x81,
0xe1,
0xbf,
0xb6,
0x20,
0xcf,
0x80,
0xce,
0xb9,
0xe1,
0xbd,
0xb0,
0x20,
0xcf,
0x83,
0xcf,
0x84,
0xe1,
0xbd,
0xb8,
0x20,
0xcf,
0x87,
0xcf,
0x81,
0xcf,
0x85,
0xcf,
0x83,
0xce,
0xb1,
0xcf,
0x86,
0xe1,
0xbd,
0xb6,
0x20,
0xce,
0xbe,
0xce,
0xad,
0xcf,
0x86,
0xcf,
0x89,
0xcf,
0x84,
0xce,
0xbf,
];
const String testKatakanaPhrase =
"イロハニホヘト チリヌルヲ ワカヨタレソ "
"ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
const List<int> testKatakanaUtf8 = const <int>[
0xe3,
0x82,
0xa4,
0xe3,
0x83,
0xad,
0xe3,
0x83,
0x8f,
0xe3,
0x83,
0x8b,
0xe3,
0x83,
0x9b,
0xe3,
0x83,
0x98,
0xe3,
0x83,
0x88,
0x20,
0xe3,
0x83,
0x81,
0xe3,
0x83,
0xaa,
0xe3,
0x83,
0x8c,
0xe3,
0x83,
0xab,
0xe3,
0x83,
0xb2,
0x20,
0xe3,
0x83,
0xaf,
0xe3,
0x82,
0xab,
0xe3,
0x83,
0xa8,
0xe3,
0x82,
0xbf,
0xe3,
0x83,
0xac,
0xe3,
0x82,
0xbd,
0x20,
0xe3,
0x83,
0x84,
0xe3,
0x83,
0x8d,
0xe3,
0x83,
0x8a,
0xe3,
0x83,
0xa9,
0xe3,
0x83,
0xa0,
0x20,
0xe3,
0x82,
0xa6,
0xe3,
0x83,
0xb0,
0xe3,
0x83,
0x8e,
0xe3,
0x82,
0xaa,
0xe3,
0x82,
0xaf,
0xe3,
0x83,
0xa4,
0xe3,
0x83,
0x9e,
0x20,
0xe3,
0x82,
0xb1,
0xe3,
0x83,
0x95,
0xe3,
0x82,
0xb3,
0xe3,
0x82,
0xa8,
0xe3,
0x83,
0x86,
0x20,
0xe3,
0x82,
0xa2,
0xe3,
0x82,
0xb5,
0xe3,
0x82,
0xad,
0xe3,
0x83,
0xa6,
0xe3,
0x83,
0xa1,
0xe3,
0x83,
0x9f,
0xe3,
0x82,
0xb7,
0x20,
0xe3,
0x83,
0xb1,
0xe3,
0x83,
0x92,
0xe3,
0x83,
0xa2,
0xe3,
0x82,
0xbb,
0xe3,
0x82,
0xb9,
0xe3,
0x83,
0xb3,
];
void main() {
testEncodeToUtf8();
String decodeUtf8List(List<int> codeUnits) => utf8.decode(codeUnits);
String decodeUtf8Uint8List(List<int> codeUnits) =>
utf8.decode(new Uint8List.fromList(codeUnits));
testUtf8BytesToString(decodeUtf8List);
testUtf8BytesToString(decodeUtf8Uint8List);
List<int> utf8ToRunes1(List<int> codeUnits) {
return utf8.decode(codeUnits, allowMalformed: true).runes.toList();
}
List<int> utf8ToRunes2(List<int> codeUnits) {
return utf8
.decode(new Uint8List.fromList(codeUnits), allowMalformed: true)
.runes
.toList();
}
testUtf8bytesToCodepoints(utf8ToRunes1);
testUtf8bytesToCodepoints(utf8ToRunes2);
}
void testEncodeToUtf8() {
List<int> encodeUtf8(String str) => utf8.encode(str);
Expect.listEquals(
testEnglishUtf8,
encodeUtf8(testEnglishPhrase),
"english to utf8",
);
Expect.listEquals(
testDanishUtf8,
encodeUtf8(testDanishPhrase),
"encode danish to utf8",
);
Expect.listEquals(
testHebrewUtf8,
encodeUtf8(testHebrewPhrase),
"Hebrew to utf8",
);
Expect.listEquals(
testRussianUtf8,
encodeUtf8(testRussianPhrase),
"Russian to utf8",
);
Expect.listEquals(
testGreekUtf8,
encodeUtf8(testGreekPhrase),
"Greek to utf8",
);
Expect.listEquals(
testKatakanaUtf8,
encodeUtf8(testKatakanaPhrase),
"Katakana to utf8",
);
}
void testUtf8bytesToCodepoints(List<int> utf8ToRunes(List<int> utf8)) {
Expect.listEquals(
[954, 972, 963, 956, 949],
utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]),
"κόσμε",
);
// boundary conditions: First possible sequence of a certain length
Expect.listEquals([], utf8ToRunes([]), "no input");
Expect.listEquals([0x0], utf8ToRunes([0x0]), "0");
Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80");
Expect.listEquals([0x800], utf8ToRunes([0xe0, 0xa0, 0x80]), "800");
Expect.listEquals([0x10000], utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000");
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]),
"200000",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
"4000000",
);
// boundary conditions: Last possible sequence of a certain length
Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f");
Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff");
Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff");
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]),
"1fffff",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]),
"3ffffff",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
"4000000",
);
// other boundary conditions
Expect.listEquals([0xd7ff], utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff");
Expect.listEquals([0xe000], utf8ToRunes([0xee, 0x80, 0x80]), "e000");
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xef, 0xbf, 0xbd]),
"fffd",
);
Expect.listEquals(
[0x10ffff],
utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]),
"10ffff",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf4, 0x90, 0x80, 0x80]),
"110000",
);
// unexpected continuation bytes
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0x80]),
"80 => replacement character",
);
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xbf]),
"bf => replacement character",
);
List<int> allContinuationBytes = <int>[];
List<int> matchingReplacementChars = <int>[];
for (int i = 0x80; i < 0xc0; i++) {
allContinuationBytes.add(i);
matchingReplacementChars.add(unicodeReplacementCharacterRune);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allContinuationBytes),
"80 - bf => replacement character x 64",
);
List<int> allFirstTwoByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xc0; i < 0xe0; i++) {
allFirstTwoByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allFirstTwoByteSeq),
"c0 - df + space => replacement character + space x 32",
);
List<int> allFirstThreeByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xe0; i < 0xf0; i++) {
allFirstThreeByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allFirstThreeByteSeq),
"e0 - ef + space => replacement character x 16",
);
List<int> allFirstFourByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf0; i < 0xf8; i++) {
allFirstFourByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allFirstFourByteSeq),
"f0 - f7 + space => replacement character x 8",
);
List<int> allFirstFiveByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf8; i < 0xfc; i++) {
allFirstFiveByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allFirstFiveByteSeq),
"f8 - fb + space => replacement character x 4",
);
List<int> allFirstSixByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xfc; i < 0xfe; i++) {
allFirstSixByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(
matchingReplacementChars,
utf8ToRunes(allFirstSixByteSeq),
"fc - fd + space => replacement character x 2",
);
// Sequences with last continuation byte missing
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xc2]),
"2-byte sequence with last byte missing",
);
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xe0, 0x80]),
"3-byte sequence with last byte missing",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf0, 0x80, 0x80]),
"4-byte sequence with last byte missing",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf8, 0x88, 0x80, 0x80]),
"5-byte sequence with last byte missing",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]),
"6-byte sequence with last byte missing",
);
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xdf]),
"2-byte sequence with last byte missing (hi)",
);
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xef, 0xbf]),
"3-byte sequence with last byte missing (hi)",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf7, 0xbf, 0xbf]),
"4-byte sequence with last byte missing (hi)",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]),
"5-byte sequence with last byte missing (hi)",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
"6-byte sequence with last byte missing (hi)",
);
// Concatenation of incomplete sequences
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([
0xc2,
0xe0,
0x80,
0xf0,
0x80,
0x80,
0xf8,
0x88,
0x80,
0x80,
0xfc,
0x80,
0x80,
0x80,
0x80,
0xdf,
0xef, // These two bytes form one incomplete sequence.
0xbf, // All others form one per byte.
0xf7,
0xbf,
0xbf,
0xfb,
0xbf,
0xbf,
0xbf,
0xfd,
0xbf,
0xbf,
0xbf,
0xbf,
]),
"Concatenation of incomplete sequences",
);
// Impossible bytes
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xfe]),
"fe",
);
Expect.listEquals(
[unicodeReplacementCharacterRune],
utf8ToRunes([0xff]),
"ff",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]),
"fe fe ff ff",
);
// Overlong sequences
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc0, 0xaf]),
"c0 af",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xe0, 0x80, 0xaf]),
"e0 80 af",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]),
"f0 80 80 af",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]),
"f8 80 80 80 af",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
"fc 80 80 80 80 af",
);
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc1, 0xbf]),
"c1 bf",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xe0, 0x9f, 0xbf]),
"e0 9f bf",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]),
"f0 8f bf bf",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]),
"f8 87 bf bf bf",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
"fc 83 bf bf bf bf",
);
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc0, 0x80]),
"c0 80",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xe0, 0x80, 0x80]),
"e0 80 80",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf0, 0x80, 0x80, 0x80]),
"f0 80 80 80",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]),
"f8 80 80 80 80",
);
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
],
utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
"fc 80 80 80 80 80",
);
// Other illegal code positions (???)
Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]), "U+FFFE");
Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "U+FFFF");
}
void testUtf8BytesToString(String decodeUtf8(List<int> input)) {
Expect.stringEquals(
testEnglishPhrase,
decodeUtf8(testEnglishUtf8),
"English",
);
Expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish");
Expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew");
Expect.stringEquals(
testRussianPhrase,
decodeUtf8(testRussianUtf8),
"Russian",
);
Expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek");
Expect.stringEquals(
testKatakanaPhrase,
decodeUtf8(testKatakanaUtf8),
"Katakana",
);
}