blob: 9311e45614917259160e52bcd8f9d0e6068561a4 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
library utf.utf82_test;
import 'package:test/test.dart';
import 'package:utf/utf.dart';
import 'expect.dart' as expect;
const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog.";
const List<int> testEnglishUtf8 = const <int>[
0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, // 8
0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
0x64, 0x6f, 0x67, 0x2e
];
const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
"fløde mens cirkusklovnen Wolther spillede på xylofon.";
const List<int> testDanishUtf8 = const <int>[
0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, // 8
0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
0x6e, 0x2e
];
// unusual formatting due to strange editor interaction w/ text direction.
const String testHebrewPhrase =
"דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
const List<int> testHebrewUtf8 = const <int>[
0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, // 8
0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
0xd7, 0x94
];
const String testRussianPhrase = "Съешь же ещё этих мягких "
"французских булок да выпей чаю";
const List<int> testRussianUtf8 = const <int>[
0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, // 8
0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
0x87, 0xd0, 0xb0, 0xd1, 0x8e
];
const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
"στὸ χρυσαφὶ ξέφωτο";
const List<int> testGreekUtf8 = const <int>[
0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, // 8
0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf
];
const String testKatakanaPhrase = """
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
const List<int> testKatakanaUtf8 = const <int>[
0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, // 8
0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3
];
void main() {
test('utf8 bytes to codepoints', testUtf8bytesToCodepoints);
test('utf8 bytes to string', testUtf8BytesToString);
test('encode to utf8', testEncodeToUtf8);
test('iterable methods', testIterableMethods);
}
void testEncodeToUtf8() {
expect.listEquals(
testEnglishUtf8, encodeUtf8(testEnglishPhrase), "english to utf8");
expect.listEquals(
testDanishUtf8, encodeUtf8(testDanishPhrase), "encode danish to utf8");
expect.listEquals(
testHebrewUtf8, encodeUtf8(testHebrewPhrase), "Hebrew to utf8");
expect.listEquals(
testRussianUtf8, encodeUtf8(testRussianPhrase), "Russian to utf8");
expect.listEquals(
testGreekUtf8, encodeUtf8(testGreekPhrase), "Greek to utf8");
expect.listEquals(
testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), "Katakana to utf8");
}
void testUtf8bytesToCodepoints() {
expect.listEquals(
[954, 972, 963, 956, 949],
utf8ToCodepoints(
[0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]),
"κόσμε");
// boundary conditions: First possible sequence of a certain length
expect.listEquals([], utf8ToCodepoints([]), "no input");
expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
expect.listEquals([0x800], utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
expect.listEquals(
[0x10000], utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), "4000000");
// boundary conditions: Last possible sequence of a certain length
expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), "4000000");
// other boundary conditions
expect.listEquals([0xd7ff], utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
expect.listEquals([0xe000], utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
expect.listEquals(
[0x10ffff], utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
// unexpected continuation bytes
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0x80]), "80 => replacement character");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xbf]), "bf => replacement character");
List<int> allContinuationBytes = <int>[];
List<int> matchingReplacementChars = <int>[];
for (int i = 0x80; i < 0xc0; i++) {
allContinuationBytes.add(i);
matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allContinuationBytes),
"80 - bf => replacement character x 64");
List<int> allFirstTwoByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xc0; i < 0xe0; i++) {
allFirstTwoByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allFirstTwoByteSeq),
"c0 - df + space => replacement character + space x 32");
List<int> allFirstThreeByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xe0; i < 0xf0; i++) {
allFirstThreeByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allFirstThreeByteSeq),
"e0 - ef + space => replacement character x 16");
List<int> allFirstFourByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf0; i < 0xf8; i++) {
allFirstFourByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allFirstFourByteSeq),
"f0 - f7 + space => replacement character x 8");
List<int> allFirstFiveByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf8; i < 0xfc; i++) {
allFirstFiveByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allFirstFiveByteSeq),
"f8 - fb + space => replacement character x 4");
List<int> allFirstSixByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xfc; i < 0xfe; i++) {
allFirstSixByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
expect.listEquals(
matchingReplacementChars,
utf8ToCodepoints(allFirstSixByteSeq),
"fc - fd + space => replacement character x 2");
// Sequences with last continuation byte missing
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xc2]), "2-byte sequence with last byte missing");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xe0, 0x80]), "3-byte sequence with last byte missing");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf0, 0x80, 0x80]),
"4-byte sequence with last byte missing");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
"5-byte sequence with last byte missing");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
"6-byte sequence with last byte missing");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xdf]), "2-byte sequence with last byte missing (hi)");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xef, 0xbf]),
"3-byte sequence with last byte missing (hi)");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
"4-byte sequence with last byte missing (hi)");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
"5-byte sequence with last byte missing (hi)");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
"6-byte sequence with last byte missing (hi)");
// Concatenation of incomplete sequences
expect.listEquals(
[
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
],
utf8ToCodepoints([
0xc2, // 1
0xe0, 0x80,
0xf0, 0x80, 0x80,
0xf8, 0x88, 0x80, 0x80,
0xfc, 0x80, 0x80, 0x80, 0x80,
0xdf,
0xef, 0xbf,
0xf7, 0xbf, 0xbf,
0xfb, 0xbf, 0xbf, 0xbf,
0xfd, 0xbf, 0xbf, 0xbf, 0xbf
]),
"Concatenation of incomplete sequences");
// Impossible bytes
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfe]), "fe");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xff]), "ff");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
// Overlong sequences
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xc0, 0xaf]), "c0 af");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
"fc 80 80 80 80 af");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
"fc 83 bf bf bf bf");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xc0, 0x80]), "c0 80");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
expect.listEquals(
[UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
"fc 80 80 80 80 80");
// Illegal code positions
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");
expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");
// Paired UTF-16 surrogates
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), "U+D800 U+DC00");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), "U+D800 U+DFFF");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), "U+DB7F U+DC00");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), "U+DB7F U+DFFF");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), "U+DB80 U+DC00");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), "U+DB80 U+DFFF");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), "U+DBFF U+DC00");
expect.listEquals([
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), "U+DBFF U+DFFF");
// Other illegal code positions (???)
expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), "U+FFFE");
expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "U+FFFF");
}
void testUtf8BytesToString() {
expect.stringEquals(
testEnglishPhrase, decodeUtf8(testEnglishUtf8), "English");
expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish");
expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew");
expect.stringEquals(
testRussianPhrase, decodeUtf8(testRussianUtf8), "Russian");
expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek");
expect.stringEquals(
testKatakanaPhrase, decodeUtf8(testKatakanaUtf8), "Katakana");
}
void testIterableMethods() {
IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);
// get the first character
expect.equals(testEnglishUtf8[0], englishDecoder.first);
// get the whole translation using the Iterable interface
expect.stringEquals(testEnglishPhrase,
new String.fromCharCodes(new List<int>.from(englishDecoder)));
IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);
// get the first character
expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);
// get the whole translation using the Iterable interface
expect.stringEquals(testKatakanaPhrase,
new String.fromCharCodes(new List<int>.from(kataDecoder)));
}