blob: a207cdf710a60389398f79c89d2f7dd248be5f0a [file] [log] [blame]
/*
* Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file
* for details. All rights reserved. Use of this source code is governed by a
* BSD-style license that can be found in the LICENSE file.
*/
/**
* @assertion const Utf8Codec({bool allowMalformed: false})
* Instantiates a new Utf8Codec.
*
* The optional allowMalformed argument defines how decoder (and decode) deal
* with invalid or unterminated character sequences.
*
* If it is true (and not overridden at the method invocation) decode and the
* decoder replace invalid (or unterminated) octet sequences with the Unicode
* Replacement character U+FFFD (�). Otherwise they throw a FormatException.
* @description Checks that this constructor with allowMalformed: true creates
* Utf8Codec which doesn't throw FormatException for invalid or unterminated
* character sequences but replaces them by Replacement character U+FFFD (�)
* Invalid characters taken from
* http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
* @issue 28832
* @author sgrekhov@unipro.ru
*/
import "dart:convert";
import "../../../Utils/expect.dart";
check(List<int> toDecode, String expected) {
Utf8Codec codec = new Utf8Codec(allowMalformed: true);
Expect.equals(expected, codec.decode(toDecode));
}
main() {
//Sequence of all 64 possible continuation bytes (0x80-0xBF)
for (int i = 0x80; i <= 0xBF; i++) {
check([i], "�");
}
// All 32 first bytes of 2-byte sequences (0xc0-0xdf)
for (int i = 0xc0; i <= 0xdf; i++) {
check([i], "�");
}
// All 16 first bytes of 3-byte sequences (0xe0-0xef)
for (int i = 0xe0; i <= 0xef; i++) {
check([i], "�");
}
// All 8 first bytes of 4-byte sequences (0xf0-0xf7)
for (int i = 0xf0; i <= 0xf7; i++) {
check([i], "�");
}
// All 4 first bytes of 5-byte sequences (0xf8-0xfb)
for (int i = 0xf8; i <= 0xfb; i++) {
check([i], "�");
}
// All 2 first bytes of 6-byte sequences (0xfc-0xfd)
for (int i = 0xfc; i <= 0xfd; i++) {
check([i], "�");
}
// The following two bytes cannot appear in a correct UTF-8 string
check([0xFE], "�");
check([0xFF], "�");
check([0xFE, 0xFE, 0xFF, 0xFF], "����");
// Overlong ASCII character
check([0xc0, 0xaf], "��");
check([0xe0, 0x80, 0xaf], "���");
check([0xf0, 0x80, 0x80, 0xaf], "����");
check([0xf8, 0x80, 0x80, 0x80, 0xaf], "�����");
check([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf], "������");
// Maximum overlong sequences
check([0xc1, 0xBF], "��");
check([0xe0, 0x9f, 0xBF], "���");
check([0xf0, 0x8f, 0xBF, 0xBF], "����");
check([0xf8, 0x87, 0xBF, 0xBF, 0xBF], "�����");
check([0xfc, 0x83, 0xBF, 0xBF, 0xBF, 0xBF], "������");
// Overlong representation of the NUL character
check([0xC0, 0x80], "��");
check([0xE0, 0x80, 0x80], "���");
check([0xF0, 0x80, 0x80, 0x80], "����");
check([0xF8, 0x80, 0x80, 0x80, 0x80], "�����");
check([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], "������");
// Single UTF-16 surrogates
check([0xED, 0xA0, 0x80], "���");
check([0xED, 0xAD, 0xBF], "���");
check([0xED, 0xAD, 0x80], "���");
check([0xED, 0xAF, 0xBF], "���");
check([0xED, 0xB0, 0x80], "���");
check([0xED, 0xBE, 0x80], "���");
check([0xED, 0xBF, 0xBF], "���");
// Paired UTF-16 surrogates
check([0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], "������");
check([0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF], "������");
check([0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80], "������");
check([0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF], "������");
check([0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80], "������");
check([0xED, 0xAE, 0x80, 0xED, 0xBF, 0xBF], "������");
check([0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80], "������");
check([0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF], "������");
}