| // Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| /// @assertion String decode(List<int> codeUnits, {bool allowMalformed}) |
| /// Decodes the UTF-8 codeUnits (a list of unsigned 8-bit integers) to the |
| /// corresponding string. |
| /// ... |
| /// If allowMalformed is true the decoder replaces invalid (or unterminated) |
| /// character sequences with the Unicode Replacement character U+FFFD (�). |
| /// Otherwise it throws a FormatException. |
| /// @description Checks that this method with allowMalformed: true doesn't throw |
| /// FormatException for invalid or unterminated character sequences but replaces |
| /// them by Replacement character U+FFFD (�) |
| /// Invalid characters taken from |
| /// http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
| /// @issue 28832 |
| /// @author sgrekhov@unipro.ru |
| |
| import "dart:convert"; |
| import "../../../Utils/expect.dart"; |
| |
| check(List<int> toDecode, String expected) { |
| Utf8Codec codec = new Utf8Codec(); |
| Expect.equals(expected, codec.decode(toDecode, allowMalformed: true)); |
| } |
| |
| main() { |
| //Sequence of all 64 possible continuation bytes (0x80-0xBF) |
| for (int i = 0x80; i <= 0xBF; i++) { |
| check([i], "�"); |
| } |
| |
| // All 32 first bytes of 2-byte sequences (0xc0-0xdf) |
| for (int i = 0xc0; i <= 0xdf; i++) { |
| check([i], "�"); |
| } |
| |
| // All 16 first bytes of 3-byte sequences (0xe0-0xef) |
| for (int i = 0xe0; i <= 0xef; i++) { |
| check([i], "�"); |
| } |
| |
| // All 8 first bytes of 4-byte sequences (0xf0-0xf7) |
| for (int i = 0xf0; i <= 0xf7; i++) { |
| check([i], "�"); |
| } |
| |
| // All 4 first bytes of 5-byte sequences (0xf8-0xfb) |
| for (int i = 0xf8; i <= 0xfb; i++) { |
| check([i], "�"); |
| } |
| |
| // All 2 first bytes of 6-byte sequences (0xfc-0xfd) |
| for (int i = 0xfc; i <= 0xfd; i++) { |
| check([i], "�"); |
| } |
| |
| // The following two bytes cannot appear in a correct UTF-8 string |
| check([0xFE], "�"); |
| check([0xFF], "�"); |
| check([0xFE, 0xFE, 0xFF, 0xFF], "����"); |
| |
| // Overlong ASCII character |
| check([0xc0, 0xaf], "��"); |
| check([0xe0, 0x80, 0xaf], "���"); |
| check([0xf0, 0x80, 0x80, 0xaf], "����"); |
| check([0xf8, 0x80, 0x80, 0x80, 0xaf], "�����"); |
| check([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf], "������"); |
| |
| // Maximum overlong sequences |
| check([0xc1, 0xBF], "��"); |
| check([0xe0, 0x9f, 0xBF], "���"); |
| check([0xf0, 0x8f, 0xBF, 0xBF], "����"); |
| check([0xf8, 0x87, 0xBF, 0xBF, 0xBF], "�����"); |
| check([0xfc, 0x83, 0xBF, 0xBF, 0xBF, 0xBF], "������"); |
| |
| // Overlong representation of the NUL character |
| check([0xC0, 0x80], "��"); |
| check([0xE0, 0x80, 0x80], "���"); |
| check([0xF0, 0x80, 0x80, 0x80], "����"); |
| check([0xF8, 0x80, 0x80, 0x80, 0x80], "�����"); |
| check([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], "������"); |
| |
| // Single UTF-16 surrogates |
| check([0xED, 0xA0, 0x80], "���"); |
| check([0xED, 0xAD, 0xBF], "���"); |
| check([0xED, 0xAD, 0x80], "���"); |
| check([0xED, 0xAF, 0xBF], "���"); |
| check([0xED, 0xB0, 0x80], "���"); |
| check([0xED, 0xBE, 0x80], "���"); |
| check([0xED, 0xBF, 0xBF], "���"); |
| |
| // Paired UTF-16 surrogates |
| check([0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], "������"); |
| check([0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF], "������"); |
| check([0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80], "������"); |
| check([0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF], "������"); |
| check([0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80], "������"); |
| check([0xED, 0xAE, 0x80, 0xED, 0xBF, 0xBF], "������"); |
| check([0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80], "������"); |
| check([0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF], "������"); |
| } |