Add Code Page support. (#26) * Add "Code Page" support. An easy way to define an encoding as a mapping between bytes and code points. Includes definitions for the ISO-8859 family of code pages.

commit: 5b931a9edae4bee892d7c54eee864fc5fe793322 [log] [tgz]
author: Lasse R.H. Nielsen <lrn@google.com> Tue Sep 22 18:46:02 2020 +0200
committer: GitHub <noreply@github.com> Tue Sep 22 18:46:02 2020 +0200
tree: 25a39d0093a517aed75fc7c51325e80d4dcc38d8
parent: c1b01f832835d3d8a06b0b246a361c0eaab35d3c [diff]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37e25fd..947ff11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md

@@ -9,6 +9,8 @@
 2.10.0-2.0.dev, which is the first version where this package will appear in the
 null safety allow list.
 
+ * Added `CodePage` class for single-byte `Encoding` implementations.
+
 ## 2.1.1
 
  * Fixed a DDC compilation regression for consumers using the Dart 1.x SDK that was introduced in `2.1.0`.

diff --git a/lib/convert.dart b/lib/convert.dart
index d745856..53a68d8 100644
--- a/lib/convert.dart
+++ b/lib/convert.dart

@@ -6,6 +6,7 @@
 
 export 'src/accumulator_sink.dart';
 export 'src/byte_accumulator_sink.dart';
+export 'src/codepage.dart';
 export 'src/hex.dart';
 export 'src/identity_codec.dart';
 export 'src/percent.dart';

diff --git a/lib/src/codepage.dart b/lib/src/codepage.dart
new file mode 100644
index 0000000..2cc5fbd
--- /dev/null
+++ b/lib/src/codepage.dart

@@ -0,0 +1,431 @@
+// Copyright (c) 2020, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import "dart:convert";
+import "dart:typed_data";
+
+/// The ISO-8859-2/Latin-2 (Eastern European) code page.
+final CodePage latin2 =
+    CodePage._bmp("latin-2", "$_ascii$_noControls$_top8859_2");
+
+/// The ISO-8859-3/Latin-3 (South European) code page.
+final CodePage latin3 =
+    CodePage._bmp("latin-3", "$_ascii$_noControls$_top8859_3");
+
+/// The ISO-8859-4/Latin-4 (North European) code page.
+final CodePage latin4 =
+    CodePage._bmp("latin-4", "$_ascii$_noControls$_top8859_4");
+
+/// The ISO-8859-5/Latin-Cyrillic code page.
+final CodePage latinCyrillic =
+    CodePage._bmp("cyrillic", "$_ascii$_noControls$_top8859_5");
+
+/// The ISO-8859-6/Latin-Arabic code page.
+final CodePage latinArabic =
+    CodePage._bmp("arabic", "$_ascii$_noControls$_top8859_6");
+
+/// The ISO-8859-7/Latin-Greek code page.
+final CodePage latinGreek =
+    CodePage._bmp("greek", "$_ascii$_noControls$_top8859_7");
+
+/// The ISO-8859-7/Latin-Hebrew code page.
+final CodePage latinHebrew =
+    CodePage._bmp("hebrew", "$_ascii$_noControls$_top8859_8");
+
+/// The ISO-8859-9/Latin-5 (Turkish) code page.
+final CodePage latin5 =
+    CodePage._bmp("latin-5", "$_ascii$_noControls$_top8859_9");
+
+/// The ISO-8859-10/Latin-6 (Nordic) code page.
+final CodePage latin6 =
+    CodePage._bmp("latin-6", "$_ascii$_noControls$_top8859_10");
+
+/// The ISO-8859-11/Latin-Thai code page.
+final CodePage latinThai =
+    CodePage._bmp("tis620", "$_ascii$_noControls$_top8859_11");
+
+/// The ISO-8859-13/Latin-6 (Baltic Rim) code page.
+final CodePage latin7 =
+    CodePage._bmp("latin-7", "$_ascii$_noControls$_top8859_13");
+
+/// The ISO-8859-14/Latin-8 (Celtic) code page.
+final CodePage latin8 =
+    CodePage._bmp("latin-8", "$_ascii$_noControls$_top8859_14");
+
+/// The ISO-8859-15/Latin-9 (Western European revised) code page.
+final CodePage latin9 =
+    CodePage._bmp("latin-9", "$_ascii$_noControls$_top8859_15");
+
+/// The ISO-8859-16/Latin-10 (South Eastern European) code page.
+final CodePage latin10 =
+    CodePage._bmp("latin-10", "$_ascii$_noControls$_top8859_16");
+
+/// Characters in ISO-8859-2 above the ASCII and top control characters.
+const _top8859_2 = "\xa0Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\xadŽŻ°ą˛ł´ľśˇ¸šşťź˝žż"
+    "ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß"
+    "ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙";
+
+/// Characters in ISO-8859-3 above the ASCII and top control characters.
+const _top8859_3 = "\xa0Ħ˘£\uFFFD¤Ĥ§¨İŞĞĴ\xad\uFFFDŻ°ħ²³´µĥ·¸ışğĵ½\uFFFDż"
+    "ÀÁÂ\uFFFDÄĊĈÇÈÉÊËÌÍÎÏ\uFFFDÑÒÓÔĠÖ×ĜÙÚÛÜŬŜß"
+    "àáâ\uFFFDäċĉçèéêëìíîï\uFFFDñòóôġö÷ĝùúûüŭŝ˙";
+
+/// Characters in ISO-8859-4 above the ASCII and top control characters.
+const _top8859_4 = "\xa0ĄĸŖ¤ĨĻ§¨ŠĒĢŦ\xadŽ¯°ą˛ŗ´ĩļˇ¸šēģŧŊžŋ"
+    "ĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖ×ØŲÚÛÜŨŪß"
+    "āáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙";
+
+/// Characters in ISO-8859-5 above the ASCII and top control characters.
+const _top8859_5 = "\xa0ЁЂЃЄЅІЇЈЉЊЋЌ\xadЎЏАБВГДЕЖЗИЙКЛМНОП"
+    "РСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп"
+    "рстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ";
+
+/// Characters in ISO-8859-6 above the ASCII and top control characters.
+const _top8859_6 = "\xa0\uFFFD\uFFFD\uFFFD¤\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\u060c\xad\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\u061b\uFFFD\uFFFD\uFFFD\u061f"
+    "\uFFFD\u0621\u0622\u0623\u0624\u0625\u0626\u0627"
+    "\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f"
+    "\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"
+    "\u0638\u0639\u063a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647"
+    "\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f"
+    "\u0650\u0651\u0652\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD";
+
+/// Characters in ISO-8859-7 above the ASCII and top control characters.
+const _top8859_7 = "\xa0‘’£€₯¦§¨©ͺ«¬\xad\uFFFD―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏ"
+    "ΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήί"
+    "ΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD";
+
+/// Characters in ISO-8859-8 above the ASCII and top control characters.
+const _top8859_8 = "\xa0\uFFFD¢£¤¥¦§¨©×«¬\xad®¯°±²³´µ¶·¸¹÷»¼½¾\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD‗"
+    "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7"
+    "\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df"
+    "\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7"
+    "\u05e8\u05e9\u05ea\uFFFD\uFFFD\u200e\u200f\uFFFD";
+
+/// Characters in ISO-8859-9 above the ASCII and top control characters.
+const _top8859_9 = "\xa0¡¢£¤¥¦§¨©ª«¬\xad®¯°±²³´µ¶·¸¹º»¼½¾¿"
+    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖ×ØÙÚÛÜİŞß"
+    "àáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ";
+
+/// Characters in ISO-8859-10 above the ASCII and top control characters.
+const _top8859_10 = "\xa0ĄĒĢĪĨĶ§ĻĐŠŦŽ\xadŪŊ°ąēģīĩķ·ļđšŧž―ūŋ"
+    "ĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞß"
+    "āáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ";
+
+/// Characters in ISO-8859-11 above the ASCII and top control characters.
+const _top8859_11 = "\xa0กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟ"
+    "ภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู\uFFFD\uFFFD\uFFFD\uFFFD฿"
+    "เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛\uFFFD\uFFFD\uFFFD\uFFFD";
+
+/// Characters in ISO-8859-13 above the ASCII and top control characters.
+const _top8859_13 = "\xa0”¢£¤„¦§Ø©Ŗ«¬\xad®Æ°±²³“µ¶·ø¹ŗ»¼½¾æ"
+    "ĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽß"
+    "ąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž’";
+
+/// Characters in ISO-8859-14 above the ASCII and top control characters.
+const _top8859_14 = "\xa0Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ\xad®ŸḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡ"
+    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏŴÑÒÓÔÕÖṪØÙÚÛÜÝŶß"
+    "àáâãäåæçèéêëìíîïŵñòóôõöṫøùúûüýŷÿ";
+
+/// Characters in ISO-8859-15 above the ASCII and top control characters.
+const _top8859_15 = "\xa0¡¢£€¥Š§š©ª«¬\xad®¯°±²³Žµ¶·ž¹º»ŒœŸ¿"
+    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß"
+    "àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
+
+/// Characters in ISO-8859-16 above the ASCII and top control characters.
+const _top8859_16 = "\xa0ĄąŁ€„Š§š©Ș«Ź\xadźŻ°±ČłŽ”¶·žčș»ŒœŸż"
+    "ÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚß"
+    "àáâăäćæçèéêëìíîïđńòóôőöśűùúûüęțÿ";
+
+const _noControls = "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
+    "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD";
+
+/// ASCII characters without control characters. Shared by many code pages.
+const _ascii = "$_noControls"
+    r""" !"#$%&'()*+,-./0123456789:;<=>?"""
+    r"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_"
+    "`abcdefghijklmnopqrstuvwxyz{|}~\uFFFD";
+
+/// A mapping between bytes and characters.
+///
+/// A code page is a way to map bytes to character.
+/// As such, it can only represent 256 different characters.
+class CodePage extends Encoding {
+  final CodePageDecoder decoder;
+  final String name;
+  CodePageEncoder? _encoder;
+
+  /// Creates a code page with the given name and characters.
+  ///
+  /// The [characters] string must contain 256 code points (runes)
+  /// in the order of the bytes representing them.
+  ///
+  /// Any byte not defined by the code page should have a
+  /// U+FFFD (invalid character) code point at its place in
+  /// [characters].
+  ///
+  /// The name is used by [Encoding.name].
+  factory CodePage(String name, String characters) = CodePage._general;
+
+  /// Creates a code page with the characters of [characters].
+  ///
+  /// The [characters] must contain precisely 256 characters (code points).
+  ///
+  /// A U+FFFD (invalid character) entry in [character] means that the
+  /// corresponding byte does not have a definition in this code page.
+  CodePage._general(this.name, String characters)
+      : decoder = _createDecoder(characters);
+
+  /// Creates a code page with characters from the basic multilingual plane.
+  ///
+  /// The basic multilingual plane (BMP) contains the first 65536 code points.
+  /// As such, each character can be represented by a single UTF-16 code unit,
+  /// which makes some operations more efficient.
+  ///
+  /// The [characters] must contain precisely 256 code points from the BMP
+  /// which means that it should have length 256 and not contain any surrogates.
+  ///
+  /// A U+FFFD (invalid character) entry in [character] means that the
+  /// corresponding byte does not have a definition in this code page.
+  CodePage._bmp(this.name, String characters)
+      : decoder = _BmpCodePageDecoder(characters);
+
+  /// The character associated with a particular byte in this code page.
+  ///
+  /// The [byte] must be in the range 0..255.
+  /// The returned value should be a Unicode scalar value
+  /// (a non-surrogate code point).
+  ///
+  /// If a code page does not have a defined character for a particular
+  /// byte, it should return the Unicode invalid character (U+FFFD)
+  /// instad.
+  int operator [](int byte) => decoder._char(byte);
+
+  /// Encodes [input] using `encoder.convert`.
+  Uint8List encode(String input, {int? invalidCharacter}) =>
+      encoder.convert(input, invalidCharacter: invalidCharacter);
+
+  /// Decodes [bytes] using `encoder.convert`.
+  String decode(List<int> bytes, {bool allowInvalid = false}) =>
+      decoder.convert(bytes, allowInvalid: allowInvalid);
+
+  CodePageEncoder get encoder => _encoder ??= decoder._createEncoder();
+}
+
+/// A code page decoder, converts from bytes to characters.
+///
+/// A code page assigns characters to a subset of byte values.
+/// The decoder converts those bytes back to their characters.
+abstract class CodePageDecoder implements Converter<List<int>, String> {
+  /// Decodes a sequence of bytes into a string using a code page.
+  ///
+  /// The code page assigns one character to each byte.
+  /// Values in [input] must be bytes (integers in the range 0..255).
+  ///
+  /// If [allowInvalid] is true, non-byte values in [input],
+  /// or byte values not defined as a character in the code page,
+  /// are emitted as U+FFFD (the Unicode invalid character).
+  /// If not true, the bytes must be calid and defined characters.
+  String convert(List<int> input, {bool allowInvalid = false});
+
+  CodePageEncoder _createEncoder();
+  int _char(int byte);
+}
+
+/// Creates a decoder from [characters].
+///
+/// Recognizes if [characters] contains only characters in the BMP,
+/// and creates a [BmpCodePageDecoder] in that case.
+CodePageDecoder _createDecoder(String characters) {
+  var result = Uint32List(256);
+  var i = 0;
+  var allChars = 0;
+  for (var char in characters.runes) {
+    if (i >= 256) {
+      throw ArgumentError.value(
+          characters, "characters", "Must contain 256 characters");
+    }
+    result[i] = char;
+    allChars |= char;
+  }
+  if (i < 256) {
+    throw ArgumentError.value(
+        characters, "characters", "Must contain 256 characters");
+  }
+  if (allChars <= 0xFFFF) {
+    // It's in the BMP.
+    return _BmpCodePageDecoder(characters);
+  }
+  return _NonBmpCodePageDecoder._(result);
+}
+
+/// Code page with non-BMP characters.
+class _NonBmpCodePageDecoder extends Converter<List<int>, String>
+    implements CodePageDecoder {
+  final Uint32List _characters;
+  _NonBmpCodePageDecoder(String characters) : this._(_buildMapping(characters));
+  _NonBmpCodePageDecoder._(this._characters);
+
+  int _char(int byte) => _characters[byte];
+
+  static Uint32List _buildMapping(String characters) {
+    var result = Uint32List(256);
+    var i = 0;
+    for (var char in characters.runes) {
+      if (i >= 256) {
+        throw ArgumentError.value(
+            characters, "characters", "Must contain 256 characters");
+      }
+      result[i++] = char;
+    }
+    if (i < 256) {
+      throw ArgumentError.value(
+          characters, "characters", "Must contain 256 characters");
+    }
+    return result;
+  }
+
+  CodePageEncoder _createEncoder() {
+    var result = <int, int>{};
+    for (var i = 0; i < 256; i++) {
+      var char = _characters[i];
+      if (char != 0xFFFD) {
+        result[char] = i;
+      }
+    }
+    return CodePageEncoder._(result);
+  }
+
+  String convert(List<int> input, {bool allowInvalid = false}) {
+    var buffer = Uint32List(input.length);
+    for (var i = 0; i < input.length; i++) {
+      var byte = input[i];
+      if (byte & 0xff != byte) throw FormatException("Not a byte", input, i);
+      buffer[i] = _characters[byte];
+    }
+    return String.fromCharCodes(buffer);
+  }
+}
+
+class _BmpCodePageDecoder extends Converter<List<int>, String>
+    implements CodePageDecoder {
+  final String _characters;
+  _BmpCodePageDecoder(String characters) : _characters = characters {
+    if (characters.length != 256) {
+      throw ArgumentError.value(characters, "characters",
+          "Must contain 256 characters. Was ${characters.length}");
+    }
+  }
+
+  int _char(int byte) => _characters.codeUnitAt(byte);
+
+  String convert(List<int> bytes, {bool allowInvalid = false}) {
+    if (allowInvalid) return _convertAllowInvalid(bytes);
+    var count = bytes.length;
+    var codeUnits = Uint16List(count);
+    for (var i = 0; i < count; i++) {
+      var byte = bytes[i];
+      if (byte != byte & 0xff) {
+        throw FormatException("Not a byte value", bytes, i);
+      }
+      var character = _characters.codeUnitAt(byte);
+      if (character == 0xFFFD) {
+        throw FormatException("Not defined in this code page", bytes, i);
+      }
+      codeUnits[i] = character;
+    }
+    return String.fromCharCodes(codeUnits);
+  }
+
+  String _convertAllowInvalid(List<int> bytes) {
+    var count = bytes.length;
+    var codeUnits = Uint16List(count);
+    for (var i = 0; i < count; i++) {
+      var byte = bytes[i];
+      int character;
+      if (byte == byte & 0xff) {
+        character = _characters.codeUnitAt(byte);
+      } else {
+        character = 0xFFFD;
+      }
+      codeUnits[i] = character;
+    }
+    return String.fromCharCodes(codeUnits);
+  }
+
+  CodePageEncoder _createEncoder() => CodePageEncoder._bmp(_characters);
+}
+
+/// Encoder for a code page.
+///
+/// Converts a string into bytes where each byte represents that character
+/// according to the code page definition.
+class CodePageEncoder extends Converter<String, List<int>> {
+  final Map<int, int> _encoding;
+
+  CodePageEncoder._bmp(String characters)
+      : _encoding = _createBmpEncoding(characters);
+
+  CodePageEncoder._(this._encoding);
+
+  static Map<int, int> _createBmpEncoding(String characters) {
+    var encoding = <int, int>{};
+    for (var i = 0; i < characters.length; i++) {
+      var char = characters.codeUnitAt(i);
+      if (char != 0xFFFD) encoding[characters.codeUnitAt(i)] = i;
+    }
+    return encoding;
+  }
+
+  /// Converts input to the byte encoding in this code page.
+  ///
+  /// If [invalidCharacter] is supplied, it must be a byte value
+  /// (in the range 0..255).
+  ///
+  /// If [input] contains characters that are not available
+  /// in this code page, they are replaced by the [invalidCharacter] byte,
+  /// and then [invalidCharacter] must have been supplied.
+  Uint8List convert(String input, {int? invalidCharacter}) {
+    if (invalidCharacter != null) {
+      RangeError.checkValueInInterval(
+          invalidCharacter, 0, 255, "invalidCharacter");
+    }
+    var count = input.length;
+    var result = Uint8List(count);
+    var j = 0;
+    for (var i = 0; i < count; i++) {
+      var char = input.codeUnitAt(i);
+      var byte = _encoding[char];
+      nullCheck:
+      if (byte == null) {
+        // Check for surrogate.
+        var offset = i;
+        if (char & 0xFC00 == 0xD800 && i + 1 < count) {
+          var next = input.codeUnitAt(i + 1);
+          if ((next & 0xFC00) == 0xDC00) {
+            i = i + 1;
+            char = 0x10000 + ((char & 0x3ff) << 10) + (next & 0x3ff);
+            byte = _encoding[char];
+            if (byte != null) break nullCheck;
+          }
+        }
+        byte = invalidCharacter ??
+            (throw FormatException(
+                "Not a character in this code page", input, offset));
+      }
+      result[j++] = byte;
+    }
+    return Uint8List.sublistView(result, 0, j);
+  }
+}

diff --git a/test/codepage_test.dart b/test/codepage_test.dart
new file mode 100644
index 0000000..2105f27
--- /dev/null
+++ b/test/codepage_test.dart

@@ -0,0 +1,63 @@
+// Copyright (c) 2020, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:typed_data';
+
+import 'package:convert/convert.dart';
+import 'package:test/test.dart';
+
+void main() {
+  var bytes = Uint8List.fromList([for (var i = 0; i < 256; i++) i]);
+  for (var cp in [
+    latin2,
+    latin3,
+    latin4,
+    latin5,
+    latin6,
+    latin7,
+    latin8,
+    latin9,
+    latin10,
+    latinCyrillic,
+    latinGreek,
+    latinHebrew,
+    latinThai,
+    latinArabic
+  ]) {
+    test("${cp.name} codepage", () {
+      // All ASCII compatible.
+      for (var byte = 0x20; byte < 0x7f; byte++) {
+        expect(cp[byte], byte);
+      }
+      // Maps both directions.
+      for (var byte = 0; byte < 256; byte++) {
+        var char = cp[byte];
+        if (char != 0xFFFD) {
+          var string = String.fromCharCode(char);
+          expect(cp.encode(string), [byte]);
+          expect(cp.decode([byte]), string);
+        }
+      }
+      expect(() => cp.decode([0xfffd]), throwsA(isA<FormatException>()));
+      // Decode works like operator[].
+      expect(cp.decode(bytes, allowInvalid: true),
+          String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]]));
+    });
+  }
+  test("latin-2 roundtrip", () {
+    // Data from http://www.columbia.edu/kermit/latin2.html
+    var latin2text =
+        "\xa0Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\xadŽŻ°ą˛ł´ľśˇ¸šşťź˝žżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙";
+    expect(latin2.decode(latin2.encode(latin2text)), latin2text);
+  });
+
+  test("latin-3 roundtrip", () {
+    // Data from http://www.columbia.edu/kermit/latin3.html
+    var latin2text =
+        "\xa0Ħ˘£¤\u{FFFD}Ĥ§¨İŞĞĴ\xad\u{FFFD}Ż°ħ²³´µĥ·¸ışğĵ½\u{FFFD}żÀÁÂ\u{FFFD}ÄĊĈÇÈÉÊËÌÍÎÏ\u{FFFD}ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ\u{FFFD}äċĉçèéêëìíîï\u{FFFD}ñòóôġö÷ĝùúûüŭŝ˙";
+    var encoded = latin3.encode(latin2text, invalidCharacter: 0);
+    var decoded = latin3.decode(encoded, allowInvalid: true);
+    expect(decoded, latin2text);
+  });
+}
commit	5b931a9edae4bee892d7c54eee864fc5fe793322	[log] [tgz]
author	Lasse R.H. Nielsen <lrn@google.com>	Tue Sep 22 18:46:02 2020 +0200
committer	GitHub <noreply@github.com>	Tue Sep 22 18:46:02 2020 +0200
tree	25a39d0093a517aed75fc7c51325e80d4dcc38d8
parent	c1b01f832835d3d8a06b0b246a361c0eaab35d3c [diff]