Add a hexadecimal codec.
R=lrn@google.com
Review URL: https://codereview.chromium.org//1364613002 .
diff --git a/lib/convert.dart b/lib/convert.dart
new file mode 100644
index 0000000..5b05322
--- /dev/null
+++ b/lib/convert.dart
@@ -0,0 +1,7 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert;
+
+export 'src/hex.dart';
diff --git a/lib/src/hex.dart b/lib/src/hex.dart
new file mode 100644
index 0000000..ec4451e
--- /dev/null
+++ b/lib/src/hex.dart
@@ -0,0 +1,29 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.hex;
+
+import 'dart:convert';
+
+import 'hex/encoder.dart';
+import 'hex/decoder.dart';
+
+export 'hex/encoder.dart' hide hexEncoder;
+export 'hex/decoder.dart' hide hexDecoder;
+
+/// The canonical instance of [HexCodec].
+const hex = const HexCodec._();
+
+/// A codec that converts byte arrays to and from hexadecimal strings, following
+/// [the Base16 spec][rfc].
+///
+/// [rfc]: https://tools.ietf.org/html/rfc4648#section-8
+///
+/// This should be used via the [hex] field.
+class HexCodec extends Codec<List<int>, String> {
+ HexEncoder get encoder => hexEncoder;
+ HexDecoder get decoder => hexDecoder;
+
+ const HexCodec._();
+}
diff --git a/lib/src/hex/decoder.dart b/lib/src/hex/decoder.dart
new file mode 100644
index 0000000..2ba169a
--- /dev/null
+++ b/lib/src/hex/decoder.dart
@@ -0,0 +1,193 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.hex.decoder;
+
+import 'dart:convert';
+import 'dart:typed_data';
+
+import 'package:charcode/ascii.dart';
+
+/// The canonical instance of [HexDecoder].
+const hexDecoder = const HexDecoder._();
+
+/// A converter that decodes hexadecimal strings into byte arrays.
+///
+/// Because two hexadecimal digits correspond to a single byte, this will throw
+/// a [FormatException] if given an odd-length string. It will also throw a
+/// [FormatException] if given a string containing non-hexadecimal code units.
+class HexDecoder extends Converter<String, List<int>> {
+ const HexDecoder._();
+
+ List<int> convert(String string) {
+ if (!string.length.isEven) {
+ throw new FormatException("Invalid input length, must be even.",
+ string, string.length);
+ }
+
+ var bytes = new Uint8List(string.length ~/ 2);
+ _decode(string.codeUnits, 0, string.length, bytes, 0);
+ return bytes;
+ }
+
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+ new _HexDecoderSink(sink);
+}
+
+/// A conversion sink for chunked hexadecimal decoding.
+class _HexDecoderSink extends StringConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This will be non-`null` if the most recent string had an odd number of
+ /// hexadecimal digits. Since it's the most significant digit, it's always a
+ /// multiple of 16.
+ int _lastDigit;
+
+ _HexDecoderSink(this._sink);
+
+ void addSlice(String string, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, string.length);
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+
+ var codeUnits = string.codeUnits;
+ var bytes;
+ var bytesStart;
+ if (_lastDigit == null) {
+ bytes = new Uint8List((end - start) ~/ 2);
+ bytesStart = 0;
+ } else {
+ var hexPairs = (end - start - 1) ~/ 2;
+ bytes = new Uint8List(1 + hexPairs);
+ bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start);
+ start++;
+ bytesStart = 1;
+ }
+
+ _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart);
+
+ _sink.add(bytes);
+ if (isLast) close();
+ }
+
+ ByteConversionSink asUtf8Sink(bool allowMalformed) =>
+ new _HexDecoderByteSink(_sink);
+
+ void close() {
+ if (_lastDigit != null) {
+ throw new FormatException("Invalid input length, must be even.");
+ }
+
+ _sink.close();
+ }
+}
+
+/// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes.
+class _HexDecoderByteSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This will be non-`null` if the most recent string had an odd number of
+ /// hexadecimal digits. Since it's the most significant digit, it's always a
+ /// multiple of 16.
+ int _lastDigit;
+
+ _HexDecoderByteSink(this._sink);
+
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+
+ var bytes;
+ var bytesStart;
+ if (_lastDigit == null) {
+ bytes = new Uint8List((end - start) ~/ 2);
+ bytesStart = 0;
+ } else {
+ var hexPairs = (end - start - 1) ~/ 2;
+ bytes = new Uint8List(1 + hexPairs);
+ bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start);
+ start++;
+ bytesStart = 1;
+ }
+
+ _lastDigit = _decode(chunk, start, end, bytes, bytesStart);
+
+ _sink.add(bytes);
+ if (isLast) close();
+ }
+
+ void close() {
+ if (_lastDigit != null) {
+ throw new FormatException("Invalid input length, must be even.");
+ }
+
+ _sink.close();
+ }
+}
+
+/// Decodes [codeUnits] and writes the result into [destination].
+///
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
+/// the result into [destination] starting at [destinationStart].
+///
+/// If there's a leftover digit at the end of the decoding, this returns that
+/// digit. Otherwise it returns `null`.
+int _decode(List<int> codeUnits, int sourceStart, int sourceEnd,
+ List<int> destination, int destinationStart) {
+ var destinationIndex = destinationStart;
+ for (var i = sourceStart; i < sourceEnd - 1; i += 2) {
+ var firstDigit = _digitForCodeUnit(codeUnits, i);
+ var secondDigit = _digitForCodeUnit(codeUnits, i + 1);
+ destination[destinationIndex++] = 16 * firstDigit + secondDigit;
+ }
+
+ if ((sourceEnd - sourceStart).isEven) return null;
+ return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1);
+}
+
+/// Returns the digit (0 through 15) corresponding to the hexadecimal code unit
+/// at index [i] in [codeUnits].
+///
+/// If the given code unit isn't valid hexadecimal, throws a [FormatException].
+int _digitForCodeUnit(List<int> codeUnits, int index) {
+ // If the code unit is a numeral, get its value. XOR works because 0 in ASCII
+ // is `0b110000` and the other numerals come after it in ascending order and
+ // take up at most four bits.
+ //
+ // We check for digits first because it ensures there's only a single branch
+ // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
+ // check because branch prediction will always work on it for valid data.
+ var codeUnit = codeUnits[index];
+ var digit = $0 ^ codeUnit;
+ if (digit <= 9) {
+ if (digit >= 0) return digit;
+ } else {
+ // If the code unit is an uppercase letter, convert it to lowercase. This
+ // works because uppercase letters in ASCII are exactly `0b100000 = 0x20`
+ // less than lowercase letters, so if we ensure that that bit is 1 we ensure
+ // that the letter is lowercase.
+ var letter = 0x20 | codeUnit;
+ if ($a <= letter && letter <= $f) return letter - $a + 10;
+ }
+
+ throw new FormatException(
+ "Invalid hexadecimal code unit "
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.",
+ codeUnits, index);
+}
diff --git a/lib/src/hex/encoder.dart b/lib/src/hex/encoder.dart
new file mode 100644
index 0000000..7a974e3
--- /dev/null
+++ b/lib/src/hex/encoder.dart
@@ -0,0 +1,87 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.hex.encoder;
+
+import 'dart:convert';
+import 'dart:typed_data';
+
+import 'package:charcode/ascii.dart';
+
+/// The canonical instance of [HexEncoder].
+const hexEncoder = const HexEncoder._();
+
+/// A converter that encodes byte arrays into hexadecimal strings.
+///
+/// This will throw a [RangeError] if the byte array has any digits that don't
+/// fit in the gamut of a byte.
+class HexEncoder extends Converter<List<int>, String> {
+ const HexEncoder._();
+
+ String convert(List<int> bytes) => _convert(bytes, 0, bytes.length);
+
+ ByteConversionSink startChunkedConversion(Sink<String> sink) =>
+ new _HexEncoderSink(sink);
+}
+
+/// A conversion sink for chunked hexadecimal encoding.
+class _HexEncoderSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<String> _sink;
+
+ _HexEncoderSink(this._sink);
+
+ void add(List<int> chunk) {
+ _sink.add(_convert(chunk, 0, chunk.length));
+ }
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+ _sink.add(_convert(chunk, start, end));
+ if (isLast) _sink.close();
+ }
+
+ void close() {
+ _sink.close();
+ }
+}
+
+String _convert(List<int> bytes, int start, int end) {
+ // A Uint8List is more efficient than a StringBuffer given that we know that
+ // we're only emitting ASCII-compatible characters, and that we know the
+ // length ahead of time.
+ var buffer = new Uint8List((end - start) * 2);
+ var bufferIndex = 0;
+
+ // A bitwise OR of all bytes in [bytes]. This allows us to check for
+ // out-of-range bytes without adding more branches than necessary to the
+ // core loop.
+ var byteOr = 0;
+ for (var i = start; i < end; i++) {
+ var byte = bytes[i];
+ byteOr |= byte;
+
+ // The bitwise arithmetic here is equivalent to `byte ~/ 16` and `byte % 16`
+ // for valid byte values, but is easier for dart2js to optimize given that
+ // it can't prove that [byte] will always be positive.
+ buffer[bufferIndex++] = _codeUnitForDigit((byte & 0xF0) >> 4);
+ buffer[bufferIndex++] = _codeUnitForDigit(byte & 0x0F);
+ }
+
+ if (byteOr >= 0 && byteOr <= 255) return new String.fromCharCodes(buffer);
+
+ // If there was an invalid byte, find it and throw an exception.
+ for (var i = start; i < end; i++) {
+ var byte = bytes[i];
+ if (byte >= 0 && byte <= 0xff) continue;
+ throw new FormatException("Invalid byte 0x${byte.toRadixString(16)}.",
+ bytes, i);
+ }
+
+ throw 'unreachable';
+}
+
+/// Returns the ASCII/Unicode code unit corresponding to the hexadecimal digit
+/// [digit].
+int _codeUnitForDigit(int digit) => digit < 10 ? digit + $0 : digit + $a - 10;
diff --git a/pubspec.yaml b/pubspec.yaml
index a7b179c..db13edf 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: convert
-version: 0.0.1
+version: 0.0.1-dev
description: Utilities for converting between data representations.
author: Dart Team <misc@dartlang.org>
homepage: https://github.com/dart-lang/convert
@@ -7,5 +7,8 @@
environment:
sdk: '>=1.8.0 <2.0.0'
+dependencies:
+ charcode: '^1.1.0'
+
dev_dependencies:
- test: '^1.12.0'
+ test: '^0.12.0'
diff --git a/test/hex_test.dart b/test/hex_test.dart
new file mode 100644
index 0000000..f52edaf
--- /dev/null
+++ b/test/hex_test.dart
@@ -0,0 +1,153 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:async';
+
+import 'package:convert/convert.dart';
+import 'package:test/test.dart';
+
+void main() {
+ group("encoder", () {
+ test("converts byte arrays to hex", () {
+ expect(hex.encode([0x1a, 0xb2, 0x3c, 0xd4]), equals("1ab23cd4"));
+ expect(hex.encode([0x00, 0x01, 0xfe, 0xff]), equals("0001feff"));
+ });
+
+ group("with chunked conversion", () {
+ test("converts byte arrays to hex", () {
+ var results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ var sink = hex.encoder.startChunkedConversion(controller.sink);
+
+ sink.add([0x1a, 0xb2, 0x3c, 0xd4]);
+ expect(results, equals(["1ab23cd4"]));
+
+ sink.add([0x00, 0x01, 0xfe, 0xff]);
+ expect(results, equals(["1ab23cd4", "0001feff"]));
+ });
+
+ test("handles empty and single-byte lists", () {
+ var results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ var sink = hex.encoder.startChunkedConversion(controller.sink);
+
+ sink.add([]);
+ expect(results, equals([""]));
+
+ sink.add([0x00]);
+ expect(results, equals(["", "00"]));
+
+ sink.add([]);
+ expect(results, equals(["", "00", ""]));
+ });
+ });
+
+ test("rejects non-bytes", () {
+ expect(() => hex.encode([0x100]), throwsFormatException);
+
+ var sink = hex.encoder.startChunkedConversion(
+ new StreamController(sync: true));
+ expect(() => sink.add([0x100]), throwsFormatException);
+ });
+ });
+
+ group("decoder", () {
+ test("converts hex to byte arrays", () {
+ expect(hex.decode("1ab23cd4"), equals([0x1a, 0xb2, 0x3c, 0xd4]));
+ expect(hex.decode("0001feff"), equals([0x00, 0x01, 0xfe, 0xff]));
+ });
+
+ test("supports uppercase letters", () {
+ expect(hex.decode("0123456789ABCDEFabcdef"), equals([
+ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef
+ ]));
+ });
+
+ group("with chunked conversion", () {
+ var results;
+ var sink;
+ setUp(() {
+ results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ sink = hex.decoder.startChunkedConversion(controller.sink);
+ });
+
+ test("converts hex to byte arrays", () {
+ sink.add("1ab23cd4");
+ expect(results, equals([[0x1a, 0xb2, 0x3c, 0xd4]]));
+
+ sink.add("0001feff");
+ expect(results,
+ equals([[0x1a, 0xb2, 0x3c, 0xd4], [0x00, 0x01, 0xfe, 0xff]]));
+ });
+
+ test("supports trailing digits split across chunks", () {
+ sink.add("1ab23");
+ expect(results, equals([[0x1a, 0xb2]]));
+
+ sink.add("cd");
+ expect(results, equals([[0x1a, 0xb2], [0x3c]]));
+
+ sink.add("40001");
+ expect(results, equals([[0x1a, 0xb2], [0x3c], [0xd4, 0x00, 0x01]]));
+
+ sink.add("feff");
+ expect(results,
+ equals([[0x1a, 0xb2], [0x3c], [0xd4, 0x00, 0x01], [0xfe, 0xff]]));
+ });
+
+ test("supports empty strings", () {
+ sink.add("");
+ expect(results, isEmpty);
+
+ sink.add("0");
+ expect(results, equals([[]]));
+
+ sink.add("");
+ expect(results, equals([[]]));
+
+ sink.add("0");
+ expect(results, equals([[], [0x00]]));
+
+ sink.add("");
+ expect(results, equals([[], [0x00]]));
+ });
+
+ test("rejects odd length detected in close()", () {
+ sink.add("1ab23");
+ expect(results, equals([[0x1a, 0xb2]]));
+ expect(() => sink.close(), throwsFormatException);
+ });
+
+ test("rejects odd length detected in addSlice()", () {
+ sink.addSlice("1ab23cd", 0, 5, false);
+ expect(results, equals([[0x1a, 0xb2]]));
+
+ expect(() => sink.addSlice("1ab23cd", 5, 7, true),
+ throwsFormatException);
+ });
+ });
+
+ group("rejects non-hex character", () {
+ for (var char in
+ ["g", "G", "/", ":", "@", "`", "\x00", "\u0141", "\u{10041}"]) {
+ test('"$char"', () {
+ expect(() => hex.decode("a$char"), throwsFormatException);
+ expect(() => hex.decode("${char}a"), throwsFormatException);
+
+ var sink = hex.decoder.startChunkedConversion(
+ new StreamController(sync: true));
+ expect(() => sink.add(char), throwsFormatException);
+ });
+ }
+ });
+
+ test("rejects odd length detected in convert()", () {
+ expect(() => hex.decode("1ab23cd"), throwsFormatException);
+ });
+ });
+}