Add a percent-encoding converter.
This is useful for percent-encoding binary or streaming data.
R=lrn@google.com
Review URL: https://codereview.chromium.org//1393003003 .
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2a2d63c..dd213b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,5 @@
# Changelog
-## 0.0.1
+## 1.0.0
- Initial version
diff --git a/lib/convert.dart b/lib/convert.dart
index 5b05322..50b0296 100644
--- a/lib/convert.dart
+++ b/lib/convert.dart
@@ -5,3 +5,4 @@
library convert;
export 'src/hex.dart';
+export 'src/percent.dart';
diff --git a/lib/src/hex/decoder.dart b/lib/src/hex/decoder.dart
index 2ba169a..efb0b0d 100644
--- a/lib/src/hex/decoder.dart
+++ b/lib/src/hex/decoder.dart
@@ -7,7 +7,7 @@
import 'dart:convert';
import 'dart:typed_data';
-import 'package:charcode/ascii.dart';
+import '../utils.dart';
/// The canonical instance of [HexDecoder].
const hexDecoder = const HexDecoder._();
@@ -53,7 +53,7 @@
RangeError.checkValidRange(start, end, string.length);
if (start == end) {
- if (isLast) close();
+ if (isLast) _close(string, end);
return;
}
@@ -66,7 +66,7 @@
} else {
var hexPairs = (end - start - 1) ~/ 2;
bytes = new Uint8List(1 + hexPairs);
- bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start);
+ bytes[0] = _lastDigit + digitForCodeUnit(codeUnits, start);
start++;
bytesStart = 1;
}
@@ -74,15 +74,20 @@
_lastDigit = _decode(codeUnits, start, end, bytes, bytesStart);
_sink.add(bytes);
- if (isLast) close();
+ if (isLast) _close(string, end);
}
ByteConversionSink asUtf8Sink(bool allowMalformed) =>
new _HexDecoderByteSink(_sink);
- void close() {
+ void close() => _close();
+
+ /// Like [close], but includes [string] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([String string, int index]) {
if (_lastDigit != null) {
- throw new FormatException("Invalid input length, must be even.");
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", string, index);
}
_sink.close();
@@ -109,7 +114,7 @@
RangeError.checkValidRange(start, end, chunk.length);
if (start == end) {
- if (isLast) close();
+ if (isLast) _close(chunk, end);
return;
}
@@ -121,7 +126,7 @@
} else {
var hexPairs = (end - start - 1) ~/ 2;
bytes = new Uint8List(1 + hexPairs);
- bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start);
+ bytes[0] = _lastDigit + digitForCodeUnit(chunk, start);
start++;
bytesStart = 1;
}
@@ -129,12 +134,17 @@
_lastDigit = _decode(chunk, start, end, bytes, bytesStart);
_sink.add(bytes);
- if (isLast) close();
+ if (isLast) _close(chunk, end);
}
- void close() {
+ void close() => _close();
+
+ /// Like [close], but includes [chunk] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([List<int> chunk, int index]) {
if (_lastDigit != null) {
- throw new FormatException("Invalid input length, must be even.");
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", chunk, index);
}
_sink.close();
@@ -152,42 +162,11 @@
List<int> destination, int destinationStart) {
var destinationIndex = destinationStart;
for (var i = sourceStart; i < sourceEnd - 1; i += 2) {
- var firstDigit = _digitForCodeUnit(codeUnits, i);
- var secondDigit = _digitForCodeUnit(codeUnits, i + 1);
+ var firstDigit = digitForCodeUnit(codeUnits, i);
+ var secondDigit = digitForCodeUnit(codeUnits, i + 1);
destination[destinationIndex++] = 16 * firstDigit + secondDigit;
}
if ((sourceEnd - sourceStart).isEven) return null;
- return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1);
-}
-
-/// Returns the digit (0 through 15) corresponding to the hexadecimal code unit
-/// at index [i] in [codeUnits].
-///
-/// If the given code unit isn't valid hexadecimal, throws a [FormatException].
-int _digitForCodeUnit(List<int> codeUnits, int index) {
- // If the code unit is a numeral, get its value. XOR works because 0 in ASCII
- // is `0b110000` and the other numerals come after it in ascending order and
- // take up at most four bits.
- //
- // We check for digits first because it ensures there's only a single branch
- // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
- // check because branch prediction will always work on it for valid data.
- var codeUnit = codeUnits[index];
- var digit = $0 ^ codeUnit;
- if (digit <= 9) {
- if (digit >= 0) return digit;
- } else {
- // If the code unit is an uppercase letter, convert it to lowercase. This
- // works because uppercase letters in ASCII are exactly `0b100000 = 0x20`
- // less than lowercase letters, so if we ensure that that bit is 1 we ensure
- // that the letter is lowercase.
- var letter = 0x20 | codeUnit;
- if ($a <= letter && letter <= $f) return letter - $a + 10;
- }
-
- throw new FormatException(
- "Invalid hexadecimal code unit "
- "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.",
- codeUnits, index);
+ return 16 * digitForCodeUnit(codeUnits, sourceEnd - 1);
}
diff --git a/lib/src/hex/encoder.dart b/lib/src/hex/encoder.dart
index 7a974e3..a9c66a5 100644
--- a/lib/src/hex/encoder.dart
+++ b/lib/src/hex/encoder.dart
@@ -75,7 +75,8 @@
for (var i = start; i < end; i++) {
var byte = bytes[i];
if (byte >= 0 && byte <= 0xff) continue;
- throw new FormatException("Invalid byte 0x${byte.toRadixString(16)}.",
+ throw new FormatException(
+ "Invalid byte ${byte < 0 ? "-" : ""}0x${byte.abs().toRadixString(16)}.",
bytes, i);
}
diff --git a/lib/src/percent.dart b/lib/src/percent.dart
new file mode 100644
index 0000000..3bb3da1
--- /dev/null
+++ b/lib/src/percent.dart
@@ -0,0 +1,38 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.percent;
+
+import 'dart:convert';
+
+import 'percent/encoder.dart';
+import 'percent/decoder.dart';
+
+export 'percent/encoder.dart' hide percentEncoder;
+export 'percent/decoder.dart' hide percentDecoder;
+
+/// The canonical instance of [PercentCodec].
+const percent = const PercentCodec._();
+
+// TODO(nweiz): Add flags to support generating and interpreting "+" as a space
+// character. Also add an option for custom sets of unreserved characters.
+/// A codec that converts byte arrays to and from percent-encoded (also known as
+/// URL-encoded) strings according to [RFC 3986][rfc].
+///
+/// [rfc]: https://tools.ietf.org/html/rfc3986#section-2.1
+///
+/// [encoder] encodes all bytes other than ASCII letters, decimal digits, or one
+/// of `-._~`. This matches the behavior of [Uri.encodeQueryComponent] except
+/// that it doesn't encode `0x20` bytes to the `+` character.
+///
+/// To be maximally flexible, [decoder] will decode any percent-encoded byte and
+/// will allow any non-percent-encoded byte other than `%`. By default, it
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by
+/// [Uri.encodeQueryComponent].
+class PercentCodec extends Codec<List<int>, String> {
+ PercentEncoder get encoder => percentEncoder;
+ PercentDecoder get decoder => percentDecoder;
+
+ const PercentCodec._();
+}
diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart
new file mode 100644
index 0000000..0740636
--- /dev/null
+++ b/lib/src/percent/decoder.dart
@@ -0,0 +1,244 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.percent.decoder;
+
+import 'dart:convert';
+
+import 'package:charcode/ascii.dart';
+import 'package:typed_data/typed_data.dart';
+
+import '../utils.dart';
+
+/// The canonical instance of [PercentDecoder].
+const percentDecoder = const PercentDecoder._();
+
+const _lastPercent = -1;
+
+/// A converter that decodes percent-encoded strings into byte arrays.
+///
+/// To be maximally flexible, this will decode any percent-encoded byte and
+/// will allow any non-percent-encoded byte other than `%`. By default, it
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by
+/// [Uri.encodeQueryComponent].
+///
+/// This will throw a [FormatException] if the input string has an incomplete
+/// percent-encoding, or if it contains non-ASCII code units.
+class PercentDecoder extends Converter<String, List<int>> {
+ const PercentDecoder._();
+
+ List<int> convert(String string) {
+ var buffer = new Uint8Buffer();
+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);
+
+ if (lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.",
+ string, string.length);
+ }
+
+ return buffer.buffer.asUint8List(0, buffer.length);
+ }
+
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+ new _PercentDecoderSink(sink);
+}
+
+/// A conversion sink for chunked percent-encoded decoding.
+class _PercentDecoderSink extends StringConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderSink(this._sink);
+
+ void addSlice(String string, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, string.length);
+
+ if (start == end) {
+ if (isLast) _close(string, end);
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ var codeUnits = string.codeUnits;
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) _close(string, end);
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));
+ start++;
+ }
+
+ _lastDigit = _decode(codeUnits, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) _close(string, end);
+ }
+
+ ByteConversionSink asUtf8Sink(bool allowMalformed) =>
+ new _PercentDecoderByteSink(_sink);
+
+ void close() => _close();
+
+ /// Like [close], but includes [string] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([String string, int index]) {
+ if (_lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", string, index);
+ }
+
+ _sink.close();
+ }
+}
+
+/// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes.
+class _PercentDecoderByteSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderByteSink(this._sink);
+
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+
+ if (start == end) {
+ if (isLast) _close(chunk, end);
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(chunk, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) _close(chunk, end);
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start));
+ start++;
+ }
+
+ _lastDigit = _decode(chunk, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) _close(chunk, end);
+ }
+
+ void close() => _close();
+
+ /// Like [close], but includes [chunk] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([List<int> chunk, int index]) {
+ if (_lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", chunk, index);
+ }
+
+ _sink.close();
+ }
+}
+
+/// Decodes [codeUnits] and writes the result into [destination].
+///
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
+/// the result into [destination] starting at [destinationStart].
+///
+/// If there's a leftover digit at the end of the decoding, this returns that
+/// digit. Otherwise it returns `null`.
+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
+ // A bitwise OR of all code units in [codeUnits]. This allows us to check for
+ // out-of-range code units without adding more branches than necessary to the
+ // core loop.
+ var codeUnitOr = 0;
+
+ // The beginning of the current slice of adjacent non-% characters. We can add
+ // all of these to the buffer at once.
+ var sliceStart = start;
+ for (var i = start; i < end; i++) {
+ // First, loop through non-% characters.
+ var codeUnit = codeUnits[i];
+ if (codeUnits[i] != $percent) {
+ codeUnitOr |= codeUnit;
+ continue;
+ }
+
+ // We found a %. The slice from `sliceStart` to `i` represents characters
+ // than can be copied to the buffer as-is.
+ if (i > sliceStart) {
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i);
+ buffer.addAll(codeUnits.getRange(sliceStart, i));
+ }
+
+ // Now decode the percent-encoded byte and add it as well.
+ i++;
+ if (i >= end) return _lastPercent;
+
+ var firstDigit = digitForCodeUnit(codeUnits, i);
+ i++;
+ if (i >= end) return 16 * firstDigit;
+
+ var secondDigit = digitForCodeUnit(codeUnits, i);
+ buffer.add(16 * firstDigit + secondDigit);
+
+ // The next iteration will look for non-% characters again.
+ sliceStart = i + 1;
+ }
+
+ if (end > sliceStart) {
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end);
+ if (start == sliceStart) {
+ buffer.addAll(codeUnits);
+ } else {
+ buffer.addAll(codeUnits.getRange(sliceStart, end));
+ }
+ }
+
+ return null;
+}
+
+void _checkForInvalidCodeUnit(int codeUnitOr, List<int> codeUnits, int start,
+ int end) {
+ if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return;
+
+ for (var i = start; i < end; i++) {
+ var codeUnit = codeUnits[i];
+ if (codeUnit >= 0 && codeUnit <= 0x7f) continue;
+ throw new FormatException(
+ "Non-ASCII code unit "
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
+ codeUnits, i);
+ }
+}
diff --git a/lib/src/percent/encoder.dart b/lib/src/percent/encoder.dart
new file mode 100644
index 0000000..c781760
--- /dev/null
+++ b/lib/src/percent/encoder.dart
@@ -0,0 +1,104 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.percent.encoder;
+
+import 'dart:convert';
+
+import 'package:charcode/ascii.dart';
+
+/// The canonical instance of [PercentEncoder].
+const percentEncoder = const PercentEncoder._();
+
+/// A converter that encodes byte arrays into percent-encoded strings.
+///
+/// [encoder] encodes all bytes other than ASCII letters, decimal digits, or one
+/// of `-._~`. This matches the behavior of [Uri.encodeQueryComponent] except
+/// that it doesn't encode `0x20` bytes to the `+` character.
+///
+/// This will throw a [RangeError] if the byte array has any digits that don't
+/// fit in the gamut of a byte.
+class PercentEncoder extends Converter<List<int>, String> {
+ const PercentEncoder._();
+
+ String convert(List<int> bytes) => _convert(bytes, 0, bytes.length);
+
+ ByteConversionSink startChunkedConversion(Sink<String> sink) =>
+ new _PercentEncoderSink(sink);
+}
+
+/// A conversion sink for chunked percentadecimal encoding.
+class _PercentEncoderSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<String> _sink;
+
+ _PercentEncoderSink(this._sink);
+
+ void add(List<int> chunk) {
+ _sink.add(_convert(chunk, 0, chunk.length));
+ }
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+ _sink.add(_convert(chunk, start, end));
+ if (isLast) _sink.close();
+ }
+
+ void close() {
+ _sink.close();
+ }
+}
+
+String _convert(List<int> bytes, int start, int end) {
+ var buffer = new StringBuffer();
+
+ // A bitwise OR of all bytes in [bytes]. This allows us to check for
+ // out-of-range bytes without adding more branches than necessary to the
+ // core loop.
+ var byteOr = 0;
+ for (var i = start; i < end; i++) {
+ var byte = bytes[i];
+ byteOr |= byte;
+
+ // If the byte is an uppercase letter, convert it to lowercase to check if
+ // it's unreserved. This works because uppercase letters in ASCII are
+ // exactly `0b100000 = 0x20` less than lowercase letters, so if we ensure
+ // that that bit is 1 we ensure that the letter is lowercase.
+ var letter = 0x20 | byte;
+ if ((letter >= $a && letter <= $z) ||
+ byte == $dash ||
+ byte == $dot ||
+ byte == $underscore ||
+ byte == $tilde) {
+ // Unreserved characters are safe to write as-is.
+ buffer.writeCharCode(byte);
+ continue;
+ }
+
+ buffer.writeCharCode($percent);
+
+ // The bitwise arithmetic here is equivalent to `byte ~/ 16` and `byte % 16`
+ // for valid byte values, but is easier for dart2js to optimize given that
+ // it can't prove that [byte] will always be positive.
+ buffer.writeCharCode(_codeUnitForDigit((byte & 0xF0) >> 4));
+ buffer.writeCharCode(_codeUnitForDigit(byte & 0x0F));
+ }
+
+ if (byteOr >= 0 && byteOr <= 255) return buffer.toString();
+
+ // If there was an invalid byte, find it and throw an exception.
+ for (var i = start; i < end; i++) {
+ var byte = bytes[i];
+ if (byte >= 0 && byte <= 0xff) continue;
+ throw new FormatException(
+ "Invalid byte ${byte < 0 ? "-" : ""}0x${byte.abs().toRadixString(16)}.",
+ bytes, i);
+ }
+
+ throw 'unreachable';
+}
+
+/// Returns the ASCII/Unicode code unit corresponding to the hexadecimal digit
+/// [digit].
+int _codeUnitForDigit(int digit) => digit < 10 ? digit + $0 : digit + $A - 10;
diff --git a/lib/src/utils.dart b/lib/src/utils.dart
new file mode 100644
index 0000000..6bd4468
--- /dev/null
+++ b/lib/src/utils.dart
@@ -0,0 +1,39 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.utils;
+
+import 'package:charcode/ascii.dart';
+
+/// Returns the digit (0 through 15) corresponding to the hexadecimal code unit
+/// at index [i] in [codeUnits].
+///
+/// If the given code unit isn't valid hexadecimal, throws a [FormatException].
+int digitForCodeUnit(List<int> codeUnits, int index) {
+ // If the code unit is a numeral, get its value. XOR works because 0 in ASCII
+ // is `0b110000` and the other numerals come after it in ascending order and
+ // take up at most four bits.
+ //
+ // We check for digits first because it ensures there's only a single branch
+ // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
+ // check because branch prediction will always work on it for valid data.
+ var codeUnit = codeUnits[index];
+ var digit = $0 ^ codeUnit;
+ if (digit <= 9) {
+ if (digit >= 0) return digit;
+ } else {
+ // If the code unit is an uppercase letter, convert it to lowercase. This
+ // works because uppercase letters in ASCII are exactly `0b100000 = 0x20`
+ // less than lowercase letters, so if we ensure that that bit is 1 we ensure
+ // that the letter is lowercase.
+ var letter = 0x20 | codeUnit;
+ if ($a <= letter && letter <= $f) return letter - $a + 10;
+ }
+
+ throw new FormatException(
+ "Invalid hexadecimal code unit "
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.",
+ codeUnits, index);
+}
+
diff --git a/pubspec.yaml b/pubspec.yaml
index db13edf..48ef140 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: convert
-version: 0.0.1-dev
+version: 1.0.0
description: Utilities for converting between data representations.
author: Dart Team <misc@dartlang.org>
homepage: https://github.com/dart-lang/convert
@@ -9,6 +9,7 @@
dependencies:
charcode: '^1.1.0'
+ typed_data: '^1.0.0'
dev_dependencies:
test: '^0.12.0'
diff --git a/test/percent_test.dart b/test/percent_test.dart
new file mode 100644
index 0000000..f7a1c48
--- /dev/null
+++ b/test/percent_test.dart
@@ -0,0 +1,208 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:async';
+
+import 'package:charcode/ascii.dart';
+import 'package:convert/convert.dart';
+import 'package:test/test.dart';
+
+void main() {
+ group("encoder", () {
+ test("doesn't percent-encode unreserved characters", () {
+ expect(percent.encode([
+ $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r,
+ $s, $t, $u, $v, $w, $x, $y, $z, $A, $B, $C, $D, $E, $F, $G, $H, $I, $J,
+ $K, $L, $M, $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z, $dash,
+ $dot, $underscore, $tilde
+ ]), equals("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~"));
+ });
+
+ test("percent-encodes reserved ASCII characters", () {
+ expect(percent.encode([
+ $space, $backquote, $open_brace, $at, $open_bracket, $comma,
+ $division, $caret, $close_brace, $del, $nul, $percent
+ ]), equals("%20%60%7B%40%5B%2C%2F%5E%7D%7F%00%25"));
+ });
+
+ test("percent-encodes non-ASCII characters", () {
+ expect(percent.encode([0x80, 0xFF]), equals("%80%FF"));
+ });
+
+ test("mixes encoded and unencoded characters", () {
+ expect(percent.encode([$a, $plus, $b, $equal, 0x80]),
+ equals("a%2Bb%3D%80"));
+ });
+
+ group("with chunked conversion", () {
+ test("percent-encodes byte arrays", () {
+ var results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ var sink = percent.encoder.startChunkedConversion(controller.sink);
+
+ sink.add([$a, $plus, $b, $equal, 0x80]);
+ expect(results, equals(["a%2Bb%3D%80"]));
+
+ sink.add([0x00, 0x01, 0xfe, 0xff]);
+ expect(results, equals(["a%2Bb%3D%80", "%00%01%FE%FF"]));
+ });
+
+ test("handles empty and single-byte lists", () {
+ var results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ var sink = percent.encoder.startChunkedConversion(controller.sink);
+
+ sink.add([]);
+ expect(results, equals([""]));
+
+ sink.add([0x00]);
+ expect(results, equals(["", "%00"]));
+
+ sink.add([]);
+ expect(results, equals(["", "%00", ""]));
+ });
+ });
+
+ test("rejects non-bytes", () {
+ expect(() => percent.encode([0x100]), throwsFormatException);
+
+ var sink = percent.encoder.startChunkedConversion(
+ new StreamController(sync: true));
+ expect(() => sink.add([0x100]), throwsFormatException);
+ });
+ });
+
+ group("decoder", () {
+ test("converts percent-encoded strings to byte arrays", () {
+ expect(percent.decode("a%2Bb%3D%80"),
+ equals([$a, $plus, $b, $equal, 0x80]));
+ });
+
+ test("supports lowercase letters", () {
+ expect(percent.decode("a%2bb%3d%80"),
+ equals([$a, $plus, $b, $equal, 0x80]));
+ });
+
+ test("supports more aggressive encoding", () {
+ expect(percent.decode("%61%2E%5A"), equals([$a, $dot, $Z]));
+ });
+
+ test("supports less aggressive encoding", () {
+ expect(percent.decode(" `{@[,/^}\x7F\x00"), equals([
+ $space, $backquote, $open_brace, $at, $open_bracket, $comma,
+ $division, $caret, $close_brace, $del, $nul
+ ]));
+ });
+
+ group("with chunked conversion", () {
+ var results;
+ var sink;
+ setUp(() {
+ results = [];
+ var controller = new StreamController(sync: true);
+ controller.stream.listen(results.add);
+ sink = percent.decoder.startChunkedConversion(controller.sink);
+ });
+
+ test("converts percent to byte arrays", () {
+ sink.add("a%2Bb%3D%80");
+ expect(results, equals([[$a, $plus, $b, $equal, 0x80]]));
+
+ sink.add("%00%01%FE%FF");
+ expect(results,
+ equals([[$a, $plus, $b, $equal, 0x80], [0x00, 0x01, 0xfe, 0xff]]));
+ });
+
+ test("supports trailing percents and digits split across chunks", () {
+ sink.add("ab%");
+ expect(results, equals([[$a, $b]]));
+
+ sink.add("2");
+ expect(results, equals([[$a, $b]]));
+
+ sink.add("0cd%2");
+ expect(results, equals([[$a, $b], [$space, $c, $d]]));
+
+ sink.add("0");
+ expect(results, equals(([[$a, $b], [$space, $c, $d], [$space]])));
+ });
+
+ test("supports empty strings", () {
+ sink.add("");
+ expect(results, isEmpty);
+
+ sink.add("%");
+ expect(results, equals([[]]));
+
+ sink.add("");
+ expect(results, equals([[]]));
+
+ sink.add("2");
+ expect(results, equals([[]]));
+
+ sink.add("");
+ expect(results, equals([[]]));
+
+ sink.add("0");
+ expect(results, equals([[], [0x20]]));
+ });
+
+ test("rejects dangling % detected in close()", () {
+ sink.add("ab%");
+ expect(results, equals([[$a, $b]]));
+ expect(() => sink.close(), throwsFormatException);
+ });
+
+ test("rejects dangling digit detected in close()", () {
+ sink.add("ab%2");
+ expect(results, equals([[$a, $b]]));
+ expect(() => sink.close(), throwsFormatException);
+ });
+
+ test("rejects danging % detected in addSlice()", () {
+ sink.addSlice("ab%", 0, 3, false);
+ expect(results, equals([[$a, $b]]));
+
+ expect(() => sink.addSlice("ab%", 0, 3, true),
+ throwsFormatException);
+ });
+
+ test("rejects danging digit detected in addSlice()", () {
+ sink.addSlice("ab%2", 0, 3, false);
+ expect(results, equals([[$a, $b]]));
+
+ expect(() => sink.addSlice("ab%2", 0, 3, true),
+ throwsFormatException);
+ });
+ });
+
+ group("rejects non-ASCII character", () {
+ for (var char in ["\u0141", "\u{10041}"]) {
+ test('"$char"', () {
+ expect(() => percent.decode("a$char"), throwsFormatException);
+ expect(() => percent.decode("${char}a"), throwsFormatException);
+
+ var sink = percent.decoder.startChunkedConversion(
+ new StreamController(sync: true));
+ expect(() => sink.add(char), throwsFormatException);
+ });
+ }
+ });
+
+ test("rejects % followed by non-hex", () {
+ expect(() => percent.decode("%z2"), throwsFormatException);
+ expect(() => percent.decode("%2z"), throwsFormatException);
+ });
+
+ test("rejects dangling % detected in convert()", () {
+ expect(() => percent.decode("ab%"), throwsFormatException);
+ });
+
+ test("rejects dangling digit detected in convert()", () {
+ expect(() => percent.decode("ab%2"), throwsFormatException);
+ });
+ });
+}