| // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| part of dart.convert; |
| |
| /** |
| * An instance of the default implementation of the [AsciiCodec]. |
| * |
| * This instance provides a convenient access to the most common ASCII |
| * use cases. |
| * |
| * Examples: |
| * |
| * var encoded = ASCII.encode("This is ASCII!"); |
| * var decoded = ASCII.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, |
| * 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]); |
| */ |
| const AsciiCodec ASCII = const AsciiCodec(); |
| |
| const int _ASCII_MASK = 0x7F; |
| |
| /** |
| * An [AsciiCodec] allows encoding strings as ASCII bytes |
| * and decoding ASCII bytes to strings. |
| */ |
| class AsciiCodec extends Encoding { |
| final bool _allowInvalid; |
| /** |
| * Instantiates a new [AsciiCodec]. |
| * |
| * If [allowInvalid] is true, the [decode] method and the converter |
| * returned by [decoder] will default to allowing invalid values. |
| * If allowing invalid values, the values will be decoded into the Unicode |
| * Replacement character (U+FFFD). If not, an exception will be thrown. |
| * Calls to the [decode] method can choose to override this default. |
| * |
| * Encoders will not accept invalid (non Latin-1) characters. |
| */ |
| const AsciiCodec({bool allowInvalid: false}) : _allowInvalid = allowInvalid; |
| |
| String get name => "us-ascii"; |
| |
| /** |
| * Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the |
| * corresponding string. |
| * |
| * If [bytes] contains values that are not in the range 0 .. 127, the decoder |
| * will eventually throw a [FormatException]. |
| * |
| * If [allowInvalid] is not provided, it defaults to the value used to create |
| * this [AsciiCodec]. |
| */ |
| String decode(List<int> bytes, { bool allowInvalid }) { |
| if (allowInvalid == null) allowInvalid = _allowInvalid; |
| if (allowInvalid) { |
| return const AsciiDecoder(allowInvalid: true).convert(bytes); |
| } else { |
| return const AsciiDecoder(allowInvalid: false).convert(bytes); |
| } |
| } |
| |
| AsciiEncoder get encoder => const AsciiEncoder(); |
| |
| AsciiDecoder get decoder => |
| _allowInvalid ? const AsciiDecoder(allowInvalid: true) |
| : const AsciiDecoder(allowInvalid: false); |
| } |
| |
| // Superclass for [AsciiEncoder] and [Latin1Encoder]. |
| // Generalizes common operations that only differ by a mask; |
| class _UnicodeSubsetEncoder extends Converter<String, List<int>> { |
| final int _subsetMask; |
| |
| const _UnicodeSubsetEncoder(this._subsetMask); |
| |
| /** |
| * Converts the [String] into a list of its code units. |
| * |
| * If [start] and [end] are provided, only the substring |
| * `string.substring(start, end)` is used as input to the conversion. |
| */ |
| List<int> convert(String string, [int start = 0, int end]) { |
| int stringLength = string.length; |
| RangeError.checkValidRange(start, end, stringLength); |
| if (end == null) end = stringLength; |
| int length = end - start; |
| List result = new Uint8List(length); |
| for (int i = 0; i < length; i++) { |
| var codeUnit = string.codeUnitAt(start + i); |
| if ((codeUnit & ~_subsetMask) != 0) { |
| throw new ArgumentError("String contains invalid characters."); |
| } |
| result[i] = codeUnit; |
| } |
| return result; |
| } |
| |
| /** |
| * Starts a chunked conversion. |
| * |
| * The converter works more efficiently if the given [sink] is a |
| * [ByteConversionSink]. |
| */ |
| StringConversionSink startChunkedConversion(Sink<List<int>> sink) { |
| if (sink is! ByteConversionSink) { |
| sink = new ByteConversionSink.from(sink); |
| } |
| return new _UnicodeSubsetEncoderSink(_subsetMask, sink); |
| } |
| |
| // Override the base-class' bind, to provide a better type. |
| Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); |
| } |
| |
| /** |
| * This class converts strings of only ASCII characters to bytes. |
| */ |
| class AsciiEncoder extends _UnicodeSubsetEncoder { |
| const AsciiEncoder() : super(_ASCII_MASK); |
| } |
| |
| /** |
| * This class encodes chunked strings to bytes (unsigned 8-bit |
| * integers). |
| */ |
| class _UnicodeSubsetEncoderSink extends StringConversionSinkBase { |
| final ByteConversionSink _sink; |
| final int _subsetMask; |
| |
| _UnicodeSubsetEncoderSink(this._subsetMask, this._sink); |
| |
| void close() { |
| _sink.close(); |
| } |
| |
| void addSlice(String source, int start, int end, bool isLast) { |
| RangeError.checkValidRange(start, end, source.length); |
| for (int i = start; i < end; i++) { |
| int codeUnit = source.codeUnitAt(i); |
| if ((codeUnit & ~_subsetMask) != 0) { |
| throw new ArgumentError( |
| "Source contains invalid character with code point: $codeUnit."); |
| } |
| } |
| _sink.add(source.codeUnits.sublist(start, end)); |
| if (isLast) { |
| close(); |
| } |
| } |
| } |
| |
| /** |
| * This class converts Latin-1 bytes (lists of unsigned 8-bit integers) |
| * to a string. |
| */ |
| abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> { |
| final bool _allowInvalid; |
| final int _subsetMask; |
| |
| /** |
| * Instantiates a new decoder. |
| * |
| * The [_allowInvalid] argument defines how [convert] deals |
| * with invalid bytes. |
| * |
| * The [_subsetMask] argument is a bit mask used to define the subset |
| * of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or |
| * [_ASCII_MASK] for ASCII (7-bit). |
| * |
| * If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the |
| * Unicode Replacement character `U+FFFD` (�). |
| * Otherwise it throws a [FormatException]. |
| */ |
| const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask); |
| |
| /** |
| * Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the |
| * corresponding string. |
| * |
| * If [start] and [end] are provided, only the sub-list of bytes from |
| * `start` to `end` (`end` not inclusive) is used as input to the conversion. |
| */ |
| String convert(List<int> bytes, [int start = 0, int end]) { |
| int byteCount = bytes.length; |
| RangeError.checkValidRange(start, end, byteCount); |
| if (end == null) end = byteCount; |
| |
| for (int i = start; i < end; i++) { |
| int byte = bytes[i]; |
| if ((byte & ~_subsetMask) != 0) { |
| if (!_allowInvalid) { |
| throw new FormatException("Invalid value in input: $byte"); |
| } |
| return _convertInvalid(bytes, start, end); |
| } |
| } |
| return new String.fromCharCodes(bytes, start, end); |
| } |
| |
| String _convertInvalid(List<int> bytes, int start, int end) { |
| StringBuffer buffer = new StringBuffer(); |
| for (int i = start; i < end; i++) { |
| int value = bytes[i]; |
| if ((value & ~_subsetMask) != 0) value = 0xFFFD; |
| buffer.writeCharCode(value); |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * Starts a chunked conversion. |
| * |
| * The converter works more efficiently if the given [sink] is a |
| * [StringConversionSink]. |
| */ |
| ByteConversionSink startChunkedConversion(Sink<String> sink); |
| |
| // Override the base-class's bind, to provide a better type. |
| Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); |
| } |
| |
| class AsciiDecoder extends _UnicodeSubsetDecoder { |
| const AsciiDecoder({bool allowInvalid: false}) |
| : super(allowInvalid, _ASCII_MASK); |
| |
| /** |
| * Starts a chunked conversion. |
| * |
| * The converter works more efficiently if the given [sink] is a |
| * [StringConversionSink]. |
| */ |
| ByteConversionSink startChunkedConversion(Sink<String> sink) { |
| StringConversionSink stringSink; |
| if (sink is StringConversionSink) { |
| stringSink = sink; |
| } else { |
| stringSink = new StringConversionSink.from(sink); |
| } |
| // TODO(lrn): Use asUtf16Sink when it becomes available. It |
| // works just as well, is likely to have less decoding overhead, |
| // and make adding U+FFFD easier. |
| // At that time, merge this with _Latin1DecoderSink; |
| if (_allowInvalid) { |
| return new _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false)); |
| } else { |
| return new _SimpleAsciiDecoderSink(stringSink); |
| } |
| } |
| } |
| |
| class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase { |
| ByteConversionSink _utf8Sink; |
| _ErrorHandlingAsciiDecoderSink(this._utf8Sink); |
| |
| void close() { |
| _utf8Sink.close(); |
| } |
| |
| void add(List<int> source) { |
| addSlice(source, 0, source.length, false); |
| } |
| |
| void addSlice(List<int> source, int start, int end, bool isLast) { |
| RangeError.checkValidRange(start, end, source.length); |
| for (int i = start; i < end; i++) { |
| if ((source[i] & ~_ASCII_MASK) != 0) { |
| if (i > start) _utf8Sink.addSlice(source, start, i, false); |
| // Add UTF-8 encoding of U+FFFD. |
| _utf8Sink.add(const<int>[0xEF, 0xBF, 0xBD]); |
| start = i + 1; |
| } |
| } |
| if (start < end) { |
| _utf8Sink.addSlice(source, start, end, isLast); |
| } else if (isLast) { |
| close(); |
| } |
| } |
| } |
| |
| class _SimpleAsciiDecoderSink extends ByteConversionSinkBase { |
| Sink _sink; |
| _SimpleAsciiDecoderSink(this._sink); |
| |
| void close() { |
| _sink.close(); |
| } |
| |
| void add(List<int> source) { |
| for (int i = 0; i < source.length; i++) { |
| if ((source[i] & ~_ASCII_MASK) != 0) { |
| throw new FormatException("Source contains non-ASCII bytes."); |
| } |
| } |
| _sink.add(new String.fromCharCodes(source)); |
| } |
| |
| void addSlice(List<int> source, int start, int end, bool isLast) { |
| final int length = source.length; |
| RangeError.checkValidRange(start, end, length); |
| if (start < end) { |
| if (start != 0 || end != length) { |
| source = source.sublist(start, end); |
| } |
| add(source); |
| } |
| if (isLast) close(); |
| } |
| } |