| // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| part of dart.convert; |
| |
| /// An instance of the default implementation of the [AsciiCodec]. |
| /// |
| /// This instance provides a convenient access to the most common ASCII |
| /// use cases. |
| /// |
| /// Examples: |
| /// ```dart |
| /// var encoded = ascii.encode("This is ASCII!"); |
| /// var decoded = ascii.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, |
| /// 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]); |
| /// ``` |
| const AsciiCodec ascii = AsciiCodec(); |
| |
| const int _asciiMask = 0x7F; |
| |
| /// An [AsciiCodec] allows encoding strings as ASCII bytes |
| /// and decoding ASCII bytes to strings. |
| class AsciiCodec extends Encoding { |
| final bool _allowInvalid; |
| |
| /// Instantiates a new [AsciiCodec]. |
| /// |
| /// If [allowInvalid] is true, the [decode] method and the converter |
| /// returned by [decoder] will default to allowing invalid values. |
| /// If allowing invalid values, the values will be decoded into the Unicode |
| /// Replacement character (U+FFFD). If not, an exception will be thrown. |
| /// Calls to the [decode] method can choose to override this default. |
| /// |
| /// Encoders will not accept invalid (non ASCII) characters. |
| const AsciiCodec({bool allowInvalid = false}) : _allowInvalid = allowInvalid; |
| |
| /// The name of this codec, "us-ascii". |
| String get name => "us-ascii"; |
| |
| Uint8List encode(String source) => encoder.convert(source); |
| |
| /// Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the |
| /// corresponding string. |
| /// |
| /// If [bytes] contains values that are not in the range 0 .. 127, the decoder |
| /// will eventually throw a [FormatException]. |
| /// |
| /// If [allowInvalid] is not provided, it defaults to the value used to create |
| /// this [AsciiCodec]. |
| String decode(List<int> bytes, {bool? allowInvalid}) { |
| if (allowInvalid ?? _allowInvalid) { |
| return const AsciiDecoder(allowInvalid: true).convert(bytes); |
| } else { |
| return const AsciiDecoder(allowInvalid: false).convert(bytes); |
| } |
| } |
| |
| AsciiEncoder get encoder => const AsciiEncoder(); |
| |
| AsciiDecoder get decoder => _allowInvalid |
| ? const AsciiDecoder(allowInvalid: true) |
| : const AsciiDecoder(allowInvalid: false); |
| } |
| |
| // Superclass for [AsciiEncoder] and [Latin1Encoder]. |
| // Generalizes common operations that only differ by a mask; |
| class _UnicodeSubsetEncoder extends Converter<String, List<int>> { |
| final int _subsetMask; |
| |
| const _UnicodeSubsetEncoder(this._subsetMask); |
| |
| /// Converts the [String] into a list of its code units. |
| /// |
| /// If [start] and [end] are provided, only the substring |
| /// `string.substring(start, end)` is used as input to the conversion. |
| Uint8List convert(String string, [int start = 0, int? end]) { |
| var stringLength = string.length; |
| end = RangeError.checkValidRange(start, end, stringLength); |
| // TODO(38725): Remove workaround when assignment promotion is implemented |
| if (end == null) { |
| throw RangeError("Invalid range"); |
| } |
| var length = end - start; |
| var result = Uint8List(length); |
| for (var i = 0; i < length; i++) { |
| var codeUnit = string.codeUnitAt(start + i); |
| if ((codeUnit & ~_subsetMask) != 0) { |
| throw ArgumentError.value( |
| string, "string", "Contains invalid characters."); |
| } |
| result[i] = codeUnit; |
| } |
| return result; |
| } |
| |
| /// Starts a chunked conversion. |
| /// |
| /// The converter works more efficiently if the given [sink] is a |
| /// [ByteConversionSink]. |
| StringConversionSink startChunkedConversion(Sink<List<int>> sink) { |
| return _UnicodeSubsetEncoderSink(_subsetMask, |
| sink is ByteConversionSink ? sink : ByteConversionSink.from(sink)); |
| } |
| |
| // Override the base-class' bind, to provide a better type. |
| Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); |
| } |
| |
| /// This class converts strings of only ASCII characters to bytes. |
| class AsciiEncoder extends _UnicodeSubsetEncoder { |
| const AsciiEncoder() : super(_asciiMask); |
| } |
| |
| /// This class encodes chunked strings to bytes (unsigned 8-bit |
| /// integers). |
| class _UnicodeSubsetEncoderSink extends StringConversionSinkBase { |
| final ByteConversionSink _sink; |
| final int _subsetMask; |
| |
| _UnicodeSubsetEncoderSink(this._subsetMask, this._sink); |
| |
| void close() { |
| _sink.close(); |
| } |
| |
| void addSlice(String source, int start, int end, bool isLast) { |
| RangeError.checkValidRange(start, end, source.length); |
| for (var i = start; i < end; i++) { |
| var codeUnit = source.codeUnitAt(i); |
| if ((codeUnit & ~_subsetMask) != 0) { |
| throw ArgumentError( |
| "Source contains invalid character with code point: $codeUnit."); |
| } |
| } |
| _sink.add(source.codeUnits.sublist(start, end)); |
| if (isLast) { |
| close(); |
| } |
| } |
| } |
| |
| /// This class converts Latin-1 bytes (lists of unsigned 8-bit integers) |
| /// to a string. |
| abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> { |
| final bool _allowInvalid; |
| final int _subsetMask; |
| |
| /// Instantiates a new decoder. |
| /// |
| /// The [_allowInvalid] argument defines how [convert] deals |
| /// with invalid bytes. |
| /// |
| /// The [_subsetMask] argument is a bit mask used to define the subset |
| /// of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or |
| /// [_asciiMask] for ASCII (7-bit). |
| /// |
| /// If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the |
| /// Unicode Replacement character `U+FFFD` (�). |
| /// Otherwise it throws a [FormatException]. |
| const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask); |
| |
| /// Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the |
| /// corresponding string. |
| /// |
| /// If [start] and [end] are provided, only the sub-list of bytes from |
| /// `start` to `end` (`end` not inclusive) is used as input to the conversion. |
| String convert(List<int> bytes, [int start = 0, int? end]) { |
| end = RangeError.checkValidRange(start, end, bytes.length); |
| // TODO(38725): Remove workaround when assignment promotion is implemented |
| if (end == null) { |
| throw RangeError("Invalid range"); |
| } |
| for (var i = start; i < end; i++) { |
| var byte = bytes[i]; |
| if ((byte & ~_subsetMask) != 0) { |
| if (!_allowInvalid) { |
| throw FormatException("Invalid value in input: $byte"); |
| } |
| return _convertInvalid(bytes, start, end); |
| } |
| } |
| return String.fromCharCodes(bytes, start, end); |
| } |
| |
| String _convertInvalid(List<int> bytes, int start, int end) { |
| var buffer = StringBuffer(); |
| for (var i = start; i < end; i++) { |
| var value = bytes[i]; |
| if ((value & ~_subsetMask) != 0) value = 0xFFFD; |
| buffer.writeCharCode(value); |
| } |
| return buffer.toString(); |
| } |
| |
| /// Starts a chunked conversion. |
| /// |
| /// The converter works more efficiently if the given [sink] is a |
| /// [StringConversionSink]. |
| ByteConversionSink startChunkedConversion(Sink<String> sink); |
| |
| // Override the base-class's bind, to provide a better type. |
| Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); |
| } |
| |
| class AsciiDecoder extends _UnicodeSubsetDecoder { |
| const AsciiDecoder({bool allowInvalid = false}) |
| : super(allowInvalid, _asciiMask); |
| |
| /// Starts a chunked conversion. |
| /// |
| /// The converter works more efficiently if the given [sink] is a |
| /// [StringConversionSink]. |
| ByteConversionSink startChunkedConversion(Sink<String> sink) { |
| StringConversionSink stringSink; |
| if (sink is StringConversionSink) { |
| stringSink = sink; |
| } else { |
| stringSink = StringConversionSink.from(sink); |
| } |
| // TODO(lrn): Use asUtf16Sink when it becomes available. It |
| // works just as well, is likely to have less decoding overhead, |
| // and make adding U+FFFD easier. |
| // At that time, merge this with _Latin1DecoderSink; |
| if (_allowInvalid) { |
| return _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false)); |
| } else { |
| return _SimpleAsciiDecoderSink(stringSink); |
| } |
| } |
| } |
| |
| class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase { |
| ByteConversionSink _utf8Sink; |
| _ErrorHandlingAsciiDecoderSink(this._utf8Sink); |
| |
| void close() { |
| _utf8Sink.close(); |
| } |
| |
| void add(List<int> source) { |
| addSlice(source, 0, source.length, false); |
| } |
| |
| void addSlice(List<int> source, int start, int end, bool isLast) { |
| RangeError.checkValidRange(start, end, source.length); |
| for (var i = start; i < end; i++) { |
| if ((source[i] & ~_asciiMask) != 0) { |
| if (i > start) _utf8Sink.addSlice(source, start, i, false); |
| // Add UTF-8 encoding of U+FFFD. |
| _utf8Sink.add(const <int>[0xEF, 0xBF, 0xBD]); |
| start = i + 1; |
| } |
| } |
| if (start < end) { |
| _utf8Sink.addSlice(source, start, end, isLast); |
| } else if (isLast) { |
| close(); |
| } |
| } |
| } |
| |
| class _SimpleAsciiDecoderSink extends ByteConversionSinkBase { |
| Sink _sink; |
| _SimpleAsciiDecoderSink(this._sink); |
| |
| void close() { |
| _sink.close(); |
| } |
| |
| void add(List<int> source) { |
| for (var i = 0; i < source.length; i++) { |
| if ((source[i] & ~_asciiMask) != 0) { |
| throw FormatException("Source contains non-ASCII bytes."); |
| } |
| } |
| _sink.add(String.fromCharCodes(source)); |
| } |
| |
| void addSlice(List<int> source, int start, int end, bool isLast) { |
| final length = source.length; |
| RangeError.checkValidRange(start, end, length); |
| if (start < end) { |
| if (start != 0 || end != length) { |
| source = source.sublist(start, end); |
| } |
| add(source); |
| } |
| if (isLast) close(); |
| } |
| } |