lib/src/percent/decoder.dart - convert - Git at Google

 // Copyright (c) 2015, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 library convert.percent.decoder;

 import 'dart:convert';

 import 'package:typed_data/typed_data.dart';

 import '../charcodes.dart';
 import '../utils.dart';

 /// The canonical instance of [PercentDecoder].
 const percentDecoder = PercentDecoder._();

 const _lastPercent = -1;

 /// A converter that decodes percent-encoded strings into byte arrays.
 ///
 /// To be maximally flexible, this will decode any percent-encoded byte and
 /// will allow any non-percent-encoded byte other than `%`. By default, it
 /// interprets `+` as `0x2B` rather than `0x20` as emitted by
 /// [Uri.encodeQueryComponent].
 ///
 /// This will throw a [FormatException] if the input string has an incomplete
 /// percent-encoding, or if it contains non-ASCII code units.
 class PercentDecoder extends Converter<String, List<int>> {
   const PercentDecoder._();

   @override
   List<int> convert(String input) {
     var buffer = Uint8Buffer();
     var lastDigit = _decode(input.codeUnits, 0, input.length, buffer);

     if (lastDigit != null) {
       throw FormatException(
           "Input ended with incomplete encoded byte.", input, input.length);
     }

     return buffer.buffer.asUint8List(0, buffer.length);
   }

   @override
   StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
       _PercentDecoderSink(sink);
 }

 /// A conversion sink for chunked percent-encoded decoding.
 class _PercentDecoderSink extends StringConversionSinkBase {
   /// The underlying sink to which decoded byte arrays will be passed.
   final Sink<List<int>> _sink;

   /// The trailing digit from the previous string.
   ///
   /// This is `null` if the previous string ended with a complete
   /// percent-encoded byte or a literal character. It's [_lastPercent] if the
   /// most recent string ended with `%`. Otherwise, the most recent string ended
   /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
   /// it's the most significant digit, it's always a multiple of 16.
   int? _lastDigit;

   _PercentDecoderSink(this._sink);

   @override
   void addSlice(String string, int start, int end, bool isLast) {
     RangeError.checkValidRange(start, end, string.length);

     if (start == end) {
       if (isLast) _close(string, end);
       return;
     }

     var buffer = Uint8Buffer();
     var codeUnits = string.codeUnits;
     if (_lastDigit == _lastPercent) {
       _lastDigit = 16 * digitForCodeUnit(codeUnits, start);
       start++;

       if (start == end) {
         if (isLast) _close(string, end);
         return;
       }
     }

     if (_lastDigit != null) {
       buffer.add(_lastDigit! + digitForCodeUnit(codeUnits, start));
       start++;
     }

     _lastDigit = _decode(codeUnits, start, end, buffer);

     _sink.add(buffer.buffer.asUint8List(0, buffer.length));
     if (isLast) _close(string, end);
   }

   @override
   ByteConversionSink asUtf8Sink(bool allowMalformed) =>
       _PercentDecoderByteSink(_sink);

   @override
   void close() => _close();

   /// Like [close], but includes [string] and [index] in the [FormatException]
   /// if one is thrown.
   void _close([String? string, int? index]) {
     if (_lastDigit != null) {
       throw FormatException(
           "Input ended with incomplete encoded byte.", string, index);
     }

     _sink.close();
   }
 }

 /// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes.
 class _PercentDecoderByteSink extends ByteConversionSinkBase {
   /// The underlying sink to which decoded byte arrays will be passed.
   final Sink<List<int>> _sink;

   /// The trailing digit from the previous string.
   ///
   /// This is `null` if the previous string ended with a complete
   /// percent-encoded byte or a literal character. It's [_lastPercent] if the
   /// most recent string ended with `%`. Otherwise, the most recent string ended
   /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
   /// it's the most significant digit, it's always a multiple of 16.
   int? _lastDigit;

   _PercentDecoderByteSink(this._sink);

   @override
   void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

   @override
   void addSlice(List<int> chunk, int start, int end, bool isLast) {
     RangeError.checkValidRange(start, end, chunk.length);

     if (start == end) {
       if (isLast) _close(chunk, end);
       return;
     }

     var buffer = Uint8Buffer();
     if (_lastDigit == _lastPercent) {
       _lastDigit = 16 * digitForCodeUnit(chunk, start);
       start++;

       if (start == end) {
         if (isLast) _close(chunk, end);
         return;
       }
     }

     if (_lastDigit != null) {
       buffer.add(_lastDigit! + digitForCodeUnit(chunk, start));
       start++;
     }

     _lastDigit = _decode(chunk, start, end, buffer);

     _sink.add(buffer.buffer.asUint8List(0, buffer.length));
     if (isLast) _close(chunk, end);
   }

   @override
   void close() => _close();

   /// Like [close], but includes [chunk] and [index] in the [FormatException]
   /// if one is thrown.
   void _close([List<int>? chunk, int? index]) {
     if (_lastDigit != null) {
       throw FormatException(
           "Input ended with incomplete encoded byte.", chunk, index);
     }

     _sink.close();
   }
 }

 /// Decodes [codeUnits] and writes the result into [destination].
 ///
 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
 /// the result into [destination] starting at [destinationStart].
 ///
 /// If there's a leftover digit at the end of the decoding, this returns that
 /// digit. Otherwise it returns `null`.
 int? _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
   // A bitwise OR of all code units in [codeUnits]. This allows us to check for
   // out-of-range code units without adding more branches than necessary to the
   // core loop.
   var codeUnitOr = 0;

   // The beginning of the current slice of adjacent non-% characters. We can add
   // all of these to the buffer at once.
   var sliceStart = start;
   for (var i = start; i < end; i++) {
     // First, loop through non-% characters.
     var codeUnit = codeUnits[i];
     if (codeUnits[i] != $percent) {
       codeUnitOr |= codeUnit;
       continue;
     }

     // We found a %. The slice from `sliceStart` to `i` represents characters
     // than can be copied to the buffer as-is.
     if (i > sliceStart) {
       _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i);
       buffer.addAll(codeUnits, sliceStart, i);
     }

     // Now decode the percent-encoded byte and add it as well.
     i++;
     if (i >= end) return _lastPercent;

     var firstDigit = digitForCodeUnit(codeUnits, i);
     i++;
     if (i >= end) return 16 * firstDigit;

     var secondDigit = digitForCodeUnit(codeUnits, i);
     buffer.add(16 * firstDigit + secondDigit);

     // The next iteration will look for non-% characters again.
     sliceStart = i + 1;
   }

   if (end > sliceStart) {
     _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end);
     if (start == sliceStart) {
       buffer.addAll(codeUnits);
     } else {
       buffer.addAll(codeUnits, sliceStart, end);
     }
   }

   return null;
 }

 void _checkForInvalidCodeUnit(
     int codeUnitOr, List<int> codeUnits, int start, int end) {
   if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return;

   for (var i = start; i < end; i++) {
     var codeUnit = codeUnits[i];
     if (codeUnit >= 0 && codeUnit <= 0x7f) continue;
     throw FormatException(
         "Non-ASCII code unit "
         "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
         codeUnits,
         i);
   }
 }
	// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	library convert.percent.decoder;

	import 'dart:convert';

	import 'package:typed_data/typed_data.dart';

	import '../charcodes.dart';
	import '../utils.dart';

	/// The canonical instance of [PercentDecoder].
	const percentDecoder = PercentDecoder._();

	const _lastPercent = -1;

	/// A converter that decodes percent-encoded strings into byte arrays.
	///
	/// To be maximally flexible, this will decode any percent-encoded byte and
	/// will allow any non-percent-encoded byte other than `%`. By default, it
	/// interprets `+` as `0x2B` rather than `0x20` as emitted by
	/// [Uri.encodeQueryComponent].
	///
	/// This will throw a [FormatException] if the input string has an incomplete
	/// percent-encoding, or if it contains non-ASCII code units.
	class PercentDecoder extends Converter<String, List<int>> {
	const PercentDecoder._();

	@override
	List<int> convert(String input) {
	var buffer = Uint8Buffer();
	var lastDigit = _decode(input.codeUnits, 0, input.length, buffer);

	if (lastDigit != null) {
	throw FormatException(
	"Input ended with incomplete encoded byte.", input, input.length);
	}

	return buffer.buffer.asUint8List(0, buffer.length);
	}

	@override
	StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
	_PercentDecoderSink(sink);
	}

	/// A conversion sink for chunked percent-encoded decoding.
	class _PercentDecoderSink extends StringConversionSinkBase {
	/// The underlying sink to which decoded byte arrays will be passed.
	final Sink<List<int>> _sink;

	/// The trailing digit from the previous string.
	///
	/// This is `null` if the previous string ended with a complete
	/// percent-encoded byte or a literal character. It's [_lastPercent] if the
	/// most recent string ended with `%`. Otherwise, the most recent string ended
	/// with a `%` followed by a hexadecimal digit, and this is that digit. Since
	/// it's the most significant digit, it's always a multiple of 16.
	int? _lastDigit;

	_PercentDecoderSink(this._sink);

	@override
	void addSlice(String string, int start, int end, bool isLast) {
	RangeError.checkValidRange(start, end, string.length);

	if (start == end) {
	if (isLast) _close(string, end);
	return;
	}

	var buffer = Uint8Buffer();
	var codeUnits = string.codeUnits;
	if (_lastDigit == _lastPercent) {
	_lastDigit = 16 * digitForCodeUnit(codeUnits, start);
	start++;

	if (start == end) {
	if (isLast) _close(string, end);
	return;
	}
	}

	if (_lastDigit != null) {
	buffer.add(_lastDigit! + digitForCodeUnit(codeUnits, start));
	start++;
	}

	_lastDigit = _decode(codeUnits, start, end, buffer);

	_sink.add(buffer.buffer.asUint8List(0, buffer.length));
	if (isLast) _close(string, end);
	}

	@override
	ByteConversionSink asUtf8Sink(bool allowMalformed) =>
	_PercentDecoderByteSink(_sink);

	@override
	void close() => _close();

	/// Like [close], but includes [string] and [index] in the [FormatException]
	/// if one is thrown.
	void _close([String? string, int? index]) {
	if (_lastDigit != null) {
	throw FormatException(
	"Input ended with incomplete encoded byte.", string, index);
	}

	_sink.close();
	}
	}

	/// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes.
	class _PercentDecoderByteSink extends ByteConversionSinkBase {
	/// The underlying sink to which decoded byte arrays will be passed.
	final Sink<List<int>> _sink;

	/// The trailing digit from the previous string.
	///
	/// This is `null` if the previous string ended with a complete
	/// percent-encoded byte or a literal character. It's [_lastPercent] if the
	/// most recent string ended with `%`. Otherwise, the most recent string ended
	/// with a `%` followed by a hexadecimal digit, and this is that digit. Since
	/// it's the most significant digit, it's always a multiple of 16.
	int? _lastDigit;

	_PercentDecoderByteSink(this._sink);

	@override
	void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

	@override
	void addSlice(List<int> chunk, int start, int end, bool isLast) {
	RangeError.checkValidRange(start, end, chunk.length);

	if (start == end) {
	if (isLast) _close(chunk, end);
	return;
	}

	var buffer = Uint8Buffer();
	if (_lastDigit == _lastPercent) {
	_lastDigit = 16 * digitForCodeUnit(chunk, start);
	start++;

	if (start == end) {
	if (isLast) _close(chunk, end);
	return;
	}
	}

	if (_lastDigit != null) {
	buffer.add(_lastDigit! + digitForCodeUnit(chunk, start));
	start++;
	}

	_lastDigit = _decode(chunk, start, end, buffer);

	_sink.add(buffer.buffer.asUint8List(0, buffer.length));
	if (isLast) _close(chunk, end);
	}

	@override
	void close() => _close();

	/// Like [close], but includes [chunk] and [index] in the [FormatException]
	/// if one is thrown.
	void _close([List<int>? chunk, int? index]) {
	if (_lastDigit != null) {
	throw FormatException(
	"Input ended with incomplete encoded byte.", chunk, index);
	}

	_sink.close();
	}
	}

	/// Decodes [codeUnits] and writes the result into [destination].
	///
	/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
	/// the result into [destination] starting at [destinationStart].
	///
	/// If there's a leftover digit at the end of the decoding, this returns that
	/// digit. Otherwise it returns `null`.
	int? _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
	// A bitwise OR of all code units in [codeUnits]. This allows us to check for
	// out-of-range code units without adding more branches than necessary to the
	// core loop.
	var codeUnitOr = 0;

	// The beginning of the current slice of adjacent non-% characters. We can add
	// all of these to the buffer at once.
	var sliceStart = start;
	for (var i = start; i < end; i++) {
	// First, loop through non-% characters.
	var codeUnit = codeUnits[i];
	if (codeUnits[i] != $percent) {
	codeUnitOr \|= codeUnit;
	continue;
	}

	// We found a %. The slice from `sliceStart` to `i` represents characters
	// than can be copied to the buffer as-is.
	if (i > sliceStart) {
	_checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i);
	buffer.addAll(codeUnits, sliceStart, i);
	}

	// Now decode the percent-encoded byte and add it as well.
	i++;
	if (i >= end) return _lastPercent;

	var firstDigit = digitForCodeUnit(codeUnits, i);
	i++;
	if (i >= end) return 16 * firstDigit;

	var secondDigit = digitForCodeUnit(codeUnits, i);
	buffer.add(16 * firstDigit + secondDigit);

	// The next iteration will look for non-% characters again.
	sliceStart = i + 1;
	}

	if (end > sliceStart) {
	_checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end);
	if (start == sliceStart) {
	buffer.addAll(codeUnits);
	} else {
	buffer.addAll(codeUnits, sliceStart, end);
	}
	}

	return null;
	}

	void _checkForInvalidCodeUnit(
	int codeUnitOr, List<int> codeUnits, int start, int end) {
	if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return;

	for (var i = start; i < end; i++) {
	var codeUnit = codeUnits[i];
	if (codeUnit >= 0 && codeUnit <= 0x7f) continue;
	throw FormatException(
	"Non-ASCII code unit "
	"U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
	codeUnits,
	i);
	}
	}