lib/src/third_party/tar/src/reader.dart - pub.git - Git at Google

 import 'dart:async';
 import 'dart:collection';
 import 'dart:convert';
 import 'dart:typed_data';

 import 'package:async/async.dart';
 import 'package:meta/meta.dart';
 import 'package:typed_data/typed_data.dart';

 import 'charcodes.dart';
 import 'constants.dart';
 import 'entry.dart';
 import 'exception.dart';
 import 'format.dart';
 import 'header.dart';
 import 'sparse.dart';
 import 'utils.dart';

 /// [TarReader] provides sequential access to the TAR files in a TAR archive.
 /// It is designed to read from a stream and to spit out substreams for
 /// individual file contents in order to minimize the amount of memory needed
 /// to read each archive where possible.
 @sealed
 class TarReader implements StreamIterator<TarEntry> {
   final BlockReader _reader;
   final PaxHeaders _paxHeaders = PaxHeaders();
   final int _maxSpecialFileSize;

   TarEntry? _current;

   /// The underlying content stream for the [_current] entry. Draining this
   /// stream will move the tar reader to the beginning of the next file.
   ///
   /// This is not the same as `_current.stream` for sparse files, which are
   /// reported as expanded through [TarEntry.contents].
   /// For that reason, we prefer to drain this stream when skipping a tar entry.
   /// When we know we're skipping data, there's no point expanding sparse holes.
   ///
   /// This stream is always set to null after being drained, and there can only
   /// be one [_underlyingContentStream] at a time.
   Stream<List<int>>? _underlyingContentStream;

   /// Whether [_current] has ever been listened to.
   bool _listenedToContentsOnce = false;

   /// Whether we're in the process of reading tar headers.
   bool _isReadingHeaders = false;

   /// Whether this tar reader is terminally done.
   ///
   /// That is the case if:
   ///  - [cancel] was called
   ///  - [moveNext] completed to `false` once.
   ///  - [moveNext] completed to an error
   ///  - an error was emitted through a tar entry's content stream
   bool _isDone = false;

   /// Whether we should ensure that the stream emits no further data after the
   /// end of the tar file was reached.
   final bool _checkNoTrailingData;

   /// Creates a tar reader reading from the raw [tarStream].
   ///
   /// The [disallowTrailingData] parameter can be enabled to assert that the
   /// [tarStream] contains exactly one tar archive before ending.
   /// When [disallowTrailingData] is disabled (which is the default), the reader
   /// will automatically cancel its stream subscription when [moveNext] returns
   /// `false`.
   /// When it is enabled and a marker indicating the end of an archive is
   /// encountered, [moveNext] will wait for further events on the stream. If
   /// further data is received, a [TarException] will be thrown and the
   /// subscription will be cancelled. Otherwise, [moveNext] effectively waits
   /// for a done event, making a cancellation unecessary.
   /// Depending on the input stream, cancellations may cause unintended
   /// side-effects. In that case, [disallowTrailingData] can be used to ensure
   /// that the stream is only cancelled if it emits an invalid tar file.
   ///
   /// The [maxSpecialFileSize] parameter can be used to limit the maximum length
   /// of hidden entries in the tar stream. These entries include extended PAX
   /// headers or long names in GNU tar. The content of those entries has to be
   /// buffered in the parser to properly read the following tar entries. To
   /// avoid memory-based denial-of-service attacks, this library limits their
   /// maximum length. Changing the default of 2 KiB is rarely necessary.
   TarReader(Stream<List<int>> tarStream,
       {int maxSpecialFileSize = defaultSpecialLength,
       bool disallowTrailingData = false})
       : _reader = BlockReader(tarStream),
         _checkNoTrailingData = disallowTrailingData,
         _maxSpecialFileSize = maxSpecialFileSize;

   @override
   TarEntry get current {
     final current = _current;

     if (current == null) {
       throw StateError('Invalid call to TarReader.current. \n'
           'Did you call and await next() and checked that it returned true?');
     }

     return current;
   }

   /// Reads the tar stream up until the beginning of the next logical file.
   ///
   /// If such file exists, the returned future will complete with `true`. After
   /// the future completes, the next tar entry will be evailable in [current].
   ///
   /// If no such file exists, the future will complete with `false`.
   /// The future might complete with an [TarException] if the tar stream is
   /// malformed or ends unexpectedly.
   /// If the future completes with `false` or an exception, the reader will
   /// [cancel] itself and release associated resources. Thus, it is invalid to
   /// call [moveNext] again in that case.
   @override
   Future<bool> moveNext() async {
     await _prepareToReadHeaders();
     try {
       return await _moveNextInternal();
     } on Object {
       await cancel();
       rethrow;
     }
   }

   /// Consumes the stream up to the contents of the next logical tar entry.
   /// Will cancel the underlying subscription when returning false, but not when
   /// it throws.
   Future<bool> _moveNextInternal() async {
     // We're reading a new logical file, so clear the local pax headers
     _paxHeaders.clearLocals();

     var gnuLongName = '';
     var gnuLongLink = '';
     var eofAcceptable = true;

     var format = TarFormat.ustar |
         TarFormat.pax |
         TarFormat.gnu |
         TarFormat.v7 |
         TarFormat.star;

     HeaderImpl? nextHeader;

     // Externally, [moveNext] iterates through the tar archive as if it is a
     // series of files. Internally, the tar format often uses fake "files" to
     // add meta data that describes the next file. These meta data "files"
     // should not normally be visible to the outside. As such, this loop
     // iterates through one or more "header files" until it finds a
     // "normal file".
     while (true) {
       final rawHeader = await _readFullBlock(allowEmpty: eofAcceptable);

       nextHeader = await _readHeader(rawHeader);
       if (nextHeader == null) {
         if (eofAcceptable) {
           await _handleExpectedEof();
           return false;
         } else {
           _unexpectedEof();
         }
       }

       // We're beginning to read a file, if the tar file ends now something is
       // wrong
       eofAcceptable = false;
       format = format.mayOnlyBe(nextHeader.format);

       // Check for PAX/GNU special headers and files.
       if (nextHeader.typeFlag == TypeFlag.xHeader ||
           nextHeader.typeFlag == TypeFlag.xGlobalHeader) {
         format = format.mayOnlyBe(TarFormat.pax);
         final paxHeaderSize = _checkSpecialSize(nextHeader.size);

         final rawPaxHeaders =
             (await _readFullBlock(amount: numBlocks(paxHeaderSize)))
                 .sublistView(0, paxHeaderSize);

         _paxHeaders.readPaxHeaders(
             rawPaxHeaders, nextHeader.typeFlag == TypeFlag.xGlobalHeader);

         // This is a meta header affecting the next header.
         continue;
       } else if (nextHeader.typeFlag == TypeFlag.gnuLongLink ||
           nextHeader.typeFlag == TypeFlag.gnuLongName) {
         format = format.mayOnlyBe(TarFormat.gnu);
         final size = _checkSpecialSize(nextHeader.size);
         final realName = await _readFullBlock(amount: numBlocks(size));

         final readName = realName.readString(0, realName.length);
         if (nextHeader.typeFlag == TypeFlag.gnuLongName) {
           gnuLongName = readName;
         } else {
           gnuLongLink = readName;
         }

         // This is a meta header affecting the next header.
         continue;
       } else {
         // The old GNU sparse format is handled here since it is technically
         // just a regular file with additional attributes.

         if (gnuLongName.isNotEmpty) nextHeader.name = gnuLongName;
         if (gnuLongLink.isNotEmpty) nextHeader.linkName = gnuLongLink;

         if (nextHeader.internalTypeFlag == TypeFlag.regA) {
           /// Legacy archives use trailing slash for directories
           if (nextHeader.name.endsWith('/')) {
             nextHeader.internalTypeFlag = TypeFlag.dir;
           } else {
             nextHeader.internalTypeFlag = TypeFlag.reg;
           }
         }

         final content = await _handleFile(nextHeader, rawHeader);

         // Set the final guess at the format
         if (format.has(TarFormat.ustar) && format.has(TarFormat.pax)) {
           format = format.mayOnlyBe(TarFormat.ustar);
         }
         nextHeader.format = format;

         _current = TarEntry(nextHeader, content);
         _listenedToContentsOnce = false;
         _isReadingHeaders = false;
         return true;
       }
     }
   }

   @override
   Future<void> cancel() async {
     if (_isDone) return;

     _isDone = true;
     _current = null;
     _underlyingContentStream = null;
     _listenedToContentsOnce = false;
     _isReadingHeaders = false;

     // Note: Calling cancel is safe when the stream has already been completed.
     // It's a noop in that case, which is what we want.
     return _reader.close();
   }

   /// Utility function for quickly iterating through all entries in [tarStream].
   static Future<void> forEach(Stream<List<int>> tarStream,
       FutureOr<void> Function(TarEntry entry) action) async {
     final reader = TarReader(tarStream);
     try {
       while (await reader.moveNext()) {
         await action(reader.current);
       }
     } finally {
       await reader.cancel();
     }
   }

   /// Ensures that this reader can safely read headers now.
   ///
   /// This methods prevents:
   ///  * concurrent calls to [moveNext]
   ///  * a call to [moveNext] while a stream is active:
   ///    * if [contents] has never been listened to, we drain the stream
   ///    * otherwise, throws a [StateError]
   Future<void> _prepareToReadHeaders() async {
     if (_isDone) {
       throw StateError('Tried to call TarReader.moveNext() on a canceled '
           'reader. \n'
           'Note that a reader is canceled when moveNext() throws or returns '
           'false.');
     }

     if (_isReadingHeaders) {
       throw StateError('Concurrent call to TarReader.moveNext() detected. \n'
           'Please await all calls to Reader.moveNext().');
     }
     _isReadingHeaders = true;

     final underlyingStream = _underlyingContentStream;
     if (underlyingStream != null) {
       if (_listenedToContentsOnce) {
         throw StateError(
             'Illegal call to TarReader.moveNext() while a previous stream was '
             'active.\n'
             'When listening to tar contents, make sure the stream is '
             'complete or cancelled before calling TarReader.moveNext() again.');
       } else {
         await underlyingStream.drain<void>();
         // The stream should reset when drained (we do this in _publishStream)
         assert(_underlyingContentStream == null);
       }
     }
   }

   int _checkSpecialSize(int size) {
     if (size > _maxSpecialFileSize) {
       throw TarException(
           'TAR file contains hidden entry with an invalid size of $size.');
     }

     return size;
   }

   /// Ater we detected the end of a tar file, optionally check for trailing
   /// data.
   Future<void> _handleExpectedEof() async {
     if (_checkNoTrailingData) {
       // Trailing zeroes are okay, but don't allow any more data here.
       Uint8List block;

       do {
         block = await _reader.nextBlock();
         if (!block.isAllZeroes) {
           throw TarException(
               'Illegal content after the end of the tar archive.');
         }
       } while (block.length == blockSize);
       // The stream is done when we couldn't read the full block.
     }

     await cancel();
   }

   Never _unexpectedEof() {
     throw TarException.header('Unexpected end of file');
   }

   /// Reads [amount] blocks from the input stream, or throws an exception if
   /// the stream ends prematurely.
   Future<Uint8List> _readFullBlock({bool allowEmpty = false, int amount = 1}) {
     final blocks = Uint8List(amount * blockSize);
     var offset = 0;

     return _reader.nextBlocks(amount).forEach((chunk) {
       blocks.setAll(offset, chunk);
       offset += chunk.length;
     }).then((void _) {
       if (allowEmpty && offset == 0) {
         return Uint8List(0);
       } else if (offset < blocks.length) {
         _unexpectedEof();
       } else {
         return blocks;
       }
     });
   }

   /// Reads the next block header and assumes that the underlying reader
   /// is already aligned to a block boundary. It returns the raw block of the
   /// header in case further processing is required.
   ///
   /// EOF is hit when one of the following occurs:
   ///	* Exactly 0 bytes are read and EOF is hit.
   ///	* Exactly 1 block of zeros is read and EOF is hit.
   ///	* At least 2 blocks of zeros are read.
   Future<HeaderImpl?> _readHeader(Uint8List rawHeader) async {
     // Exactly 0 bytes are read and EOF is hit.
     if (rawHeader.isEmpty) return null;

     if (rawHeader.isAllZeroes) {
       rawHeader = await _reader.nextBlock();

       // Exactly 1 block of zeroes is read and EOF is hit.
       if (rawHeader.isEmpty) return null;

       if (rawHeader.isAllZeroes) {
         // Two blocks of zeros are read - Normal EOF.
         return null;
       }

       throw TarException('Encountered a non-zero block after a zero block');
     }

     return HeaderImpl.parseBlock(rawHeader, paxHeaders: _paxHeaders);
   }

   /// Creates a stream of the next entry's content
   Future<Stream<List<int>>> _handleFile(
       HeaderImpl header, Uint8List rawHeader) async {
     List<SparseEntry>? sparseData;
     if (header.typeFlag == TypeFlag.gnuSparse) {
       sparseData = await _readOldGNUSparseMap(header, rawHeader);
     } else {
       sparseData = await _readGNUSparsePAXHeaders(header);
     }

     if (sparseData != null) {
       if (header.hasContent &&
           !validateSparseEntries(sparseData, header.size)) {
         throw TarException.header('Invalid sparse file header.');
       }

       final sparseHoles = invertSparseEntries(sparseData, header.size);
       final sparseDataLength =
           sparseData.fold<int>(0, (value, element) => value + element.length);

       final streamBlockCount = numBlocks(sparseDataLength);
       final safeStream = _publishStream(
           _reader.nextBlocks(streamBlockCount), streamBlockCount * blockSize);
       return sparseStream(safeStream, sparseHoles, header.size);
     } else {
       var size = header.size;
       if (!header.hasContent) size = 0;

       if (size < 0) {
         throw TarException.header('Invalid size ($size) detected!');
       }

       if (size == 0) {
         return _publishStream(const Stream<Never>.empty(), 0);
       } else {
         final blockCount = numBlocks(header.size);
         return _publishStream(_reader.nextBlocks(blockCount), header.size);
       }
     }
   }

   /// Publishes an library-internal stream for users.
   ///
   /// This adds a check to ensure that the stream we're exposing has the
   /// expected length. It also sets the [_underlyingContentStream] field when
   /// the stream starts and resets it when it's done.
   Stream<List<int>> _publishStream(Stream<List<int>> stream, int length) {
     // There can only be one content stream at a time. This precondition is
     // checked by _prepareToReadHeaders.
     assert(_underlyingContentStream == null);
     Stream<List<int>>? thisStream;

     return thisStream =
         _underlyingContentStream = Stream.eventTransformed(stream, (sink) {
       // This callback is called when we have a listener. Make sure that, at
       // this point, this stream is still the active content stream.
       // If users store the contents of a tar header, then read more tar
       // entries, and finally try to read the stream of the old contents, they'd
       // get an exception about the straem already being listened to.
       // This can be a bit confusing, so this check enables a better error UX.
       if (thisStream != _underlyingContentStream) {
         throw StateError(
           'Tried listening to an outdated tar entry. \n'
           'As all tar entries found by a reader are backed by a single source '
           'stream, only the latest tar entry can be read. It looks like you '
           'stored the results of `tarEntry.contents` somewhere, called '
           '`reader.moveNext()` and then read the contents of the previous '
           'entry.\n'
           'For more details, including a discussion of workarounds, see '
           'https://github.com/simolus3/tar/issues/18',
         );
       } else if (_listenedToContentsOnce) {
         throw StateError(
           'A tar entry has been listened to multiple times. \n'
           'As all tar entries are read from what\'s likely a single-'
           'subscription stream, this is unsupported. If you didn\'t read a tar '
           'entry multiple times yourself, perhaps you\'ve called `moveNext()` '
           'before reading contents?',
         );
       }

       _listenedToContentsOnce = true;

       late _OutgoingStreamGuard guard;
       return guard = _OutgoingStreamGuard(
         length,
         sink,
         // Reset state when the stream is done. This will only be called when
         // the stream is done, not when a listener cancels.
         () {
           _underlyingContentStream = null;
           if (guard.hadError) {
             cancel();
           }
         },
       );
     });
   }

   /// Checks the PAX headers for GNU sparse headers.
   /// If they are found, then this function reads the sparse map and returns it.
   /// This assumes that 0.0 headers have already been converted to 0.1 headers
   /// by the PAX header parsing logic.
   Future<List<SparseEntry>?> _readGNUSparsePAXHeaders(HeaderImpl header) async {
     /// Identify the version of GNU headers.
     var isVersion1 = false;
     final major = _paxHeaders[paxGNUSparseMajor];
     final minor = _paxHeaders[paxGNUSparseMinor];

     final sparseMapHeader = _paxHeaders[paxGNUSparseMap];
     if (major == '0' && (minor == '0' || minor == '1') ||
         // assume 0.0 or 0.1 if no version header is set
         sparseMapHeader != null && sparseMapHeader.isNotEmpty) {
       isVersion1 = false;
     } else if (major == '1' && minor == '0') {
       isVersion1 = true;
     } else {
       // Unknown version that we don't support
       return null;
     }

     header.format |= TarFormat.pax;

     /// Update [header] from GNU sparse PAX headers.
     final possibleName = _paxHeaders[paxGNUSparseName] ?? '';
     if (possibleName.isNotEmpty) {
       header.name = possibleName;
     }

     final possibleSize =
         _paxHeaders[paxGNUSparseSize] ?? _paxHeaders[paxGNUSparseRealSize];

     if (possibleSize != null && possibleSize.isNotEmpty) {
       final size = int.tryParse(possibleSize, radix: 10);
       if (size == null) {
         throw TarException.header('Invalid PAX size ($possibleSize) detected');
       }

       header.size = size;
     }

     // Read the sparse map according to the appropriate format.
     if (isVersion1) {
       return await _readGNUSparseMap1x0();
     }

     return _readGNUSparseMap0x1(header);
   }

   /// Reads the sparse map as stored in GNU's PAX sparse format version 1.0.
   /// The format of the sparse map consists of a series of newline-terminated
   /// numeric fields. The first field is the number of entries and is always
   /// present. Following this are the entries, consisting of two fields
   /// (offset, length). This function must stop reading at the end boundary of
   /// the block containing the last newline.
   ///
   /// Note that the GNU manual says that numeric values should be encoded in
   /// octal format. However, the GNU tar utility itself outputs these values in
   /// decimal. As such, this library treats values as being encoded in decimal.
   Future<List<SparseEntry>> _readGNUSparseMap1x0() async {
     var newLineCount = 0;
     final block = Uint8Queue();

     /// Ensures that [block] h as at least [n] tokens.
     Future<void> feedTokens(int n) async {
       while (newLineCount < n) {
         final newBlock = await _readFullBlock();
         if (newBlock.length < blockSize) {
           throw TarException.header(
               'GNU Sparse Map does not have enough lines!');
         }

         block.addAll(newBlock);
         newLineCount += newBlock.where((byte) => byte == $lf).length;
       }
     }

     /// Get the next token delimited by a newline. This assumes that
     /// at least one newline exists in the buffer.
     String nextToken() {
       newLineCount--;
       final nextNewLineIndex = block.indexOf($lf);
       final result = block.sublist(0, nextNewLineIndex);
       block.removeRange(0, nextNewLineIndex + 1);
       return result.readString(0, nextNewLineIndex);
     }

     await feedTokens(1);

     // Parse for the number of entries.
     // Use integer overflow resistant math to check this.
     final numEntriesString = nextToken();
     final numEntries = int.tryParse(numEntriesString);
     if (numEntries == null || numEntries < 0 || 2 * numEntries < numEntries) {
       throw TarException.header(
           'Invalid sparse map number of entries: $numEntriesString!');
     }

     // Parse for all member entries.
     // [numEntries] is trusted after this since a potential attacker must have
     // committed resources proportional to what this library used.
     await feedTokens(2 * numEntries);

     final sparseData = <SparseEntry>[];

     for (var i = 0; i < numEntries; i++) {
       final offsetToken = nextToken();
       final lengthToken = nextToken();

       final offset = int.tryParse(offsetToken);
       final length = int.tryParse(lengthToken);

       if (offset == null || length == null) {
         throw TarException.header(
             'Failed to read a GNU sparse map entry. Encountered '
             'offset: $offsetToken, length: $lengthToken');
       }

       sparseData.add(SparseEntry(offset, length));
     }
     return sparseData;
   }

   /// Reads the sparse map as stored in GNU's PAX sparse format version 0.1.
   /// The sparse map is stored in the PAX headers and is stored like this:
   /// `offset₀,size₀,offset₁,size₁...`
   List<SparseEntry> _readGNUSparseMap0x1(TarHeader header) {
     // Get number of entries, check for integer overflows
     final numEntriesString = _paxHeaders[paxGNUSparseNumBlocks];
     final numEntries =
         numEntriesString != null ? int.tryParse(numEntriesString) : null;

     if (numEntries == null || numEntries < 0 || 2 * numEntries < numEntries) {
       throw TarException.header('Invalid GNU version 0.1 map');
     }

     // There should be two numbers in [sparseMap] for each entry.
     final sparseMap = _paxHeaders[paxGNUSparseMap]?.split(',');
     if (sparseMap == null) {
       throw TarException.header('Invalid GNU version 0.1 map');
     }

     if (sparseMap.length != 2 * numEntries) {
       throw TarException.header(
           'Detected sparse map length ${sparseMap.length} '
           'that is not twice the number of entries $numEntries');
     }

     /// Loop through sparse map entries.
     /// [numEntries] is now trusted.
     final sparseData = <SparseEntry>[];
     for (var i = 0; i < sparseMap.length; i += 2) {
       final offset = int.tryParse(sparseMap[i]);
       final length = int.tryParse(sparseMap[i + 1]);

       if (offset == null || length == null) {
         throw TarException.header(
             'Failed to read a GNU sparse map entry. Encountered '
             'offset: $offset, length: $length');
       }

       sparseData.add(SparseEntry(offset, length));
     }

     return sparseData;
   }

   /// Reads the sparse map from the old GNU sparse format.
   /// The sparse map is stored in the tar header if it's small enough.
   /// If it's larger than four entries, then one or more extension headers are
   /// used to store the rest of the sparse map.
   ///
   /// [TarHeader.size] does not reflect the size of any extended headers used.
   /// Thus, this function will read from the chunked stream iterator to fetch
   /// extra headers.
   ///
   /// See also: https://www.gnu.org/software/tar/manual/html_section/tar_94.html#SEC191
   Future<List<SparseEntry>> _readOldGNUSparseMap(
       HeaderImpl header, Uint8List rawHeader) async {
     // Make sure that the input format is GNU.
     // Unfortunately, the STAR format also has a sparse header format that uses
     // the same type flag but has a completely different layout.
     if (header.format != TarFormat.gnu) {
       throw TarException.header('Tried to read sparse map of non-GNU header');
     }

     // Read the real size of the file when sparse holes are expanded.
     header.size = rawHeader.readNumeric(483, 12);
     final sparseEntries = <SparseEntry>[];

     bool readEntry(Uint8List source, int offset) {
       // If a sparse header starts with a null byte, it marks the end of the
       // sparse structures.
       if (rawHeader[offset] == 0) return false;

       final fileOffset = source.readNumeric(offset, 12);
       final length = source.readNumeric(offset + 12, 12);

       sparseEntries.add(SparseEntry(fileOffset, length));
       return true;
     }

     // The first four sparse headers are stored in the tar header itself
     for (var i = 0; i < 4; i++) {
       final offset = 386 + 24 * i;
       if (!readEntry(rawHeader, offset)) break;
     }

     var isExtended = rawHeader[482] != 0;

     while (isExtended) {
       // Ok, we have a new block of sparse headers to process
       final block = await _readFullBlock();

       // A full block of sparse data contains up to 21 entries
       for (var i = 0; i < 21; i++) {
         if (!readEntry(block, i * 24)) break;
       }

       // The last bytes indicates whether another sparse header block follows.
       isExtended = block[504] != 0;
     }

     return sparseEntries;
   }
 }

 @internal
 class PaxHeaders extends UnmodifiableMapBase<String, String> {
   final Map<String, String> _globalHeaders = {};
   Map<String, String> _localHeaders = {};

   /// Applies new global PAX-headers from the map.
   ///
   /// The [headers] will replace global headers with the same key, but leave
   /// others intact.
   void newGlobals(Map<String, String> headers) {
     _globalHeaders.addAll(headers);
   }

   /// Applies new local PAX-headers from the map.
   ///
   /// This replaces all currently active local headers.
   void newLocals(Map<String, String> headers) {
     _localHeaders = headers;
   }

   /// Clears local headers.
   ///
   /// This is used by the reader after a file has ended, as local headers only
   /// apply to the next entry.
   void clearLocals() {
     _localHeaders = {};
   }

   @override
   String? operator [](Object? key) {
     return _localHeaders[key] ?? _globalHeaders[key];
   }

   @override
   Iterable<String> get keys => {..._globalHeaders.keys, ..._localHeaders.keys};

   /// Decodes the content of an extended pax header entry.
   ///
   /// Semantically, a [PAX Header][posix pax] is a map with string keys and
   /// values, where both keys and values are encodes with utf8.
   ///
   /// However, [old GNU Versions][gnu sparse00] used to repeat keys to store
   /// sparse file information in sparse headers. This method will transparently
   /// rewrite the PAX format of version 0.0 to version 0.1.
   ///
   /// [posix pax]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03
   /// [gnu sparse00]: https://www.gnu.org/software/tar/manual/html_section/tar_94.html#SEC192
   void readPaxHeaders(List<int> data, bool isGlobal,
       {bool ignoreUnknown = true}) {
     var offset = 0;
     final map = <String, String>{};
     final sparseMap = <String>[];

     Never error() => throw TarException.header('Invalid PAX record');

     while (offset < data.length) {
       // At the start of an entry, expect its length which is terminated by a
       // space char.
       final space = data.indexOf($space, offset);
       if (space == -1) break;

       var length = 0;
       var currentChar = data[offset];
       var charsInLength = 0;
       while (currentChar >= $0 && currentChar <= $9) {
         length = length * 10 + currentChar - $0;
         charsInLength++;
         currentChar = data[++offset];
       }

       if (length == 0) {
         error();
       }

       // Skip the whitespace
       if (currentChar != $space) {
         error();
       }
       offset++;

       // Length also includes the length description and a space we just read
       final endOfEntry = offset + length - 1 - charsInLength;
       // checking against endOfEntry - 1 because the trailing whitespace is
       // optional for the last entry
       if (endOfEntry < offset || endOfEntry - 1 > data.length) {
         error();
       }

       // Read the key
       final nextEquals = data.indexOf($equal, offset);
       if (nextEquals == -1 || nextEquals >= endOfEntry) {
         error();
       }

       final key = utf8.decoder.convert(data, offset, nextEquals);
       // Skip over the equals sign
       offset = nextEquals + 1;

       // Subtract one for trailing newline for value
       final endOfValue = endOfEntry - 1;

       if (!_isValidPaxKey(key)) {
         error();
       }

       // If we're seeing weird PAX Version 0.0 sparse keys, expect alternating
       // GNU.sparse.offset and GNU.sparse.numbytes headers.
       if (key == paxGNUSparseNumBytes || key == paxGNUSparseOffset) {
         final value = utf8.decoder.convert(data, offset, endOfValue);

         if (!_isValidPaxRecord(key, value) ||
             (sparseMap.length.isEven && key != paxGNUSparseOffset) ||
             (sparseMap.length.isOdd && key != paxGNUSparseNumBytes) ||
             value.contains(',')) {
           error();
         }

         sparseMap.add(value);
       } else if (!ignoreUnknown || supportedPaxHeaders.contains(key)) {
         // Ignore unrecognized headers to avoid unbounded growth of the global
         // header map.
         final value = unsafeUtf8Decoder.convert(data, offset, endOfValue);

         if (!_isValidPaxRecord(key, value)) {
           error();
         }

         map[key] = value;
       }

       // Skip over value
       offset = endOfValue;
       // and the trailing newline
       final hasNewline = offset < data.length;
       if (hasNewline && data[offset] != $lf) {
         throw TarException('Invalid PAX Record (missing trailing newline)');
       }
       offset++;
     }

     if (sparseMap.isNotEmpty) {
       map[paxGNUSparseMap] = sparseMap.join(',');
     }

     if (isGlobal) {
       newGlobals(map);
     } else {
       newLocals(map);
     }
   }

   // NB: Some Tar files have malformed UTF-8 data in the headers, we should
   // decode them anyways even if they're broken
   static const unsafeUtf8Decoder = Utf8Decoder(allowMalformed: true);

   static bool _isValidPaxKey(String key) {
     // These limitations are documented in the PAX standard.
     return key.isNotEmpty && !key.contains('=') & !key.codeUnits.contains(0);
   }

   /// Checks whether [key], [value] is a valid entry in a pax header.
   ///
   /// This is adopted from the Golang tar reader (`validPAXRecord`), which says
   /// that "Keys and values should be UTF-8, but the number of bad writers out
   /// there forces us to be a more liberal."
   static bool _isValidPaxRecord(String key, String value) {
     // These aren't documented in any standard, but Golangs's tar has them and
     // got away with it.
     switch (key) {
       case paxPath:
       case paxLinkpath:
       case paxUname:
       case paxGname:
         return !value.codeUnits.contains(0);
       default:
         return true;
     }
   }
 }

 /// Event-sink tracking the length of emitted tar entry streams.
 ///
 /// [ChunkedStreamReader.readStream] might return a stream shorter than
 /// expected. That indicates an invalid tar file though, since the correct size
 /// is stored in the header.
 class _OutgoingStreamGuard extends EventSink<Uint8List> {
   int remainingContentSize;
   int remainingPaddingSize;

   final EventSink<List<int>> out;
   void Function() onDone;

   bool hadError = false;
   bool isInContent = true;

   _OutgoingStreamGuard(this.remainingContentSize, this.out, this.onDone)
       : remainingPaddingSize = _paddingFor(remainingContentSize);

   static int _paddingFor(int contentSize) {
     final offsetInLastBlock = contentSize.toUnsigned(blockSizeLog2);
     if (offsetInLastBlock != 0) {
       return blockSize - offsetInLastBlock;
     }
     return 0;
   }

   @override
   void add(Uint8List event) {
     if (isInContent) {
       if (event.length <= remainingContentSize) {
         // We can fully add this chunk as it consists entirely of data
         out.add(event);
         remainingContentSize -= event.length;
       } else {
         // We can add the first bytes as content, the others are padding that we
         // shouldn't emit
         out.add(event.sublistView(0, remainingContentSize));
         isInContent = false;
         remainingPaddingSize -= event.length - remainingContentSize;
         remainingContentSize = 0;
       }
     } else {
       // Ok, the entire event is padding
       remainingPaddingSize -= event.length;
     }

     // The underlying stream comes from pkg:tar, so if we get too many bytes
     // that's a bug in this package.
     assert(remainingPaddingSize >= 0, 'Stream emitted to many bytes');
   }

   @override
   void addError(Object error, [StackTrace? stackTrace]) {
     hadError = true;
     out.addError(error, stackTrace);
   }

   @override
   void close() {
     // If the stream stopped after an error, the user is already aware that
     // something is wrong.
     if (remainingContentSize > 0 && !hadError) {
       out.addError(
           TarException('Unexpected end of tar file'), StackTrace.current);
     }

     onDone();
     out.close();
   }
 }