blob: 09ae3ca573b5808dfb9f97bc5706a52ee20230d5 [file] [log] [blame]
import 'dart:async';
import 'dart:convert';
import 'dart:math';
import 'dart:typed_data';
import 'package:charcode/ascii.dart';
import 'common.dart';
import 'entry.dart';
/// A stream transformer turning byte-streams into a stream of tar entries.
///
/// You can iterate over entries in a tar archive like this:
///
/// ```dart
/// import 'dart:io';
/// import 'package:tar/tar.dart' as tar;
///
/// Future<void> main() async {
/// final tarFile = File('file.tar.gz')
/// .openRead()
/// // use gzip.decoder if you're reading .tar.gz files
/// .transform(gzip.decoder)
/// .transform(const tar.Reader());
///
/// await for (final entry in tarFile) {
/// print(entry.name);
/// print(await entry.transform(utf8.decoder).first);
/// }
/// }
/// ```
class Reader extends StreamTransformerBase<List<int>, Entry> {
/// The maximum length for special files, such as extended PAX headers or long
/// file names in GNU-tar.
///
/// The content of those files has to be buffered in the reader until it
/// reaches the next entry. To avoid memory-based denial-of-service attacks
/// with large headers, this library only allows 1 KiB by default.
/// This limit can be increased, which is rarely needed.
final int maxSpecialFileLength;
/// Creates a reader with a custom [maxSpecialFileLength].
///
/// When using the default value, consider using the regular [reader] instead.
const Reader({this.maxSpecialFileLength = defaultSpecialLength})
: assert(maxSpecialFileLength >= blockSize);
@override
Stream<Entry> bind(Stream<List<int>> stream) {
return _BoundTarStream(stream, maxSpecialFileLength).stream;
}
}
/// A stream transformer turning byte-streams into a stream of tar entries.
///
/// You can iterate over entries in a tar archive like this:
///
/// ```dart
/// import 'dart:io';
/// import 'package:tar/tar.dart' as tar;
///
/// Future<void> main() async {
/// final tarFile = File('file.tar.gz')
/// .openRead()
/// // use gzip.decoder if you're reading .tar.gz files
/// .transform(gzip.decoder)
/// .transform(tar.reader);
///
/// await for (final entry in tarFile) {
/// print(entry.name);
/// print(await entry.transform(utf8.decoder).first);
/// }
/// }
/// ```
const reader = Reader();
class _BoundTarStream {
// sync because we'll only add events in response to events that we receive.
final _controller = StreamController<Entry>(sync: true);
// We don't propagate pauses/resumes from the global [_controller] when we're
// reading an entry, so we have to remember the state to do that later.
var _controllerState = _ControllerState.idle;
// Whether we're skipping input to get to the end of a tar block.
bool _isWaitingForBlockToFinish = false;
// Whether we've seen the end of the tar stream, indicated by two empty
// blocks.
bool _hasReachedEnd = false;
StreamController<Uint8List>? _entryController;
// The subscription to the input stream from the constructor. We only start to
// listen when we have a listener to this stream, and we pause/resume the
// subscription as necessary.
late StreamSubscription<List<int>> _subscription;
/// Extended PAX headers used for long names.
///
/// See also: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03
final PaxHeaders _paxHeaders = PaxHeaders();
FileType? _processingSpecialType;
// When we're parsing a header, this stores collected header values. When
// processing a special file type (e.g. extended headers), this buffer will
// store the content of that file. We start by reading a header.
Uint8List _buffer = Uint8List(blockSize);
// The amount of bytes to read before we switch states (e.g. from headers to
// entries to vice-versa)
int _remainingBytes = blockSize;
// The offset in the current block, used to track how much data to skip when
// we go to the next block.
int _offsetInBlock = 0;
final int maxSpecialTypeLength;
Stream<Entry> get stream => _controller.stream;
_BoundTarStream(Stream<List<int>> stream, this.maxSpecialTypeLength) {
_controller
..onPause = () {
_setStateAndPropagate(_ControllerState.paused);
}
..onResume = () {
_setStateAndPropagate(_ControllerState.active);
}
..onCancel = () {
_setStateAndPropagate(_ControllerState.canceled);
}
..onListen = () {
_controllerState = _ControllerState.active;
_subscription = stream.listen(
(chunk) {
try {
_processChunk(chunk);
} catch (e, s) {
_controller.addError(e, s);
}
},
onDone: () {
if (!_hasReachedEnd) {
_controller.addError(StateError('Unexpected end of input'));
}
_controller.close();
},
onError: _controller.addError,
);
};
}
void _setStateAndPropagate(_ControllerState state) {
_controllerState = state;
_propagateStateIfPossible();
}
void _propagateStateIfPossible() {
// Don't pause or resume if we are processing an entry. Users are supposed
// to pause/resume the entry stream instead.
if (_entryController == null) {
switch (_controllerState) {
case _ControllerState.idle:
throw AssertionError('Should not get back to idle.');
case _ControllerState.active:
if (_subscription.isPaused) _subscription.resume();
break;
case _ControllerState.paused:
if (!_subscription.isPaused) _subscription.pause();
break;
case _ControllerState.canceled:
_subscription.cancel();
break;
}
}
}
/// Switches to a state in which we're skipping padding, if necessary.
void _skipPadding() {
if (_offsetInBlock != 0) {
_remainingBytes = blockSize - _offsetInBlock;
_isWaitingForBlockToFinish = true;
}
}
void _processChunk(List<int> chunk) {
var offset = 0;
List<int> read(int amount) {
final result = chunk.sublist(offset, offset + amount);
_remainingBytes -= amount;
offset += amount;
_offsetInBlock = (_offsetInBlock + amount).toUnsigned(blockSizeLog2);
return result;
}
void readSpecialFile(int availableBytes) {
_buffer.setAll(_buffer.length - _remainingBytes, read(availableBytes));
if (_remainingBytes == 0) {
switch (_processingSpecialType) {
case FileType.extendedHeader:
_paxHeaders.newLocals(_readPaxHeader());
break;
case FileType.globalExtended:
_paxHeaders.newGlobals(_readPaxHeader());
break;
// Fake a pax header for these two, they're otherwise equivalent
case FileType.gnuLongLinkName:
_paxHeaders.linkName = _readZeroTerminated();
break;
case FileType.gnuLongName:
_paxHeaders.fileName = _readZeroTerminated();
break;
default:
throw AssertionError('Only headers are special types');
}
// Resume by parsing the next header, which is then a regular one
_skipPadding();
_processingSpecialType = null;
if (_buffer.length != blockSize) _buffer = Uint8List(blockSize);
_remainingBytes = blockSize;
}
}
void readHeader(int availableBytes) {
_buffer.setAll(blockSize - _remainingBytes, read(availableBytes));
if (_remainingBytes == 0) {
// Header is complete, start emitting an entry. Note that we don't have
// to skip padding as headers always have the length of one block.
if (_buffer.isAllZeroes) {
_hasReachedEnd = true;
return;
}
final header = Header.fromBlock(_buffer, headers: _paxHeaders);
final type = header.type;
if (!_transparentFileTypes.contains(type)) {
final entry = _entryController = StreamController(
sync: true,
onListen: () {
if (_subscription.isPaused) _subscription.resume();
},
onPause: _subscription.pause,
onResume: _subscription.resume,
);
_remainingBytes = header.size;
_controller.add(Entry(header, entry.stream));
} else {
final length = header.size;
if (length > maxSpecialTypeLength) {
_controller.addError(StateError(
'This tar file contains an extended PAX-header with a length of '
'$length bytes. Since these headers have to be buffered, this '
'tar reader permits a maximum length of $maxSpecialTypeLength. \n'
'You can increase this limit when constructing a tar.Reader().',
));
}
_remainingBytes = header.size;
_buffer = Uint8List(header.size);
_processingSpecialType = type;
}
}
}
while (offset < chunk.length) {
if (_hasReachedEnd) break;
var remainingInChunk = chunk.length - offset;
if (_isWaitingForBlockToFinish) {
final remainingInBlock = blockSize - _offsetInBlock;
if (remainingInBlock <= remainingInChunk) {
// Skip the block padding, then go on with the next block
offset += remainingInBlock;
_offsetInBlock += remainingInBlock;
remainingInChunk -= remainingInBlock;
_isWaitingForBlockToFinish = false;
} else {
// The rest of this chunk is padding data that we can ignore.
_offsetInBlock += remainingInChunk;
_subscription.resume();
break;
}
}
final availableBytes = min(_remainingBytes, remainingInChunk);
if (_processingSpecialType != null) {
readSpecialFile(availableBytes);
} else {
final currentEntry = _entryController;
if (currentEntry == null) {
// If there's no current entry, we're reading a header
readHeader(availableBytes);
} else {
// Otherwise, add to the current entry
final outputChunk = read(availableBytes);
currentEntry.add(outputChunk.asUint8List());
if (_remainingBytes == 0) {
// Entry is done. Close and start by reading the next header
currentEntry.close();
_entryController = null;
_propagateStateIfPossible();
_skipPadding();
_remainingBytes = blockSize;
_paxHeaders.clearLocals();
}
}
}
}
}
/// Decodes the content of an extended pax header entry.
///
/// For details, see https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03
Map<String, String> _readPaxHeader() {
var offset = 0;
final map = <String, String>{};
while (offset < _buffer.length) {
// At the start of an entry, expect its length
var length = 0;
var currentChar = _buffer[offset];
var charsInLength = 0;
while (currentChar >= $0 && currentChar <= $9) {
length = length * 10 + currentChar - $0;
charsInLength++;
currentChar = _buffer[++offset];
}
if (length == 0) {
throw StateError('Could not parse extended pax header: Got entry with '
'zero length.');
}
// Skip the whitespace
if (currentChar != $space) {
throw StateError('Could not parse extended pax header: Expected '
'whitespace after length indicator.');
}
currentChar = _buffer[++offset];
// Read the key
final keyBuffer = StringBuffer();
while (currentChar != $equal) {
keyBuffer.writeCharCode(currentChar);
currentChar = _buffer[++offset];
}
final key = keyBuffer.toString();
// Skip over the equals sign
offset++;
// Now, read the value from the known size. We subtract 3 for the space,
// the equals and the trailing newline
final lengthOfValue = length - 3 - keyBuffer.length - charsInLength;
final value =
utf8.decode(_buffer.sublist(offset, offset + lengthOfValue));
// Ignore unrecognized headers to avoid unbounded growth of the global
// header map.
if (supportedPaxHeaders.contains(key)) {
map[key] = value;
}
// Skip over value and trailing newline
offset += lengthOfValue + 1;
}
return map;
}
String _readZeroTerminated() {
return readZeroTerminated(_buffer, 0, _buffer.length);
}
}
// Archive entries with those types are hidden from users
const _transparentFileTypes = {
FileType.extendedHeader,
FileType.globalExtended,
FileType.gnuLongLinkName,
FileType.gnuLongName,
};
enum _ControllerState {
idle,
active,
paused,
canceled,
}
extension on Uint8List {
bool get isAllZeroes {
for (var i = 0; i < length; i++) {
if (this[i] != 0) return false;
}
return true;
}
}