TarEntryEncoder: synchronous encoding of tar entries. (#17)
Add an `SynchronousTarEntry` class to store in-memory tar entries. A new synchronous converter can convert these entries to bytes.
Co-authored-by: Simon Binder <oss@simonbinder.eu>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c54d3b..e022efd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.5.0
+
+- Support sync encoding with `tarConverter`.
+
## 0.4.0
- Support generating tar files with GNU-style long link names
diff --git a/README.md b/README.md
index 99fa1be..cfb80cd 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,9 @@
A more complex example for writing files can be found in [`example/archive_self.dart`](example/archive_self.dart).
-Note that, by default, tar files are written in the pax format defined by the
+### Encoding options
+
+By default, tar files are written in the pax format defined by the
POSIX.1-2001 specification (`--format=posix` in GNU tar).
When all entries have file names shorter than 100 chars and a size smaller
than 8 GB, this is equivalent to the `ustar` format. This library won't write
@@ -125,6 +127,27 @@
To change the output format on the `tarWriter` transformer, use
`tarWriterWith`.
+### Synchronous writing
+
+As the content of tar entries is defined as an asynchronous stream, the tar encoder is asynchronous too.
+The more specific `SynchronousTarEntry` class stores tar content as a list of bytes, meaning that it can be
+written synchronously too.
+
+To synchronously write tar files, use `tarConverter` (or `tarConverterWith` for options):
+
+```dart
+List<int> createTarArchive(Iterable<SynchronousTarEntry> entries) {
+ late List<int> result;
+ final sink = ByteConversionSink.withCallback((data) => result = data);
+
+ final output = tarConverter.startChunkedConversion(sink);
+ entries.forEach(output.add);
+ output.close();
+
+ return result;
+}
+```
+
## Features
- Supports v7, ustar, pax, gnu and star archives
diff --git a/lib/src/entry.dart b/lib/src/entry.dart
index f6b0a5a..160974b 100644
--- a/lib/src/entry.dart
+++ b/lib/src/entry.dart
@@ -52,8 +52,17 @@
TarEntry._(this.header, this.contents);
/// Creates an in-memory tar entry from the [header] and the [data] to store.
- factory TarEntry.data(TarHeader header, List<int> data) {
+ static SynchronousTarEntry data(TarHeader header, List<int> data) {
(header as HeaderImpl).size = data.length;
- return TarEntry(header, Stream.value(data));
+ return SynchronousTarEntry._(header, data);
}
}
+
+/// A tar entry stored in memory.
+class SynchronousTarEntry extends TarEntry {
+ /// The contents of this tar entry as a byte array.
+ final List<int> data;
+
+ SynchronousTarEntry._(TarHeader header, this.data)
+ : super._(header, Stream.value(data));
+}
diff --git a/lib/src/writer.dart b/lib/src/writer.dart
index 71ab368..1094a6b 100644
--- a/lib/src/writer.dart
+++ b/lib/src/writer.dart
@@ -99,6 +99,33 @@
return _WritingSink(output, format);
}
+/// A synchronous encoder for in-memory tar files.
+///
+/// The default [tarWriter] creates an asynchronous conversion from a stream of
+/// tar entries to a byte stream.
+/// When all tar entries are in-memory ([SynchronousTarEntry]), it is possible
+/// to write them synchronously too.
+///
+/// To create a tar archive consisting of a single entry, use
+/// [Converter.convert] on this [tarConverter].
+/// To create a tar archive consisting of any number of entries, first call
+/// [Converter.startChunkedConversion] with a suitable output sink. Next, call
+/// [Sink.add] for each tar entry and finish the archive by calling
+/// [Sink.close].
+///
+/// To change the output format of the tar converter, use [tarConverterWith].
+/// To encode any kind of tar entries, use the asynchronous [tarWriter].
+const Converter<SynchronousTarEntry, List<int>> tarConverter =
+ _SynchronousTarConverter(OutputFormat.pax);
+
+/// A synchronous encoder for in-memory tar files, with custom encoding options.
+///
+/// For more information on how to use the converter, see [tarConverter].
+Converter<SynchronousTarEntry, List<int>> tarConverterWith(
+ {OutputFormat format = OutputFormat.pax}) {
+ return _SynchronousTarConverter(format);
+}
+
/// This option controls how long file and link names should be written.
///
/// This option can be passed to writer in [tarWritingSink] or[tarWriterWith].
@@ -127,16 +154,15 @@
class _WritingSink extends StreamSink<TarEntry> {
final StreamSink<List<int>> _output;
- final OutputFormat format;
-
- int _paxHeaderCount = 0;
+ final _SynchronousTarSink _synchronousWriter;
bool _closed = false;
final Completer<Object?> _done = Completer();
int _pendingOperations = 0;
Future<void> _ready = Future.value();
- _WritingSink(this._output, this.format);
+ _WritingSink(this._output, OutputFormat format)
+ : _synchronousWriter = _SynchronousTarSink(_output, format);
@override
Future<void> get done => _done.future;
@@ -175,6 +201,114 @@
size = bufferedData.length;
}
+ _synchronousWriter._writeHeader(header, size);
+
+ // Write content.
+ if (bufferedData != null) {
+ _output.add(bufferedData);
+ } else {
+ await event.contents.forEach(_output.add);
+ }
+
+ _output.add(_paddingBytes(size));
+ }
+
+ @override
+ void addError(Object error, [StackTrace? stackTrace]) {
+ _output.addError(error, stackTrace);
+ }
+
+ @override
+ Future<void> addStream(Stream<TarEntry> stream) async {
+ await for (final entry in stream) {
+ await add(entry);
+ }
+ }
+
+ @override
+ Future<void> close() async {
+ if (!_closed) {
+ _closed = true;
+
+ // Add two empty blocks at the end.
+ await _doWork(_synchronousWriter.close);
+ }
+
+ return done;
+ }
+}
+
+Uint8List _paddingBytes(int size) {
+ final padding = -size % blockSize;
+ return Uint8List(padding);
+}
+
+class _SynchronousTarConverter
+ extends Converter<SynchronousTarEntry, List<int>> {
+ final OutputFormat format;
+
+ const _SynchronousTarConverter(this.format);
+
+ @override
+ Sink<SynchronousTarEntry> startChunkedConversion(Sink<List<int>> sink) {
+ return _SynchronousTarSink(sink, format);
+ }
+
+ @override
+ List<int> convert(SynchronousTarEntry input) {
+ final output = BytesBuilder(copy: false);
+ startChunkedConversion(ByteConversionSink.withCallback(output.add))
+ ..add(input)
+ ..close();
+
+ return output.takeBytes();
+ }
+}
+
+class _SynchronousTarSink extends Sink<SynchronousTarEntry> {
+ final OutputFormat _format;
+ final Sink<List<int>> _output;
+
+ bool _closed = false;
+ int _paxHeaderCount = 0;
+
+ _SynchronousTarSink(this._output, this._format);
+
+ @override
+ void add(SynchronousTarEntry data) {
+ addHeaderAndData(data.header, data.data);
+ }
+
+ void addHeaderAndData(TarHeader header, List<int> data) {
+ _throwIfClosed();
+
+ _writeHeader(header, data.length);
+ _output..add(data)..add(_paddingBytes(data.length));
+ }
+
+ @override
+ void close() {
+ if (_closed) return;
+
+ // End the tar archive by writing two zero blocks.
+ _output
+ ..add(UnmodifiableUint8ListView(zeroBlock))
+ ..add(UnmodifiableUint8ListView(zeroBlock));
+ _output.close();
+
+ _closed = true;
+ }
+
+ void _throwIfClosed() {
+ if (_closed) {
+ throw StateError('Encoder is closed. '
+ 'After calling `endOfArchive()`, encoder must not be used.');
+ }
+ }
+
+ void _writeHeader(TarHeader header, int size) {
+ assert(header.size < 0 || header.size == size);
+
var nameBytes = utf8.encode(header.name);
var linkBytes = utf8.encode(header.linkName ?? '');
var gnameBytes = utf8.encode(header.groupName ?? '');
@@ -209,10 +343,10 @@
}
if (paxHeader.isNotEmpty) {
- if (format == OutputFormat.pax) {
- await _writePaxHeader(paxHeader);
+ if (_format == OutputFormat.pax) {
+ _writePaxHeader(paxHeader);
} else {
- await _writeGnuLongName(paxHeader);
+ _writeGnuLongName(paxHeader);
}
}
@@ -238,24 +372,13 @@
checksum += byte;
}
headerBlock.setUint(checksum, 148, 8);
-
_output.add(headerBlock);
-
- // Write content.
- if (bufferedData != null) {
- _output.add(bufferedData);
- } else {
- await event.contents.forEach(_output.add);
- }
-
- final padding = -size % blockSize;
- _output.add(Uint8List(padding));
}
- /// Writes an extended pax header.
+ /// Encodes an extended pax header.
///
/// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03
- Future<void> _writePaxHeader(Map<String, List<int>> values) {
+ void _writePaxHeader(Map<String, List<int>> values) {
final buffer = BytesBuilder();
// format of each entry: "%d %s=%s\n", <length>, <keyword>, <value>
// note that the length includes the trailing \n and the length description
@@ -287,7 +410,7 @@
});
final paxData = buffer.takeBytes();
- final file = TarEntry.data(
+ addHeaderAndData(
HeaderImpl.internal(
format: TarFormat.pax,
modified: millisecondsSinceEpoch(0),
@@ -298,10 +421,9 @@
),
paxData,
);
- return _safeAdd(file);
}
- Future<void> _writeGnuLongName(Map<String, List<int>> values) async {
+ void _writeGnuLongName(Map<String, List<int>> values) {
// Ensure that a file that can't be written in the GNU format is not written
const allowedKeys = {paxPath, paxLinkpath};
final invalidOptions = values.keys.toSet()..removeAll(allowedKeys);
@@ -316,54 +438,25 @@
final name = values[paxPath];
final linkName = values[paxLinkpath];
- Future<void> write(List<int> name, TypeFlag flag) {
- return _safeAdd(
- TarEntry.data(
- HeaderImpl.internal(
- name: '././@LongLink',
- modified: millisecondsSinceEpoch(0),
- format: TarFormat.gnu,
- typeFlag: flag,
- ),
- name,
+ void create(List<int> name, TypeFlag flag) {
+ return addHeaderAndData(
+ HeaderImpl.internal(
+ name: '././@LongLink',
+ modified: millisecondsSinceEpoch(0),
+ format: TarFormat.gnu,
+ typeFlag: flag,
),
+ name,
);
}
if (name != null) {
- await write(name, TypeFlag.gnuLongName);
+ create(name, TypeFlag.gnuLongName);
}
if (linkName != null) {
- await write(linkName, TypeFlag.gnuLongLink);
+ create(linkName, TypeFlag.gnuLongLink);
}
}
-
- @override
- void addError(Object error, [StackTrace? stackTrace]) {
- _output.addError(error, stackTrace);
- }
-
- @override
- Future<void> addStream(Stream<TarEntry> stream) async {
- await for (final entry in stream) {
- await add(entry);
- }
- }
-
- @override
- Future<void> close() async {
- if (!_closed) {
- _closed = true;
-
- // Add two empty blocks at the end.
- await _doWork(() {
- _output.add(zeroBlock);
- _output.add(zeroBlock);
- });
- }
-
- return done;
- }
}
extension on Uint8List {
diff --git a/lib/tar.dart b/lib/tar.dart
index 218a6a2..14247bd 100644
--- a/lib/tar.dart
+++ b/lib/tar.dart
@@ -9,7 +9,7 @@
import 'src/writer.dart';
export 'src/constants.dart' show TypeFlag;
-export 'src/entry.dart';
+export 'src/entry.dart' show TarEntry, SynchronousTarEntry;
export 'src/exception.dart';
export 'src/format.dart';
export 'src/header.dart' show TarHeader;
diff --git a/pubspec.yaml b/pubspec.yaml
index 1e60694..d3e10b6 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,6 +1,6 @@
name: tar
description: Memory-efficient, streaming implementation of the tar file format
-version: 0.4.0
+version: 0.5.0
repository: https://github.com/simolus3/tar/
environment:
diff --git a/test/windows_integration_test.dart b/test/windows_integration_test.dart
index 9ca22ca..ed7f9f2 100644
--- a/test/windows_integration_test.dart
+++ b/test/windows_integration_test.dart
@@ -13,7 +13,7 @@
final file = File(Directory.systemTemp.path + '\\tar_test.tar');
addTearDown(file.delete);
- await Stream.value(entry)
+ await Stream<TarEntry>.value(entry)
.transform(tarWriterWith(format: OutputFormat.gnuLongName))
.pipe(file.openWrite());
diff --git a/test/writer_test.dart b/test/writer_test.dart
index 45469e4..c6b23fc 100644
--- a/test/writer_test.dart
+++ b/test/writer_test.dart
@@ -1,4 +1,5 @@
import 'dart:async';
+import 'dart:convert';
import 'dart:typed_data';
import 'package:tar/tar.dart' as tar;
@@ -57,6 +58,69 @@
);
}, testOn: '!windows');
+ test('writes entries synchronously', () async {
+ final date = DateTime.parse('2020-12-30 12:34');
+ final builder = BytesBuilder(copy: false);
+ final sink = tar.tarConverter
+ .startChunkedConversion(ByteConversionSink.withCallback(builder.add));
+
+ sink.add(tar.TarEntry.data(
+ tar.TarHeader(
+ name: 'first.txt',
+ mode: int.parse('644', radix: 8),
+ size: 0,
+ userId: 3,
+ groupId: 4,
+ userName: 'my_user',
+ modified: date,
+ ),
+ Uint8List(10),
+ ));
+ sink.add(tar.TarEntry.data(
+ tar.TarHeader(
+ name: 'second.txt',
+ mode: int.parse('644', radix: 8),
+ size: 0,
+ userId: 3,
+ groupId: 4,
+ userName: 'my_user',
+ modified: date,
+ ),
+ Uint8List(512),
+ ));
+
+ sink.close();
+
+ final process = await startTar(['--list', '--verbose']);
+ process.stdin.add(builder.takeBytes());
+
+ expect(
+ process.lines,
+ emitsInOrder(
+ <Matcher>[
+ allOf(
+ contains('-rw-r--r--'),
+ contains('my_user'),
+ contains('10'),
+ // The date format is different across GNU and BSD tar
+ anyOf(contains('12:34'), contains('Dec 30')),
+ contains('first.txt'),
+ ),
+ allOf(
+ contains('-rw-r--r--'),
+ contains('my_user'),
+ contains('512'),
+ // The date format is different across GNU and BSD tar
+ anyOf(contains('12:34'), contains('Dec 30')),
+ contains('second.txt'),
+ ),
+ ],
+ ),
+ );
+
+ await process.stdin.close();
+ }, testOn: '!windows');
+
test('writes huge files', () async {
final oneMb = Uint8List(oneMbSize);
const count = tenGbSize ~/ oneMbSize;