TarEntryEncoder: synchronous encoding of tar entries. (#17)

Add an `SynchronousTarEntry` class to store in-memory tar entries. A new synchronous converter can convert these entries to bytes.

Co-authored-by: Simon Binder <oss@simonbinder.eu>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c54d3b..e022efd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.5.0
+
+- Support sync encoding with `tarConverter`.
+
 ## 0.4.0
 
 - Support generating tar files with GNU-style long link names
diff --git a/README.md b/README.md
index 99fa1be..cfb80cd 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,9 @@
 
 A more complex example for writing files can be found in [`example/archive_self.dart`](example/archive_self.dart).
 
-Note that, by default, tar files are  written in the pax format defined by the
+### Encoding options
+
+By default, tar files are  written in the pax format defined by the
 POSIX.1-2001 specification (`--format=posix` in GNU tar).
 When all entries have file names shorter than 100 chars and a size smaller 
 than 8 GB, this is equivalent to the `ustar` format. This library won't write
@@ -125,6 +127,27 @@
 To change the output format on the `tarWriter` transformer, use
 `tarWriterWith`.
 
+### Synchronous writing
+
+As the content of tar entries is defined as an asynchronous stream, the tar encoder is asynchronous too.
+The more specific `SynchronousTarEntry` class stores tar content as a list of bytes, meaning that it can be
+written synchronously too.
+
+To synchronously write tar files, use `tarConverter` (or `tarConverterWith` for options):
+
+```dart
+List<int> createTarArchive(Iterable<SynchronousTarEntry> entries) {
+  late List<int> result;
+  final sink = ByteConversionSink.withCallback((data) => result = data);
+
+  final output = tarConverter.startChunkedConversion(sink);
+  entries.forEach(output.add);
+  output.close();
+
+  return result;
+}
+```
+
 ## Features
 
 - Supports v7, ustar, pax, gnu and star archives
diff --git a/lib/src/entry.dart b/lib/src/entry.dart
index f6b0a5a..160974b 100644
--- a/lib/src/entry.dart
+++ b/lib/src/entry.dart
@@ -52,8 +52,17 @@
   TarEntry._(this.header, this.contents);
 
   /// Creates an in-memory tar entry from the [header] and the [data] to store.
-  factory TarEntry.data(TarHeader header, List<int> data) {
+  static SynchronousTarEntry data(TarHeader header, List<int> data) {
     (header as HeaderImpl).size = data.length;
-    return TarEntry(header, Stream.value(data));
+    return SynchronousTarEntry._(header, data);
   }
 }
+
+/// A tar entry stored in memory.
+class SynchronousTarEntry extends TarEntry {
+  /// The contents of this tar entry as a byte array.
+  final List<int> data;
+
+  SynchronousTarEntry._(TarHeader header, this.data)
+      : super._(header, Stream.value(data));
+}
diff --git a/lib/src/writer.dart b/lib/src/writer.dart
index 71ab368..1094a6b 100644
--- a/lib/src/writer.dart
+++ b/lib/src/writer.dart
@@ -99,6 +99,33 @@
   return _WritingSink(output, format);
 }
 
+/// A synchronous encoder for in-memory tar files.
+///
+/// The default [tarWriter] creates an asynchronous conversion from a stream of
+/// tar entries to a byte stream.
+/// When all tar entries are in-memory ([SynchronousTarEntry]), it is possible
+/// to write them synchronously too.
+///
+/// To create a tar archive consisting of a single entry, use
+/// [Converter.convert] on this [tarConverter].
+/// To create a tar archive consisting of any number of entries, first call
+/// [Converter.startChunkedConversion] with a suitable output sink. Next, call
+/// [Sink.add] for each tar entry and finish the archive by calling
+/// [Sink.close].
+///
+/// To change the output format of the tar converter, use [tarConverterWith].
+/// To encode any kind of tar entries, use the asynchronous [tarWriter].
+const Converter<SynchronousTarEntry, List<int>> tarConverter =
+    _SynchronousTarConverter(OutputFormat.pax);
+
+/// A synchronous encoder for in-memory tar files, with custom encoding options.
+///
+/// For more information on how to use the converter, see [tarConverter].
+Converter<SynchronousTarEntry, List<int>> tarConverterWith(
+    {OutputFormat format = OutputFormat.pax}) {
+  return _SynchronousTarConverter(format);
+}
+
 /// This option controls how long file and link names should be written.
 ///
 /// This option can be passed to writer in [tarWritingSink] or[tarWriterWith].
@@ -127,16 +154,15 @@
 
 class _WritingSink extends StreamSink<TarEntry> {
   final StreamSink<List<int>> _output;
-  final OutputFormat format;
-
-  int _paxHeaderCount = 0;
+  final _SynchronousTarSink _synchronousWriter;
   bool _closed = false;
   final Completer<Object?> _done = Completer();
 
   int _pendingOperations = 0;
   Future<void> _ready = Future.value();
 
-  _WritingSink(this._output, this.format);
+  _WritingSink(this._output, OutputFormat format)
+      : _synchronousWriter = _SynchronousTarSink(_output, format);
 
   @override
   Future<void> get done => _done.future;
@@ -175,6 +201,114 @@
       size = bufferedData.length;
     }
 
+    _synchronousWriter._writeHeader(header, size);
+
+    // Write content.
+    if (bufferedData != null) {
+      _output.add(bufferedData);
+    } else {
+      await event.contents.forEach(_output.add);
+    }
+
+    _output.add(_paddingBytes(size));
+  }
+
+  @override
+  void addError(Object error, [StackTrace? stackTrace]) {
+    _output.addError(error, stackTrace);
+  }
+
+  @override
+  Future<void> addStream(Stream<TarEntry> stream) async {
+    await for (final entry in stream) {
+      await add(entry);
+    }
+  }
+
+  @override
+  Future<void> close() async {
+    if (!_closed) {
+      _closed = true;
+
+      // Add two empty blocks at the end.
+      await _doWork(_synchronousWriter.close);
+    }
+
+    return done;
+  }
+}
+
+Uint8List _paddingBytes(int size) {
+  final padding = -size % blockSize;
+  return Uint8List(padding);
+}
+
+class _SynchronousTarConverter
+    extends Converter<SynchronousTarEntry, List<int>> {
+  final OutputFormat format;
+
+  const _SynchronousTarConverter(this.format);
+
+  @override
+  Sink<SynchronousTarEntry> startChunkedConversion(Sink<List<int>> sink) {
+    return _SynchronousTarSink(sink, format);
+  }
+
+  @override
+  List<int> convert(SynchronousTarEntry input) {
+    final output = BytesBuilder(copy: false);
+    startChunkedConversion(ByteConversionSink.withCallback(output.add))
+      ..add(input)
+      ..close();
+
+    return output.takeBytes();
+  }
+}
+
+class _SynchronousTarSink extends Sink<SynchronousTarEntry> {
+  final OutputFormat _format;
+  final Sink<List<int>> _output;
+
+  bool _closed = false;
+  int _paxHeaderCount = 0;
+
+  _SynchronousTarSink(this._output, this._format);
+
+  @override
+  void add(SynchronousTarEntry data) {
+    addHeaderAndData(data.header, data.data);
+  }
+
+  void addHeaderAndData(TarHeader header, List<int> data) {
+    _throwIfClosed();
+
+    _writeHeader(header, data.length);
+    _output..add(data)..add(_paddingBytes(data.length));
+  }
+
+  @override
+  void close() {
+    if (_closed) return;
+
+    // End the tar archive by writing two zero blocks.
+    _output
+      ..add(UnmodifiableUint8ListView(zeroBlock))
+      ..add(UnmodifiableUint8ListView(zeroBlock));
+    _output.close();
+
+    _closed = true;
+  }
+
+  void _throwIfClosed() {
+    if (_closed) {
+      throw StateError('Encoder is closed. '
+          'After calling `endOfArchive()`, encoder must not be used.');
+    }
+  }
+
+  void _writeHeader(TarHeader header, int size) {
+    assert(header.size < 0 || header.size == size);
+
     var nameBytes = utf8.encode(header.name);
     var linkBytes = utf8.encode(header.linkName ?? '');
     var gnameBytes = utf8.encode(header.groupName ?? '');
@@ -209,10 +343,10 @@
     }
 
     if (paxHeader.isNotEmpty) {
-      if (format == OutputFormat.pax) {
-        await _writePaxHeader(paxHeader);
+      if (_format == OutputFormat.pax) {
+        _writePaxHeader(paxHeader);
       } else {
-        await _writeGnuLongName(paxHeader);
+        _writeGnuLongName(paxHeader);
       }
     }
 
@@ -238,24 +372,13 @@
       checksum += byte;
     }
     headerBlock.setUint(checksum, 148, 8);
-
     _output.add(headerBlock);
-
-    // Write content.
-    if (bufferedData != null) {
-      _output.add(bufferedData);
-    } else {
-      await event.contents.forEach(_output.add);
-    }
-
-    final padding = -size % blockSize;
-    _output.add(Uint8List(padding));
   }
 
-  /// Writes an extended pax header.
+  /// Encodes an extended pax header.
   ///
   /// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03
-  Future<void> _writePaxHeader(Map<String, List<int>> values) {
+  void _writePaxHeader(Map<String, List<int>> values) {
     final buffer = BytesBuilder();
     // format of each entry: "%d %s=%s\n", <length>, <keyword>, <value>
     // note that the length includes the trailing \n and the length description
@@ -287,7 +410,7 @@
     });
 
     final paxData = buffer.takeBytes();
-    final file = TarEntry.data(
+    addHeaderAndData(
       HeaderImpl.internal(
         format: TarFormat.pax,
         modified: millisecondsSinceEpoch(0),
@@ -298,10 +421,9 @@
       ),
       paxData,
     );
-    return _safeAdd(file);
   }
 
-  Future<void> _writeGnuLongName(Map<String, List<int>> values) async {
+  void _writeGnuLongName(Map<String, List<int>> values) {
     // Ensure that a file that can't be written in the GNU format is not written
     const allowedKeys = {paxPath, paxLinkpath};
     final invalidOptions = values.keys.toSet()..removeAll(allowedKeys);
@@ -316,54 +438,25 @@
     final name = values[paxPath];
     final linkName = values[paxLinkpath];
 
-    Future<void> write(List<int> name, TypeFlag flag) {
-      return _safeAdd(
-        TarEntry.data(
-          HeaderImpl.internal(
-            name: '././@LongLink',
-            modified: millisecondsSinceEpoch(0),
-            format: TarFormat.gnu,
-            typeFlag: flag,
-          ),
-          name,
+    void create(List<int> name, TypeFlag flag) {
+      return addHeaderAndData(
+        HeaderImpl.internal(
+          name: '././@LongLink',
+          modified: millisecondsSinceEpoch(0),
+          format: TarFormat.gnu,
+          typeFlag: flag,
         ),
+        name,
       );
     }
 
     if (name != null) {
-      await write(name, TypeFlag.gnuLongName);
+      create(name, TypeFlag.gnuLongName);
     }
     if (linkName != null) {
-      await write(linkName, TypeFlag.gnuLongLink);
+      create(linkName, TypeFlag.gnuLongLink);
     }
   }
-
-  @override
-  void addError(Object error, [StackTrace? stackTrace]) {
-    _output.addError(error, stackTrace);
-  }
-
-  @override
-  Future<void> addStream(Stream<TarEntry> stream) async {
-    await for (final entry in stream) {
-      await add(entry);
-    }
-  }
-
-  @override
-  Future<void> close() async {
-    if (!_closed) {
-      _closed = true;
-
-      // Add two empty blocks at the end.
-      await _doWork(() {
-        _output.add(zeroBlock);
-        _output.add(zeroBlock);
-      });
-    }
-
-    return done;
-  }
 }
 
 extension on Uint8List {
diff --git a/lib/tar.dart b/lib/tar.dart
index 218a6a2..14247bd 100644
--- a/lib/tar.dart
+++ b/lib/tar.dart
@@ -9,7 +9,7 @@
 import 'src/writer.dart';
 
 export 'src/constants.dart' show TypeFlag;
-export 'src/entry.dart';
+export 'src/entry.dart' show TarEntry, SynchronousTarEntry;
 export 'src/exception.dart';
 export 'src/format.dart';
 export 'src/header.dart' show TarHeader;
diff --git a/pubspec.yaml b/pubspec.yaml
index 1e60694..d3e10b6 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,6 +1,6 @@
 name: tar
 description: Memory-efficient, streaming implementation of the tar file format
-version: 0.4.0
+version: 0.5.0
 repository: https://github.com/simolus3/tar/
 
 environment:
diff --git a/test/windows_integration_test.dart b/test/windows_integration_test.dart
index 9ca22ca..ed7f9f2 100644
--- a/test/windows_integration_test.dart
+++ b/test/windows_integration_test.dart
@@ -13,7 +13,7 @@
     final file = File(Directory.systemTemp.path + '\\tar_test.tar');
     addTearDown(file.delete);
 
-    await Stream.value(entry)
+    await Stream<TarEntry>.value(entry)
         .transform(tarWriterWith(format: OutputFormat.gnuLongName))
         .pipe(file.openWrite());
 
diff --git a/test/writer_test.dart b/test/writer_test.dart
index 45469e4..c6b23fc 100644
--- a/test/writer_test.dart
+++ b/test/writer_test.dart
@@ -1,4 +1,5 @@
 import 'dart:async';
+import 'dart:convert';
 import 'dart:typed_data';
 
 import 'package:tar/tar.dart' as tar;
@@ -57,6 +58,69 @@
     );
   }, testOn: '!windows');
 
+  test('writes entries synchronously', () async {
+    final date = DateTime.parse('2020-12-30 12:34');
+    final builder = BytesBuilder(copy: false);
+    final sink = tar.tarConverter
+        .startChunkedConversion(ByteConversionSink.withCallback(builder.add));
+
+    sink.add(tar.TarEntry.data(
+      tar.TarHeader(
+        name: 'first.txt',
+        mode: int.parse('644', radix: 8),
+        size: 0,
+        userId: 3,
+        groupId: 4,
+        userName: 'my_user',
+        modified: date,
+      ),
+      Uint8List(10),
+    ));
+    sink.add(tar.TarEntry.data(
+      tar.TarHeader(
+        name: 'second.txt',
+        mode: int.parse('644', radix: 8),
+        size: 0,
+        userId: 3,
+        groupId: 4,
+        userName: 'my_user',
+        modified: date,
+      ),
+      Uint8List(512),
+    ));
+
+    sink.close();
+
+    final process = await startTar(['--list', '--verbose']);
+    process.stdin.add(builder.takeBytes());
+
+    expect(
+      process.lines,
+      emitsInOrder(
+        <Matcher>[
+          allOf(
+            contains('-rw-r--r--'),
+            contains('my_user'),
+            contains('10'),
+            // The date format is different across GNU and BSD tar
+            anyOf(contains('12:34'), contains('Dec 30')),
+            contains('first.txt'),
+          ),
+          allOf(
+            contains('-rw-r--r--'),
+            contains('my_user'),
+            contains('512'),
+            // The date format is different across GNU and BSD tar
+            anyOf(contains('12:34'), contains('Dec 30')),
+            contains('second.txt'),
+          ),
+        ],
+      ),
+    );
+
+    await process.stdin.close();
+  }, testOn: '!windows');
+
   test('writes huge files', () async {
     final oneMb = Uint8List(oneMbSize);
     const count = tenGbSize ~/ oneMbSize;