Support GNU-style long filenames (#13)
diff --git a/.github/workflows/dart.yml b/.github/workflows/dart.yml
index a192518..2aa4542 100644
--- a/.github/workflows/dart.yml
+++ b/.github/workflows/dart.yml
@@ -6,39 +6,53 @@
pull_request:
branches: [ main ]
-jobs:
- build:
- runs-on: ubuntu-latest
+env:
+ PUB_ENVIRONMENT: bot.github
+ PUB_CACHE: ".dart_tool/pub_cache"
- container:
- image: google/dart:beta
+jobs:
+ analyze:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - uses: dart-lang/setup-dart@v1
+ - uses: actions/cache@v2
+ with:
+ path: .dart_tool
+ key: dart-tool-${{ hashFiles('pubspec.yaml') }}
+
+ - name: "Install dependencies"
+ run: dart pub upgrade
+
+ - name: "Ensure formatted"
+ run: dart format --output=none --set-exit-if-changed .
+
+ - name: "Analyze project"
+ run: dart analyze --fatal-infos
+
+ test:
+ strategy:
+ matrix:
+ os: [ubuntu-latest, windows-latest, macOS-latest]
+ runs-on: ${{ matrix.os }}
+ # analyze creates the cache, avoid downloading dependencies again here
+ needs: analyze
steps:
- uses: actions/checkout@v2
-
- - name: Print Dart SDK version
- run: dart --version
-
- - name: Cache pub dependencies
- uses: actions/cache@v2
- env:
- cache-name: tar-cache-deps
+ - uses: dart-lang/setup-dart@v1
+ - uses: actions/cache@v2
with:
path: .dart_tool
- key: ${{ env.cache-name }}
-
- - name: Install dependencies
- env:
- PUB_CACHE: ".dart_tool/pub_cache"
+ key: dart-tool-${{ hashFiles('pubspec.yaml') }}
+
+ - name: "Get dependencies"
run: dart pub get
- - name: Verify formatting
- run: dart format --output=none --set-exit-if-changed .
-
- - name: Analyze project source
- run: dart analyze --fatal-infos
-
- - name: Run tests
- env:
- PUB_CACHE: ".dart_tool/pub_cache"
+ - name: "Download 7za"
+ run: dart run tool/download_7za.dart
+ if: runner.os == 'Windows'
+
+ - name: "Run tests"
run: dart test
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6aa953f..561163d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.4.0
+
+- Support generating tar files with GNU-style long link names
+ - Add `format` parameter to `tarWritingSink` and `tarTransformerWith`
+
## 0.3.3
- Drop `chunked_stream` dependency in favor of `package:async`.
diff --git a/README.md b/README.md
index 65e40b3..77410cd 100644
--- a/README.md
+++ b/README.md
@@ -76,11 +76,6 @@
}
```
-Note that tar files are always written in the pax format defined by the POSIX.1-2001 specification
-(`--format=posix` in GNU tar).
-When all entries have file names shorter than 100 chars and a size smaller than 8 GB, this is
-equivalent to the `ustar` format. This library won't write PAX headers when there is no reason to do so.
-
To write `.tar.gz` files, you can again transform the stream twice:
```dart
@@ -95,6 +90,25 @@
}
```
+Note that, by default, tar files are written in the pax format defined by the
+POSIX.1-2001 specification (`--format=posix` in GNU tar).
+When all entries have file names shorter than 100 chars and a size smaller
+than 8 GB, this is equivalent to the `ustar` format. This library won't write
+PAX headers when there is no reason to do so.
+If you prefer writing GNU-style long filenames instead, you can use the
+`format` option:
+
+```dart
+Future<void> write(Stream<TarEntry> entries) {
+ return entries
+ .transform(tarWriterWith(format: OutputFormat.gnuLongName))
+ .pipe(tarWritingSink(
+ File('output.tar.gz').openWrite(),
+ format: OutputFormat.gnuLongName,
+ ));
+}
+```
+
## Features
- Supports v7, ustar, pax, gnu and star archives
diff --git a/analysis_options.yaml b/analysis_options.yaml
index a4e40fa..717c8c6 100644
--- a/analysis_options.yaml
+++ b/analysis_options.yaml
@@ -1,4 +1,4 @@
-include: package:extra_pedantic/analysis_options.yaml
+include: package:extra_pedantic/analysis_options.1.3.0.yaml
analyzer:
strong-mode:
@@ -15,3 +15,5 @@
literal_only_boolean_expressions: false # Nothing wrong with a little while(true)
parameter_assignments: false
unnecessary_await_in_return: false
+ no_default_cases: false
+ prefer_asserts_with_message: false # We only use asserts for library-internal invariants
diff --git a/lib/src/reader.dart b/lib/src/reader.dart
index de84d6a..713235a 100644
--- a/lib/src/reader.dart
+++ b/lib/src/reader.dart
@@ -803,8 +803,8 @@
// If we're seeing weird PAX Version 0.0 sparse keys, expect alternating
// GNU.sparse.offset and GNU.sparse.numbytes headers.
if (key == paxGNUSparseNumBytes || key == paxGNUSparseOffset) {
- if ((sparseMap.length % 2 == 0 && key != paxGNUSparseOffset) ||
- (sparseMap.length % 2 == 1 && key != paxGNUSparseNumBytes) ||
+ if ((sparseMap.length.isEven && key != paxGNUSparseOffset) ||
+ (sparseMap.length.isOdd && key != paxGNUSparseNumBytes) ||
value.contains(',')) {
error();
}
diff --git a/lib/src/writer.dart b/lib/src/writer.dart
index 8d8073f..71ab368 100644
--- a/lib/src/writer.dart
+++ b/lib/src/writer.dart
@@ -7,16 +7,19 @@
import 'entry.dart';
import 'format.dart';
import 'header.dart';
+import 'utils.dart';
class _WritingTransformer extends StreamTransformerBase<TarEntry, List<int>> {
- const _WritingTransformer();
+ final OutputFormat format;
+
+ const _WritingTransformer(this.format);
@override
Stream<List<int>> bind(Stream<TarEntry> stream) {
// sync because the controller proxies another stream
final controller = StreamController<List<int>>(sync: true);
controller.onListen = () {
- stream.pipe(tarWritingSink(controller));
+ stream.pipe(tarWritingSink(controller, format: format));
};
return controller.stream;
@@ -32,7 +35,30 @@
///
/// When piping the resulting stream into a [StreamConsumer], consider using
/// [tarWritingSink] directly.
-const StreamTransformer<TarEntry, List<int>> tarWriter = _WritingTransformer();
+/// To change the output format of files with long names, use [tarWriterWith].
+const StreamTransformer<TarEntry, List<int>> tarWriter =
+ _WritingTransformer(OutputFormat.pax);
+
+/// Creates a stream transformer writing tar entries as byte streams, with
+/// custom encoding options.
+///
+/// The [format] [OutputFormat] can be used to select the way tar entries with
+/// long file or link names are written. By default, the writer will emit an
+/// extended PAX header for the file ([OutputFormat.pax]).
+/// Alternatively, [OutputFormat.gnuLongName] can be used to emit special tar
+/// entries with the [TypeFlag.gnuLongName] type.
+///
+/// Regardless of the input stream, the stream returned by this
+/// [StreamTransformer.bind] is a single-subscription stream.
+/// Apart from that, subscriptions, cancellations, pauses and resumes are
+/// propagated as one would expect from a [StreamTransformer].
+///
+/// When using the default options, prefer using the constant [tarWriter]
+/// instead.
+StreamTransformer<TarEntry, List<int>> tarWriterWith(
+ {OutputFormat format = OutputFormat.pax}) {
+ return _WritingTransformer(format);
+}
/// Create a sink emitting encoded tar files to the [output] sink.
///
@@ -62,15 +88,46 @@
/// Note that, if you don't set the [TarHeader.size], outgoing tar entries need
/// to be buffered once, which decreases performance.
///
+/// The [format] argument can be used to control how long file names are written
+/// in the tar archive. For more details, see the options in [OutputFormat].
+///
/// See also:
/// - [tarWriter], a stream transformer using this sink
/// - [StreamSink]
-StreamSink<TarEntry> tarWritingSink(StreamSink<List<int>> output) {
- return _WritingSink(output);
+StreamSink<TarEntry> tarWritingSink(StreamSink<List<int>> output,
+ {OutputFormat format = OutputFormat.pax}) {
+ return _WritingSink(output, format);
+}
+
+/// This option controls how long file and link names should be written.
+///
+/// This option can be passed to writer in [tarWritingSink] or[tarWriterWith].
+enum OutputFormat {
+ /// Generates an extended PAX headers to encode files with a long name.
+ ///
+ /// This is the default option.
+ pax,
+
+ /// Generates [TypeFlag.gnuLongName] or [TypeFlag.gnuLongLink] entries when
+ /// encoding files with a long name.
+ ///
+ /// When this option is set, `package:tar` will not emit PAX headers which
+ /// may improve compatibility with some legacy systems like old 7zip versions.
+ ///
+ /// Note that this format can't encode large file sizes or long user names.
+ /// Tar entries can't be written if
+ /// * their [TarHeader.userName] is longer than 31 bytes in utf8,
+ /// * their [TarHeader.groupName] is longer than 31 bytes in utf8, or,
+ /// * their [TarEntry.contents] are larger than 8589934591 byte (around
+ /// 8 GiB).
+ ///
+ /// Attempting to encode such file will throw an [UnsupportedError].
+ gnuLongName,
}
class _WritingSink extends StreamSink<TarEntry> {
final StreamSink<List<int>> _output;
+ final OutputFormat format;
int _paxHeaderCount = 0;
bool _closed = false;
@@ -79,7 +136,7 @@
int _pendingOperations = 0;
Future<void> _ready = Future.value();
- _WritingSink(this._output);
+ _WritingSink(this._output, this.format);
@override
Future<void> get done => _done.future;
@@ -127,6 +184,7 @@
// have to insert an entry just to store the names. Some tar implementations
// expect them to be zero-terminated, so use 99 chars to be safe.
final paxHeader = <String, List<int>>{};
+
if (nameBytes.length > 99) {
paxHeader[paxPath] = nameBytes;
nameBytes = nameBytes.sublist(0, 99);
@@ -151,7 +209,11 @@
}
if (paxHeader.isNotEmpty) {
- await _writePaxHeader(paxHeader);
+ if (format == OutputFormat.pax) {
+ await _writePaxHeader(paxHeader);
+ } else {
+ await _writeGnuLongName(paxHeader);
+ }
}
final headerBlock = Uint8List(blockSize)
@@ -228,7 +290,7 @@
final file = TarEntry.data(
HeaderImpl.internal(
format: TarFormat.pax,
- modified: DateTime.fromMillisecondsSinceEpoch(0),
+ modified: millisecondsSinceEpoch(0),
name: 'PaxHeader/${_paxHeaderCount++}',
mode: 0,
size: paxData.length,
@@ -239,6 +301,43 @@
return _safeAdd(file);
}
+ Future<void> _writeGnuLongName(Map<String, List<int>> values) async {
+ // Ensure that a file that can't be written in the GNU format is not written
+ const allowedKeys = {paxPath, paxLinkpath};
+ final invalidOptions = values.keys.toSet()..removeAll(allowedKeys);
+ if (invalidOptions.isNotEmpty) {
+ throw UnsupportedError(
+ 'Unsupporteed entry for OutputFormat.gnu. It uses long fields that '
+ "can't be represented: $invalidOptions. \n"
+ 'Try using OutputFormat.pax instead.',
+ );
+ }
+
+ final name = values[paxPath];
+ final linkName = values[paxLinkpath];
+
+ Future<void> write(List<int> name, TypeFlag flag) {
+ return _safeAdd(
+ TarEntry.data(
+ HeaderImpl.internal(
+ name: '././@LongLink',
+ modified: millisecondsSinceEpoch(0),
+ format: TarFormat.gnu,
+ typeFlag: flag,
+ ),
+ name,
+ ),
+ );
+ }
+
+ if (name != null) {
+ await write(name, TypeFlag.gnuLongName);
+ }
+ if (linkName != null) {
+ await write(linkName, TypeFlag.gnuLongLink);
+ }
+ }
+
@override
void addError(Object error, [StackTrace? stackTrace]) {
_output.addError(error, stackTrace);
diff --git a/lib/tar.dart b/lib/tar.dart
index dd5f896..218a6a2 100644
--- a/lib/tar.dart
+++ b/lib/tar.dart
@@ -14,4 +14,4 @@
export 'src/format.dart';
export 'src/header.dart' show TarHeader;
export 'src/reader.dart' show TarReader;
-export 'src/writer.dart' show tarWritingSink, tarWriter;
+export 'src/writer.dart';
diff --git a/pubspec.yaml b/pubspec.yaml
index 71c6f2a..1e60694 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,6 +1,6 @@
name: tar
description: Memory-efficient, streaming implementation of the tar file format
-version: 0.3.3
+version: 0.4.0
repository: https://github.com/simolus3/tar/
environment:
@@ -13,5 +13,6 @@
dev_dependencies:
charcode: ^1.2.0
- extra_pedantic: ^1.2.0
- test: ^1.16.0
+ extra_pedantic: ^1.4.0
+ path: ^1.8.0
+ test: ^1.17.4
diff --git a/test/reader_test.dart b/test/reader_test.dart
index a8944a0..a2e9edf 100644
--- a/test/reader_test.dart
+++ b/test/reader_test.dart
@@ -6,9 +6,8 @@
import 'package:async/async.dart';
import 'package:tar/src/reader.dart';
import 'package:tar/src/utils.dart';
-import 'package:test/test.dart';
-
import 'package:tar/tar.dart';
+import 'package:test/test.dart';
void main() {
group('POSIX.1-2001', () {
diff --git a/test/sparse_test.dart b/test/sparse_test.dart
index 6e3c695..de92dba 100644
--- a/test/sparse_test.dart
+++ b/test/sparse_test.dart
@@ -1,3 +1,4 @@
+@TestOn('linux') // We currently use gnu tar to create test inputs
import 'dart:io';
import 'dart:math';
diff --git a/test/system_tar.dart b/test/system_tar.dart
index 4e7c897..1a090ee 100644
--- a/test/system_tar.dart
+++ b/test/system_tar.dart
@@ -42,10 +42,10 @@
yield* tar.stdout;
}
-Future<Process> writeToTar(
- List<String> args, Stream<tar.TarEntry> entries) async {
+Future<Process> writeToTar(List<String> args, Stream<tar.TarEntry> entries,
+ {tar.OutputFormat format = tar.OutputFormat.pax}) async {
final proc = await startTar(args);
- await entries.pipe(tar.tarWritingSink(proc.stdin));
+ await entries.pipe(tar.tarWritingSink(proc.stdin, format: format));
return proc;
}
diff --git a/test/windows_integration_test.dart b/test/windows_integration_test.dart
new file mode 100644
index 0000000..9ca22ca
--- /dev/null
+++ b/test/windows_integration_test.dart
@@ -0,0 +1,23 @@
+@TestOn('windows')
+import 'dart:io';
+
+import 'package:tar/tar.dart';
+import 'package:test/test.dart';
+
+import 'system_tar.dart';
+
+void main() {
+ test('emits long file names that are understood by 7zip', () async {
+ final name = 'name' * 40;
+ final entry = TarEntry.data(TarHeader(name: name), []);
+ final file = File(Directory.systemTemp.path + '\\tar_test.tar');
+ addTearDown(file.delete);
+
+ await Stream.value(entry)
+ .transform(tarWriterWith(format: OutputFormat.gnuLongName))
+ .pipe(file.openWrite());
+
+ final proc = await Process.start('7za.exe', ['l', file.path]);
+ expect(proc.lines, emitsThrough(contains(name)));
+ });
+}
diff --git a/test/writer_test.dart b/test/writer_test.dart
index 7b742ed..12aece0 100644
--- a/test/writer_test.dart
+++ b/test/writer_test.dart
@@ -1,22 +1,30 @@
import 'dart:async';
import 'dart:typed_data';
+import 'package:tar/tar.dart' as tar;
import 'package:test/test.dart';
-import 'package:tar/tar.dart' as tar;
import 'system_tar.dart';
-void main() {
- test('writes long file names', () async {
- final name = '${'very' * 30} long name.txt';
- final withLongName = tar.TarEntry.data(
- tar.TarHeader(name: name, mode: 0, size: 0),
- Uint8List(0),
- );
+const oneMbSize = 1024 * 1024;
+const tenGbSize = oneMbSize * 1024 * 10;
- final proc = await writeToTar(['--list'], Stream.value(withLongName));
- expect(proc.lines, emits(name));
- });
+void main() {
+ group('writes long file names', () {
+ for (final style in tar.OutputFormat.values) {
+ test(style.toString(), () async {
+ final name = '${'very' * 30} long name.txt';
+ final withLongName = tar.TarEntry.data(
+ tar.TarHeader(name: name, mode: 0, size: 0),
+ Uint8List(0),
+ );
+
+ final proc = await writeToTar(['--list'], Stream.value(withLongName),
+ format: style);
+ expect(proc.lines, emits(contains(name)));
+ });
+ }
+ }, testOn: '!windows');
test('writes headers', () async {
final date = DateTime.parse('2020-12-30 12:34');
@@ -40,17 +48,15 @@
emits(
allOf(
contains('-rwxr--r--'),
- contains('my_user/long group that exceeds 32 characters'),
- contains('2020-12-30 12:34'),
+ contains('my_user'),
+ contains('long group that exceeds 32 characters'),
+ contains('12:34'),
),
),
);
- });
+ }, testOn: '!windows');
test('writes huge files', () async {
- const oneMbSize = 1024 * 1024;
- const tenGbSize = oneMbSize * 1024 * 10;
-
final oneMb = Uint8List(oneMbSize);
const count = tenGbSize ~/ oneMbSize;
@@ -65,5 +71,62 @@
final proc = await writeToTar(['--list', '--verbose'], Stream.value(entry));
expect(proc.lines, emits(contains(tenGbSize.toString())));
+ }, testOn: '!windows');
+
+ group('refuses to write files with OutputFormat.gnu', () {
+ void shouldThrow(tar.TarEntry entry) {
+ final output = tar.tarWritingSink(_NullStreamSink(),
+ format: tar.OutputFormat.gnuLongName);
+ expect(Stream.value(entry).pipe(output), throwsA(isUnsupportedError));
+ }
+
+ test('when they are too large', () {
+ final oneMb = Uint8List(oneMbSize);
+ const count = tenGbSize ~/ oneMbSize;
+
+ final entry = tar.TarEntry(
+ tar.TarHeader(
+ name: 'file.blob',
+ mode: 0,
+ size: tenGbSize,
+ ),
+ Stream<List<int>>.fromIterable(Iterable.generate(count, (i) => oneMb)),
+ );
+ shouldThrow(entry);
+ });
+
+ test('when they use long user names', () {
+ shouldThrow(
+ tar.TarEntry.data(
+ tar.TarHeader(
+ name: 'file.txt',
+ userName: 'this name is longer than 32 chars, which is not allowed',
+ ),
+ [],
+ ),
+ );
+ });
});
}
+
+class _NullStreamSink<T> extends StreamSink<T> {
+ @override
+ void add(T event) {}
+
+ @override
+ void addError(Object error, [StackTrace? stackTrace]) {
+ // ignore: only_throw_errors
+ throw error;
+ }
+
+ @override
+ Future<void> addStream(Stream<T> stream) {
+ return stream.forEach(add);
+ }
+
+ @override
+ Future<void> close() async {}
+
+ @override
+ Future<void> get done => close();
+}
diff --git a/tool/download_7za.dart b/tool/download_7za.dart
new file mode 100644
index 0000000..3d3e5a8
--- /dev/null
+++ b/tool/download_7za.dart
@@ -0,0 +1,11 @@
+import 'dart:io';
+
+Future<void> main() async {
+ final client = HttpClient();
+ final request = await client.getUrl(Uri.parse(
+ 'https://storage.googleapis.com/simon-public-euw3/assets/7za.exe'));
+ final response = await request.close();
+
+ await response.pipe(File('7za.exe').openWrite());
+ client.close();
+}