Use Dart library to read and write tar files (#2817)
diff --git a/lib/src/io.dart b/lib/src/io.dart
index 0f50c96..23d1add 100644
--- a/lib/src/io.dart
+++ b/lib/src/io.dart
@@ -8,7 +8,6 @@
import 'dart:convert';
import 'dart:io';
-import 'package:async/async.dart';
import 'package:http/http.dart' show ByteStream;
import 'package:http_multi_server/http_multi_server.dart';
import 'package:meta/meta.dart';
@@ -16,12 +15,12 @@
import 'package:pedantic/pedantic.dart';
import 'package:pool/pool.dart';
import 'package:stack_trace/stack_trace.dart';
+import 'package:tar/tar.dart';
import 'error_group.dart';
import 'exceptions.dart';
import 'exit_codes.dart' as exit_codes;
import 'log.dart' as log;
-import 'sdk.dart';
import 'utils.dart';
export 'package:http/http.dart' show ByteStream;
@@ -34,6 +33,12 @@
/// additional throughput.
final _descriptorPool = Pool(32);
+/// The assumed default file mode on Linux and macOS
+const _defaultMode = 420; // 644₈
+
+/// Mask for executable bits in file modes.
+const _executableMask = 0x49; // 001 001 001
+
/// Determines if a file or directory exists at [path].
bool entryExists(String path) =>
dirExists(path) || fileExists(path) || linkExists(path);
@@ -216,6 +221,10 @@
});
}
+void _chmod(int mode, String file) {
+ runProcessSync('chmod', [mode.toRadixString(8), file]);
+}
+
/// Deletes [file] if it's a symlink.
///
/// The [File] class overwrites the symlink targets when writing to a file,
@@ -792,103 +801,97 @@
return server;
}
-String _tarPath = _findTarPath();
-
-/// Find a tar. Prefering system installed tar.
-///
-/// On linux tar should always be /bin/tar [See FHS 2.3][1]
-/// On MacOS it seems to always be /usr/bin/tar.
-///
-/// [1]: https://refspecs.linuxfoundation.org/FHS_2.3/fhs-2.3.pdf
-String _findTarPath() {
- for (final file in ['/bin/tar', '/usr/bin/tar']) {
- if (fileExists(file)) {
- return file;
- }
- }
- log.warning(
- 'Could not find a system `tar` installed in /bin/tar or /usr/bin/tar, '
- 'attempting to use tar from PATH');
- return 'tar';
-}
-
/// Extracts a `.tar.gz` file from [stream] to [destination].
Future extractTarGz(Stream<List<int>> stream, String destination) async {
log.fine('Extracting .tar.gz stream to $destination.');
- final decompressed = stream.transform(GZipCodec().decoder);
- // We used to stream directly to `tar`, but that was fragile in certain
- // settings.
- final processResult = await withTempDir((tempDir) async {
- final tarFile = path.join(tempDir, 'archive.tar');
- try {
- await _createFileFromStream(decompressed, tarFile);
- } catch (e) {
- // We don't know the error type here: https://dartbug.com/41270
- throw FileSystemException('Could not decompress gz stream $e');
+ destination = path.absolute(destination);
+ final reader = TarReader(stream.transform(gzip.decoder));
+ while (await reader.moveNext()) {
+ final entry = reader.current;
+
+ final filePath = path.joinAll([
+ destination,
+ // Tar file names always use forward slashes
+ ...path.posix.split(entry.name),
+ ]);
+
+ if (!path.isWithin(destination, filePath)) {
+ // The tar contains entries that would be written outside of the
+ // destination. That doesn't happen by accident, assume that the tar file
+ // is malicious.
+ await reader.cancel();
+ throw FormatException('Invalid tar entry: ${entry.name}');
}
- return (Platform.isWindows)
- ? runProcess(_pathTo7zip, ['x', tarFile], workingDir: destination)
- : runProcess(_tarPath, [
- if (_noUnknownKeyword) '--warning=no-unknown-keyword',
- '--extract',
- '--no-same-owner',
- '--no-same-permissions',
- '--directory',
- destination,
- '--file',
- tarFile,
- ]);
- });
- if (processResult.exitCode != exit_codes.SUCCESS) {
- throw FileSystemException(
- 'Could not un-tar (exit code ${processResult.exitCode}). Error:\n'
- '${processResult.stdout.join("\n")}\n'
- '${processResult.stderr.join("\n")}');
- }
- log.fine('Extracted .tar.gz to $destination. Exit code $exitCode.');
-}
-/// Whether to include "--warning=no-unknown-keyword" when invoking tar.
-///
-/// BSD tar (the default on OS X) can insert strange headers to a tarfile that
-/// GNU tar (the default on Linux) is unable to understand. This will cause GNU
-/// tar to emit a number of harmless but scary-looking warnings which are
-/// silenced by this flag.
-final bool _noUnknownKeyword = _computeNoUnknownKeyword();
-bool _computeNoUnknownKeyword() {
- if (!Platform.isLinux) return false;
- var result = Process.runSync(_tarPath, ['--version']);
- if (result.exitCode != 0) {
- throw ApplicationException(
- 'Pub failed to run tar (exit code ${result.exitCode}):\n${result.stderr}');
+ final parentDirectory = path.dirname(filePath);
+
+ bool checkValidTarget(String linkTarget) {
+ final isValid = path.isWithin(destination, linkTarget);
+ if (!isValid) {
+ log.fine('Skipping ${entry.name}: Invalid link target');
+ }
+
+ return isValid;
+ }
+
+ switch (entry.type) {
+ case TypeFlag.dir:
+ ensureDir(filePath);
+ break;
+ case TypeFlag.reg:
+ case TypeFlag.regA:
+ // Regular file
+ deleteIfLink(filePath);
+ ensureDir(parentDirectory);
+
+ await _createFileFromStream(entry.contents, filePath);
+
+ if (Platform.isLinux || Platform.isMacOS) {
+ // Apply executable bits from tar header, but don't change r/w bits
+ // from the default
+ final mode = _defaultMode | (entry.header.mode & _executableMask);
+
+ if (mode != _defaultMode) {
+ _chmod(mode, filePath);
+ }
+ }
+ break;
+ case TypeFlag.symlink:
+ // Link to another file in this tar, relative from this entry.
+ final resolvedTarget = path.joinAll(
+ [parentDirectory, ...path.posix.split(entry.header.linkName)]);
+ if (!checkValidTarget(resolvedTarget)) {
+ // Don't allow links to files outside of this tar.
+ break;
+ }
+
+ ensureDir(parentDirectory);
+ createSymlink(
+ path.relative(resolvedTarget, from: parentDirectory), filePath);
+ break;
+ case TypeFlag.link:
+ // We generate hardlinks as symlinks too, but their linkName is relative
+ // to the root of the tar file (unlike symlink entries, whose linkName
+ // is relative to the entry itself).
+ final fromDestination = path.join(destination, entry.header.linkName);
+ if (!checkValidTarget(fromDestination)) {
+ break; // Link points outside of the tar file.
+ }
+
+ final fromFile = path.relative(fromDestination, from: parentDirectory);
+ ensureDir(parentDirectory);
+ createSymlink(fromFile, filePath);
+ break;
+ default:
+ // Only extract files
+ continue;
+ }
}
- var match =
- RegExp(r'^tar \(GNU tar\) (\d+).(\d+)\n').firstMatch(result.stdout);
- if (match == null) return false;
-
- var major = int.parse(match[1]);
- var minor = int.parse(match[2]);
- return major >= 2 || (major == 1 && minor >= 23);
+ log.fine('Extracted .tar.gz to $destination.');
}
-final String _pathTo7zip = (() {
- final candidate = runningFromDartRepo
- ? path.join(dartRepoRoot, 'third_party', '7zip', '7za.exe')
- : path.join(
- sdk.rootDirectory,
- 'lib',
- '_internal',
- 'pub',
- 'asset',
- '7zip',
- '7za.exe',
- );
- if (fileExists(candidate)) return candidate;
- throw StateError('Could not find 7zip.');
-})();
-
/// Create a .tar.gz archive from a list of entries.
///
/// Each entry can be a [String], [Directory], or [File] object. The root of
@@ -897,110 +900,47 @@
///
/// Returns a [ByteStream] that emits the contents of the archive.
ByteStream createTarGz(List<String> contents, {String baseDir}) {
- return ByteStream(StreamCompleter.fromFuture(Future.sync(() async {
- var buffer = StringBuffer();
- buffer.write('Creating .tar.gz stream containing:\n');
- contents.forEach(buffer.writeln);
- log.fine(buffer.toString());
+ var buffer = StringBuffer();
+ buffer.write('Creating .tar.gz stream containing:\n');
+ contents.forEach(buffer.writeln);
+ log.fine(buffer.toString());
- baseDir ??= path.current;
- baseDir = path.absolute(baseDir);
- contents = contents.map((entry) {
- entry = path.absolute(entry);
- if (!path.isWithin(baseDir, entry)) {
- throw ArgumentError('Entry $entry is not inside $baseDir.');
- }
- return path.relative(entry, from: baseDir);
- }).toList();
+ baseDir ??= path.current;
+ baseDir = path.absolute(baseDir);
- if (!Platform.isWindows) {
- var args = [
- // ustar is the most recent tar format that's compatible across all
- // OSes.
- '--format=ustar',
- '--create',
- '--gzip',
- '--directory',
- baseDir
- ];
-
- String stdin;
- if (Platform.isLinux) {
- // GNU tar flags.
- // https://www.gnu.org/software/tar/manual/html_section/tar_33.html
-
- args.addAll(['--files-from', '/dev/stdin']);
- stdin = contents.join('\n');
-
- /// Travis's version of tar apparently doesn't support passing unknown
- /// values to the --owner and --group flags for some reason.
- if (!isTravis) {
- // The ustar format doesn't support large UIDs. We don't care about
- // preserving ownership anyway, so we just set them to "pub".
- args.addAll(['--owner=pub', '--group=pub']);
- }
- } else {
- // OSX can take inputs in mtree format since at least OSX 10.9 (bsdtar
- // 2.8.3). We use this to set the uname and gname, since it doesn't have
- // flags for those.
- //
- // https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man1/tar.1.html
- args.add('@/dev/stdin');
-
- // The ustar format doesn't support large UIDs. We don't care about
- // preserving ownership anyway, so we just set them to "pub".
- // TODO(rnystrom): This assumes contents does not contain any
- // directories.
- var mtreeHeader = '#mtree\n/set uname=pub gname=pub type=file\n';
-
- // We need a newline at the end, otherwise the last file would get
- // ignored.
- stdin =
- mtreeHeader + contents.join('\n').replaceAll(' ', r'\040') + '\n';
- }
-
- // Setting the working directory should be unnecessary since we pass an
- // explicit base directory to tar. However, on Mac when using an mtree
- // input file, relative paths in the mtree file are interpreted as
- // relative to the current working directory, not the "--directory"
- // argument.
- var process = await startProcess(_tarPath, args, workingDir: baseDir);
- process.stdin.add(utf8.encode(stdin));
- process.stdin.close();
- return process.stdout;
+ final tarContents = Stream.fromIterable(contents.map((entry) {
+ entry = path.absolute(entry);
+ if (!path.isWithin(baseDir, entry)) {
+ throw ArgumentError('Entry $entry is not inside $baseDir.');
}
- // Don't use [withTempDir] here because we don't want to delete the temp
- // directory until the returned stream has closed.
- var tempDir = await _createSystemTempDir();
+ final relative = path.relative(entry, from: baseDir);
+ // On Windows, we can't open some files without normalizing them
+ final file = File(path.normalize(entry));
+ final stat = file.statSync();
- try {
- // Create the file containing the list of files to compress.
- var contentsPath = path.join(tempDir, 'files.txt');
- writeTextFile(contentsPath, contents.join('\n'));
-
- // Create the tar file.
- var tarFile = path.join(tempDir, 'intermediate.tar');
- var args = ['a', '-w$baseDir', tarFile, '@$contentsPath'];
-
- // We're passing 'baseDir' both as '-w' and setting it as the working
- // directory explicitly here intentionally. The former ensures that the
- // files added to the archive have the correct relative path in the
- // archive. The latter enables relative paths in the "-i" args to be
- // resolved.
- await runProcess(_pathTo7zip, args, workingDir: baseDir);
-
- // GZIP it. 7zip doesn't support doing both as a single operation.
- // Send the output to stdout.
- args = ['a', 'unused', '-tgzip', '-so', tarFile];
- return (await startProcess(_pathTo7zip, args))
- .stdout
- .transform(onDoneTransformer(() => deleteEntry(tempDir)));
- } catch (_) {
- deleteEntry(tempDir);
- rethrow;
+ if (stat.type == FileSystemEntityType.link) {
+ log.message('$entry is a link locally, but will be uploaded as a '
+ 'duplicate file.');
}
- })));
+
+ return TarEntry(
+ TarHeader(
+ // Ensure paths in tar files use forward slashes
+ name: path.url.joinAll(path.split(relative)),
+ // We want to keep executable bits, but otherwise use the default
+ // file mode
+ mode: _defaultMode | (stat.mode & _executableMask),
+ size: stat.size,
+ modified: stat.changed,
+ userName: 'pub',
+ groupName: 'pub',
+ ),
+ file.openRead(),
+ );
+ }));
+
+ return ByteStream(tarContents.transform(tarWriter).transform(gzip.encoder));
}
/// Contains the results of invoking a [Process] and waiting for it to complete.
diff --git a/pubspec.yaml b/pubspec.yaml
index 865e4b0..b0b64d2 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -24,6 +24,7 @@
shelf: ^0.7.0
source_span: ^1.4.0
stack_trace: ^1.0.0
+ tar: ^0.3.0-nullsafety
yaml: ^2.2.0
dev_dependencies:
diff --git a/test/io_test.dart b/test/io_test.dart
index ecb77c0..8c627d1 100644
--- a/test/io_test.dart
+++ b/test/io_test.dart
@@ -9,6 +9,7 @@
import 'package:path/path.dart' as path;
import 'package:pub/src/exceptions.dart';
import 'package:pub/src/io.dart';
+import 'package:tar/tar.dart';
import 'package:test/test.dart';
import 'descriptor.dart' as d;
@@ -227,6 +228,197 @@
});
});
+ group('extractTarGz', () {
+ test('decompresses simple archive', () async {
+ await withTempDir((tempDir) async {
+ await extractTarGz(
+ Stream.fromIterable(
+ [
+ base64Decode(
+ 'H4sIAP2weF4AA+3S0QqCMBiG4V2KeAE1nfuF7maViNBqzDyQ6N4z6yCIogOtg97ncAz2wTvfuxCW'
+ 'alZ6UFqttIiUYpXObWlzM57fqcyIkcxoU2ZKZyYvtErsvLNuuvboYpKotqm7uPUv74XYeBf7Oh66'
+ '8I1dX+LH/qFbt6HaLHrnd9O/cQ0sxZv++UP/Qob+1srQX08/5dmf9z+le+erdJWOHyE9/3oPAAAA'
+ 'AAAAAAAAAAAAgM9dALkoaRMAKAAA')
+ ],
+ ),
+ tempDir);
+
+ await d.dir(appPath, [
+ d.rawPubspec({
+ 'name': 'myapp',
+ }),
+ ]).validate(tempDir);
+ });
+ });
+
+ test('throws on tar error', () async {
+ await withTempDir((tempDir) async {
+ await expectLater(
+ () async => await extractTarGz(
+ Stream.fromIterable(
+ [
+ base64Decode(
+ // Correct Gzip of a faulty tar archive.
+ 'H4sICBKyeF4AA215YXBwLnRhcgDt0sEKgjAAh/GdewrxAWpzbkJvs0pEaDVmHiR699Q6BBJ00Dr0'
+ '/Y5jsD98850LYSMWJXuFkUJaITNTmEyPR09Caaut0lIXSkils1yKxCy76KFtLi4miWjqqo0H//Ze'
+ 'iLV3saviuQ3f2PUlfkwf2l0Tyv26c/44/xtDYJsP6a0trJn2z1765/3/UMbYvr+cf8rUn/e/pifn'
+ 'y3Sbjh8hvf16DwAAAAAAAAAAAAAAAIDPre4CU/3q/CcAAA==')
+ ],
+ ),
+ tempDir),
+ throwsA(isA<TarException>()));
+ });
+ });
+
+ test('throws on gzip error', () async {
+ await withTempDir((tempDir) async {
+ await expectLater(
+ () async => await extractTarGz(
+ Stream.fromIterable(
+ [
+ [10, 20, 30] // Not a good gz stream.
+ ],
+ ),
+ tempDir),
+ throwsA(
+ isA<FormatException>().having((e) => e.message, 'message',
+ contains('Filter error, bad data')),
+ ),
+ );
+ });
+ });
+
+ test(
+ 'applies executable bits from tar file',
+ () => withTempDir((tempDir) async {
+ final entry = Stream.value(TarEntry.data(
+ TarHeader(
+ name: 'weird_exe',
+ typeFlag: TypeFlag.reg,
+ mode: int.parse('110', radix: 8),
+ ),
+ const []));
+
+ await extractTarGz(
+ entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+ expect(File('$tempDir/weird_exe').statSync().modeString(), 'rwxr-xr--');
+ }),
+ testOn: 'linux || mac-os',
+ );
+
+ test('extracts files and links', () {
+ return withTempDir((tempDir) async {
+ final entries = Stream.fromIterable([
+ TarEntry.data(
+ TarHeader(name: 'lib/main.txt', typeFlag: TypeFlag.reg),
+ utf8.encode('text content'),
+ ),
+ TarEntry.data(
+ TarHeader(
+ name: 'bin/main.txt',
+ typeFlag: TypeFlag.symlink,
+ linkName: '../lib/main.txt',
+ ),
+ const [],
+ ),
+ TarEntry.data(
+ TarHeader(
+ name: 'test/main.txt',
+ typeFlag: TypeFlag.link,
+ // TypeFlag.link is resolved against the root of the tar file
+ linkName: 'lib/main.txt',
+ ),
+ const [],
+ ),
+ ]);
+
+ await extractTarGz(
+ entries.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+ await d.dir(
+ '.',
+ [
+ d.file('lib/main.txt', 'text content'),
+ d.file('bin/main.txt', 'text content'),
+ d.file('test/main.txt', 'text content'),
+ ],
+ ).validate(tempDir);
+ });
+ });
+
+ test('preserves empty directories', () {
+ return withTempDir((tempDir) async {
+ final entry = Stream.value(TarEntry.data(
+ TarHeader(
+ name: 'bin/',
+ typeFlag: TypeFlag.dir,
+ ),
+ const []));
+
+ await extractTarGz(
+ entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+ await expectLater(
+ Directory(tempDir).list(),
+ emits(isA<Directory>()
+ .having((e) => path.basename(e.path), 'basename', 'bin')));
+ });
+ });
+
+ test('throws for entries escaping the tar file', () {
+ return withTempDir((tempDir) async {
+ final entry = Stream.value(TarEntry.data(
+ TarHeader(
+ name: '../other_package-1.2.3/lib/file.dart',
+ typeFlag: TypeFlag.reg,
+ ),
+ const []));
+
+ await expectLater(
+ extractTarGz(
+ entry.transform(tarWriter).transform(gzip.encoder), tempDir),
+ throwsA(isA<FormatException>()));
+
+ await expectLater(Directory(tempDir).list(), emitsDone);
+ });
+ });
+
+ test('skips symlinks escaping the tar file', () {
+ return withTempDir((tempDir) async {
+ final entry = Stream.value(TarEntry.data(
+ TarHeader(
+ name: 'nested/bad_link',
+ typeFlag: TypeFlag.symlink,
+ linkName: '../../outside.txt',
+ ),
+ const []));
+
+ await extractTarGz(
+ entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+ await expectLater(Directory(tempDir).list(), emitsDone);
+ });
+ });
+
+ test('skips hardlinks escaping the tar file', () {
+ return withTempDir((tempDir) async {
+ final entry = Stream.value(TarEntry.data(
+ TarHeader(
+ name: 'nested/bad_link',
+ typeFlag: TypeFlag.link,
+ linkName: '../outside.txt',
+ ),
+ const []));
+
+ await extractTarGz(
+ entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+ await expectLater(Directory(tempDir).list(), emitsDone);
+ });
+ });
+ });
+
testExistencePredicate('entryExists', entryExists,
forFile: true,
forFileSymlink: true,
@@ -370,62 +562,6 @@
});
}
});
- group('extractTarGz', () {
- test('decompresses simple archive', () async {
- await withTempDir((tempDir) async {
- await extractTarGz(
- Stream.fromIterable(
- [
- base64Decode(
- 'H4sIAP2weF4AA+3S0QqCMBiG4V2KeAE1nfuF7maViNBqzDyQ6N4z6yCIogOtg97ncAz2wTvfuxCW'
- 'alZ6UFqttIiUYpXObWlzM57fqcyIkcxoU2ZKZyYvtErsvLNuuvboYpKotqm7uPUv74XYeBf7Oh66'
- '8I1dX+LH/qFbt6HaLHrnd9O/cQ0sxZv++UP/Qob+1srQX08/5dmf9z+le+erdJWOHyE9/3oPAAAA'
- 'AAAAAAAAAAAAgM9dALkoaRMAKAAA')
- ],
- ),
- tempDir);
-
- await d.dir(appPath, [
- d.rawPubspec({
- 'name': 'myapp',
- }),
- ]).validate(tempDir);
- });
- });
-
- test('throws on tar error', () async {
- await withTempDir((tempDir) async {
- await expectLater(
- () async => await extractTarGz(
- Stream.fromIterable(
- [
- base64Decode(
- // Correct Gzip of a faulty tar archive.
- 'H4sICBKyeF4AA215YXBwLnRhcgDt0sEKgjAAh/GdewrxAWpzbkJvs0pEaDVmHiR699Q6BBJ00Dr0'
- '/Y5jsD98850LYSMWJXuFkUJaITNTmEyPR09Caaut0lIXSkils1yKxCy76KFtLi4miWjqqo0H//Ze'
- 'iLV3saviuQ3f2PUlfkwf2l0Tyv26c/44/xtDYJsP6a0trJn2z1765/3/UMbYvr+cf8rUn/e/pifn'
- 'y3Sbjh8hvf16DwAAAAAAAAAAAAAAAIDPre4CU/3q/CcAAA==')
- ],
- ),
- tempDir),
- throwsA(isA<FileSystemException>()));
- });
- });
-
- test('throws on gzip error', () async {
- await withTempDir((tempDir) async {
- await expectLater(
- () async => await extractTarGz(
- Stream.fromIterable(
- [
- [10, 20, 30] // Not a good gz stream.
- ],
- ),
- tempDir),
- throwsA(isA<FileSystemException>()));
- });
- });
- });
}
/// Like [withTempDir], but canonicalizes the path before passing it to [fn].