Use Dart library to read and write tar files (#2817)

diff --git a/lib/src/io.dart b/lib/src/io.dart
index 0f50c96..23d1add 100644
--- a/lib/src/io.dart
+++ b/lib/src/io.dart
@@ -8,7 +8,6 @@
 import 'dart:convert';
 import 'dart:io';
 
-import 'package:async/async.dart';
 import 'package:http/http.dart' show ByteStream;
 import 'package:http_multi_server/http_multi_server.dart';
 import 'package:meta/meta.dart';
@@ -16,12 +15,12 @@
 import 'package:pedantic/pedantic.dart';
 import 'package:pool/pool.dart';
 import 'package:stack_trace/stack_trace.dart';
+import 'package:tar/tar.dart';
 
 import 'error_group.dart';
 import 'exceptions.dart';
 import 'exit_codes.dart' as exit_codes;
 import 'log.dart' as log;
-import 'sdk.dart';
 import 'utils.dart';
 
 export 'package:http/http.dart' show ByteStream;
@@ -34,6 +33,12 @@
 /// additional throughput.
 final _descriptorPool = Pool(32);
 
+/// The assumed default file mode on Linux and macOS
+const _defaultMode = 420; // 644₈
+
+/// Mask for executable bits in file modes.
+const _executableMask = 0x49; // 001 001 001
+
 /// Determines if a file or directory exists at [path].
 bool entryExists(String path) =>
     dirExists(path) || fileExists(path) || linkExists(path);
@@ -216,6 +221,10 @@
   });
 }
 
+void _chmod(int mode, String file) {
+  runProcessSync('chmod', [mode.toRadixString(8), file]);
+}
+
 /// Deletes [file] if it's a symlink.
 ///
 /// The [File] class overwrites the symlink targets when writing to a file,
@@ -792,103 +801,97 @@
   return server;
 }
 
-String _tarPath = _findTarPath();
-
-/// Find a tar. Prefering system installed tar.
-///
-/// On linux tar should always be /bin/tar [See FHS 2.3][1]
-/// On MacOS it seems to always be /usr/bin/tar.
-///
-/// [1]: https://refspecs.linuxfoundation.org/FHS_2.3/fhs-2.3.pdf
-String _findTarPath() {
-  for (final file in ['/bin/tar', '/usr/bin/tar']) {
-    if (fileExists(file)) {
-      return file;
-    }
-  }
-  log.warning(
-      'Could not find a system `tar` installed in /bin/tar or /usr/bin/tar, '
-      'attempting to use tar from PATH');
-  return 'tar';
-}
-
 /// Extracts a `.tar.gz` file from [stream] to [destination].
 Future extractTarGz(Stream<List<int>> stream, String destination) async {
   log.fine('Extracting .tar.gz stream to $destination.');
-  final decompressed = stream.transform(GZipCodec().decoder);
 
-  // We used to stream directly to `tar`,  but that was fragile in certain
-  // settings.
-  final processResult = await withTempDir((tempDir) async {
-    final tarFile = path.join(tempDir, 'archive.tar');
-    try {
-      await _createFileFromStream(decompressed, tarFile);
-    } catch (e) {
-      // We don't know the error type here: https://dartbug.com/41270
-      throw FileSystemException('Could not decompress gz stream $e');
+  destination = path.absolute(destination);
+  final reader = TarReader(stream.transform(gzip.decoder));
+  while (await reader.moveNext()) {
+    final entry = reader.current;
+
+    final filePath = path.joinAll([
+      destination,
+      // Tar file names always use forward slashes
+      ...path.posix.split(entry.name),
+    ]);
+
+    if (!path.isWithin(destination, filePath)) {
+      // The tar contains entries that would be written outside of the
+      // destination. That doesn't happen by accident, assume that the tar file
+      // is malicious.
+      await reader.cancel();
+      throw FormatException('Invalid tar entry: ${entry.name}');
     }
-    return (Platform.isWindows)
-        ? runProcess(_pathTo7zip, ['x', tarFile], workingDir: destination)
-        : runProcess(_tarPath, [
-            if (_noUnknownKeyword) '--warning=no-unknown-keyword',
-            '--extract',
-            '--no-same-owner',
-            '--no-same-permissions',
-            '--directory',
-            destination,
-            '--file',
-            tarFile,
-          ]);
-  });
-  if (processResult.exitCode != exit_codes.SUCCESS) {
-    throw FileSystemException(
-        'Could not un-tar (exit code ${processResult.exitCode}). Error:\n'
-        '${processResult.stdout.join("\n")}\n'
-        '${processResult.stderr.join("\n")}');
-  }
-  log.fine('Extracted .tar.gz to $destination. Exit code $exitCode.');
-}
 
-/// Whether to include "--warning=no-unknown-keyword" when invoking tar.
-///
-/// BSD tar (the default on OS X) can insert strange headers to a tarfile that
-/// GNU tar (the default on Linux) is unable to understand. This will cause GNU
-/// tar to emit a number of harmless but scary-looking warnings which are
-/// silenced by this flag.
-final bool _noUnknownKeyword = _computeNoUnknownKeyword();
-bool _computeNoUnknownKeyword() {
-  if (!Platform.isLinux) return false;
-  var result = Process.runSync(_tarPath, ['--version']);
-  if (result.exitCode != 0) {
-    throw ApplicationException(
-        'Pub failed to run tar (exit code ${result.exitCode}):\n${result.stderr}');
+    final parentDirectory = path.dirname(filePath);
+
+    bool checkValidTarget(String linkTarget) {
+      final isValid = path.isWithin(destination, linkTarget);
+      if (!isValid) {
+        log.fine('Skipping ${entry.name}: Invalid link target');
+      }
+
+      return isValid;
+    }
+
+    switch (entry.type) {
+      case TypeFlag.dir:
+        ensureDir(filePath);
+        break;
+      case TypeFlag.reg:
+      case TypeFlag.regA:
+        // Regular file
+        deleteIfLink(filePath);
+        ensureDir(parentDirectory);
+
+        await _createFileFromStream(entry.contents, filePath);
+
+        if (Platform.isLinux || Platform.isMacOS) {
+          // Apply executable bits from tar header, but don't change r/w bits
+          // from the default
+          final mode = _defaultMode | (entry.header.mode & _executableMask);
+
+          if (mode != _defaultMode) {
+            _chmod(mode, filePath);
+          }
+        }
+        break;
+      case TypeFlag.symlink:
+        // Link to another file in this tar, relative from this entry.
+        final resolvedTarget = path.joinAll(
+            [parentDirectory, ...path.posix.split(entry.header.linkName)]);
+        if (!checkValidTarget(resolvedTarget)) {
+          // Don't allow links to files outside of this tar.
+          break;
+        }
+
+        ensureDir(parentDirectory);
+        createSymlink(
+            path.relative(resolvedTarget, from: parentDirectory), filePath);
+        break;
+      case TypeFlag.link:
+        // We generate hardlinks as symlinks too, but their linkName is relative
+        // to the root of the tar file (unlike symlink entries, whose linkName
+        // is relative to the entry itself).
+        final fromDestination = path.join(destination, entry.header.linkName);
+        if (!checkValidTarget(fromDestination)) {
+          break; // Link points outside of the tar file.
+        }
+
+        final fromFile = path.relative(fromDestination, from: parentDirectory);
+        ensureDir(parentDirectory);
+        createSymlink(fromFile, filePath);
+        break;
+      default:
+        // Only extract files
+        continue;
+    }
   }
 
-  var match =
-      RegExp(r'^tar \(GNU tar\) (\d+).(\d+)\n').firstMatch(result.stdout);
-  if (match == null) return false;
-
-  var major = int.parse(match[1]);
-  var minor = int.parse(match[2]);
-  return major >= 2 || (major == 1 && minor >= 23);
+  log.fine('Extracted .tar.gz to $destination.');
 }
 
-final String _pathTo7zip = (() {
-  final candidate = runningFromDartRepo
-      ? path.join(dartRepoRoot, 'third_party', '7zip', '7za.exe')
-      : path.join(
-          sdk.rootDirectory,
-          'lib',
-          '_internal',
-          'pub',
-          'asset',
-          '7zip',
-          '7za.exe',
-        );
-  if (fileExists(candidate)) return candidate;
-  throw StateError('Could not find 7zip.');
-})();
-
 /// Create a .tar.gz archive from a list of entries.
 ///
 /// Each entry can be a [String], [Directory], or [File] object. The root of
@@ -897,110 +900,47 @@
 ///
 /// Returns a [ByteStream] that emits the contents of the archive.
 ByteStream createTarGz(List<String> contents, {String baseDir}) {
-  return ByteStream(StreamCompleter.fromFuture(Future.sync(() async {
-    var buffer = StringBuffer();
-    buffer.write('Creating .tar.gz stream containing:\n');
-    contents.forEach(buffer.writeln);
-    log.fine(buffer.toString());
+  var buffer = StringBuffer();
+  buffer.write('Creating .tar.gz stream containing:\n');
+  contents.forEach(buffer.writeln);
+  log.fine(buffer.toString());
 
-    baseDir ??= path.current;
-    baseDir = path.absolute(baseDir);
-    contents = contents.map((entry) {
-      entry = path.absolute(entry);
-      if (!path.isWithin(baseDir, entry)) {
-        throw ArgumentError('Entry $entry is not inside $baseDir.');
-      }
-      return path.relative(entry, from: baseDir);
-    }).toList();
+  baseDir ??= path.current;
+  baseDir = path.absolute(baseDir);
 
-    if (!Platform.isWindows) {
-      var args = [
-        // ustar is the most recent tar format that's compatible across all
-        // OSes.
-        '--format=ustar',
-        '--create',
-        '--gzip',
-        '--directory',
-        baseDir
-      ];
-
-      String stdin;
-      if (Platform.isLinux) {
-        // GNU tar flags.
-        // https://www.gnu.org/software/tar/manual/html_section/tar_33.html
-
-        args.addAll(['--files-from', '/dev/stdin']);
-        stdin = contents.join('\n');
-
-        /// Travis's version of tar apparently doesn't support passing unknown
-        /// values to the --owner and --group flags for some reason.
-        if (!isTravis) {
-          // The ustar format doesn't support large UIDs. We don't care about
-          // preserving ownership anyway, so we just set them to "pub".
-          args.addAll(['--owner=pub', '--group=pub']);
-        }
-      } else {
-        // OSX can take inputs in mtree format since at least OSX 10.9 (bsdtar
-        // 2.8.3). We use this to set the uname and gname, since it doesn't have
-        // flags for those.
-        //
-        // https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man1/tar.1.html
-        args.add('@/dev/stdin');
-
-        // The ustar format doesn't support large UIDs. We don't care about
-        // preserving ownership anyway, so we just set them to "pub".
-        // TODO(rnystrom): This assumes contents does not contain any
-        // directories.
-        var mtreeHeader = '#mtree\n/set uname=pub gname=pub type=file\n';
-
-        // We need a newline at the end, otherwise the last file would get
-        // ignored.
-        stdin =
-            mtreeHeader + contents.join('\n').replaceAll(' ', r'\040') + '\n';
-      }
-
-      // Setting the working directory should be unnecessary since we pass an
-      // explicit base directory to tar. However, on Mac when using an mtree
-      // input file, relative paths in the mtree file are interpreted as
-      // relative to the current working directory, not the "--directory"
-      // argument.
-      var process = await startProcess(_tarPath, args, workingDir: baseDir);
-      process.stdin.add(utf8.encode(stdin));
-      process.stdin.close();
-      return process.stdout;
+  final tarContents = Stream.fromIterable(contents.map((entry) {
+    entry = path.absolute(entry);
+    if (!path.isWithin(baseDir, entry)) {
+      throw ArgumentError('Entry $entry is not inside $baseDir.');
     }
 
-    // Don't use [withTempDir] here because we don't want to delete the temp
-    // directory until the returned stream has closed.
-    var tempDir = await _createSystemTempDir();
+    final relative = path.relative(entry, from: baseDir);
+    // On Windows, we can't open some files without normalizing them
+    final file = File(path.normalize(entry));
+    final stat = file.statSync();
 
-    try {
-      // Create the file containing the list of files to compress.
-      var contentsPath = path.join(tempDir, 'files.txt');
-      writeTextFile(contentsPath, contents.join('\n'));
-
-      // Create the tar file.
-      var tarFile = path.join(tempDir, 'intermediate.tar');
-      var args = ['a', '-w$baseDir', tarFile, '@$contentsPath'];
-
-      // We're passing 'baseDir' both as '-w' and setting it as the working
-      // directory explicitly here intentionally. The former ensures that the
-      // files added to the archive have the correct relative path in the
-      // archive. The latter enables relative paths in the "-i" args to be
-      // resolved.
-      await runProcess(_pathTo7zip, args, workingDir: baseDir);
-
-      // GZIP it. 7zip doesn't support doing both as a single operation.
-      // Send the output to stdout.
-      args = ['a', 'unused', '-tgzip', '-so', tarFile];
-      return (await startProcess(_pathTo7zip, args))
-          .stdout
-          .transform(onDoneTransformer(() => deleteEntry(tempDir)));
-    } catch (_) {
-      deleteEntry(tempDir);
-      rethrow;
+    if (stat.type == FileSystemEntityType.link) {
+      log.message('$entry is a link locally, but will be uploaded as a '
+          'duplicate file.');
     }
-  })));
+
+    return TarEntry(
+      TarHeader(
+        // Ensure paths in tar files use forward slashes
+        name: path.url.joinAll(path.split(relative)),
+        // We want to keep executable bits, but otherwise use the default
+        // file mode
+        mode: _defaultMode | (stat.mode & _executableMask),
+        size: stat.size,
+        modified: stat.changed,
+        userName: 'pub',
+        groupName: 'pub',
+      ),
+      file.openRead(),
+    );
+  }));
+
+  return ByteStream(tarContents.transform(tarWriter).transform(gzip.encoder));
 }
 
 /// Contains the results of invoking a [Process] and waiting for it to complete.
diff --git a/pubspec.yaml b/pubspec.yaml
index 865e4b0..b0b64d2 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -24,6 +24,7 @@
   shelf: ^0.7.0
   source_span: ^1.4.0
   stack_trace: ^1.0.0
+  tar: ^0.3.0-nullsafety
   yaml: ^2.2.0
 
 dev_dependencies:
diff --git a/test/io_test.dart b/test/io_test.dart
index ecb77c0..8c627d1 100644
--- a/test/io_test.dart
+++ b/test/io_test.dart
@@ -9,6 +9,7 @@
 import 'package:path/path.dart' as path;
 import 'package:pub/src/exceptions.dart';
 import 'package:pub/src/io.dart';
+import 'package:tar/tar.dart';
 import 'package:test/test.dart';
 
 import 'descriptor.dart' as d;
@@ -227,6 +228,197 @@
     });
   });
 
+  group('extractTarGz', () {
+    test('decompresses simple archive', () async {
+      await withTempDir((tempDir) async {
+        await extractTarGz(
+            Stream.fromIterable(
+              [
+                base64Decode(
+                    'H4sIAP2weF4AA+3S0QqCMBiG4V2KeAE1nfuF7maViNBqzDyQ6N4z6yCIogOtg97ncAz2wTvfuxCW'
+                    'alZ6UFqttIiUYpXObWlzM57fqcyIkcxoU2ZKZyYvtErsvLNuuvboYpKotqm7uPUv74XYeBf7Oh66'
+                    '8I1dX+LH/qFbt6HaLHrnd9O/cQ0sxZv++UP/Qob+1srQX08/5dmf9z+le+erdJWOHyE9/3oPAAAA'
+                    'AAAAAAAAAAAAgM9dALkoaRMAKAAA')
+              ],
+            ),
+            tempDir);
+
+        await d.dir(appPath, [
+          d.rawPubspec({
+            'name': 'myapp',
+          }),
+        ]).validate(tempDir);
+      });
+    });
+
+    test('throws on tar error', () async {
+      await withTempDir((tempDir) async {
+        await expectLater(
+            () async => await extractTarGz(
+                Stream.fromIterable(
+                  [
+                    base64Decode(
+                        // Correct Gzip of a faulty tar archive.
+                        'H4sICBKyeF4AA215YXBwLnRhcgDt0sEKgjAAh/GdewrxAWpzbkJvs0pEaDVmHiR699Q6BBJ00Dr0'
+                        '/Y5jsD98850LYSMWJXuFkUJaITNTmEyPR09Caaut0lIXSkils1yKxCy76KFtLi4miWjqqo0H//Ze'
+                        'iLV3saviuQ3f2PUlfkwf2l0Tyv26c/44/xtDYJsP6a0trJn2z1765/3/UMbYvr+cf8rUn/e/pifn'
+                        'y3Sbjh8hvf16DwAAAAAAAAAAAAAAAIDPre4CU/3q/CcAAA==')
+                  ],
+                ),
+                tempDir),
+            throwsA(isA<TarException>()));
+      });
+    });
+
+    test('throws on gzip error', () async {
+      await withTempDir((tempDir) async {
+        await expectLater(
+          () async => await extractTarGz(
+              Stream.fromIterable(
+                [
+                  [10, 20, 30] // Not a good gz stream.
+                ],
+              ),
+              tempDir),
+          throwsA(
+            isA<FormatException>().having((e) => e.message, 'message',
+                contains('Filter error, bad data')),
+          ),
+        );
+      });
+    });
+
+    test(
+      'applies executable bits from tar file',
+      () => withTempDir((tempDir) async {
+        final entry = Stream.value(TarEntry.data(
+            TarHeader(
+              name: 'weird_exe',
+              typeFlag: TypeFlag.reg,
+              mode: int.parse('110', radix: 8),
+            ),
+            const []));
+
+        await extractTarGz(
+            entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+        expect(File('$tempDir/weird_exe').statSync().modeString(), 'rwxr-xr--');
+      }),
+      testOn: 'linux || mac-os',
+    );
+
+    test('extracts files and links', () {
+      return withTempDir((tempDir) async {
+        final entries = Stream.fromIterable([
+          TarEntry.data(
+            TarHeader(name: 'lib/main.txt', typeFlag: TypeFlag.reg),
+            utf8.encode('text content'),
+          ),
+          TarEntry.data(
+            TarHeader(
+              name: 'bin/main.txt',
+              typeFlag: TypeFlag.symlink,
+              linkName: '../lib/main.txt',
+            ),
+            const [],
+          ),
+          TarEntry.data(
+            TarHeader(
+              name: 'test/main.txt',
+              typeFlag: TypeFlag.link,
+              // TypeFlag.link is resolved against the root of the tar file
+              linkName: 'lib/main.txt',
+            ),
+            const [],
+          ),
+        ]);
+
+        await extractTarGz(
+            entries.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+        await d.dir(
+          '.',
+          [
+            d.file('lib/main.txt', 'text content'),
+            d.file('bin/main.txt', 'text content'),
+            d.file('test/main.txt', 'text content'),
+          ],
+        ).validate(tempDir);
+      });
+    });
+
+    test('preserves empty directories', () {
+      return withTempDir((tempDir) async {
+        final entry = Stream.value(TarEntry.data(
+            TarHeader(
+              name: 'bin/',
+              typeFlag: TypeFlag.dir,
+            ),
+            const []));
+
+        await extractTarGz(
+            entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+        await expectLater(
+            Directory(tempDir).list(),
+            emits(isA<Directory>()
+                .having((e) => path.basename(e.path), 'basename', 'bin')));
+      });
+    });
+
+    test('throws for entries escaping the tar file', () {
+      return withTempDir((tempDir) async {
+        final entry = Stream.value(TarEntry.data(
+            TarHeader(
+              name: '../other_package-1.2.3/lib/file.dart',
+              typeFlag: TypeFlag.reg,
+            ),
+            const []));
+
+        await expectLater(
+            extractTarGz(
+                entry.transform(tarWriter).transform(gzip.encoder), tempDir),
+            throwsA(isA<FormatException>()));
+
+        await expectLater(Directory(tempDir).list(), emitsDone);
+      });
+    });
+
+    test('skips symlinks escaping the tar file', () {
+      return withTempDir((tempDir) async {
+        final entry = Stream.value(TarEntry.data(
+            TarHeader(
+              name: 'nested/bad_link',
+              typeFlag: TypeFlag.symlink,
+              linkName: '../../outside.txt',
+            ),
+            const []));
+
+        await extractTarGz(
+            entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+        await expectLater(Directory(tempDir).list(), emitsDone);
+      });
+    });
+
+    test('skips hardlinks escaping the tar file', () {
+      return withTempDir((tempDir) async {
+        final entry = Stream.value(TarEntry.data(
+            TarHeader(
+              name: 'nested/bad_link',
+              typeFlag: TypeFlag.link,
+              linkName: '../outside.txt',
+            ),
+            const []));
+
+        await extractTarGz(
+            entry.transform(tarWriter).transform(gzip.encoder), tempDir);
+
+        await expectLater(Directory(tempDir).list(), emitsDone);
+      });
+    });
+  });
+
   testExistencePredicate('entryExists', entryExists,
       forFile: true,
       forFileSymlink: true,
@@ -370,62 +562,6 @@
       });
     }
   });
-  group('extractTarGz', () {
-    test('decompresses simple archive', () async {
-      await withTempDir((tempDir) async {
-        await extractTarGz(
-            Stream.fromIterable(
-              [
-                base64Decode(
-                    'H4sIAP2weF4AA+3S0QqCMBiG4V2KeAE1nfuF7maViNBqzDyQ6N4z6yCIogOtg97ncAz2wTvfuxCW'
-                    'alZ6UFqttIiUYpXObWlzM57fqcyIkcxoU2ZKZyYvtErsvLNuuvboYpKotqm7uPUv74XYeBf7Oh66'
-                    '8I1dX+LH/qFbt6HaLHrnd9O/cQ0sxZv++UP/Qob+1srQX08/5dmf9z+le+erdJWOHyE9/3oPAAAA'
-                    'AAAAAAAAAAAAgM9dALkoaRMAKAAA')
-              ],
-            ),
-            tempDir);
-
-        await d.dir(appPath, [
-          d.rawPubspec({
-            'name': 'myapp',
-          }),
-        ]).validate(tempDir);
-      });
-    });
-
-    test('throws on tar error', () async {
-      await withTempDir((tempDir) async {
-        await expectLater(
-            () async => await extractTarGz(
-                Stream.fromIterable(
-                  [
-                    base64Decode(
-                        // Correct Gzip of a faulty tar archive.
-                        'H4sICBKyeF4AA215YXBwLnRhcgDt0sEKgjAAh/GdewrxAWpzbkJvs0pEaDVmHiR699Q6BBJ00Dr0'
-                        '/Y5jsD98850LYSMWJXuFkUJaITNTmEyPR09Caaut0lIXSkils1yKxCy76KFtLi4miWjqqo0H//Ze'
-                        'iLV3saviuQ3f2PUlfkwf2l0Tyv26c/44/xtDYJsP6a0trJn2z1765/3/UMbYvr+cf8rUn/e/pifn'
-                        'y3Sbjh8hvf16DwAAAAAAAAAAAAAAAIDPre4CU/3q/CcAAA==')
-                  ],
-                ),
-                tempDir),
-            throwsA(isA<FileSystemException>()));
-      });
-    });
-
-    test('throws on gzip error', () async {
-      await withTempDir((tempDir) async {
-        await expectLater(
-            () async => await extractTarGz(
-                Stream.fromIterable(
-                  [
-                    [10, 20, 30] // Not a good gz stream.
-                  ],
-                ),
-                tempDir),
-            throwsA(isA<FileSystemException>()));
-      });
-    });
-  });
 }
 
 /// Like [withTempDir], but canonicalizes the path before passing it to [fn].