Add a shellSplit() function (dart-lang/io#23)
This makes it possible to parse individual strings (for example from
config files) as shell commands rather than requiring the user to
manually separate the arguments into a list.
diff --git a/pkgs/io/CHANGELOG.md b/pkgs/io/CHANGELOG.md
index a0e6bf1..568c10a 100644
--- a/pkgs/io/CHANGELOG.md
+++ b/pkgs/io/CHANGELOG.md
@@ -16,6 +16,9 @@
- Added `ProcessManager#spawnDetached`, which does not forward any I/O.
+- Added the `shellSplit()` function, which parses a list of arguments in the
+ same manner as the POSIX shell.
+
## 0.2.0
- Initial commit of...
diff --git a/pkgs/io/lib/io.dart b/pkgs/io/lib/io.dart
index 4e42aa9..e18dcac 100644
--- a/pkgs/io/lib/io.dart
+++ b/pkgs/io/lib/io.dart
@@ -6,3 +6,4 @@
export 'src/permissions.dart' show isExecutable;
export 'src/process_manager.dart' show ProcessManager, Spawn;
export 'src/shared_stdin.dart' show SharedStdIn, sharedStdIn;
+export 'src/shell_words.dart' show shellSplit;
diff --git a/pkgs/io/lib/src/shell_words.dart b/pkgs/io/lib/src/shell_words.dart
new file mode 100644
index 0000000..1fb1306
--- /dev/null
+++ b/pkgs/io/lib/src/shell_words.dart
@@ -0,0 +1,143 @@
+// Copyright 2017, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+import 'package:string_scanner/string_scanner.dart';
+
+/// Splits [command] into tokens according to [the POSIX shell
+/// specification][spec].
+///
+/// [spec]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html
+///
+/// This returns the unquoted values of quoted tokens. For example,
+/// `shellSplit('foo "bar baz"')` returns `["foo", "bar baz"]`. It does not
+/// currently support here-documents. It does *not* treat dynamic features such
+/// as parameter expansion specially. For example, `shellSplit("foo $(bar
+/// baz)")` returns `["foo", "$(bar", "baz)"]`.
+///
+/// This will discard any comments at the end of [command].
+///
+/// Throws a [FormatException] if [command] isn't a valid shell command.
+List<String> shellSplit(String command) {
+ var scanner = new StringScanner(command);
+ var results = <String>[];
+ var token = new StringBuffer();
+
+ // Whether a token is being parsed, as opposed to a separator character. This
+ // is different than just [token.isEmpty], because empty quoted tokens can
+ // exist.
+ var hasToken = false;
+
+ while (!scanner.isDone) {
+ var next = scanner.readChar();
+ switch (next) {
+ case $backslash:
+ // Section 2.2.1: A <backslash> that is not quoted shall preserve the
+ // literal value of the following character, with the exception of a
+ // <newline>. If a <newline> follows the <backslash>, the shell shall
+ // interpret this as line continuation. The <backslash> and <newline>
+ // shall be removed before splitting the input into tokens. Since the
+ // escaped <newline> is removed entirely from the input and is not
+ // replaced by any white space, it cannot serve as a token separator.
+ if (scanner.scanChar($lf)) break;
+
+ hasToken = true;
+ token.writeCharCode(scanner.readChar());
+ break;
+
+ case $single_quote:
+ hasToken = true;
+ // Section 2.2.2: Enclosing characters in single-quotes ( '' ) shall
+ // preserve the literal value of each character within the
+ // single-quotes. A single-quote cannot occur within single-quotes.
+ var firstQuote = scanner.position - 1;
+ while (!scanner.scanChar($single_quote)) {
+ _checkUnmatchedQuote(scanner, firstQuote);
+ token.writeCharCode(scanner.readChar());
+ }
+ break;
+
+ case $double_quote:
+ hasToken = true;
+ // Section 2.2.3: Enclosing characters in double-quotes ( "" ) shall
+ // preserve the literal value of all characters within the
+ // double-quotes, with the exception of the characters backquote,
+ // <dollar-sign>, and <backslash>.
+ //
+ // (Note that this code doesn't preserve special behavior of backquote
+ // or dollar sign within double quotes, since those are dynamic
+ // features.)
+ var firstQuote = scanner.position - 1;
+ while (!scanner.scanChar($double_quote)) {
+ _checkUnmatchedQuote(scanner, firstQuote);
+
+ if (scanner.scanChar($backslash)) {
+ _checkUnmatchedQuote(scanner, firstQuote);
+
+ // The <backslash> shall retain its special meaning as an escape
+ // character (see Escape Character (Backslash)) only when followed
+ // by one of the following characters when considered special:
+ //
+ // $ ` " \ <newline>
+ var next = scanner.readChar();
+ if (next == $lf) continue;
+ if (next == $dollar ||
+ next == $backquote ||
+ next == $double_quote ||
+ next == $backslash) {
+ token.writeCharCode(next);
+ } else {
+ token.writeCharCode($backslash);
+ token.writeCharCode(next);
+ }
+ } else {
+ token.writeCharCode(scanner.readChar());
+ }
+ }
+ break;
+
+ case $hash:
+ // Section 2.3: If the current character is a '#' [and the previous
+ // characters was not part of a word], it and all subsequent characters
+ // up to, but excluding, the next <newline> shall be discarded as a
+ // comment. The <newline> that ends the line is not considered part of
+ // the comment.
+ if (hasToken) {
+ token.writeCharCode($hash);
+ break;
+ }
+
+ while (!scanner.isDone && scanner.peekChar() != $lf) {
+ scanner.readChar();
+ }
+ break;
+
+ case $space:
+ case $tab:
+ case $lf:
+ if (hasToken) results.add(token.toString());
+ hasToken = false;
+ token.clear();
+ break;
+
+ default:
+ hasToken = true;
+ token.writeCharCode(next);
+ break;
+ }
+ }
+
+ if (hasToken) results.add(token.toString());
+ return results;
+}
+
+/// Throws a [FormatException] if [scanner] is done indicating that a closing
+/// quote matching the one at position [openingQuote] is missing.
+void _checkUnmatchedQuote(StringScanner scanner, int openingQuote) {
+ if (!scanner.isDone) return;
+ var type = scanner.substring(openingQuote, openingQuote + 1) == '"'
+ ? "double"
+ : "single";
+ scanner.error("Unmatched $type quote.", position: openingQuote, length: 1);
+}
diff --git a/pkgs/io/pubspec.yaml b/pkgs/io/pubspec.yaml
index 5b8d7d7..7fc9eb1 100644
--- a/pkgs/io/pubspec.yaml
+++ b/pkgs/io/pubspec.yaml
@@ -1,7 +1,7 @@
name: io
description: >
Utilities for the Dart VM Runtime.
-version: 0.2.1
+version: 0.3.0-dev
author: Dart Team <misc@dartlang.org>
homepage: https://github.com/dart-lang/io
diff --git a/pkgs/io/test/shell_words_test.dart b/pkgs/io/test/shell_words_test.dart
new file mode 100644
index 0000000..610c36d
--- /dev/null
+++ b/pkgs/io/test/shell_words_test.dart
@@ -0,0 +1,186 @@
+// Copyright 2017, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:test/test.dart';
+
+import 'package:io/io.dart';
+
+void main() {
+ group("shellSplit()", () {
+ group("returns an empty list for", () {
+ test("an empty string", () {
+ expect(shellSplit(""), isEmpty);
+ });
+
+ test("spaces", () {
+ expect(shellSplit(" "), isEmpty);
+ });
+
+ test("tabs", () {
+ expect(shellSplit("\t\t\t"), isEmpty);
+ });
+
+ test("newlines", () {
+ expect(shellSplit("\n\n\n"), isEmpty);
+ });
+
+ test("a comment", () {
+ expect(shellSplit("#foo bar baz"), isEmpty);
+ });
+
+ test("a mix", () {
+ expect(shellSplit(" \t\n# foo"), isEmpty);
+ });
+ });
+
+ group("parses unquoted", () {
+ test("a single token", () {
+ expect(shellSplit("foo"), equals(["foo"]));
+ });
+
+ test("multiple tokens", () {
+ expect(shellSplit("foo bar baz"), equals(["foo", "bar", "baz"]));
+ });
+
+ test("tokens separated by tabs", () {
+ expect(shellSplit("foo\tbar\tbaz"), equals(["foo", "bar", "baz"]));
+ });
+
+ test("tokens separated by newlines", () {
+ expect(shellSplit("foo\nbar\nbaz"), equals(["foo", "bar", "baz"]));
+ });
+
+ test("a token after whitespace", () {
+ expect(shellSplit(" \t\nfoo"), equals(["foo"]));
+ });
+
+ test("a token before whitespace", () {
+ expect(shellSplit("foo \t\n"), equals(["foo"]));
+ });
+
+ test("a token with a hash", () {
+ expect(shellSplit("foo#bar"), equals(["foo#bar"]));
+ });
+
+ test("a token before a comment", () {
+ expect(shellSplit("foo #bar"), equals(["foo"]));
+ });
+
+ test("dynamic shell features", () {
+ expect(
+ shellSplit(r"foo $(bar baz)"), equals(["foo", r"$(bar", "baz)"]));
+ expect(shellSplit("foo `bar baz`"), equals(["foo", "`bar", "baz`"]));
+ expect(shellSplit(r"foo $bar | baz"),
+ equals(["foo", r"$bar", "|", "baz"]));
+ });
+ });
+
+ group("parses a backslash", () {
+ test("before a normal character", () {
+ expect(shellSplit(r"foo\bar"), equals(["foobar"]));
+ });
+
+ test("before a dynamic shell feature", () {
+ expect(shellSplit(r"foo\$bar"), equals([r"foo$bar"]));
+ });
+
+ test("before a single quote", () {
+ expect(shellSplit(r"foo\'bar"), equals(["foo'bar"]));
+ });
+
+ test("before a double quote", () {
+ expect(shellSplit(r'foo\"bar'), equals(['foo"bar']));
+ });
+
+ test("before a space", () {
+ expect(shellSplit(r'foo\ bar'), equals(['foo bar']));
+ });
+
+ test("at the beginning of a token", () {
+ expect(shellSplit(r'\ foo'), equals([' foo']));
+ });
+
+ test("before whitespace followed by a hash", () {
+ expect(shellSplit(r'\ #foo'), equals([' #foo']));
+ });
+
+ test("before a newline in a token", () {
+ expect(shellSplit('foo\\\nbar'), equals(['foobar']));
+ });
+
+ test("before a newline outside a token", () {
+ expect(shellSplit('foo \\\n bar'), equals(['foo', 'bar']));
+ });
+
+ test("before a backslash", () {
+ expect(shellSplit(r'foo\\bar'), equals([r'foo\bar']));
+ });
+ });
+
+ group("parses single quotes", () {
+ test("that are empty", () {
+ expect(shellSplit("''"), equals([""]));
+ });
+
+ test("that contain normal characters", () {
+ expect(shellSplit("'foo'"), equals(["foo"]));
+ });
+
+ test("that contain active characters", () {
+ expect(shellSplit("'\" \\#'"), equals([r'" \#']));
+ });
+
+ test("before a hash", () {
+ expect(shellSplit("''#foo"), equals([r'#foo']));
+ });
+
+ test("inside a token", () {
+ expect(shellSplit("foo'bar baz'qux"), equals([r'foobar bazqux']));
+ });
+
+ test("without a closing quote", () {
+ expect(() => shellSplit("'foo bar"), throwsFormatException);
+ });
+ });
+
+ group("parses double quotes", () {
+ test("that are empty", () {
+ expect(shellSplit('""'), equals([""]));
+ });
+
+ test("that contain normal characters", () {
+ expect(shellSplit('"foo"'), equals(["foo"]));
+ });
+
+ test("that contain otherwise-active characters", () {
+ expect(shellSplit('"\' #"'), equals(["' #"]));
+ });
+
+ test("that contain escaped characters", () {
+ expect(shellSplit(r'"\$\`\"\\"'), equals(['\$`"\\']));
+ });
+
+ test("that contain an escaped newline", () {
+ expect(shellSplit('"\\\n"'), equals(['']));
+ });
+
+ test("that contain a backslash that's not an escape", () {
+ expect(shellSplit(r'"f\oo"'), equals([r'f\oo']));
+ });
+
+ test("before a hash", () {
+ expect(shellSplit('""#foo'), equals([r'#foo']));
+ });
+
+ test("inside a token", () {
+ expect(shellSplit('foo"bar baz"qux'), equals([r'foobar bazqux']));
+ });
+
+ test("without a closing quote", () {
+ expect(() => shellSplit('"foo bar'), throwsFormatException);
+ expect(() => shellSplit('"foo bar\\'), throwsFormatException);
+ });
+ });
+ });
+}