Add a shellSplit() function (dart-lang/io#23)

This makes it possible to parse individual strings (for example from
config files) as shell commands rather than requiring the user to
manually separate the arguments into a list.
diff --git a/pkgs/io/CHANGELOG.md b/pkgs/io/CHANGELOG.md
index a0e6bf1..568c10a 100644
--- a/pkgs/io/CHANGELOG.md
+++ b/pkgs/io/CHANGELOG.md
@@ -16,6 +16,9 @@
 
 - Added `ProcessManager#spawnDetached`, which does not forward any I/O.
 
+- Added the `shellSplit()` function, which parses a list of arguments in the
+  same manner as the POSIX shell.
+
 ## 0.2.0
 
 - Initial commit of...
diff --git a/pkgs/io/lib/io.dart b/pkgs/io/lib/io.dart
index 4e42aa9..e18dcac 100644
--- a/pkgs/io/lib/io.dart
+++ b/pkgs/io/lib/io.dart
@@ -6,3 +6,4 @@
 export 'src/permissions.dart' show isExecutable;
 export 'src/process_manager.dart' show ProcessManager, Spawn;
 export 'src/shared_stdin.dart' show SharedStdIn, sharedStdIn;
+export 'src/shell_words.dart' show shellSplit;
diff --git a/pkgs/io/lib/src/shell_words.dart b/pkgs/io/lib/src/shell_words.dart
new file mode 100644
index 0000000..1fb1306
--- /dev/null
+++ b/pkgs/io/lib/src/shell_words.dart
@@ -0,0 +1,143 @@
+// Copyright 2017, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:charcode/charcode.dart';
+import 'package:string_scanner/string_scanner.dart';
+
+/// Splits [command] into tokens according to [the POSIX shell
+/// specification][spec].
+///
+/// [spec]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html
+///
+/// This returns the unquoted values of quoted tokens. For example,
+/// `shellSplit('foo "bar baz"')` returns `["foo", "bar baz"]`. It does not
+/// currently support here-documents. It does *not* treat dynamic features such
+/// as parameter expansion specially. For example, `shellSplit("foo $(bar
+/// baz)")` returns `["foo", "$(bar", "baz)"]`.
+///
+/// This will discard any comments at the end of [command].
+///
+/// Throws a [FormatException] if [command] isn't a valid shell command.
+List<String> shellSplit(String command) {
+  var scanner = new StringScanner(command);
+  var results = <String>[];
+  var token = new StringBuffer();
+
+  // Whether a token is being parsed, as opposed to a separator character. This
+  // is different than just [token.isEmpty], because empty quoted tokens can
+  // exist.
+  var hasToken = false;
+
+  while (!scanner.isDone) {
+    var next = scanner.readChar();
+    switch (next) {
+      case $backslash:
+        // Section 2.2.1: A <backslash> that is not quoted shall preserve the
+        // literal value of the following character, with the exception of a
+        // <newline>. If a <newline> follows the <backslash>, the shell shall
+        // interpret this as line continuation. The <backslash> and <newline>
+        // shall be removed before splitting the input into tokens. Since the
+        // escaped <newline> is removed entirely from the input and is not
+        // replaced by any white space, it cannot serve as a token separator.
+        if (scanner.scanChar($lf)) break;
+
+        hasToken = true;
+        token.writeCharCode(scanner.readChar());
+        break;
+
+      case $single_quote:
+        hasToken = true;
+        // Section 2.2.2: Enclosing characters in single-quotes ( '' ) shall
+        // preserve the literal value of each character within the
+        // single-quotes. A single-quote cannot occur within single-quotes.
+        var firstQuote = scanner.position - 1;
+        while (!scanner.scanChar($single_quote)) {
+          _checkUnmatchedQuote(scanner, firstQuote);
+          token.writeCharCode(scanner.readChar());
+        }
+        break;
+
+      case $double_quote:
+        hasToken = true;
+        // Section 2.2.3: Enclosing characters in double-quotes ( "" ) shall
+        // preserve the literal value of all characters within the
+        // double-quotes, with the exception of the characters backquote,
+        // <dollar-sign>, and <backslash>.
+        //
+        // (Note that this code doesn't preserve special behavior of backquote
+        // or dollar sign within double quotes, since those are dynamic
+        // features.)
+        var firstQuote = scanner.position - 1;
+        while (!scanner.scanChar($double_quote)) {
+          _checkUnmatchedQuote(scanner, firstQuote);
+
+          if (scanner.scanChar($backslash)) {
+            _checkUnmatchedQuote(scanner, firstQuote);
+
+            // The <backslash> shall retain its special meaning as an escape
+            // character (see Escape Character (Backslash)) only when followed
+            // by one of the following characters when considered special:
+            //
+            //     $ ` " \ <newline>
+            var next = scanner.readChar();
+            if (next == $lf) continue;
+            if (next == $dollar ||
+                next == $backquote ||
+                next == $double_quote ||
+                next == $backslash) {
+              token.writeCharCode(next);
+            } else {
+              token.writeCharCode($backslash);
+              token.writeCharCode(next);
+            }
+          } else {
+            token.writeCharCode(scanner.readChar());
+          }
+        }
+        break;
+
+      case $hash:
+        // Section 2.3: If the current character is a '#' [and the previous
+        // characters was not part of a word], it and all subsequent characters
+        // up to, but excluding, the next <newline> shall be discarded as a
+        // comment. The <newline> that ends the line is not considered part of
+        // the comment.
+        if (hasToken) {
+          token.writeCharCode($hash);
+          break;
+        }
+
+        while (!scanner.isDone && scanner.peekChar() != $lf) {
+          scanner.readChar();
+        }
+        break;
+
+      case $space:
+      case $tab:
+      case $lf:
+        if (hasToken) results.add(token.toString());
+        hasToken = false;
+        token.clear();
+        break;
+
+      default:
+        hasToken = true;
+        token.writeCharCode(next);
+        break;
+    }
+  }
+
+  if (hasToken) results.add(token.toString());
+  return results;
+}
+
+/// Throws a [FormatException] if [scanner] is done indicating that a closing
+/// quote matching the one at position [openingQuote] is missing.
+void _checkUnmatchedQuote(StringScanner scanner, int openingQuote) {
+  if (!scanner.isDone) return;
+  var type = scanner.substring(openingQuote, openingQuote + 1) == '"'
+      ? "double"
+      : "single";
+  scanner.error("Unmatched $type quote.", position: openingQuote, length: 1);
+}
diff --git a/pkgs/io/pubspec.yaml b/pkgs/io/pubspec.yaml
index 5b8d7d7..7fc9eb1 100644
--- a/pkgs/io/pubspec.yaml
+++ b/pkgs/io/pubspec.yaml
@@ -1,7 +1,7 @@
 name: io
 description: >
   Utilities for the Dart VM Runtime.
-version: 0.2.1
+version: 0.3.0-dev
 author: Dart Team <misc@dartlang.org>
 homepage: https://github.com/dart-lang/io
 
diff --git a/pkgs/io/test/shell_words_test.dart b/pkgs/io/test/shell_words_test.dart
new file mode 100644
index 0000000..610c36d
--- /dev/null
+++ b/pkgs/io/test/shell_words_test.dart
@@ -0,0 +1,186 @@
+// Copyright 2017, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'package:test/test.dart';
+
+import 'package:io/io.dart';
+
+void main() {
+  group("shellSplit()", () {
+    group("returns an empty list for", () {
+      test("an empty string", () {
+        expect(shellSplit(""), isEmpty);
+      });
+
+      test("spaces", () {
+        expect(shellSplit("    "), isEmpty);
+      });
+
+      test("tabs", () {
+        expect(shellSplit("\t\t\t"), isEmpty);
+      });
+
+      test("newlines", () {
+        expect(shellSplit("\n\n\n"), isEmpty);
+      });
+
+      test("a comment", () {
+        expect(shellSplit("#foo bar baz"), isEmpty);
+      });
+
+      test("a mix", () {
+        expect(shellSplit(" \t\n# foo"), isEmpty);
+      });
+    });
+
+    group("parses unquoted", () {
+      test("a single token", () {
+        expect(shellSplit("foo"), equals(["foo"]));
+      });
+
+      test("multiple tokens", () {
+        expect(shellSplit("foo bar baz"), equals(["foo", "bar", "baz"]));
+      });
+
+      test("tokens separated by tabs", () {
+        expect(shellSplit("foo\tbar\tbaz"), equals(["foo", "bar", "baz"]));
+      });
+
+      test("tokens separated by newlines", () {
+        expect(shellSplit("foo\nbar\nbaz"), equals(["foo", "bar", "baz"]));
+      });
+
+      test("a token after whitespace", () {
+        expect(shellSplit(" \t\nfoo"), equals(["foo"]));
+      });
+
+      test("a token before whitespace", () {
+        expect(shellSplit("foo \t\n"), equals(["foo"]));
+      });
+
+      test("a token with a hash", () {
+        expect(shellSplit("foo#bar"), equals(["foo#bar"]));
+      });
+
+      test("a token before a comment", () {
+        expect(shellSplit("foo #bar"), equals(["foo"]));
+      });
+
+      test("dynamic shell features", () {
+        expect(
+            shellSplit(r"foo $(bar baz)"), equals(["foo", r"$(bar", "baz)"]));
+        expect(shellSplit("foo `bar baz`"), equals(["foo", "`bar", "baz`"]));
+        expect(shellSplit(r"foo $bar | baz"),
+            equals(["foo", r"$bar", "|", "baz"]));
+      });
+    });
+
+    group("parses a backslash", () {
+      test("before a normal character", () {
+        expect(shellSplit(r"foo\bar"), equals(["foobar"]));
+      });
+
+      test("before a dynamic shell feature", () {
+        expect(shellSplit(r"foo\$bar"), equals([r"foo$bar"]));
+      });
+
+      test("before a single quote", () {
+        expect(shellSplit(r"foo\'bar"), equals(["foo'bar"]));
+      });
+
+      test("before a double quote", () {
+        expect(shellSplit(r'foo\"bar'), equals(['foo"bar']));
+      });
+
+      test("before a space", () {
+        expect(shellSplit(r'foo\ bar'), equals(['foo bar']));
+      });
+
+      test("at the beginning of a token", () {
+        expect(shellSplit(r'\ foo'), equals([' foo']));
+      });
+
+      test("before whitespace followed by a hash", () {
+        expect(shellSplit(r'\ #foo'), equals([' #foo']));
+      });
+
+      test("before a newline in a token", () {
+        expect(shellSplit('foo\\\nbar'), equals(['foobar']));
+      });
+
+      test("before a newline outside a token", () {
+        expect(shellSplit('foo \\\n bar'), equals(['foo', 'bar']));
+      });
+
+      test("before a backslash", () {
+        expect(shellSplit(r'foo\\bar'), equals([r'foo\bar']));
+      });
+    });
+
+    group("parses single quotes", () {
+      test("that are empty", () {
+        expect(shellSplit("''"), equals([""]));
+      });
+
+      test("that contain normal characters", () {
+        expect(shellSplit("'foo'"), equals(["foo"]));
+      });
+
+      test("that contain active characters", () {
+        expect(shellSplit("'\" \\#'"), equals([r'" \#']));
+      });
+
+      test("before a hash", () {
+        expect(shellSplit("''#foo"), equals([r'#foo']));
+      });
+
+      test("inside a token", () {
+        expect(shellSplit("foo'bar baz'qux"), equals([r'foobar bazqux']));
+      });
+
+      test("without a closing quote", () {
+        expect(() => shellSplit("'foo bar"), throwsFormatException);
+      });
+    });
+
+    group("parses double quotes", () {
+      test("that are empty", () {
+        expect(shellSplit('""'), equals([""]));
+      });
+
+      test("that contain normal characters", () {
+        expect(shellSplit('"foo"'), equals(["foo"]));
+      });
+
+      test("that contain otherwise-active characters", () {
+        expect(shellSplit('"\' #"'), equals(["' #"]));
+      });
+
+      test("that contain escaped characters", () {
+        expect(shellSplit(r'"\$\`\"\\"'), equals(['\$`"\\']));
+      });
+
+      test("that contain an escaped newline", () {
+        expect(shellSplit('"\\\n"'), equals(['']));
+      });
+
+      test("that contain a backslash that's not an escape", () {
+        expect(shellSplit(r'"f\oo"'), equals([r'f\oo']));
+      });
+
+      test("before a hash", () {
+        expect(shellSplit('""#foo'), equals([r'#foo']));
+      });
+
+      test("inside a token", () {
+        expect(shellSplit('foo"bar baz"qux'), equals([r'foobar bazqux']));
+      });
+
+      test("without a closing quote", () {
+        expect(() => shellSplit('"foo bar'), throwsFormatException);
+        expect(() => shellSplit('"foo bar\\'), throwsFormatException);
+      });
+    });
+  });
+}