Merge pull request #135 from davidmorgan/faster-parsing

Faster parsing
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2f4c3e..6543f24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,9 @@
-## 1.5.3
+## 1.5.3-dev
 
-* Improve arg parsing performance for large numbers of args.
+* Improve arg parsing performance: use queues instead of lists internally to
+  get linear instead of quadratic performance, which is important for large
+  numbers of args (>1000). And, use simple string manipulation instead of
+  regular expressions for a 1.5x improvement everywhere.
 * No longer automatically add a 'help' option to commands that don't validate
   their arguments (fix #123).
 
diff --git a/lib/src/parser.dart b/lib/src/parser.dart
index 2a41f17..8dafef9 100644
--- a/lib/src/parser.dart
+++ b/lib/src/parser.dart
@@ -9,10 +9,6 @@
 import 'arg_results.dart';
 import 'option.dart';
 
-final _soloOpt = RegExp(r'^-([a-zA-Z0-9])$');
-final _abbrOpt = RegExp(r'^-([a-zA-Z0-9]+)(.*)$');
-final _longOpt = RegExp(r'^--([a-zA-Z\-_0-9]+)(=(.*))?$');
-
 /// The actual argument parsing class.
 ///
 /// Unlike [ArgParser] which is really more an "arg grammar", this is the class
@@ -127,14 +123,17 @@
   /// We treat this differently than collapsed abbreviations (like "-abc") to
   /// handle the possible value that may follow it.
   bool parseSoloOption() {
-    var soloOpt = _soloOpt.firstMatch(current);
-    if (soloOpt == null) return false;
+    // Hand coded regexp: r'^-([a-zA-Z0-9])$'
+    // Length must be two, hyphen followed by any letter/digit.
+    if (current.length != 2) return false;
+    if (!current.startsWith('-')) return false;
+    var opt = current[1];
+    if (!_isLetterOrDigit(opt.codeUnitAt(0))) return false;
 
-    var option = grammar.findByAbbreviation(soloOpt[1]);
+    var option = grammar.findByAbbreviation(opt);
     if (option == null) {
       // Walk up to the parent command if possible.
-      validate(
-          parent != null, 'Could not find an option or flag "-${soloOpt[1]}".');
+      validate(parent != null, 'Could not find an option or flag "-$opt".');
       return parent.parseSoloOption();
     }
 
@@ -153,12 +152,28 @@
   /// (like "-abc") or a single abbreviation with the value directly attached
   /// to it (like "-mrelease").
   bool parseAbbreviation(Parser innermostCommand) {
-    var abbrOpt = _abbrOpt.firstMatch(current);
-    if (abbrOpt == null) return false;
+    // Hand coded regexp: r'^-([a-zA-Z0-9]+)(.*)$'
+    // Hyphen then at least one letter/digit then zero or more
+    // anything-but-newlines.
+    if (current.length < 2) return false;
+    if (!current.startsWith('-')) return false;
+
+    // Find where we go from letters/digits to rest.
+    var index = 1;
+    while (
+        index < current.length && _isLetterOrDigit(current.codeUnitAt(index))) {
+      ++index;
+    }
+    // Must be at least one letter/digit.
+    if (index == 1) return false;
 
     // If the first character is the abbreviation for a non-flag option, then
     // the rest is the value.
-    var c = abbrOpt[1].substring(0, 1);
+    var lettersAndDigits = current.substring(1, index);
+    var rest = current.substring(index);
+    if (rest.contains('\n') || rest.contains('\r')) return false;
+
+    var c = lettersAndDigits.substring(0, 1);
     var first = grammar.findByAbbreviation(c);
     if (first == null) {
       // Walk up to the parent command if possible.
@@ -168,22 +183,22 @@
     } else if (!first.isFlag) {
       // The first character is a non-flag option, so the rest must be the
       // value.
-      var value = '${abbrOpt[1].substring(1)}${abbrOpt[2]}';
+      var value = '${lettersAndDigits.substring(1)}$rest';
       setOption(results, first, value);
     } else {
       // If we got some non-flag characters, then it must be a value, but
       // if we got here, it's a flag, which is wrong.
       validate(
-          abbrOpt[2] == '',
+          rest == '',
           'Option "-$c" is a flag and cannot handle value '
-          '"${abbrOpt[1].substring(1)}${abbrOpt[2]}".');
+          '"${lettersAndDigits.substring(1)}$rest".');
 
       // Not an option, so all characters should be flags.
       // We use "innermostCommand" here so that if a parent command parses the
       // *first* letter, subcommands can still be found to parse the other
       // letters.
-      for (var i = 0; i < abbrOpt[1].length; i++) {
-        var c = abbrOpt[1].substring(i, i + 1);
+      for (var i = 0; i < lettersAndDigits.length; i++) {
+        var c = lettersAndDigits.substring(i, i + 1);
         innermostCommand.parseShortFlag(c);
       }
     }
@@ -213,21 +228,33 @@
   /// Tries to parse the current argument as a long-form named option, which
   /// may include a value like "--mode=release" or "--mode release".
   bool parseLongOption() {
-    var longOpt = _longOpt.firstMatch(current);
-    if (longOpt == null) return false;
+    // Hand coded regexp: r'^--([a-zA-Z\-_0-9]+)(=(.*))?$'
+    // Two hyphens then at least one letter/digit/hyphen, optionally an equal
+    // sign followed by zero or more anything-but-newlines.
 
-    var name = longOpt[1];
+    if (!current.startsWith('--')) return false;
+
+    var index = current.indexOf('=');
+    var name = index == -1 ? current.substring(2) : current.substring(2, index);
+    for (var i = 0; i != name.length; ++i) {
+      if (!_isLetterOrDigitOrHyphen(name.codeUnitAt(i))) return false;
+    }
+    var value = index == -1 ? null : current.substring(index + 1);
+    if (value != null && (value.contains('\n') || value.contains('\r'))) {
+      return false;
+    }
+
     var option = grammar.options[name];
     if (option != null) {
       args.removeFirst();
       if (option.isFlag) {
-        validate(longOpt[3] == null,
-            'Flag option "$name" should not be given a value.');
+        validate(
+            value == null, 'Flag option "$name" should not be given a value.');
 
         setFlag(results, option, true);
-      } else if (longOpt[3] != null) {
+      } else if (value != null) {
         // We have a value like --foo=bar.
-        setOption(results, option, longOpt[3]);
+        setOption(results, option, value);
       } else {
         // Option like --foo, so look for the value as the next arg.
         readNextArgAsValue(option);
@@ -302,3 +329,14 @@
         '"$value" is not an allowed value for option "${option.name}".');
   }
 }
+
+bool _isLetterOrDigit(int codeUnit) =>
+    // Uppercase letters.
+    (codeUnit >= 65 && codeUnit <= 90) ||
+    // Lowercase letters.
+    (codeUnit >= 97 && codeUnit <= 122) ||
+    // Digits.
+    (codeUnit >= 48 && codeUnit <= 57);
+
+bool _isLetterOrDigitOrHyphen(int codeUnit) =>
+    _isLetterOrDigit(codeUnit) || codeUnit == 45;
diff --git a/test/parse_performance_test.dart b/test/parse_performance_test.dart
index 16e412f..881fbfe 100644
--- a/test/parse_performance_test.dart
+++ b/test/parse_performance_test.dart
@@ -6,40 +6,64 @@
 import 'package:test/test.dart';
 
 void main() {
-  group('ArgParser.parse()', () {
-    test('is fast', () {
-      var parser = ArgParser()..addFlag('flag');
+  group('ArgParser.parse() is fast', () {
+    test('for short flags', () {
+      _testParserPerformance(ArgParser()..addFlag('short', abbr: 's'), '-s');
+    });
 
-      var baseSize = 200000;
-      var baseList = List<String>.generate(baseSize, (_) => '--flag');
+    test('for abbreviations', () {
+      _testParserPerformance(
+          ArgParser()
+            ..addFlag('short', abbr: 's')
+            ..addFlag('short2', abbr: 't')
+            ..addFlag('short3', abbr: 'u')
+            ..addFlag('short4', abbr: 'v'),
+          '-stuv');
+    });
 
-      var multiplier = 10;
-      var largeList =
-          List<String>.generate(baseSize * multiplier, (_) => '--flag');
+    test('for long flags', () {
+      _testParserPerformance(ArgParser()..addFlag('long-flag'), '--long-flag');
+    });
 
-      var baseAction = () => parser.parse(baseList);
-      var largeAction = () => parser.parse(largeList);
-
-      // Warm up JIT.
-      baseAction();
-      largeAction();
-
-      var baseTime = _time(baseAction);
-      var largeTime = _time(largeAction);
-
-      print('Parsed $baseSize elements in ${baseTime}ms, '
-          '${baseSize * multiplier} elements in ${largeTime}ms.');
-
-      expect(largeTime, lessThan(baseTime * multiplier * 3),
-          reason:
-              'Comparing large data set time ${largeTime}ms to small data set time '
-              '${baseTime}ms. Data set increased ${multiplier}x, time is allowed to '
-              'increase up to ${multiplier * 3}x, but it increased '
-              '${largeTime ~/ baseTime}x.');
+    test('for long options with =', () {
+      _testParserPerformance(ArgParser()..addOption('long-option-name'),
+          '--long-option-name=long-option-value');
     });
   });
 }
 
+/// Tests how quickly [parser] parses [string].
+///
+/// Checks that a 10x increase in arg count does not lead to greater than 30x
+/// increase in parse time.
+void _testParserPerformance(ArgParser parser, String string) {
+  var baseSize = 50000;
+  var baseList = List<String>.generate(baseSize, (_) => string);
+
+  var multiplier = 10;
+  var largeList = List<String>.generate(baseSize * multiplier, (_) => string);
+
+  var baseAction = () => parser.parse(baseList);
+  var largeAction = () => parser.parse(largeList);
+
+  // Warm up JIT.
+  baseAction();
+  largeAction();
+
+  var baseTime = _time(baseAction);
+  var largeTime = _time(largeAction);
+
+  print('Parsed $baseSize elements in ${baseTime}ms, '
+      '${baseSize * multiplier} elements in ${largeTime}ms.');
+
+  expect(largeTime, lessThan(baseTime * multiplier * 3),
+      reason:
+          'Comparing large data set time ${largeTime}ms to small data set time '
+          '${baseTime}ms. Data set increased ${multiplier}x, time is allowed to '
+          'increase up to ${multiplier * 3}x, but it increased '
+          '${largeTime ~/ baseTime}x.');
+}
+
 int _time(void Function() function) {
   var stopwatch = Stopwatch()..start();
   function();