Add loose parsing option for dates, accepting mixed case and missing delimiters

BUG=
R=cbracken@google.com

Review URL: https://codereview.chromium.org//932093004
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dd3a9c..02c4d4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## 0.12.1
+  * Adds a DateFormat.parseLoose that accepts mixed case and missing
+  delimiters when parsing dates. It also allows arbitrary amounts of
+  whitespace anywhere that whitespace is expected. So, for example,
+  in en-US locale a yMMMd format would accept "SEP 3   2014", even
+  though it would generate "Sep 3, 2014". This is fairly limited, and
+  its reliability in other locales is not known.
+
 ## 0.12.0+3
   * Update pubspec dependencies to allow analyzer version 23.
 
diff --git a/lib/src/intl/date_format.dart b/lib/src/intl/date_format.dart
index 3aede36..86402c9 100644
--- a/lib/src/intl/date_format.dart
+++ b/lib/src/intl/date_format.dart
@@ -262,6 +262,58 @@
       _parse(inputString, utc: utc, strict: false);
 
   /**
+   * Given user input, attempt to parse the [inputString] "loosely" into the
+   * anticipated format, accepting some variations from the strict format.
+   *
+   * If [inputString]
+   * is accepted by [parseStrict], just return the result. If not, attempt to
+   * parse it, but accepting either upper or
+   * lower case, allowing delimiters to be missing and replaced or
+   * supplemented with whitespace,
+   * and allowing arbitrary amounts of whitespace wherever whitespace is
+   * permitted. Note that this does not allow trailing characters, the way
+   * [parse] does. It also does not allow leading whitespace on delimiters,
+   * and does not allow alternative names for months or weekdays other than
+   * those the format knows about. The restrictions are quite arbitrary and
+   * it's not known how well they'll work for locales that aren't English-like.
+   *
+   * If [inputString] does not parse, this throws a
+   * [FormatException].
+   *
+   * For example, this will accept
+   *
+   *       new DateTimeFormat.yMMMd("en_US").parseLoose("SEp   3 2014");
+   *       new DateTimeFormat.yMd("en_US").parseLoose("09    03/2014");
+   *
+   * It will NOT accept
+   *
+   *      // "Sept" is not a valid month name.
+   *      new DateTimeFormat.yMMMd("en_US").parseLoose("Sept 3, 2014");
+   *      // Delimiters can't have leading whitespace.
+   *      new DateTimeFormat.yMd("en_US").parseLoose("09 / 03 / 2014");
+   */
+  DateTime parseLoose(String inputString, [utc = false]) {
+    try {
+      return _parse(inputString, utc: utc, strict: true);
+    } on FormatException {
+      return _parseLoose(inputString.toLowerCase(), utc);
+    }
+  }
+
+  _parseLoose(String inputString, bool utc) {
+    var dateFields = new _DateBuilder();
+    if (utc) dateFields.utc = true;
+    var stream = new _Stream(inputString);
+    _formatFields.forEach((f) => f.parseLoose(stream, dateFields));
+    if (!stream.atEnd()) {
+      throw new FormatException(
+          "Characters remaining after date parsing in $inputString");
+    }
+    dateFields.verify(inputString);
+    return dateFields.asDate();
+  }
+
+  /**
    * Given user input, attempt to parse the [inputString] into the anticipated
    * format, treating it as being in the local timezone. If [inputString] does
    * not match our format, throws a [FormatException]. This will reject dates
diff --git a/lib/src/intl/date_format_field.dart b/lib/src/intl/date_format_field.dart
index 47a9d7b..4b8b12e 100644
--- a/lib/src/intl/date_format_field.dart
+++ b/lib/src/intl/date_format_field.dart
@@ -39,6 +39,13 @@
   /** Abstract method for subclasses to implementing parsing for their format.*/
   void parse(_Stream input, _DateBuilder dateFields);
 
+  /**
+   * Abstract method for subclasses to implementing 'loose' parsing for
+   * their format, accepting input case-insensitively, and allowing some
+   * delimiters to be skipped.
+   */
+  void parseLoose(_Stream input, _DateBuilder dateFields);
+
   /** Parse a literal field. We just look for the exact input. */
   void parseLiteral(_Stream input) {
     var found = input.read(width);
@@ -47,6 +54,20 @@
     }
   }
 
+  /**
+   * Parse a literal field. We accept either an exact match, or an arbitrary
+   * amount of whitespace.
+   */
+  void parseLiteralLoose(_Stream input) {
+    var found = input.peek(width);
+    if (found == pattern) {
+      input.read(width);
+    }
+    while (!input.atEnd() && input.peek().trim().isEmpty) {
+      input.read();
+    }
+  }
+
   /** Throw a format exception with an error message indicating the position.*/
   void throwFormatException(_Stream stream) {
     throw new FormatException("Trying to read $this from ${stream.contents} "
@@ -64,8 +85,11 @@
   _DateFormatLiteralField(pattern, parent): super(pattern, parent);
 
   parse(_Stream input, _DateBuilder dateFields) {
-    return parseLiteral(input);
+    parseLiteral(input);
   }
+
+  parseLoose(_Stream input, _DateBuilder dateFields) =>
+    parseLiteralLoose(input);
 }
 
 /**
@@ -84,9 +108,12 @@
   }
 
   parse(_Stream input, _DateBuilder dateFields) {
-    return parseLiteral(input);
+    parseLiteral(input);
   }
 
+  parseLoose(_Stream input, _DateBuilder dateFields) =>
+    parseLiteralLoose(input);
+
   void patchQuotes() {
     if (pattern == "''") {
       pattern = "'";
@@ -98,6 +125,110 @@
   }
 }
 
+/**
+ * A field that parses "loosely", meaning that we'll accept input that is
+ * missing delimiters, has upper/lower case mixed up, and might not strictly
+ * conform to the pattern, e.g. the pattern calls for Sep we might accept
+ * sep, september, sEPTember. Doesn't affect numeric fields.
+ */
+class _LoosePatternField extends _DateFormatPatternField {
+  _LoosePatternField(String pattern, parent) : super(pattern, parent);
+
+   /**
+    * Parse from a list of possibilities, but case-insensitively.
+    * Assumes that input is lower case.
+    */
+   int parseEnumeratedString(_Stream input, List possibilities) {
+     var lowercasePossibilities = possibilities
+         .map((x) => x.toLowerCase())
+         .toList();
+     try {
+       return super.parseEnumeratedString(input, lowercasePossibilities);
+     } on FormatException {
+       return -1;
+     }
+   }
+
+  /**
+   * Parse a month name, case-insensitively, and set it in [dateFields].
+   * Assumes that [input] is lower case.
+   */
+  void parseMonth(input, dateFields) {
+    if (width <= 2) {
+      handleNumericField(input, dateFields.setMonth);
+      return;
+    }
+    var possibilities =
+        [symbols.MONTHS, symbols.SHORTMONTHS];
+    for (var monthNames in possibilities) {
+      var month = parseEnumeratedString(input, monthNames);
+      if (month != -1) {
+        dateFields.month = month + 1;
+        return;
+      }
+    }
+  }
+
+  /**
+   * Parse a standalone day name, case-insensitively.
+   * Assumes that input is lower case. Doesn't do anything
+   */
+  void parseStandaloneDay(input) {
+    // This is ignored, but we still have to skip over it the correct amount.
+    if (width <= 2) {
+      handleNumericField(input, (x) => x);
+      return;
+    }
+    var possibilities =
+        [symbols.STANDALONEWEEKDAYS, symbols.STANDALONESHORTWEEKDAYS];
+    for (var dayNames in possibilities) {
+       var day = parseEnumeratedString(input, dayNames);
+       if (day != -1) {
+         return;
+       }
+     }
+  }
+
+  /**
+   * Parse a standalone month name, case-insensitively.
+   * Assumes that input is lower case. Doesn't do anything
+   */
+  void parseStandaloneMonth(input, dateFields) {
+    if (width <= 2) {
+      handleNumericField(input, (x) => x);
+      return;
+    }
+    var possibilities =
+        [symbols.STANDALONEMONTHS, symbols.STANDALONESHORTMONTHS];
+    for (var monthNames in possibilities) {
+      var month = parseEnumeratedString(input, monthNames);
+      if (month != -1) {
+        dateFields.month = month + 1;
+        return;
+      }
+    }
+  }
+
+  /**
+   * Parse a day of the week name, case-insensitively.
+   * Assumes that input is lower case. Doesn't do anything
+   */
+  void parseDayOfWeek(_Stream input) {
+    // This is IGNORED, but we still have to skip over it the correct amount.
+    if (width <= 2) {
+      handleNumericField(input, (x) => x);
+      return;
+    }
+    var possibilities = [symbols.WEEKDAYS, symbols.SHORTWEEKDAYS];
+    for (var dayNames in possibilities) {
+       var day = parseEnumeratedString(input, dayNames);
+       if (day != -1) {
+         return;
+       }
+     }
+  }
+}
+
 /*
  * Represents a field in the pattern that formats some aspect of the
  * date. Consists primarily of a switch on the particular pattern characters
@@ -120,6 +251,16 @@
     parseField(input, dateFields);
   }
 
+
+  /**
+   * Parse the date according to our specification and put the result
+   * into the correct place in dateFields. Allow looser parsing, accepting
+   * case-insensitive input and skipped delimiters.
+   */
+  void parseLoose(_Stream input, _DateBuilder dateFields) {
+    new _LoosePatternField(pattern, parent).parse(input, dateFields);
+  }
+
   /**
    * Parse a field representing part of a date pattern. Note that we do not
    * return a value, but rather build up the result in [builder].
diff --git a/pubspec.yaml b/pubspec.yaml
index c315f5c..df34aca 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
 name: intl
-version: 0.12.0+3
+version: 0.12.1
 author: Dart Team <misc@dartlang.org>
 description: Contains code to deal with internationalized/localized messages, date and number formatting and parsing, bi-directional text, and other internationalization issues.
 homepage: https://www.dartlang.org
diff --git a/test/date_time_loose_parsing_test.dart b/test/date_time_loose_parsing_test.dart
new file mode 100644
index 0000000..1158982
--- /dev/null
+++ b/test/date_time_loose_parsing_test.dart
@@ -0,0 +1,55 @@
+// Copyright (c) 2014, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Tests for the loose option when parsing dates and times, which accept
+/// mixed-case input and are able to skip missing delimiters. This is only
+/// tested in basic US locale, it's hard to define for others.
+library date_time_loose_test;
+
+import 'package:intl/intl.dart';
+import 'package:unittest/unittest.dart';
+
+main() {
+  var format;
+
+  var date = new DateTime(2014, 9, 3);
+
+  check(String s) {
+    expect(() => format.parse(s), throwsFormatException);
+    expect(format.parseLoose(s), date);
+  }
+
+  test("Loose parsing yMMMd", () {
+    // Note: We can't handle e.g. Sept, we don't have those abbreviations
+    // in our data.
+    // Also doesn't handle "sep3,2014", or "sep 3.2014"
+    format = new DateFormat.yMMMd("en_US");
+    check("Sep 3 2014");
+    check("sep 3 2014");
+    check("sep 3  2014");
+    check("sep  3 2014");
+    check("sep  3       2014");
+    check("sep3 2014");
+    check("september 3, 2014");
+    check("sEPTembER 3, 2014");
+    check("seP 3, 2014");
+  });
+
+  test("Loose parsing yMMMd that parses strict", () {
+    expect(format.parseLoose("Sep 3, 2014"), date);
+  });
+
+  test("Loose parsing yMd", () {
+    format = new DateFormat.yMd("en_US");
+    check("09 3 2014");
+    check("09 00003    2014");
+    check("09/    03/2014");
+    expect(() => format.parseLoose("09 / 03 / 2014"), throwsA(new isInstanceOf<FormatException>()));
+  });
+
+  test("Loose parsing yMd that parses strict", () {
+    expect(format.parseLoose("09/03/2014"), date);
+    expect(format.parseLoose("09/3/2014"), date);
+  });
+}