Add loose parsing option for dates, accepting mixed case and missing delimiters
BUG=
R=cbracken@google.com
Review URL: https://codereview.chromium.org//932093004
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dd3a9c..02c4d4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## 0.12.1
+ * Adds a DateFormat.parseLoose that accepts mixed case and missing
+ delimiters when parsing dates. It also allows arbitrary amounts of
+ whitespace anywhere that whitespace is expected. So, for example,
+ in en-US locale a yMMMd format would accept "SEP 3 2014", even
+ though it would generate "Sep 3, 2014". This is fairly limited, and
+ its reliability in other locales is not known.
+
## 0.12.0+3
* Update pubspec dependencies to allow analyzer version 23.
diff --git a/lib/src/intl/date_format.dart b/lib/src/intl/date_format.dart
index 3aede36..86402c9 100644
--- a/lib/src/intl/date_format.dart
+++ b/lib/src/intl/date_format.dart
@@ -262,6 +262,58 @@
_parse(inputString, utc: utc, strict: false);
/**
+ * Given user input, attempt to parse the [inputString] "loosely" into the
+ * anticipated format, accepting some variations from the strict format.
+ *
+ * If [inputString]
+ * is accepted by [parseStrict], just return the result. If not, attempt to
+ * parse it, but accepting either upper or
+ * lower case, allowing delimiters to be missing and replaced or
+ * supplemented with whitespace,
+ * and allowing arbitrary amounts of whitespace wherever whitespace is
+ * permitted. Note that this does not allow trailing characters, the way
+ * [parse] does. It also does not allow leading whitespace on delimiters,
+ * and does not allow alternative names for months or weekdays other than
+ * those the format knows about. The restrictions are quite arbitrary and
+ * it's not known how well they'll work for locales that aren't English-like.
+ *
+ * If [inputString] does not parse, this throws a
+ * [FormatException].
+ *
+ * For example, this will accept
+ *
+ * new DateTimeFormat.yMMMd("en_US").parseLoose("SEp 3 2014");
+ * new DateTimeFormat.yMd("en_US").parseLoose("09 03/2014");
+ *
+ * It will NOT accept
+ *
+ * // "Sept" is not a valid month name.
+ * new DateTimeFormat.yMMMd("en_US").parseLoose("Sept 3, 2014");
+ * // Delimiters can't have leading whitespace.
+ * new DateTimeFormat.yMd("en_US").parseLoose("09 / 03 / 2014");
+ */
+ DateTime parseLoose(String inputString, [utc = false]) {
+ try {
+ return _parse(inputString, utc: utc, strict: true);
+ } on FormatException {
+ return _parseLoose(inputString.toLowerCase(), utc);
+ }
+ }
+
+ _parseLoose(String inputString, bool utc) {
+ var dateFields = new _DateBuilder();
+ if (utc) dateFields.utc = true;
+ var stream = new _Stream(inputString);
+ _formatFields.forEach((f) => f.parseLoose(stream, dateFields));
+ if (!stream.atEnd()) {
+ throw new FormatException(
+ "Characters remaining after date parsing in $inputString");
+ }
+ dateFields.verify(inputString);
+ return dateFields.asDate();
+ }
+
+ /**
* Given user input, attempt to parse the [inputString] into the anticipated
* format, treating it as being in the local timezone. If [inputString] does
* not match our format, throws a [FormatException]. This will reject dates
diff --git a/lib/src/intl/date_format_field.dart b/lib/src/intl/date_format_field.dart
index 47a9d7b..4b8b12e 100644
--- a/lib/src/intl/date_format_field.dart
+++ b/lib/src/intl/date_format_field.dart
@@ -39,6 +39,13 @@
/** Abstract method for subclasses to implementing parsing for their format.*/
void parse(_Stream input, _DateBuilder dateFields);
+ /**
+ * Abstract method for subclasses to implementing 'loose' parsing for
+ * their format, accepting input case-insensitively, and allowing some
+ * delimiters to be skipped.
+ */
+ void parseLoose(_Stream input, _DateBuilder dateFields);
+
/** Parse a literal field. We just look for the exact input. */
void parseLiteral(_Stream input) {
var found = input.read(width);
@@ -47,6 +54,20 @@
}
}
+ /**
+ * Parse a literal field. We accept either an exact match, or an arbitrary
+ * amount of whitespace.
+ */
+ void parseLiteralLoose(_Stream input) {
+ var found = input.peek(width);
+ if (found == pattern) {
+ input.read(width);
+ }
+ while (!input.atEnd() && input.peek().trim().isEmpty) {
+ input.read();
+ }
+ }
+
/** Throw a format exception with an error message indicating the position.*/
void throwFormatException(_Stream stream) {
throw new FormatException("Trying to read $this from ${stream.contents} "
@@ -64,8 +85,11 @@
_DateFormatLiteralField(pattern, parent): super(pattern, parent);
parse(_Stream input, _DateBuilder dateFields) {
- return parseLiteral(input);
+ parseLiteral(input);
}
+
+ parseLoose(_Stream input, _DateBuilder dateFields) =>
+ parseLiteralLoose(input);
}
/**
@@ -84,9 +108,12 @@
}
parse(_Stream input, _DateBuilder dateFields) {
- return parseLiteral(input);
+ parseLiteral(input);
}
+ parseLoose(_Stream input, _DateBuilder dateFields) =>
+ parseLiteralLoose(input);
+
void patchQuotes() {
if (pattern == "''") {
pattern = "'";
@@ -98,6 +125,110 @@
}
}
+/**
+ * A field that parses "loosely", meaning that we'll accept input that is
+ * missing delimiters, has upper/lower case mixed up, and might not strictly
+ * conform to the pattern, e.g. the pattern calls for Sep we might accept
+ * sep, september, sEPTember. Doesn't affect numeric fields.
+ */
+class _LoosePatternField extends _DateFormatPatternField {
+ _LoosePatternField(String pattern, parent) : super(pattern, parent);
+
+ /**
+ * Parse from a list of possibilities, but case-insensitively.
+ * Assumes that input is lower case.
+ */
+ int parseEnumeratedString(_Stream input, List possibilities) {
+ var lowercasePossibilities = possibilities
+ .map((x) => x.toLowerCase())
+ .toList();
+ try {
+ return super.parseEnumeratedString(input, lowercasePossibilities);
+ } on FormatException {
+ return -1;
+ }
+ }
+
+ /**
+ * Parse a month name, case-insensitively, and set it in [dateFields].
+ * Assumes that [input] is lower case.
+ */
+ void parseMonth(input, dateFields) {
+ if (width <= 2) {
+ handleNumericField(input, dateFields.setMonth);
+ return;
+ }
+ var possibilities =
+ [symbols.MONTHS, symbols.SHORTMONTHS];
+ for (var monthNames in possibilities) {
+ var month = parseEnumeratedString(input, monthNames);
+ if (month != -1) {
+ dateFields.month = month + 1;
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse a standalone day name, case-insensitively.
+ * Assumes that input is lower case. Doesn't do anything
+ */
+ void parseStandaloneDay(input) {
+ // This is ignored, but we still have to skip over it the correct amount.
+ if (width <= 2) {
+ handleNumericField(input, (x) => x);
+ return;
+ }
+ var possibilities =
+ [symbols.STANDALONEWEEKDAYS, symbols.STANDALONESHORTWEEKDAYS];
+ for (var dayNames in possibilities) {
+ var day = parseEnumeratedString(input, dayNames);
+ if (day != -1) {
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse a standalone month name, case-insensitively.
+ * Assumes that input is lower case. Doesn't do anything
+ */
+ void parseStandaloneMonth(input, dateFields) {
+ if (width <= 2) {
+ handleNumericField(input, (x) => x);
+ return;
+ }
+ var possibilities =
+ [symbols.STANDALONEMONTHS, symbols.STANDALONESHORTMONTHS];
+ for (var monthNames in possibilities) {
+ var month = parseEnumeratedString(input, monthNames);
+ if (month != -1) {
+ dateFields.month = month + 1;
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse a day of the week name, case-insensitively.
+ * Assumes that input is lower case. Doesn't do anything
+ */
+ void parseDayOfWeek(_Stream input) {
+ // This is IGNORED, but we still have to skip over it the correct amount.
+ if (width <= 2) {
+ handleNumericField(input, (x) => x);
+ return;
+ }
+ var possibilities = [symbols.WEEKDAYS, symbols.SHORTWEEKDAYS];
+ for (var dayNames in possibilities) {
+ var day = parseEnumeratedString(input, dayNames);
+ if (day != -1) {
+ return;
+ }
+ }
+ }
+}
+
/*
* Represents a field in the pattern that formats some aspect of the
* date. Consists primarily of a switch on the particular pattern characters
@@ -120,6 +251,16 @@
parseField(input, dateFields);
}
+
+ /**
+ * Parse the date according to our specification and put the result
+ * into the correct place in dateFields. Allow looser parsing, accepting
+ * case-insensitive input and skipped delimiters.
+ */
+ void parseLoose(_Stream input, _DateBuilder dateFields) {
+ new _LoosePatternField(pattern, parent).parse(input, dateFields);
+ }
+
/**
* Parse a field representing part of a date pattern. Note that we do not
* return a value, but rather build up the result in [builder].
diff --git a/pubspec.yaml b/pubspec.yaml
index c315f5c..df34aca 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: intl
-version: 0.12.0+3
+version: 0.12.1
author: Dart Team <misc@dartlang.org>
description: Contains code to deal with internationalized/localized messages, date and number formatting and parsing, bi-directional text, and other internationalization issues.
homepage: https://www.dartlang.org
diff --git a/test/date_time_loose_parsing_test.dart b/test/date_time_loose_parsing_test.dart
new file mode 100644
index 0000000..1158982
--- /dev/null
+++ b/test/date_time_loose_parsing_test.dart
@@ -0,0 +1,55 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// Tests for the loose option when parsing dates and times, which accept
+/// mixed-case input and are able to skip missing delimiters. This is only
+/// tested in basic US locale, it's hard to define for others.
+library date_time_loose_test;
+
+import 'package:intl/intl.dart';
+import 'package:unittest/unittest.dart';
+
+main() {
+ var format;
+
+ var date = new DateTime(2014, 9, 3);
+
+ check(String s) {
+ expect(() => format.parse(s), throwsFormatException);
+ expect(format.parseLoose(s), date);
+ }
+
+ test("Loose parsing yMMMd", () {
+ // Note: We can't handle e.g. Sept, we don't have those abbreviations
+ // in our data.
+ // Also doesn't handle "sep3,2014", or "sep 3.2014"
+ format = new DateFormat.yMMMd("en_US");
+ check("Sep 3 2014");
+ check("sep 3 2014");
+ check("sep 3 2014");
+ check("sep 3 2014");
+ check("sep 3 2014");
+ check("sep3 2014");
+ check("september 3, 2014");
+ check("sEPTembER 3, 2014");
+ check("seP 3, 2014");
+ });
+
+ test("Loose parsing yMMMd that parses strict", () {
+ expect(format.parseLoose("Sep 3, 2014"), date);
+ });
+
+ test("Loose parsing yMd", () {
+ format = new DateFormat.yMd("en_US");
+ check("09 3 2014");
+ check("09 00003 2014");
+ check("09/ 03/2014");
+ expect(() => format.parseLoose("09 / 03 / 2014"), throwsA(new isInstanceOf<FormatException>()));
+ });
+
+ test("Loose parsing yMd that parses strict", () {
+ expect(format.parseLoose("09/03/2014"), date);
+ expect(format.parseLoose("09/3/2014"), date);
+ });
+}