Add a string_scanner package.
This code has been pulled out of the Shelf package.
R=kevmoo@google.com
Review URL: https://codereview.chromium.org//213833013
git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart/pkg/string_scanner@34665 260f80e4-7a28-3924-810f-c04153c831b5
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5c60afe
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,26 @@
+Copyright 2014, the Dart project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..90660fc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+This package exposes a `StringScanner` type that makes it easy to parse a string
+using a series of `Pattern`s. For example:
+
+```dart
+import 'dart:math';
+
+import 'package:string_scanner/string_scanner.dart';
+
+num parseNumber(String source) {
+ // Scan a number ("1", "1.5", "-3").
+ var scanner = new StringScanner(source);
+
+ // [Scanner.scan] tries to consume a [Pattern] and returns whether or not it
+ // succeeded. It will move the scan pointer past the end of the pattern.
+ var negative = scanner.scan("-");
+
+ // [Scanner.expect] consumes a [Pattern] and throws a [FormatError] if it
+ // fails. Like [Scanner.scan], it will move the scan pointer forward.
+ scanner.expect(new RegExp(r"\d+"));
+
+ // [Scanner.lastMatch] holds the [MatchData] for the most recent call to
+ // [Scanner.scan], [Scanner.expect], or [Scanner.matches].
+ var number = int.parse(scanner.lastMatch[0]);
+
+ if (scanner.scan(".")) {
+ scanner.expect(new RegExp(r"\d+"));
+ var decimal = scanner.lastMatch[0];
+ number += int.parse(decimal) / math.pow(10, decimal.length);
+ }
+
+ // [Scanner.expectDone] will throw a [FormatError] if there's any input that
+ // hasn't yet been consumed.
+ scanner.expectDone();
+
+ return (negative ? -1 : 1) * number;
+}
+```
diff --git a/lib/string_scanner.dart b/lib/string_scanner.dart
new file mode 100644
index 0000000..624c090
--- /dev/null
+++ b/lib/string_scanner.dart
@@ -0,0 +1,113 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// A library for parsing strings using a sequence of patterns.
+library string_scanner;
+
+// TODO(nweiz): Add some integration between this and source maps.
+/// A class that scans through a string using [Pattern]s.
+class StringScanner {
+ /// The string being scanned through.
+ final String string;
+
+ /// The current position of the scanner in the string, in characters.
+ int get position => _position;
+ set position(int position) {
+ if (position < 0 || position > string.length) {
+ throw new ArgumentError("Invalid position $position");
+ }
+
+ _position = position;
+ }
+ int _position = 0;
+
+ /// The data about the previous match made by the scanner.
+ ///
+ /// If the last match failed, this will be `null`.
+ Match get lastMatch => _lastMatch;
+ Match _lastMatch;
+
+ /// The portion of the string that hasn't yet been scanned.
+ String get rest => string.substring(position);
+
+ /// Whether the scanner has completely consumed [string].
+ bool get isDone => position == string.length;
+
+ /// Creates a new [StringScanner] that starts scanning from [position].
+ ///
+ /// [position] defaults to 0, the beginning of the string.
+ StringScanner(this.string, {int position}) {
+ if (position != null) this.position = position;
+ }
+
+ /// If [pattern] matches at the current position of the string, scans forward
+ /// until the end of the match.
+ ///
+ /// Returns whether or not [pattern] matched.
+ bool scan(Pattern pattern) {
+ var success = matches(pattern);
+ if (success) _position = _lastMatch.end;
+ return success;
+ }
+
+ /// If [pattern] matches at the current position of the string, scans forward
+ /// until the end of the match.
+ ///
+ /// If [pattern] did not match, throws a [FormatException] describing the
+ /// position of the failure. [name] is used in this error as the expected name
+ /// of the pattern being matched; if it's `null`, the pattern itself is used
+ /// instead.
+ void expect(Pattern pattern, {String name}) {
+ if (scan(pattern)) return;
+
+ if (name == null) {
+ if (pattern is RegExp) {
+ name = "/${pattern.pattern.replaceAll("/", "\\/")}/";
+ } else {
+ name = pattern.toString()
+ .replaceAll("\\", "\\\\").replaceAll('"', '\\"');
+ name = '"$name"';
+ }
+ }
+ _fail(name);
+ }
+
+ /// If the string has not been fully consumed, this throws a
+ /// [FormatException].
+ void expectDone() {
+ if (isDone) return;
+ _fail("no more input");
+ }
+
+ /// Returns whether or not [pattern] matches at the current position of the
+ /// string.
+ ///
+ /// This doesn't move the scan pointer forward.
+ bool matches(Pattern pattern) {
+ _lastMatch = pattern.matchAsPrefix(string, position);
+ return _lastMatch != null;
+ }
+
+ // TODO(nweiz): Make this handle long lines more gracefully.
+ /// Throws a [FormatException] describing that [name] is expected at the
+ /// current position in the string.
+ void _fail(String name) {
+ var newlines = "\n".allMatches(string.substring(0, position)).toList();
+ var line = newlines.length + 1;
+ var column;
+ var lastLine;
+ if (newlines.isEmpty) {
+ column = position + 1;
+ lastLine = string.substring(0, position);
+ } else {
+ column = position - newlines.last.end + 1;
+ lastLine = string.substring(newlines.last.end, position);
+ }
+ lastLine += rest.replaceFirst(new RegExp(r"\n.*"), '');
+ throw new FormatException(
+ "Expected $name on line $line, column $column.\n"
+ "$lastLine\n"
+ "${new List.filled(column - 1, ' ').join()}^");
+ }
+}
diff --git a/pubspec.yaml b/pubspec.yaml
new file mode 100644
index 0000000..3035901
--- /dev/null
+++ b/pubspec.yaml
@@ -0,0 +1,10 @@
+name: string_scanner
+version: 0.0.1
+author: "Dart Team <misc@dartlang.org>"
+homepage: http://www.dartlang.org
+description: >
+ A class for parsing strings using a sequence of patterns.
+dev_dependencies:
+ unittest: ">=0.10.0 <0.11.0"
+environment:
+ sdk: ">=1.2.0 <2.0.0"
diff --git a/test/error_format_test.dart b/test/error_format_test.dart
new file mode 100644
index 0000000..1187344
--- /dev/null
+++ b/test/error_format_test.dart
@@ -0,0 +1,110 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library string_scanner.error_format_test;
+
+import 'package:string_scanner/string_scanner.dart';
+import 'package:unittest/unittest.dart';
+
+void main() {
+ test('points to the first unconsumed character', () {
+ var scanner = new StringScanner('foo bar baz');
+ scanner.expect('foo ');
+ expect(() => scanner.expect('foo'), throwsFormattedError('''
+Expected "foo" on line 1, column 5.
+foo bar baz
+ ^'''));
+ });
+
+ test('prints the correct line', () {
+ var scanner = new StringScanner('foo bar baz\ndo re mi\nearth fire water');
+ scanner.expect('foo bar baz\ndo ');
+ expect(() => scanner.expect('foo'), throwsFormattedError('''
+Expected "foo" on line 2, column 4.
+do re mi
+ ^'''));
+ });
+
+ test('handles the beginning of the string correctly', () {
+ var scanner = new StringScanner('foo bar baz');
+ expect(() => scanner.expect('zap'), throwsFormattedError('''
+Expected "zap" on line 1, column 1.
+foo bar baz
+^'''));
+ });
+
+ test('handles the end of the string correctly', () {
+ var scanner = new StringScanner('foo bar baz');
+ scanner.expect('foo bar baz');
+ expect(() => scanner.expect('bang'), throwsFormattedError('''
+Expected "bang" on line 1, column 12.
+foo bar baz
+ ^'''));
+ });
+
+ test('handles an empty string correctly', () {
+ expect(() => new StringScanner('').expect('foo'), throwsFormattedError('''
+Expected "foo" on line 1, column 1.
+
+^'''));
+ });
+
+ group("expected name", () {
+ test("uses the provided name", () {
+ expect(() => new StringScanner('').expect('foo bar', name: 'zap'),
+ throwsFormattedError('''
+Expected zap on line 1, column 1.
+
+^'''));
+ });
+
+ test("escapes string quotes", () {
+ expect(() => new StringScanner('').expect('foo"bar'),
+ throwsFormattedError('''
+Expected "foo\\"bar" on line 1, column 1.
+
+^'''));
+ });
+
+ test("escapes string backslashes", () {
+ expect(() => new StringScanner('').expect('foo\\bar'),
+ throwsFormattedError('''
+Expected "foo\\\\bar" on line 1, column 1.
+
+^'''));
+ });
+
+ test("prints PERL-style regexps", () {
+ expect(() => new StringScanner('').expect(new RegExp(r'foo')),
+ throwsFormattedError('''
+Expected /foo/ on line 1, column 1.
+
+^'''));
+ });
+
+ test("escape regexp forward slashes", () {
+ expect(() => new StringScanner('').expect(new RegExp(r'foo/bar')),
+ throwsFormattedError('''
+Expected /foo\\/bar/ on line 1, column 1.
+
+^'''));
+ });
+
+ test("does not escape regexp backslashes", () {
+ expect(() => new StringScanner('').expect(new RegExp(r'foo\bar')),
+ throwsFormattedError('''
+Expected /foo\\bar/ on line 1, column 1.
+
+^'''));
+ });
+ });
+}
+
+Matcher throwsFormattedError(String format) {
+ return throwsA(predicate((error) {
+ expect(error, isFormatException);
+ expect(error.message, equals(format));
+ return true;
+ }));
+}
diff --git a/test/string_scanner_test.dart b/test/string_scanner_test.dart
new file mode 100644
index 0000000..0cab627
--- /dev/null
+++ b/test/string_scanner_test.dart
@@ -0,0 +1,267 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library string_scanner.string_scanner_test;
+
+import 'package:string_scanner/string_scanner.dart';
+import 'package:unittest/unittest.dart';
+
+void main() {
+ group('with an empty string', () {
+ var scanner;
+ setUp(() {
+ scanner = new StringScanner('');
+ });
+
+ test('is done', () {
+ expect(scanner.isDone, isTrue);
+ expect(scanner.expectDone, isNot(throwsFormatException));
+ });
+
+ test('rest is empty', () {
+ expect(scanner.rest, isEmpty);
+ });
+
+ test('lastMatch is null', () {
+ expect(scanner.lastMatch, isNull);
+ });
+
+ test('position is zero', () {
+ expect(scanner.position, equals(0));
+ });
+
+ test("scan returns false and doesn't change the state", () {
+ expect(scanner.scan(new RegExp('.')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ });
+
+ test("expect throws a FormatException and doesn't change the state", () {
+ expect(() => scanner.expect(new RegExp('.')), throwsFormatException);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ });
+
+ test("matches returns false and doesn't change the state", () {
+ expect(scanner.matches(new RegExp('.')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ });
+
+ test('setting position to 1 throws an ArgumentError', () {
+ expect(() {
+ scanner.position = 1;
+ }, throwsArgumentError);
+ });
+
+ test('setting position to -1 throws an ArgumentError', () {
+ expect(() {
+ scanner.position = -1;
+ }, throwsArgumentError);
+ });
+ });
+
+ group('at the beginning of a string', () {
+ var scanner;
+ setUp(() {
+ scanner = new StringScanner('foo bar');
+ });
+
+ test('is not done', () {
+ expect(scanner.isDone, isFalse);
+ expect(scanner.expectDone, throwsFormatException);
+ });
+
+ test('rest is the whole string', () {
+ expect(scanner.rest, equals('foo bar'));
+ });
+
+ test('lastMatch is null', () {
+ expect(scanner.lastMatch, isNull);
+ });
+
+ test('position is zero', () {
+ expect(scanner.position, equals(0));
+ });
+
+ test("a matching scan returns true and changes the state", () {
+ expect(scanner.scan(new RegExp('f(..)')), isTrue);
+ expect(scanner.lastMatch[1], equals('oo'));
+ expect(scanner.position, equals(3));
+ expect(scanner.rest, equals(' bar'));
+ });
+
+ test("a non-matching scan returns false and sets lastMatch to null", () {
+ expect(scanner.matches(new RegExp('f(..)')), isTrue);
+ expect(scanner.lastMatch, isNotNull);
+
+ expect(scanner.scan(new RegExp('b(..)')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ expect(scanner.rest, equals('foo bar'));
+ });
+
+ test("a matching expect changes the state", () {
+ scanner.expect(new RegExp('f(..)'));
+ expect(scanner.lastMatch[1], equals('oo'));
+ expect(scanner.position, equals(3));
+ expect(scanner.rest, equals(' bar'));
+ });
+
+ test("a non-matching expect throws a FormatException and sets lastMatch to "
+ "null", () {
+ expect(scanner.matches(new RegExp('f(..)')), isTrue);
+ expect(scanner.lastMatch, isNotNull);
+
+ expect(() => scanner.expect(new RegExp('b(..)')), throwsFormatException);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ expect(scanner.rest, equals('foo bar'));
+ });
+
+ test("a matching matches returns true and only changes lastMatch", () {
+ expect(scanner.matches(new RegExp('f(..)')), isTrue);
+ expect(scanner.lastMatch[1], equals('oo'));
+ expect(scanner.position, equals(0));
+ expect(scanner.rest, equals('foo bar'));
+ });
+
+ test("a non-matching matches returns false and doesn't change the state",
+ () {
+ expect(scanner.matches(new RegExp('b(..)')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(0));
+ expect(scanner.rest, equals('foo bar'));
+ });
+
+ test('setting position to 1 moves the cursor forward', () {
+ scanner.position = 1;
+ expect(scanner.position, equals(1));
+ expect(scanner.rest, equals('oo bar'));
+
+ expect(scanner.scan(new RegExp('oo.')), isTrue);
+ expect(scanner.lastMatch[0], equals('oo '));
+ expect(scanner.position, equals(4));
+ expect(scanner.rest, equals('bar'));
+ });
+
+ test('setting position beyond the string throws an ArgumentError', () {
+ expect(() {
+ scanner.position = 8;
+ }, throwsArgumentError);
+ });
+
+ test('setting position to -1 throws an ArgumentError', () {
+ expect(() {
+ scanner.position = -1;
+ }, throwsArgumentError);
+ });
+
+ test('scan accepts any Pattern', () {
+ expect(scanner.scan('foo'), isTrue);
+ expect(scanner.lastMatch[0], equals('foo'));
+ expect(scanner.position, equals(3));
+ expect(scanner.rest, equals(' bar'));
+ });
+
+ test('scans multiple times', () {
+ expect(scanner.scan(new RegExp('f(..)')), isTrue);
+ expect(scanner.lastMatch[1], equals('oo'));
+ expect(scanner.position, equals(3));
+ expect(scanner.rest, equals(' bar'));
+
+ expect(scanner.scan(new RegExp(' b(..)')), isTrue);
+ expect(scanner.lastMatch[1], equals('ar'));
+ expect(scanner.position, equals(7));
+ expect(scanner.rest, equals(''));
+ expect(scanner.isDone, isTrue);
+ expect(scanner.expectDone, isNot(throwsFormatException));
+ });
+ });
+
+ group('at the end of a string', () {
+ var scanner;
+ setUp(() {
+ scanner = new StringScanner('foo bar');
+ expect(scanner.scan('foo bar'), isTrue);
+ });
+
+ test('is done', () {
+ expect(scanner.isDone, isTrue);
+ expect(scanner.expectDone, isNot(throwsFormatException));
+ });
+
+ test('rest is empty', () {
+ expect(scanner.rest, isEmpty);
+ });
+
+ test('position is zero', () {
+ expect(scanner.position, equals(7));
+ });
+
+ test("scan returns false and sets lastMatch to null", () {
+ expect(scanner.scan(new RegExp('.')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(7));
+ });
+
+ test("expect throws a FormatException and sets lastMatch to null", () {
+ expect(() => scanner.expect(new RegExp('.')), throwsFormatException);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(7));
+ });
+
+ test("matches returns false sets lastMatch to null", () {
+ expect(scanner.matches(new RegExp('.')), isFalse);
+ expect(scanner.lastMatch, isNull);
+ expect(scanner.position, equals(7));
+ });
+
+ test('setting position to 1 moves the cursor backward', () {
+ scanner.position = 1;
+ expect(scanner.position, equals(1));
+ expect(scanner.rest, equals('oo bar'));
+
+ expect(scanner.scan(new RegExp('oo.')), isTrue);
+ expect(scanner.lastMatch[0], equals('oo '));
+ expect(scanner.position, equals(4));
+ expect(scanner.rest, equals('bar'));
+ });
+
+ test('setting position beyond the string throws an ArgumentError', () {
+ expect(() {
+ scanner.position = 8;
+ }, throwsArgumentError);
+ });
+
+ test('setting position to -1 throws an ArgumentError', () {
+ expect(() {
+ scanner.position = -1;
+ }, throwsArgumentError);
+ });
+ });
+
+ group('a scanner constructed with a custom position', () {
+ test('starts scanning from that position', () {
+ var scanner = new StringScanner('foo bar', position: 1);
+ expect(scanner.position, equals(1));
+ expect(scanner.rest, equals('oo bar'));
+
+ expect(scanner.scan(new RegExp('oo.')), isTrue);
+ expect(scanner.lastMatch[0], equals('oo '));
+ expect(scanner.position, equals(4));
+ expect(scanner.rest, equals('bar'));
+ });
+
+ test('throws an ArgumentError if the position is -1', () {
+ expect(() => new StringScanner('foo bar', position: -1),
+ throwsArgumentError);
+ });
+
+ test('throws an ArgumentError if the position is beyond the string', () {
+ expect(() => new StringScanner('foo bar', position: 8),
+ throwsArgumentError);
+ });
+ });
+}