Add new SpanScanner.eager().
This is more efficient for uses like the YAML parser, which uses the
current line and especially column information frequently while parsing
a file.
R=rnystrom@google.com
Review URL: https://codereview.chromium.org//1318603008 .
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ddfd540..23f911c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.1.4
+
+* Add `new SpanScanner.eager()` for creating a `SpanScanner` that eagerly
+ computes its current line and column numbers.
+
## 0.1.3+2
* Fix `LineScanner`'s handling of carriage returns to match that of
diff --git a/lib/src/eager_span_scanner.dart b/lib/src/eager_span_scanner.dart
new file mode 100644
index 0000000..3fae5cc
--- /dev/null
+++ b/lib/src/eager_span_scanner.dart
@@ -0,0 +1,115 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library string_scanner.eager_span_scanner;
+
+import 'package:charcode/ascii.dart';
+
+import 'line_scanner.dart';
+import 'span_scanner.dart';
+
+// TODO(nweiz): Currently this duplicates code in line_scanner.dart. Once
+// sdk#23770 is fully complete, we should move the shared code into a mixin.
+
+/// A regular expression matching newlines across platforms.
+final _newlineRegExp = new RegExp(r"\r\n?|\n");
+
+/// A [SpanScanner] that tracks the line and column eagerly, like [LineScanner].
+class EagerSpanScanner extends SpanScanner {
+ int get line => _line;
+ int _line = 0;
+
+ int get column => _column;
+ int _column = 0;
+
+ LineScannerState get state =>
+ new _EagerSpanScannerState(this, position, line, column);
+
+ bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf;
+
+ set state(LineScannerState state) {
+ if (state is! _EagerSpanScannerState ||
+ !identical((state as _EagerSpanScannerState)._scanner, this)) {
+ throw new ArgumentError("The given LineScannerState was not returned by "
+ "this LineScanner.");
+ }
+
+ super.position = state.position;
+ _line = state.line;
+ _column = state.column;
+ }
+
+ set position(int newPosition) {
+ var oldPosition = position;
+ super.position = newPosition;
+
+ if (newPosition > oldPosition) {
+ var newlines = _newlinesIn(string.substring(oldPosition, newPosition));
+ _line += newlines.length;
+ if (newlines.isEmpty) {
+ _column += newPosition - oldPosition;
+ } else {
+ _column = newPosition - newlines.last.end;
+ }
+ } else {
+ var newlines = _newlinesIn(string.substring(newPosition, oldPosition));
+ if (_betweenCRLF) newlines.removeLast();
+
+ _line -= newlines.length;
+ if (newlines.isEmpty) {
+ _column -= oldPosition - newPosition;
+ } else {
+ _column = newPosition -
+ string.lastIndexOf(_newlineRegExp, newPosition) - 1;
+ }
+ }
+ }
+
+ EagerSpanScanner(String string, {sourceUrl, int position})
+ : super(string, sourceUrl: sourceUrl, position: position);
+
+ int readChar() {
+ var char = super.readChar();
+ if (char == $lf || (char == $cr && peekChar() != $lf)) {
+ _line += 1;
+ _column = 0;
+ } else {
+ _column += 1;
+ }
+ return char;
+ }
+
+ bool scan(Pattern pattern) {
+ if (!super.scan(pattern)) return false;
+
+ var newlines = _newlinesIn(lastMatch[0]);
+ _line += newlines.length;
+ if (newlines.isEmpty) {
+ _column += lastMatch[0].length;
+ } else {
+ _column = lastMatch[0].length - newlines.last.end;
+ }
+
+ return true;
+ }
+
+ /// Returns a list of [Match]es describing all the newlines in [text], which
+ /// is assumed to end at [position].
+ List<Match> _newlinesIn(String text) {
+ var newlines = _newlineRegExp.allMatches(text).toList();
+ if (_betweenCRLF) newlines.removeLast();
+ return newlines;
+ }
+}
+
+/// A class representing the state of an [EagerSpanScanner].
+class _EagerSpanScannerState implements LineScannerState {
+ final EagerSpanScanner _scanner;
+ final int position;
+ final int line;
+ final int column;
+
+ _EagerSpanScannerState(this._scanner, this.position, this.line, this.column);
+}
+
diff --git a/lib/src/line_scanner.dart b/lib/src/line_scanner.dart
index 6a2880b..66d7575 100644
--- a/lib/src/line_scanner.dart
+++ b/lib/src/line_scanner.dart
@@ -8,6 +8,8 @@
import 'string_scanner.dart';
+// Note that much of this code is duplicated in eager_span_scanner.dart.
+
/// A regular expression matching newlines across platforms.
final _newlineRegExp = new RegExp(r"\r\n?|\n");
diff --git a/lib/src/span_scanner.dart b/lib/src/span_scanner.dart
index 2a78b5b..ebe230d 100644
--- a/lib/src/span_scanner.dart
+++ b/lib/src/span_scanner.dart
@@ -6,6 +6,7 @@
import 'package:source_span/source_span.dart';
+import 'eager_span_scanner.dart';
import 'exception.dart';
import 'line_scanner.dart';
import 'string_scanner.dart';
@@ -56,6 +57,20 @@
: _sourceFile = new SourceFile(string, url: sourceUrl),
super(string, sourceUrl: sourceUrl, position: position);
+ /// Creates a new [SpanScanner] that eagerly computes line and column numbers.
+ ///
+ /// In general [new SpanScanner] will be more efficient, since it avoids extra
+ /// computation on every scan. However, eager scanning can be useful for
+ /// situations where the normal course of parsing frequently involves
+ /// accessing the current line and column numbers.
+ ///
+ /// Note that *only* the `line` and `column` fields on the `SpanScanner`
+ /// itself and its `LineScannerState` are eagerly computed. To limit their
+ /// memory footprint, returned spans and locations will still lazily compute
+ /// their line and column numbers.
+ factory SpanScanner.eager(String string, {sourceUrl, int position}) =
+ EagerSpanScanner;
+
/// Creates a [FileSpan] representing the source range between [startState]
/// and the current position.
FileSpan spanFrom(LineScannerState startState, [LineScannerState endState]) {
diff --git a/pubspec.yaml b/pubspec.yaml
index 3e71eb7..35b3fe0 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: string_scanner
-version: 0.1.3+2
+version: 0.1.4
author: "Dart Team <misc@dartlang.org>"
homepage: https://github.com/dart-lang/string_scanner
description: >
diff --git a/test/span_scanner_test.dart b/test/span_scanner_test.dart
index a6249b7..114bff7 100644
--- a/test/span_scanner_test.dart
+++ b/test/span_scanner_test.dart
@@ -8,53 +8,63 @@
import 'package:test/test.dart';
void main() {
- var scanner;
- setUp(() {
- scanner = new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source');
+ testForImplementation("lazy", () {
+ return new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source');
});
- test("tracks the span for the last match", () {
- scanner.scan('fo');
- scanner.scan('o\nba');
-
- var span = scanner.lastSpan;
- expect(span.start.offset, equals(2));
- expect(span.start.line, equals(0));
- expect(span.start.column, equals(2));
- expect(span.start.sourceUrl, equals(Uri.parse('source')));
-
- expect(span.end.offset, equals(6));
- expect(span.end.line, equals(1));
- expect(span.end.column, equals(2));
- expect(span.start.sourceUrl, equals(Uri.parse('source')));
-
- expect(span.text, equals('o\nba'));
+ testForImplementation("eager", () {
+ return new SpanScanner.eager('foo\nbar\nbaz', sourceUrl: 'source');
});
+}
- test(".spanFrom() returns a span from a previous state", () {
- scanner.scan('fo');
- var state = scanner.state;
- scanner.scan('o\nba');
- scanner.scan('r\nba');
+void testForImplementation(String name, SpanScanner create()) {
+ group("for a $name scanner", () {
+ var scanner;
+ setUp(() => scanner = create());
- var span = scanner.spanFrom(state);
- expect(span.text, equals('o\nbar\nba'));
- });
+ test("tracks the span for the last match", () {
+ scanner.scan('fo');
+ scanner.scan('o\nba');
- test(".emptySpan returns an empty span at the current location", () {
- scanner.scan('foo\nba');
+ var span = scanner.lastSpan;
+ expect(span.start.offset, equals(2));
+ expect(span.start.line, equals(0));
+ expect(span.start.column, equals(2));
+ expect(span.start.sourceUrl, equals(Uri.parse('source')));
- var span = scanner.emptySpan;
- expect(span.start.offset, equals(6));
- expect(span.start.line, equals(1));
- expect(span.start.column, equals(2));
- expect(span.start.sourceUrl, equals(Uri.parse('source')));
+ expect(span.end.offset, equals(6));
+ expect(span.end.line, equals(1));
+ expect(span.end.column, equals(2));
+ expect(span.start.sourceUrl, equals(Uri.parse('source')));
- expect(span.end.offset, equals(6));
- expect(span.end.line, equals(1));
- expect(span.end.column, equals(2));
- expect(span.start.sourceUrl, equals(Uri.parse('source')));
+ expect(span.text, equals('o\nba'));
+ });
- expect(span.text, equals(''));
+ test(".spanFrom() returns a span from a previous state", () {
+ scanner.scan('fo');
+ var state = scanner.state;
+ scanner.scan('o\nba');
+ scanner.scan('r\nba');
+
+ var span = scanner.spanFrom(state);
+ expect(span.text, equals('o\nbar\nba'));
+ });
+
+ test(".emptySpan returns an empty span at the current location", () {
+ scanner.scan('foo\nba');
+
+ var span = scanner.emptySpan;
+ expect(span.start.offset, equals(6));
+ expect(span.start.line, equals(1));
+ expect(span.start.column, equals(2));
+ expect(span.start.sourceUrl, equals(Uri.parse('source')));
+
+ expect(span.end.offset, equals(6));
+ expect(span.end.line, equals(1));
+ expect(span.end.column, equals(2));
+ expect(span.start.sourceUrl, equals(Uri.parse('source')));
+
+ expect(span.text, equals(''));
+ });
});
}