Add new SpanScanner.eager(). This is more efficient for uses like the YAML parser, which uses the current line and especially column information frequently while parsing a file. R=rnystrom@google.com Review URL: https://codereview.chromium.org//1318603008 .
diff --git a/pkgs/string_scanner/CHANGELOG.md b/pkgs/string_scanner/CHANGELOG.md index ddfd540..23f911c 100644 --- a/pkgs/string_scanner/CHANGELOG.md +++ b/pkgs/string_scanner/CHANGELOG.md
@@ -1,3 +1,8 @@ +## 0.1.4 + +* Add `new SpanScanner.eager()` for creating a `SpanScanner` that eagerly + computes its current line and column numbers. + ## 0.1.3+2 * Fix `LineScanner`'s handling of carriage returns to match that of
diff --git a/pkgs/string_scanner/lib/src/eager_span_scanner.dart b/pkgs/string_scanner/lib/src/eager_span_scanner.dart new file mode 100644 index 0000000..3fae5cc --- /dev/null +++ b/pkgs/string_scanner/lib/src/eager_span_scanner.dart
@@ -0,0 +1,115 @@ +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +library string_scanner.eager_span_scanner; + +import 'package:charcode/ascii.dart'; + +import 'line_scanner.dart'; +import 'span_scanner.dart'; + +// TODO(nweiz): Currently this duplicates code in line_scanner.dart. Once +// sdk#23770 is fully complete, we should move the shared code into a mixin. + +/// A regular expression matching newlines across platforms. +final _newlineRegExp = new RegExp(r"\r\n?|\n"); + +/// A [SpanScanner] that tracks the line and column eagerly, like [LineScanner]. +class EagerSpanScanner extends SpanScanner { + int get line => _line; + int _line = 0; + + int get column => _column; + int _column = 0; + + LineScannerState get state => + new _EagerSpanScannerState(this, position, line, column); + + bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf; + + set state(LineScannerState state) { + if (state is! _EagerSpanScannerState || + !identical((state as _EagerSpanScannerState)._scanner, this)) { + throw new ArgumentError("The given LineScannerState was not returned by " + "this LineScanner."); + } + + super.position = state.position; + _line = state.line; + _column = state.column; + } + + set position(int newPosition) { + var oldPosition = position; + super.position = newPosition; + + if (newPosition > oldPosition) { + var newlines = _newlinesIn(string.substring(oldPosition, newPosition)); + _line += newlines.length; + if (newlines.isEmpty) { + _column += newPosition - oldPosition; + } else { + _column = newPosition - newlines.last.end; + } + } else { + var newlines = _newlinesIn(string.substring(newPosition, oldPosition)); + if (_betweenCRLF) newlines.removeLast(); + + _line -= newlines.length; + if (newlines.isEmpty) { + _column -= oldPosition - newPosition; + } else { + _column = newPosition - + string.lastIndexOf(_newlineRegExp, newPosition) - 1; + } + } + } + + EagerSpanScanner(String string, {sourceUrl, int position}) + : super(string, sourceUrl: sourceUrl, position: position); + + int readChar() { + var char = super.readChar(); + if (char == $lf || (char == $cr && peekChar() != $lf)) { + _line += 1; + _column = 0; + } else { + _column += 1; + } + return char; + } + + bool scan(Pattern pattern) { + if (!super.scan(pattern)) return false; + + var newlines = _newlinesIn(lastMatch[0]); + _line += newlines.length; + if (newlines.isEmpty) { + _column += lastMatch[0].length; + } else { + _column = lastMatch[0].length - newlines.last.end; + } + + return true; + } + + /// Returns a list of [Match]es describing all the newlines in [text], which + /// is assumed to end at [position]. + List<Match> _newlinesIn(String text) { + var newlines = _newlineRegExp.allMatches(text).toList(); + if (_betweenCRLF) newlines.removeLast(); + return newlines; + } +} + +/// A class representing the state of an [EagerSpanScanner]. +class _EagerSpanScannerState implements LineScannerState { + final EagerSpanScanner _scanner; + final int position; + final int line; + final int column; + + _EagerSpanScannerState(this._scanner, this.position, this.line, this.column); +} +
diff --git a/pkgs/string_scanner/lib/src/line_scanner.dart b/pkgs/string_scanner/lib/src/line_scanner.dart index 6a2880b..66d7575 100644 --- a/pkgs/string_scanner/lib/src/line_scanner.dart +++ b/pkgs/string_scanner/lib/src/line_scanner.dart
@@ -8,6 +8,8 @@ import 'string_scanner.dart'; +// Note that much of this code is duplicated in eager_span_scanner.dart. + /// A regular expression matching newlines across platforms. final _newlineRegExp = new RegExp(r"\r\n?|\n");
diff --git a/pkgs/string_scanner/lib/src/span_scanner.dart b/pkgs/string_scanner/lib/src/span_scanner.dart index 2a78b5b..ebe230d 100644 --- a/pkgs/string_scanner/lib/src/span_scanner.dart +++ b/pkgs/string_scanner/lib/src/span_scanner.dart
@@ -6,6 +6,7 @@ import 'package:source_span/source_span.dart'; +import 'eager_span_scanner.dart'; import 'exception.dart'; import 'line_scanner.dart'; import 'string_scanner.dart'; @@ -56,6 +57,20 @@ : _sourceFile = new SourceFile(string, url: sourceUrl), super(string, sourceUrl: sourceUrl, position: position); + /// Creates a new [SpanScanner] that eagerly computes line and column numbers. + /// + /// In general [new SpanScanner] will be more efficient, since it avoids extra + /// computation on every scan. However, eager scanning can be useful for + /// situations where the normal course of parsing frequently involves + /// accessing the current line and column numbers. + /// + /// Note that *only* the `line` and `column` fields on the `SpanScanner` + /// itself and its `LineScannerState` are eagerly computed. To limit their + /// memory footprint, returned spans and locations will still lazily compute + /// their line and column numbers. + factory SpanScanner.eager(String string, {sourceUrl, int position}) = + EagerSpanScanner; + /// Creates a [FileSpan] representing the source range between [startState] /// and the current position. FileSpan spanFrom(LineScannerState startState, [LineScannerState endState]) {
diff --git a/pkgs/string_scanner/pubspec.yaml b/pkgs/string_scanner/pubspec.yaml index 3e71eb7..35b3fe0 100644 --- a/pkgs/string_scanner/pubspec.yaml +++ b/pkgs/string_scanner/pubspec.yaml
@@ -1,5 +1,5 @@ name: string_scanner -version: 0.1.3+2 +version: 0.1.4 author: "Dart Team <misc@dartlang.org>" homepage: https://github.com/dart-lang/string_scanner description: >
diff --git a/pkgs/string_scanner/test/span_scanner_test.dart b/pkgs/string_scanner/test/span_scanner_test.dart index a6249b7..114bff7 100644 --- a/pkgs/string_scanner/test/span_scanner_test.dart +++ b/pkgs/string_scanner/test/span_scanner_test.dart
@@ -8,53 +8,63 @@ import 'package:test/test.dart'; void main() { - var scanner; - setUp(() { - scanner = new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source'); + testForImplementation("lazy", () { + return new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source'); }); - test("tracks the span for the last match", () { - scanner.scan('fo'); - scanner.scan('o\nba'); - - var span = scanner.lastSpan; - expect(span.start.offset, equals(2)); - expect(span.start.line, equals(0)); - expect(span.start.column, equals(2)); - expect(span.start.sourceUrl, equals(Uri.parse('source'))); - - expect(span.end.offset, equals(6)); - expect(span.end.line, equals(1)); - expect(span.end.column, equals(2)); - expect(span.start.sourceUrl, equals(Uri.parse('source'))); - - expect(span.text, equals('o\nba')); + testForImplementation("eager", () { + return new SpanScanner.eager('foo\nbar\nbaz', sourceUrl: 'source'); }); +} - test(".spanFrom() returns a span from a previous state", () { - scanner.scan('fo'); - var state = scanner.state; - scanner.scan('o\nba'); - scanner.scan('r\nba'); +void testForImplementation(String name, SpanScanner create()) { + group("for a $name scanner", () { + var scanner; + setUp(() => scanner = create()); - var span = scanner.spanFrom(state); - expect(span.text, equals('o\nbar\nba')); - }); + test("tracks the span for the last match", () { + scanner.scan('fo'); + scanner.scan('o\nba'); - test(".emptySpan returns an empty span at the current location", () { - scanner.scan('foo\nba'); + var span = scanner.lastSpan; + expect(span.start.offset, equals(2)); + expect(span.start.line, equals(0)); + expect(span.start.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); - var span = scanner.emptySpan; - expect(span.start.offset, equals(6)); - expect(span.start.line, equals(1)); - expect(span.start.column, equals(2)); - expect(span.start.sourceUrl, equals(Uri.parse('source'))); + expect(span.end.offset, equals(6)); + expect(span.end.line, equals(1)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); - expect(span.end.offset, equals(6)); - expect(span.end.line, equals(1)); - expect(span.end.column, equals(2)); - expect(span.start.sourceUrl, equals(Uri.parse('source'))); + expect(span.text, equals('o\nba')); + }); - expect(span.text, equals('')); + test(".spanFrom() returns a span from a previous state", () { + scanner.scan('fo'); + var state = scanner.state; + scanner.scan('o\nba'); + scanner.scan('r\nba'); + + var span = scanner.spanFrom(state); + expect(span.text, equals('o\nbar\nba')); + }); + + test(".emptySpan returns an empty span at the current location", () { + scanner.scan('foo\nba'); + + var span = scanner.emptySpan; + expect(span.start.offset, equals(6)); + expect(span.start.line, equals(1)); + expect(span.start.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.end.offset, equals(6)); + expect(span.end.line, equals(1)); + expect(span.end.column, equals(2)); + expect(span.start.sourceUrl, equals(Uri.parse('source'))); + + expect(span.text, equals('')); + }); }); }