Properly handle CR LF in LineScanner.
R=rnystrom@google.com
Review URL: https://codereview.chromium.org//1327713003 .
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce1e012..ddfd540 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.1.3+2
+
+* Fix `LineScanner`'s handling of carriage returns to match that of
+ `SpanScanner`.
+
## 0.1.3+1
* Fixed the homepage URL.
diff --git a/lib/src/line_scanner.dart b/lib/src/line_scanner.dart
index 54ecf7b..6a2880b 100644
--- a/lib/src/line_scanner.dart
+++ b/lib/src/line_scanner.dart
@@ -4,8 +4,13 @@
library string_scanner.line_scanner;
+import 'package:charcode/ascii.dart';
+
import 'string_scanner.dart';
+/// A regular expression matching newlines across platforms.
+final _newlineRegExp = new RegExp(r"\r\n?|\n");
+
/// A subclass of [StringScanner] that tracks line and column information.
class LineScanner extends StringScanner {
/// The scanner's current (zero-based) line number.
@@ -24,6 +29,10 @@
LineScannerState get state =>
new LineScannerState._(this, position, line, column);
+ /// Whether the current position is between a CR character and an LF
+ /// charactet.
+ bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf;
+
set state(LineScannerState state) {
if (!identical(state._scanner, this)) {
throw new ArgumentError("The given LineScannerState was not returned by "
@@ -40,8 +49,7 @@
super.position = newPosition;
if (newPosition > oldPosition) {
- var newlines =
- "\n".allMatches(string.substring(oldPosition, newPosition)).toList();
+ var newlines = _newlinesIn(string.substring(oldPosition, newPosition));
_line += newlines.length;
if (newlines.isEmpty) {
_column += newPosition - oldPosition;
@@ -49,13 +57,15 @@
_column = newPosition - newlines.last.end;
}
} else {
- var newlines =
- "\n".allMatches(string.substring(newPosition, oldPosition)).toList();
+ var newlines = _newlinesIn(string.substring(newPosition, oldPosition));
+ if (_betweenCRLF) newlines.removeLast();
+
_line -= newlines.length;
if (newlines.isEmpty) {
_column -= oldPosition - newPosition;
} else {
- _column = newPosition - string.lastIndexOf("\n", newPosition) - 1;
+ _column = newPosition -
+ string.lastIndexOf(_newlineRegExp, newPosition) - 1;
}
}
}
@@ -65,7 +75,7 @@
int readChar() {
var char = super.readChar();
- if (char == 0xA) {
+ if (char == $lf || (char == $cr && peekChar() != $lf)) {
_line += 1;
_column = 0;
} else {
@@ -77,7 +87,7 @@
bool scan(Pattern pattern) {
if (!super.scan(pattern)) return false;
- var newlines = "\n".allMatches(lastMatch[0]).toList();
+ var newlines = _newlinesIn(lastMatch[0]);
_line += newlines.length;
if (newlines.isEmpty) {
_column += lastMatch[0].length;
@@ -87,6 +97,14 @@
return true;
}
+
+ /// Returns a list of [Match]es describing all the newlines in [text], which
+ /// is assumed to end at [position].
+ List<Match> _newlinesIn(String text) {
+ var newlines = _newlineRegExp.allMatches(text).toList();
+ if (_betweenCRLF) newlines.removeLast();
+ return newlines;
+ }
}
/// A class representing the state of a [LineScanner].
diff --git a/pubspec.yaml b/pubspec.yaml
index 16477be..efc7b0a 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,10 +1,11 @@
name: string_scanner
-version: 0.1.4-dev
+version: 0.1.3+2
author: "Dart Team <misc@dartlang.org>"
homepage: https://github.com/dart-lang/string_scanner
description: >
A class for parsing strings using a sequence of patterns.
dependencies:
+ charcode: "^1.1.0"
path: ">=1.2.0 <2.0.0"
source_span: ">=1.0.0 <2.0.0"
dev_dependencies:
diff --git a/test/line_scanner_test.dart b/test/line_scanner_test.dart
index 83222d6..68e5c38 100644
--- a/test/line_scanner_test.dart
+++ b/test/line_scanner_test.dart
@@ -10,7 +10,7 @@
void main() {
var scanner;
setUp(() {
- scanner = new LineScanner('foo\nbar\nbaz');
+ scanner = new LineScanner('foo\nbar\r\nbaz');
});
test('begins with line and column 0', () {
@@ -33,7 +33,17 @@
test("consuming multiple newlines resets the column and increases the line",
() {
- scanner.expect('foo\nbar\nb');
+ scanner.expect('foo\nbar\r\nb');
+ expect(scanner.line, equals(2));
+ expect(scanner.column, equals(1));
+ });
+
+ test("consuming halfway through a CR LF doesn't count as a line", () {
+ scanner.expect('foo\nbar\r');
+ expect(scanner.line, equals(1));
+ expect(scanner.column, equals(4));
+
+ scanner.expect('\nb');
expect(scanner.line, equals(2));
expect(scanner.column, equals(1));
});
@@ -56,11 +66,25 @@
expect(scanner.line, equals(1));
expect(scanner.column, equals(0));
});
+
+ test("consuming halfway through a CR LF doesn't count as a line", () {
+ scanner.expect('foo\nbar');
+ expect(scanner.line, equals(1));
+ expect(scanner.column, equals(3));
+
+ scanner.readChar();
+ expect(scanner.line, equals(1));
+ expect(scanner.column, equals(4));
+
+ scanner.readChar();
+ expect(scanner.line, equals(2));
+ expect(scanner.column, equals(0));
+ });
});
group("position=", () {
test("forward through newlines sets the line and column", () {
- scanner.position = 9; // "foo\nbar\nb"
+ scanner.position = 10; // "foo\nbar\r\nb"
expect(scanner.line, equals(2));
expect(scanner.column, equals(1));
});
@@ -72,18 +96,24 @@
});
test("backward through newlines sets the line and column", () {
- scanner.scan("foo\nbar\nbaz");
+ scanner.scan("foo\nbar\r\nbaz");
scanner.position = 2; // "fo"
expect(scanner.line, equals(0));
expect(scanner.column, equals(2));
});
test("backward through no newlines sets the column", () {
- scanner.scan("foo\nbar\nbaz");
- scanner.position = 9; // "foo\nbar\nb"
+ scanner.scan("foo\nbar\r\nbaz");
+ scanner.position = 10; // "foo\nbar\r\nb"
expect(scanner.line, equals(2));
expect(scanner.column, equals(1));
});
+
+ test("forward halfway through a CR LF doesn't count as a line", () {
+ scanner.position = 8; // "foo\nbar\r"
+ expect(scanner.line, equals(1));
+ expect(scanner.column, equals(4));
+ });
});
test("state= restores the line, column, and position", () {
@@ -92,7 +122,7 @@
scanner.scan('ar\nba');
scanner.state = state;
- expect(scanner.rest, equals('ar\nbaz'));
+ expect(scanner.rest, equals('ar\r\nbaz'));
expect(scanner.line, equals(1));
expect(scanner.column, equals(1));
});