Don't use regular expressions in Loader.
This is based on https://codereview.chromium.org/1325053002. It also
avoids redundant parsing where possible.
Closes #12
R=rnystrom@google.com
Review URL: https://codereview.chromium.org//1329763002 .
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5dcd749..6359d97 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 2.1.4
+
+* Substantially improve performance.
+
## 2.1.3
* Add a hint that a colon might be missing when a mapping value is found in the
diff --git a/lib/src/loader.dart b/lib/src/loader.dart
index 0792957..5336310 100644
--- a/lib/src/loader.dart
+++ b/lib/src/loader.dart
@@ -4,6 +4,7 @@
library yaml.loader;
+import 'package:charcode/ascii.dart';
import 'package:source_span/source_span.dart';
import 'equality.dart';
@@ -13,31 +14,6 @@
import 'yaml_exception.dart';
import 'yaml_node.dart';
-/// Matches YAML null.
-final _nullRegExp = new RegExp(r"^(null|Null|NULL|~|)$");
-
-/// Matches a YAML bool.
-final _boolRegExp = new RegExp(r"^(?:(true|True|TRUE)|(false|False|FALSE))$");
-
-/// Matches a YAML decimal integer like `+1234`.
-final _decimalIntRegExp = new RegExp(r"^[-+]?[0-9]+$");
-
-/// Matches a YAML octal integer like `0o123`.
-final _octalIntRegExp = new RegExp(r"^0o([0-7]+)$");
-
-/// Matches a YAML hexidecimal integer like `0x123abc`.
-final _hexIntRegExp = new RegExp(r"^0x[0-9a-fA-F]+$");
-
-/// Matches a YAML floating point number like `12.34+e56`.
-final _floatRegExp = new RegExp(
- r"^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
-
-/// Matches YAML infinity.
-final _infinityRegExp = new RegExp(r"^([+-]?)\.(inf|Inf|INF)$");
-
-/// Matches YAML NaN.
-final _nanRegExp = new RegExp(r"^\.(nan|NaN|NAN)$");
-
/// A loader that reads [Event]s emitted by a [Parser] and emits
/// [YamlDocument]s.
///
@@ -133,15 +109,11 @@
YamlNode _loadScalar(ScalarEvent scalar) {
var node;
if (scalar.tag == "!") {
- node = _parseString(scalar);
+ node = new YamlScalar.internal(scalar.value, scalar);
} else if (scalar.tag != null) {
node = _parseByTag(scalar);
} else {
- node = _parseNull(scalar);
- if (node == null) node = _parseBool(scalar);
- if (node == null) node = _parseInt(scalar);
- if (node == null) node = _parseFloat(scalar);
- if (node == null) node = _parseString(scalar);
+ node = _parseScalar(scalar);
}
_registerAnchor(scalar.anchor, node);
@@ -197,82 +169,191 @@
/// Parses a scalar according to its tag name.
YamlScalar _parseByTag(ScalarEvent scalar) {
switch (scalar.tag) {
- case "tag:yaml.org,2002:null": return _parseNull(scalar);
- case "tag:yaml.org,2002:bool": return _parseBool(scalar);
- case "tag:yaml.org,2002:int": return _parseInt(scalar);
- case "tag:yaml.org,2002:float": return _parseFloat(scalar);
- case "tag:yaml.org,2002:str": return _parseString(scalar);
+ case "tag:yaml.org,2002:null":
+ var result = _parseNull(scalar);
+ if (result != null) return result;
+ throw new YamlException("Invalid null scalar.", scalar.span);
+ case "tag:yaml.org,2002:bool":
+ var result = _parseBool(scalar);
+ if (result != null) return result;
+ throw new YamlException("Invalid bool scalar.", scalar.span);
+ case "tag:yaml.org,2002:int":
+ var result = _parseNumber(scalar, allowFloat: false);
+ if (result != null) return result;
+ throw new YamlException("Invalid int scalar.", scalar.span);
+ case "tag:yaml.org,2002:float":
+ var result = _parseNumber(scalar, allowInt: false);
+ if (result != null) return result;
+ throw new YamlException("Invalid float scalar.", scalar.span);
+ case "tag:yaml.org,2002:str":
+ return new YamlScalar.internal(scalar.value, scalar);
+ default:
+ throw new YamlException('Undefined tag: ${scalar.tag}.', scalar.span);
}
- throw new YamlException('Undefined tag: ${scalar.tag}.', scalar.span);
}
- /// Parses a null scalar.
+ /// Parses [scalar], which may be one of several types.
+ YamlScalar _parseScalar(ScalarEvent scalar) =>
+ _tryParseScalar(scalar) ?? new YamlScalar.internal(scalar.value, scalar);
+
+ /// Tries to parse [scalar].
+ ///
+ /// If parsing fails, this returns `null`, indicating that the scalar should
+ /// be parsed as a string.
+ YamlScalar _tryParseScalar(ScalarEvent scalar) {
+ // Quickly check for the empty string, which means null.
+ var length = scalar.value.length;
+ if (length == 0) return new YamlScalar.internal(null, scalar);
+
+ // Dispatch on the first character.
+ var firstChar = scalar.value.codeUnitAt(0);
+ switch (firstChar) {
+ case $dot:
+ case $plus:
+ case $minus:
+ return _parseNumber(scalar);
+ case $n:
+ case $N:
+ return length == 4 ? _parseNull(scalar) : null;
+ case $t:
+ case $T:
+ return length == 4 ? _parseBool(scalar) : null;
+ case $f:
+ case $F:
+ return length == 5 ? _parseBool(scalar) : null;
+ case $tilde:
+ return length == 1 ? new YamlScalar.internal(null, scalar) : null;
+ default:
+ if (firstChar >= $0 && firstChar <= $9) return _parseNumber(scalar);
+ return null;
+ }
+ }
+
+ /// Parse a null scalar.
+ ///
+ /// Returns a Dart `null` if parsing fails.
YamlScalar _parseNull(ScalarEvent scalar) {
- // TODO(nweiz): add ScalarStyle and implicit metadata to the scalars.
- if (_nullRegExp.hasMatch(scalar.value)) {
- return new YamlScalar.internal(null, scalar.span, scalar.style);
- } else {
- return null;
+ switch (scalar.value) {
+ case "":
+ case "null":
+ case "Null":
+ case "NULL":
+ case "~":
+ return new YamlScalar.internal(null, scalar);
+ default:
+ return null;
}
}
- /// Parses a boolean scalar.
+ /// Parse a boolean scalar.
+ ///
+ /// Returns `null` if parsing fails.
YamlScalar _parseBool(ScalarEvent scalar) {
- var match = _boolRegExp.firstMatch(scalar.value);
- if (match == null) return null;
- return new YamlScalar.internal(
- match.group(1) != null, scalar.span, scalar.style);
+ switch (scalar.value) {
+ case "true":
+ case "True":
+ case "TRUE":
+ return new YamlScalar.internal(true, scalar);
+ case "false":
+ case "False":
+ case "FALSE":
+ return new YamlScalar.internal(false, scalar);
+ default:
+ return null;
+ }
}
- /// Parses an integer scalar.
- YamlScalar _parseInt(ScalarEvent scalar) {
- var match = _decimalIntRegExp.firstMatch(scalar.value);
- if (match != null) {
- return new YamlScalar.internal(
- int.parse(match.group(0)), scalar.span, scalar.style);
+ /// Parses a numeric scalar.
+ ///
+ /// Returns `null` if parsing fails.
+ YamlNode _parseNumber(ScalarEvent scalar, {bool allowInt: true,
+ bool allowFloat: true}) {
+ var value = _parseNumberValue(scalar.value,
+ allowInt: allowInt, allowFloat: allowFloat);
+ return value == null ? null : new YamlScalar.internal(value, scalar);
+ }
+
+ /// Parses the value of a number.
+ ///
+ /// Returns the number if it's parsed successfully, or `null` if it's not.
+ num _parseNumberValue(String contents, {bool allowInt: true,
+ bool allowFloat: true}) {
+ assert(allowInt || allowFloat);
+
+ var firstChar = contents.codeUnitAt(0);
+ var length = contents.length;
+
+ // Quick check for single digit integers.
+ if (allowInt && length == 1) {
+ var value = firstChar - $0;
+ return value >= 0 && value <= 9 ? value : null;
}
- match = _octalIntRegExp.firstMatch(scalar.value);
- if (match != null) {
- var n = int.parse(match.group(1), radix: 8);
- return new YamlScalar.internal(n, scalar.span, scalar.style);
+ var secondChar = contents.codeUnitAt(1);
+
+ // Hexadecimal or octal integers.
+ if (allowInt && firstChar == $0) {
+ // int.parse supports 0x natively.
+ if (secondChar == $x) return int.parse(contents, onError: (_) => null);
+
+ if (secondChar == $o) {
+ var afterRadix = contents.substring(2);
+ return int.parse(afterRadix, radix: 8, onError: (_) => null);
+ }
}
- match = _hexIntRegExp.firstMatch(scalar.value);
- if (match != null) {
- return new YamlScalar.internal(
- int.parse(match.group(0)), scalar.span, scalar.style);
+ // Int or float starting with a digit or a +/- sign.
+ if ((firstChar >= $0 && firstChar <= $9) ||
+ ((firstChar == $plus || firstChar == $minus) &&
+ secondChar >= $0 && secondChar <= $9)) {
+ // Try to parse an int or, failing that, a double.
+ var result = null;
+ if (allowInt) {
+ // Pass "radix: 10" explicitly to ensure that "-0x10", which is valid
+ // Dart but invalid YAML, doesn't get parsed.
+ result = int.parse(contents, radix: 10, onError: (_) => null);
+ }
+
+ if (allowFloat) result ??= double.parse(contents, (_) => null);
+ return result;
+ }
+
+ if (!allowFloat) return null;
+
+ // Now the only possibility is to parse a float starting with a dot or a
+ // sign and a dot, or the signed/unsigned infinity values and not-a-numbers.
+ if ((firstChar == $dot && secondChar >= $0 && secondChar <= $9) ||
+ (firstChar == $minus || firstChar == $plus) && secondChar == $dot) {
+ // Starting with a . and a number or a sign followed by a dot.
+ if (length == 5) {
+ switch (contents) {
+ case "+.inf":
+ case "+.Inf":
+ case "+.INF":
+ return double.INFINITY;
+ case "-.inf":
+ case "-.Inf":
+ case "-.INF":
+ return -double.INFINITY;
+ }
+ }
+
+ return double.parse(contents, (_) => null);
+ }
+
+ if (length == 4 && firstChar == $dot) {
+ switch (contents) {
+ case ".inf":
+ case ".Inf":
+ case ".INF":
+ return double.INFINITY;
+ case ".nan":
+ case ".NaN":
+ case ".NAN":
+ return double.NAN;
+ }
}
return null;
}
-
- /// Parses a floating-point scalar.
- YamlScalar _parseFloat(ScalarEvent scalar) {
- var match = _floatRegExp.firstMatch(scalar.value);
- if (match != null) {
- // YAML allows floats of the form "0.", but Dart does not. Fix up those
- // floats by removing the trailing dot.
- var matchStr = match.group(0).replaceAll(new RegExp(r"\.$"), "");
- return new YamlScalar.internal(
- double.parse(matchStr), scalar.span, scalar.style);
- }
-
- match = _infinityRegExp.firstMatch(scalar.value);
- if (match != null) {
- var value = match.group(1) == "-" ? -double.INFINITY : double.INFINITY;
- return new YamlScalar.internal(value, scalar.span, scalar.style);
- }
-
- match = _nanRegExp.firstMatch(scalar.value);
- if (match != null) {
- return new YamlScalar.internal(double.NAN, scalar.span, scalar.style);
- }
-
- return null;
- }
-
- /// Parses a string scalar.
- YamlScalar _parseString(ScalarEvent scalar) =>
- new YamlScalar.internal(scalar.value, scalar.span, scalar.style);
}
diff --git a/lib/src/yaml_node.dart b/lib/src/yaml_node.dart
index 027bc1b..d4961bb 100644
--- a/lib/src/yaml_node.dart
+++ b/lib/src/yaml_node.dart
@@ -9,6 +9,7 @@
import 'package:collection/collection.dart';
import 'package:source_span/source_span.dart';
+import 'event.dart';
import 'null_span.dart';
import 'style.dart';
import 'yaml_node_wrapper.dart';
@@ -163,7 +164,14 @@
}
/// Users of the library should not use this constructor.
- YamlScalar.internal(this.value, SourceSpan span, this.style) {
+ YamlScalar.internal(this.value, ScalarEvent scalar)
+ : style = scalar.style {
+ _span = span;
+ }
+
+ /// Users of the library should not use this constructor.
+ YamlScalar.internalWithSpan(this.value, SourceSpan span)
+ : style = ScalarStyle.ANY {
_span = span;
}
diff --git a/lib/src/yaml_node_wrapper.dart b/lib/src/yaml_node_wrapper.dart
index be96ba4..d7146c4 100644
--- a/lib/src/yaml_node_wrapper.dart
+++ b/lib/src/yaml_node_wrapper.dart
@@ -59,7 +59,7 @@
final SourceSpan _span;
Iterable get keys => _dartMap.keys.map((key) =>
- new YamlScalar.internal(key, _span, ScalarStyle.ANY));
+ new YamlScalar.internalWithSpan(key, _span));
_YamlMapNodes(this._dartMap, this._span);
@@ -151,5 +151,5 @@
YamlNode _nodeForValue(value, SourceSpan span) {
if (value is Map) return new YamlMapWrapper._(value, span);
if (value is List) return new YamlListWrapper._(value, span);
- return new YamlScalar.internal(value, span, ScalarStyle.ANY);
+ return new YamlScalar.internalWithSpan(value, span);
}
diff --git a/lib/yaml.dart b/lib/yaml.dart
index 9af329a..aa120ef 100644
--- a/lib/yaml.dart
+++ b/lib/yaml.dart
@@ -53,7 +53,7 @@
var document = loader.load();
if (document == null) {
return new YamlDocument.internal(
- new YamlScalar.internal(null, loader.span, ScalarStyle.ANY),
+ new YamlScalar.internalWithSpan(null, loader.span),
loader.span, null, const []);
}
diff --git a/pubspec.yaml b/pubspec.yaml
index 618f82b..f6a5b1e 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: yaml
-version: 2.1.4-dev
+version: 2.1.4
author: "Dart Team <misc@dartlang.org>"
homepage: https://github.com/dart-lang/yaml
description: A parser for YAML.
@@ -12,4 +12,4 @@
dev_dependencies:
test: ">=0.12.0 <0.13.0"
environment:
- sdk: '>=1.8.0 <2.0.0'
+ sdk: '>=1.12.0 <2.0.0'