Avoid using RegExps in path to work around issue 19090.
R=rnystrom@google.com
Review URL: https://codereview.chromium.org//309803003
git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart/pkg/path@36909 260f80e4-7a28-3924-810f-c04153c831b5
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a6eccf..f1d2485 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@
* Many members on `Style` that provided access to patterns and functions used
internally for parsing paths have been deprecated.
+* Manually parse paths (rather than using RegExps to do so) for better
+ performance.
+
# 1.2.0
* Added `path.prettyUri`, which produces a human-readable representation of a
diff --git a/lib/src/characters.dart b/lib/src/characters.dart
new file mode 100644
index 0000000..ff196a6
--- /dev/null
+++ b/lib/src/characters.dart
@@ -0,0 +1,19 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/// This library contains character-code definitions.
+library path.characters;
+
+const PLUS = 0x2b;
+const MINUS = 0x2d;
+const PERIOD = 0x2e;
+const SLASH = 0x2f;
+const ZERO = 0x30;
+const NINE = 0x39;
+const COLON = 0x3a;
+const UPPER_A = 0x41;
+const UPPER_Z = 0x5a;
+const LOWER_A = 0x61;
+const LOWER_Z = 0x7a;
+const BACKSLASH = 0x5c;
diff --git a/lib/src/context.dart b/lib/src/context.dart
index f3826d7..823e0c2 100644
--- a/lib/src/context.dart
+++ b/lib/src/context.dart
@@ -219,7 +219,7 @@
// replaces the path after it.
var parsed = _parse(part);
parsed.root = this.rootPrefix(buffer.toString());
- if (parsed.root.contains(style.needsSeparatorPattern)) {
+ if (style.needsSeparator(parsed.root)) {
parsed.separators[0] = style.separator;
}
buffer.clear();
@@ -230,7 +230,7 @@
buffer.clear();
buffer.write(part);
} else {
- if (part.length > 0 && part[0].contains(style.separatorPattern)) {
+ if (part.length > 0 && style.containsSeparator(part[0])) {
// The part starts with a separator, so we don't need to add one.
} else if (needsSeparator) {
buffer.write(separator);
@@ -241,7 +241,7 @@
// Unless this part ends with a separator, we'll need to add one before
// the next part.
- needsSeparator = part.contains(style.needsSeparatorPattern);
+ needsSeparator = style.needsSeparator(part);
}
return buffer.toString();
diff --git a/lib/src/internal_style.dart b/lib/src/internal_style.dart
index 13fb57e..67b5d34 100644
--- a/lib/src/internal_style.dart
+++ b/lib/src/internal_style.dart
@@ -13,55 +13,34 @@
/// the members that [Context] uses should be hidden from them. Those members
/// are defined on this class instead.
abstract class InternalStyle extends Style {
- /// The path separator for this style. On POSIX, this is `/`. On Windows,
- /// it's `\`.
+ /// The default path separator for this style.
+ ///
+ /// On POSIX, this is `/`. On Windows, it's `\`.
String get separator;
- /// The [Pattern] that can be used to match a separator for a path in this
- /// style. Windows allows both "/" and "\" as path separators even though "\"
- /// is the canonical one.
- Pattern get separatorPattern;
+ /// Returns whether [path] contains a separator.
+ bool containsSeparator(String path);
- /// The [Pattern] that matches path components that need a separator after
- /// them.
+ /// Returns whether [codeUnit] is the character code of a separator.
+ bool isSeparator(int codeUnit);
+
+ /// Returns whether this path component needs a separator after it.
///
/// Windows and POSIX styles just need separators when the previous component
/// doesn't already end in a separator, but the URL always needs to place a
/// separator between the root and the first component, even if the root
/// already ends in a separator character. For example, to join "file://" and
/// "usr", an additional "/" is needed (making "file:///usr").
- Pattern get needsSeparatorPattern;
-
- /// The [Pattern] that can be used to match the root prefix of an absolute
- /// path in this style.
- Pattern get rootPattern;
-
- /// The [Pattern] that can be used to match the root prefix of a root-relative
- /// path in this style.
- ///
- /// This can be null to indicate that this style doesn't support root-relative
- /// paths.
- final Pattern relativeRootPattern = null;
+ bool needsSeparator(String path);
/// Gets the root prefix of [path] if path is absolute. If [path] is relative,
/// returns `null`.
- String getRoot(String path) {
- // TODO(rnystrom): Use firstMatch() when #7080 is fixed.
- var matches = rootPattern.allMatches(path);
- if (matches.isNotEmpty) return matches.first[0];
- return getRelativeRoot(path);
- }
+ String getRoot(String path);
/// Gets the root prefix of [path] if it's root-relative.
///
/// If [path] is relative or absolute and not root-relative, returns `null`.
- String getRelativeRoot(String path) {
- if (relativeRootPattern == null) return null;
- // TODO(rnystrom): Use firstMatch() when #7080 is fixed.
- var matches = relativeRootPattern.allMatches(path);
- if (matches.isEmpty) return null;
- return matches.first[0];
- }
+ String getRelativeRoot(String path);
/// Returns the path represented by [uri] in this style.
String pathFromUri(Uri uri);
diff --git a/lib/src/parsed_path.dart b/lib/src/parsed_path.dart
index 3f3c3b9..57773ee 100644
--- a/lib/src/parsed_path.dart
+++ b/lib/src/parsed_path.dart
@@ -52,19 +52,21 @@
var parts = [];
var separators = [];
- var firstSeparator = style.separatorPattern.matchAsPrefix(path);
- if (firstSeparator != null) {
- separators.add(firstSeparator[0]);
- path = path.substring(firstSeparator[0].length);
+ var start = 0;
+
+ if (path.isNotEmpty && style.isSeparator(path.codeUnitAt(0))) {
+ separators.add(path[0]);
+ start = 1;
} else {
separators.add('');
}
- var start = 0;
- for (var match in style.separatorPattern.allMatches(path)) {
- parts.add(path.substring(start, match.start));
- separators.add(match[0]);
- start = match.end;
+ for (var i = start; i < path.length; i++) {
+ if (style.isSeparator(path.codeUnitAt(i))) {
+ parts.add(path.substring(start, i));
+ separators.add(path[i]);
+ start = i + 1;
+ }
}
// Add the final part, if any.
@@ -133,8 +135,7 @@
var newSeparators = new List.generate(
newParts.length, (_) => style.separator, growable: true);
newSeparators.insert(0,
- isAbsolute && newParts.length > 0 &&
- root.contains(style.needsSeparatorPattern) ?
+ isAbsolute && newParts.length > 0 && style.needsSeparator(root) ?
style.separator : '');
parts = newParts;
diff --git a/lib/src/style/posix.dart b/lib/src/style/posix.dart
index e0b6017..b8b82b4 100644
--- a/lib/src/style/posix.dart
+++ b/lib/src/style/posix.dart
@@ -4,6 +4,7 @@
library path.style.posix;
+import '../characters.dart' as chars;
import '../parsed_path.dart';
import '../internal_style.dart';
@@ -13,9 +14,28 @@
final name = 'posix';
final separator = '/';
+ final separators = const ['/'];
+
+ // Deprecated properties.
+
final separatorPattern = new RegExp(r'/');
final needsSeparatorPattern = new RegExp(r'[^/]$');
final rootPattern = new RegExp(r'^/');
+ final relativeRootPattern = null;
+
+ bool containsSeparator(String path) => path.contains('/');
+
+ bool isSeparator(int codeUnit) => codeUnit == chars.SLASH;
+
+ bool needsSeparator(String path) =>
+ path.isNotEmpty && !isSeparator(path.codeUnitAt(path.length - 1));
+
+ String getRoot(String path) {
+ if (path.isNotEmpty && isSeparator(path.codeUnitAt(0))) return '/';
+ return null;
+ }
+
+ String getRelativeRoot(String path) => null;
String pathFromUri(Uri uri) {
if (uri.scheme == '' || uri.scheme == 'file') {
diff --git a/lib/src/style/url.dart b/lib/src/style/url.dart
index 1e84917..f383923 100644
--- a/lib/src/style/url.dart
+++ b/lib/src/style/url.dart
@@ -4,7 +4,9 @@
library path.style.url;
+import '../characters.dart' as chars;
import '../internal_style.dart';
+import '../utils.dart';
/// The style for URL paths.
class UrlStyle extends InternalStyle {
@@ -12,14 +14,75 @@
final name = 'url';
final separator = '/';
+ final separators = const ['/'];
+
+ // Deprecated properties.
+
final separatorPattern = new RegExp(r'/');
final needsSeparatorPattern = new RegExp(
r"(^[a-zA-Z][-+.a-zA-Z\d]*://|[^/])$");
final rootPattern = new RegExp(r"[a-zA-Z][-+.a-zA-Z\d]*://[^/]*");
final relativeRootPattern = new RegExp(r"^/");
+ bool containsSeparator(String path) => path.contains('/');
+
+ bool isSeparator(int codeUnit) => codeUnit == chars.SLASH;
+
+ bool needsSeparator(String path) {
+ if (path.isEmpty) return false;
+
+ // A URL that doesn't end in "/" always needs a separator.
+ if (!isSeparator(path.codeUnitAt(path.length - 1))) return true;
+
+ // A URI that's just "scheme://" needs an extra separator, despite ending
+ // with "/".
+ var root = _getRoot(path);
+ return root != null && root.endsWith('://');
+ }
+
+ String getRoot(String path) {
+ var root = _getRoot(path);
+ return root == null ? getRelativeRoot(path) : root;
+ }
+
+ String getRelativeRoot(String path) {
+ if (path.isEmpty) return null;
+ return isSeparator(path.codeUnitAt(0)) ? "/" : null;
+ }
+
String pathFromUri(Uri uri) => uri.toString();
Uri relativePathToUri(String path) => Uri.parse(path);
Uri absolutePathToUri(String path) => Uri.parse(path);
+
+ // A helper method for [getRoot] that doesn't handle relative roots.
+ String _getRoot(String path) {
+ if (path.isEmpty) return null;
+
+ // We aren't using a RegExp for this because they're slow (issue 19090). If
+ // we could, we'd match against r"[a-zA-Z][-+.a-zA-Z\d]*://[^/]*".
+
+ if (!isAlphabetic(path.codeUnitAt(0))) return null;
+ var start = 1;
+ for (; start < path.length; start++) {
+ var char = path.codeUnitAt(start);
+ if (isAlphabetic(char)) continue;
+ if (isNumeric(char)) continue;
+ if (char == chars.MINUS || char == chars.PLUS || char == chars.PERIOD) {
+ continue;
+ }
+
+ break;
+ }
+
+ if (start + 3 > path.length) return null;
+ if (path.substring(start, start + 3) != '://') return null;
+ start += 3;
+
+ // A URL root can end with a non-"/" prefix.
+ while (start < path.length && !isSeparator(path.codeUnitAt(start))) {
+ start++;
+ }
+ return path.substring(0, start);
+ }
}
diff --git a/lib/src/style/windows.dart b/lib/src/style/windows.dart
index be9f45f..2965f1e 100644
--- a/lib/src/style/windows.dart
+++ b/lib/src/style/windows.dart
@@ -4,8 +4,10 @@
library path.style.windows;
-import '../parsed_path.dart';
+import '../characters.dart' as chars;
import '../internal_style.dart';
+import '../parsed_path.dart';
+import '../utils.dart';
/// The style for Windows paths.
class WindowsStyle extends InternalStyle {
@@ -13,11 +15,37 @@
final name = 'windows';
final separator = '\\';
+ final separators = const ['/', '\\'];
+
+ // Deprecated properties.
+
final separatorPattern = new RegExp(r'[/\\]');
final needsSeparatorPattern = new RegExp(r'[^/\\]$');
final rootPattern = new RegExp(r'^(\\\\[^\\]+\\[^\\/]+|[a-zA-Z]:[/\\])');
final relativeRootPattern = new RegExp(r"^[/\\](?![/\\])");
+ bool containsSeparator(String path) => path.contains('/');
+
+ bool isSeparator(int codeUnit) =>
+ codeUnit == chars.SLASH || codeUnit == chars.BACKSLASH;
+
+ bool needsSeparator(String path) {
+ if (path.isEmpty) return false;
+ return !isSeparator(path.codeUnitAt(path.length - 1));
+ }
+
+ String getRoot(String path) {
+ var root = _getRoot(path);
+ return root == null ? getRelativeRoot(path) : root;
+ }
+
+ String getRelativeRoot(String path) {
+ if (path.isEmpty) return null;
+ if (!isSeparator(path.codeUnitAt(0))) return null;
+ if (path.length > 1 && isSeparator(path.codeUnitAt(1))) return null;
+ return path[0];
+ }
+
String pathFromUri(Uri uri) {
if (uri.scheme != '' && uri.scheme != 'file') {
throw new ArgumentError("Uri $uri must have scheme 'file:'.");
@@ -66,9 +94,45 @@
// Get rid of the trailing "\" in "C:\" because the URI constructor will
// add a separator on its own.
- parsed.parts.insert(0, parsed.root.replaceAll(separatorPattern, ""));
+ parsed.parts.insert(0,
+ parsed.root.replaceAll("/", "").replaceAll("\\", ""));
return new Uri(scheme: 'file', pathSegments: parsed.parts);
}
}
+
+ // A helper method for [getRoot] that doesn't handle relative roots.
+ String _getRoot(String path) {
+ if (path.length < 3) return null;
+
+ // We aren't using a RegExp for this because they're slow (issue 19090). If
+ // we could, we'd match against r'^(\\\\[^\\]+\\[^\\/]+|[a-zA-Z]:[/\\])'.
+
+ // Try roots like "C:\".
+ if (isAlphabetic(path.codeUnitAt(0))) {
+ if (path.codeUnitAt(1) != chars.COLON) return null;
+ if (!isSeparator(path.codeUnitAt(2))) return null;
+ return path.substring(0, 3);
+ }
+
+ // Try roots like "\\server\share".
+ if (!path.startsWith('\\\\')) return null;
+
+ var start = 2;
+ // The server is one or more non-"\" characters.
+ while (start < path.length && path.codeUnitAt(start) != chars.BACKSLASH) {
+ start++;
+ }
+ if (start == 2 || start == path.length) return null;
+
+ // The share is one or more non-"\" characters.
+ start += 1;
+ if (path.codeUnitAt(start) == chars.BACKSLASH) return null;
+ start += 1;
+ while (start < path.length && path.codeUnitAt(start) != chars.BACKSLASH) {
+ start++;
+ }
+
+ return path.substring(0, start);
+ }
}
\ No newline at end of file
diff --git a/lib/src/utils.dart b/lib/src/utils.dart
new file mode 100644
index 0000000..0636261
--- /dev/null
+++ b/lib/src/utils.dart
@@ -0,0 +1,16 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library path.utils;
+
+import 'characters.dart' as chars;
+
+/// Returns whether [char] is the code for an ASCII letter (uppercase or
+/// lowercase).
+bool isAlphabetic(int char) =>
+ (char >= chars.UPPER_A && char <= chars.UPPER_Z) ||
+ (char >= chars.LOWER_A && char <= chars.LOWER_Z);
+
+/// Returns whether [char] is the code for an ASCII digit.
+bool isNumeric(int char) => char >= chars.ZERO && char <= chars.NINE;
diff --git a/pubspec.yaml b/pubspec.yaml
index 86e7d95..7bc83bf 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: path
-version: 1.2.1-dev
+version: 1.2.1
author: Dart Team <misc@dartlang.org>
description: >
A string-based path manipulation library. All of the path operations you know