Improve the performance of isWithin().
This adds a single-pass fast path function that only bails when it
encounters "." or ".." components in one path but not the other.
This brings isWithin() from about 0.008 iterations/us to about 0.0180 by
the repo's benchmark. It makes it about 11.5x faster on data provided by
@scheglov (see below), which consists of absolute paths that mostly do
not contain one another.
Data: https://github.com/dart-lang/path/issues/7#issuecomment-157856146
Closes #7
R=rnystrom@google.com
Review URL: https://codereview.chromium.org//1468343002 .
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dfc09a..aee2424 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,14 @@
+## 1.3.8
+
+* Improve the performance of `isWithin()` when the paths don't contain
+ asymmetrical `.` or `..` components.
+
+* Improve the performance of `relative()` when `from` is `null` and the path is
+ already relative.
+
+* Improve the performance of `current` when the current directory hasn't
+ changed.
+
## 1.3.7
* Improve the performance of `absolute()` and `normalize()`.
diff --git a/benchmark/benchmark.dart b/benchmark/benchmark.dart
index 8dc2690..4285e65 100644
--- a/benchmark/benchmark.dart
+++ b/benchmark/benchmark.dart
@@ -21,18 +21,47 @@
print("$name: ${count / sw.elapsedMicroseconds} iter/us (${sw.elapsed})");
}
+void runBenchmarkTwoArgs(String name, Function func, List files) {
+ // Warmup.
+ for (int i = 0; i < 1000; i++) {
+ for (var file1 in files) {
+ for (var file2 in files) {
+ func(file1, file2);
+ }
+ }
+ }
+
+ var count = 10000;
+ var sw = new Stopwatch()..start();
+ for (int i = 0; i < count; i++) {
+ for (var file1 in files) {
+ for (var file2 in files) {
+ func(file1, file2);
+ }
+ }
+ }
+ print("$name: ${count / sw.elapsedMicroseconds} iter/us (${sw.elapsed})");
+}
+
main(args) {
for (var style in [path.Style.posix, path.Style.url, path.Style.windows]) {
var context = new path.Context(style: style);
var files = COMMON_PATHS.toList()..addAll(STYLE_PATHS[style]);
- void benchmark(name, func) {
+ benchmark(name, func) {
name = style.name + '-' + name;
if (args.isEmpty || args.any((arg) => name.contains(arg))) {
runBenchmark(name, func, files);
}
}
+ benchmarkTwoArgs(name, func) {
+ name = style.name + '-' + name + '-two';
+ if (args.isEmpty || args.any((arg) => name.contains(arg))) {
+ runBenchmarkTwoArgs(name, func, files);
+ }
+ }
+
benchmark('absolute', context.absolute);
benchmark('basename', context.basename);
benchmark('basenameWithoutExtension', context.basenameWithoutExtension);
@@ -44,8 +73,10 @@
benchmark('isRootRelative', context.isRootRelative);
benchmark('normalize', context.normalize);
benchmark('relative', context.relative);
+ benchmarkTwoArgs('relative', context.relative);
benchmark('toUri', context.toUri);
benchmark('prettyUri', context.prettyUri);
+ benchmarkTwoArgs('isWithin', context.isWithin);
}
if (args.isEmpty || args.any((arg) => arg == 'current')) {
diff --git a/lib/src/context.dart b/lib/src/context.dart
index 5331e37..b19aa71 100644
--- a/lib/src/context.dart
+++ b/lib/src/context.dart
@@ -414,13 +414,7 @@
// Avoid expensive computation if the path is already relative.
if (from == null && this.isRelative(path)) return this.normalize(path);
- // Avoid calling [current] since it is slow and calling join() when
- // [from] is absolute does nothing.
- if (from == null) {
- from = current;
- } else if (this.isRelative(from) || this.isRootRelative(from)) {
- from = this.join(current, from);
- }
+ from = from == null ? current : absolute(from);
// We can't determine the path from a relative path to an absolute path.
if (this.isRelative(from) && this.isAbsolute(path)) {
@@ -506,6 +500,31 @@
/// path.isWithin('/root/path', '/root/other'); // -> false
/// path.isWithin('/root/path', '/root/path'); // -> false
bool isWithin(String parent, String child) {
+ // Make both paths the same level of relative. We're only able to do the
+ // quick comparison if both paths are in the same format, and making a path
+ // absolute is faster than making it relative.
+ var parentIsAbsolute = isAbsolute(parent);
+ var childIsAbsolute = isAbsolute(child);
+ if (parentIsAbsolute && !childIsAbsolute) {
+ child = absolute(child);
+ if (style.isRootRelative(parent)) parent = absolute(parent);
+ } else if (childIsAbsolute && !parentIsAbsolute) {
+ parent = absolute(parent);
+ if (style.isRootRelative(child)) child = absolute(child);
+ } else if (childIsAbsolute && parentIsAbsolute) {
+ var childIsRootRelative = style.isRootRelative(child);
+ var parentIsRootRelative = style.isRootRelative(parent);
+
+ if (childIsRootRelative && !parentIsRootRelative) {
+ child = absolute(child);
+ } else if (parentIsRootRelative && !childIsRootRelative) {
+ parent = absolute(parent);
+ }
+ }
+
+ var fastResult = _isWithinFast(parent, child);
+ if (fastResult != null) return fastResult;
+
var relative;
try {
relative = this.relative(child, from: parent);
@@ -521,6 +540,214 @@
parts.first != '.';
}
+ /// An optimized implementation of [isWithin] that doesn't handle a few
+ /// complex cases.
+ bool _isWithinFast(String parent, String child) {
+ // Normally we just bail when we see "." path components, but we can handle
+ // a single dot easily enough.
+ if (parent == '.') parent = '';
+
+ var parentRootLength = style.rootLength(parent);
+ var childRootLength = style.rootLength(child);
+
+ // If the roots aren't the same length, we know both paths are absolute or
+ // both are root-relative, and thus that the roots are meaningfully
+ // different.
+ //
+ // isWithin("C:/bar", "//foo/bar/baz") //=> false
+ // isWithin("http://example.com/", "http://google.com/bar") //=> false
+ if (parentRootLength != childRootLength) return false;
+
+ var parentCodeUnits = parent.codeUnits;
+ var childCodeUnits = child.codeUnits;
+
+ // Make sure that the roots are textually the same as well.
+ //
+ // isWithin("C:/bar", "D:/bar/baz") //=> false
+ // isWithin("http://example.com/", "http://example.org/bar") //=> false
+ for (var i = 0; i < parentRootLength; i++) {
+ var parentCodeUnit = parentCodeUnits[i];
+ var childCodeUnit = childCodeUnits[i];
+ if (parentCodeUnit == childCodeUnit) continue;
+
+ // If both code units are separators, that's fine too.
+ //
+ // isWithin("C:/", r"C:\foo") //=> true
+ if (!style.isSeparator(parentCodeUnit) ||
+ !style.isSeparator(childCodeUnit)) {
+ return false;
+ }
+ }
+
+ // Start by considering the last code unit as a separator, since
+ // semantically we're starting at a new path component even if we're
+ // comparing relative paths.
+ var lastCodeUnit = chars.SLASH;
+
+ // Iterate through both paths as long as they're semantically identical.
+ var parentIndex = parentRootLength;
+ var childIndex = childRootLength;
+ while (parentIndex < parent.length && childIndex < child.length) {
+ var parentCodeUnit = parentCodeUnits[parentIndex];
+ var childCodeUnit = childCodeUnits[childIndex];
+ if (parentCodeUnit == childCodeUnit) {
+ lastCodeUnit = parentCodeUnit;
+ parentIndex++;
+ childIndex++;
+ continue;
+ }
+
+ // Different separators are considered identical.
+ var parentIsSeparator = style.isSeparator(parentCodeUnit);
+ var childIsSeparator = style.isSeparator(childCodeUnit);
+ if (parentIsSeparator && childIsSeparator) {
+ lastCodeUnit = parentCodeUnit;
+ parentIndex++;
+ childIndex++;
+ continue;
+ }
+
+ // Ignore multiple separators in a row.
+ if (parentIsSeparator && style.isSeparator(lastCodeUnit)) {
+ parentIndex++;
+ continue;
+ } else if (childIsSeparator && style.isSeparator(lastCodeUnit)) {
+ childIndex++;
+ continue;
+ }
+
+ // If a dot comes after a separator or another dot, it may be a
+ // directory traversal operator. Otherwise, it's just a normal
+ // non-matching character.
+ //
+ // isWithin("foo/./bar", "foo/bar/baz") //=> true
+ // isWithin("foo/bar/../baz", "foo/bar/.foo") //=> false
+ //
+ // We could stay on the fast path for "/./", but that adds a lot of
+ // complexity and isn't likely to come up much in practice.
+ if ((parentCodeUnit == chars.PERIOD || childCodeUnit == chars.PERIOD) &&
+ (style.isSeparator(lastCodeUnit) || lastCodeUnit == chars.PERIOD)) {
+ return null;
+ }
+
+ // If we're here, we've hit two non-matching, non-significant characters.
+ // As long as the remainders of the two paths don't have any unresolved
+ // ".." components, we can be confident that [child] is not within
+ // [parent].
+ var childDirection = _pathDirection(childCodeUnits, childIndex);
+ if (childDirection != _PathDirection.belowRoot) return null;
+ var parentDirection = _pathDirection(parentCodeUnits, parentIndex);
+ if (parentDirection != _PathDirection.belowRoot) return null;
+
+ return false;
+ }
+
+ // If the child is shorter than the parent, it's probably not within the
+ // parent. The only exception is if the parent has some weird ".." stuff
+ // going on, in which case we do the slow check.
+ //
+ // isWithin("foo/bar/baz", "foo/bar") //=> false
+ // isWithin("foo/bar/baz/../..", "foo/bar") //=> true
+ if (childIndex == child.length) {
+ var direction = _pathDirection(parentCodeUnits, parentIndex);
+ return direction == _PathDirection.aboveRoot ? null : false;
+ }
+
+ // We've reached the end of the parent path, which means it's time to make a
+ // decision. Before we do, though, we'll check the rest of the child to see
+ // what that tells us.
+ var direction = _pathDirection(childCodeUnits, childIndex);
+
+ // If there are no more components in the child, then it's the same as
+ // the parent, not within it.
+ //
+ // isWithin("foo/bar", "foo/bar") //=> false
+ // isWithin("foo/bar", "foo/bar//") //=> false
+ if (direction == _PathDirection.atRoot) return false;
+
+ // If there are unresolved ".." components in the child, no decision we make
+ // will be valid. We'll abort and do the slow check instead.
+ //
+ // isWithin("foo/bar", "foo/bar/..") //=> false
+ // isWithin("foo/bar", "foo/bar/baz/bang/../../..") //=> false
+ // isWithin("foo/bar", "foo/bar/baz/bang/../../../bar/baz") //=> true
+ if (direction == _PathDirection.aboveRoot) return null;
+
+ // The child is within the parent if and only if we're on a separator
+ // boundary.
+ //
+ // isWithin("foo/bar", "foo/bar/baz") //=> true
+ // isWithin("foo/bar/", "foo/bar/baz") //=> true
+ // isWithin("foo/bar", "foo/barbaz") //=> false
+ return style.isSeparator(childCodeUnits[childIndex]) ||
+ style.isSeparator(lastCodeUnit);
+ }
+
+ // Returns a [_PathDirection] describing the path represented by [codeUnits]
+ // after [index].
+ //
+ // This ignores leading separators.
+ //
+ // pathDirection("foo") //=> below root
+ // pathDirection("foo/bar/../baz") //=> below root
+ // pathDirection("//foo/bar/baz") //=> below root
+ // pathDirection("/") //=> at root
+ // pathDirection("foo/..") //=> at root
+ // pathDirection("foo/../baz") //=> reaches root
+ // pathDirection("foo/../..") //=> above root
+ // pathDirection("foo/../../foo/bar/baz") //=> above root
+ _PathDirection _pathDirection(List<int> codeUnits, int index) {
+ var depth = 0;
+ var reachedRoot = false;
+ var i = index;
+ while (i < codeUnits.length) {
+ // Ignore initial separators or doubled separators.
+ while (i < codeUnits.length && style.isSeparator(codeUnits[i])) {
+ i++;
+ }
+
+ // If we're at the end, stop.
+ if (i == codeUnits.length) break;
+
+ // Move through the path component to the next separator.
+ var start = i;
+ while (i < codeUnits.length && !style.isSeparator(codeUnits[i])) {
+ i++;
+ }
+
+ // See if the path component is ".", "..", or a name.
+ if (i - start == 1 && codeUnits[start] == chars.PERIOD) {
+ // Don't change the depth.
+ } else if (i - start == 2 &&
+ codeUnits[start] == chars.PERIOD &&
+ codeUnits[start + 1] == chars.PERIOD) {
+ // ".." backs out a directory.
+ depth--;
+
+ // If we work back beyond the root, stop.
+ if (depth < 0) break;
+
+ // Record that we reached the root so we don't return
+ // [_PathDirection.belowRoot].
+ if (depth == 0) reachedRoot = true;
+ } else {
+ // Step inside a directory.
+ depth++;
+ }
+
+ // If we're at the end, stop.
+ if (i == codeUnits.length) break;
+
+ // Move past the separator.
+ i++;
+ }
+
+ if (depth < 0) return _PathDirection.aboveRoot;
+ if (depth == 0) return _PathDirection.atRoot;
+ if (reachedRoot) return _PathDirection.reachesRoot;
+ return _PathDirection.belowRoot;
+ }
+
/// Removes a trailing extension from the last part of [path].
///
/// context.withoutExtension('path/to/foo.dart'); // -> 'path/to/foo'
@@ -653,3 +880,30 @@
throw new ArgumentError(message.toString());
}
}
+
+/// An enum of possible return values for [Context._pathDirection].
+class _PathDirection {
+ /// The path contains enough ".." components that at some point it reaches
+ /// above its original root.
+ ///
+ /// Note that this applies even if the path ends beneath its original root. It
+ /// takes precendence over any other return values that may apple.
+ static const aboveRoot = const _PathDirection("above root");
+
+ /// The path contains enough ".." components that it ends at its original
+ /// root.
+ static const atRoot = const _PathDirection("at root");
+
+ /// The path contains enough ".." components that at some point it reaches its
+ /// original root, but it ends beneath that root.
+ static const reachesRoot = const _PathDirection("reaches root");
+
+ /// The path never reaches to or above its original root.
+ static const belowRoot = const _PathDirection("below root");
+
+ final String name;
+
+ const _PathDirection(this.name);
+
+ String toString() => name;
+}
diff --git a/pubspec.yaml b/pubspec.yaml
index bc7ab98..69359bb 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: path
-version: 1.3.7
+version: 1.3.8
author: Dart Team <misc@dartlang.org>
description: >
A string-based path manipulation library. All of the path operations you know
diff --git a/test/posix_test.dart b/test/posix_test.dart
index d6d7a90..afd6b94 100644
--- a/test/posix_test.dart
+++ b/test/posix_test.dart
@@ -419,6 +419,19 @@
expect(context.isWithin('baz', '/root/path/bang/baz'), isFalse);
});
+ test('complex cases', () {
+ expect(context.isWithin('foo/./bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo//bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo/qux/../bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/../..'), isFalse);
+ expect(context.isWithin('foo/bar', 'foo/bar///'), isFalse);
+ expect(context.isWithin('foo/.bar', 'foo/.bar/baz'), isTrue);
+ expect(context.isWithin('foo/./bar', 'foo/.bar/baz'), isFalse);
+ expect(context.isWithin('foo/..bar', 'foo/..bar/baz'), isTrue);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/..'), isFalse);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/../qux'), isTrue);
+ });
+
test('from a relative root', () {
var r = new path.Context(style: path.Style.posix, current: 'foo/bar');
expect(r.isWithin('.', 'a/b/c'), isTrue);
diff --git a/test/url_test.dart b/test/url_test.dart
index 1188129..c81893a 100644
--- a/test/url_test.dart
+++ b/test/url_test.dart
@@ -629,6 +629,31 @@
isFalse);
});
+ test('complex cases', () {
+ expect(context.isWithin('foo/./bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo//bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo/qux/../bar', 'foo/bar/baz'), isTrue);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/../..'), isFalse);
+ expect(context.isWithin('foo/bar', 'foo/bar///'), isFalse);
+ expect(context.isWithin('foo/.bar', 'foo/.bar/baz'), isTrue);
+ expect(context.isWithin('foo/./bar', 'foo/.bar/baz'), isFalse);
+ expect(context.isWithin('foo/..bar', 'foo/..bar/baz'), isTrue);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/..'), isFalse);
+ expect(context.isWithin('foo/bar', 'foo/bar/baz/../qux'), isTrue);
+ expect(context.isWithin('http://example.org/', 'http://example.com/foo'),
+ isFalse);
+ expect(context.isWithin('http://example.org/', 'http://dartlang.org/foo'),
+ isFalse);
+ });
+
+ test('with root-relative paths', () {
+ expect(context.isWithin('/foo', 'http://dartlang.org/foo/bar'), isTrue);
+ expect(context.isWithin('http://dartlang.org/foo', '/foo/bar'), isTrue);
+ expect(context.isWithin('/root', 'foo/bar'), isTrue);
+ expect(context.isWithin('foo', '/root/path/foo/bar'), isTrue);
+ expect(context.isWithin('/foo', '/foo/bar'), isTrue);
+ });
+
test('from a relative root', () {
var r = new path.Context(style: path.Style.url, current: 'foo/bar');
expect(r.isWithin('.', 'a/b/c'), isTrue);
diff --git a/test/windows_test.dart b/test/windows_test.dart
index a32f1fe..717043d 100644
--- a/test/windows_test.dart
+++ b/test/windows_test.dart
@@ -537,6 +537,30 @@
expect(context.isWithin(r'baz', r'C:\root\path\bang\baz'), isFalse);
});
+ test('complex cases', () {
+ expect(context.isWithin(r'foo\.\bar', r'foo\bar\baz'), isTrue);
+ expect(context.isWithin(r'foo\\bar', r'foo\bar\baz'), isTrue);
+ expect(context.isWithin(r'foo\qux\..\bar', r'foo\bar\baz'), isTrue);
+ expect(context.isWithin(r'foo\bar', r'foo\bar\baz\..\..'), isFalse);
+ expect(context.isWithin(r'foo\bar', r'foo\bar\\\'), isFalse);
+ expect(context.isWithin(r'foo\.bar', r'foo\.bar\baz'), isTrue);
+ expect(context.isWithin(r'foo\.\bar', r'foo\.bar\baz'), isFalse);
+ expect(context.isWithin(r'foo\..bar', r'foo\..bar\baz'), isTrue);
+ expect(context.isWithin(r'foo\bar', r'foo\bar\baz\..'), isFalse);
+ expect(context.isWithin(r'foo\bar', r'foo\bar\baz\..\qux'), isTrue);
+ expect(context.isWithin(r'C:\', 'C:/foo'), isTrue);
+ expect(context.isWithin(r'C:\', r'D:\foo'), isFalse);
+ expect(context.isWithin(r'C:\', r'\\foo\bar'), isFalse);
+ });
+
+ test('with root-relative paths', () {
+ expect(context.isWithin(r'\foo', r'C:\foo\bar'), isTrue);
+ expect(context.isWithin(r'C:\foo', r'\foo\bar'), isTrue);
+ expect(context.isWithin(r'\root', r'foo\bar'), isTrue);
+ expect(context.isWithin(r'foo', r'\root\path\foo\bar'), isTrue);
+ expect(context.isWithin(r'\foo', r'\foo\bar'), isTrue);
+ });
+
test('from a relative root', () {
var r = new path.Context(style: path.Style.windows, current: r'foo\bar');
expect(r.isWithin('.', r'a\b\c'), isTrue);