blob: 5ccdcdb62531d7820a610c38366289c1d323d10c [file] [log] [blame]
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2015 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
// Tests captures in positive and negative look-behind in regular expressions.
void testRE(RegExp re, String input, bool expectedResult) {
if (expectedResult) {
assertTrue(re.hasMatch(input));
} else {
assertFalse(re.hasMatch(input));
}
}
void execRE(RegExp re, String input, List<String> expectedResult) {
assertTrue(re.hasMatch(input));
shouldBe(re.firstMatch(input), expectedResult);
}
void multiRE(RegExp re, String input, List<List<String>> expectedResult) {
assertTrue(re.hasMatch(input));
final matches = re.allMatches(input);
assertEquals(matches.length, expectedResult.length);
for (var i = 0; i < matches.length; i++) {
shouldBe(matches.elementAt(i), expectedResult[i]);
}
}
// Simple fixed-length matches.
var re = new RegExp(r"^.(?<=a)");
execRE(re, "a", ["a"]);
testRE(re, "b", false);
re = new RegExp(r"^f..(?<=.oo)");
execRE(re, "foo1", ["foo"]);
re = new RegExp(r"^f\w\w(?<=\woo)");
execRE(re, "foo2", ["foo"]);
testRE(re, "boo", false);
testRE(re, "fao", false);
testRE(re, "foa", false);
re = new RegExp(r"(?<=abc)\w\w\w");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=a.c)\w\w\w");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=a\wc)\w\w\w");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=a[a-z])\w\w\w");
execRE(re, "abcdef", ["cde"]);
re = new RegExp(r"(?<=a[a-z][a-z])\w\w\w");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=a[a-z]{2})\w\w\w");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=a{1})\w\w\w");
execRE(re, "abcdef", ["bcd"]);
re = new RegExp(r"(?<=a{1}b{1})\w\w\w");
execRE(re, "abcdef", ["cde"]);
re = new RegExp(r"(?<=a{1}[a-z]{2})\w\w\w");
execRE(re, "abcdef", ["def"]);
// Variable-length matches.
re = new RegExp(r"(?<=[a|b|c]*)[^a|b|c]{3}");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=\w*)[^a|b|c]{3}");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=b|c)\w");
multiRE(re, "abcdef", [
["c"],
["d"]
]);
re = new RegExp(r"(?<=[b-e])\w{2}");
multiRE(re, "abcdef", [
["cd"],
["ef"]
]);
// Start of line matches.
re = new RegExp(r"(?<=^abc)def");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=^[a-c]{3})def");
execRE(re, "abcdef", ["def"]);
re = new RegExp(r"(?<=^[a-c]{3})def", multiLine: true);
execRE(re, "xyz\nabcdef", ["def"]);
re = new RegExp(r"(?<=^)\w+", multiLine: true);
multiRE(re, "ab\ncd\nefg", [
["ab"],
["cd"],
["efg"]
]);
re = new RegExp(r"\w+(?<=$)", multiLine: true);
multiRE(re, "ab\ncd\nefg", [
["ab"],
["cd"],
["efg"]
]);
re = new RegExp(r"(?<=^)\w+(?<=$)", multiLine: true);
multiRE(re, "ab\ncd\nefg", [
["ab"],
["cd"],
["efg"]
]);
re = new RegExp(r"(?<=^[^a-c]{3})def");
testRE(re, "abcdef", false);
re = new RegExp(r"^foooo(?<=^o+)$");
testRE(re, "foooo", false);
re = new RegExp(r"^foooo(?<=^o*)$");
testRE(re, "foooo", false);
re = new RegExp(r"^foo(?<=^fo+)$");
execRE(re, "foo", ["foo"]);
re = new RegExp(r"^foooo(?<=^fo*)");
execRE(re, "foooo", ["foooo"]);
re = new RegExp(r"^(f)oo(?<=^\1o+)$");
testRE(re, "foo", true);
execRE(re, "foo", ["foo", "f"]);
re = new RegExp(r"^(f)oo(?<=^\1o+)$", caseSensitive: false);
execRE(re, "foo", ["foo", "f"]);
re = new RegExp(r"^(f)oo(?<=^\1o+).$", caseSensitive: false);
execRE(re, "foo\u1234", ["foo\u1234", "f"]);
re = new RegExp(r"(?<=^\w+)def");
execRE(re, "abcdefdef", ["def"]);
multiRE(re, "abcdefdef", [
["def"],
["def"]
]);
// Word boundary matches.
re = new RegExp(r"(?<=\b)[d-f]{3}");
execRE(re, "abc def", ["def"]);
re = new RegExp(r"(?<=\B)\w{3}");
execRE(re, "ab cdef", ["def"]);
re = new RegExp(r"(?<=\B)(?<=c(?<=\w))\w{3}");
execRE(re, "ab cdef", ["def"]);
re = new RegExp(r"(?<=\b)[d-f]{3}");
testRE(re, "abcdef", false);
// Negative lookbehind.
re = new RegExp(r"(?<!abc)\w\w\w");
execRE(re, "abcdef", ["abc"]);
re = new RegExp(r"(?<!a.c)\w\w\w");
execRE(re, "abcdef", ["abc"]);
re = new RegExp(r"(?<!a\wc)\w\w\w");
execRE(re, "abcdef", ["abc"]);
re = new RegExp(r"(?<!a[a-z])\w\w\w");
execRE(re, "abcdef", ["abc"]);
re = new RegExp(r"(?<!a[a-z]{2})\w\w\w");
execRE(re, "abcdef", ["abc"]);
re = new RegExp(r"(?<!abc)def");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a.c)def");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a\wc)def");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a[a-z][a-z])def");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a[a-z]{2})def");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a{1}b{1})cde");
testRE(re, "abcdef", false);
re = new RegExp(r"(?<!a{1}[a-z]{2})def");
testRE(re, "abcdef", false);
// Capturing matches.
re = new RegExp(r"(?<=(c))def");
execRE(re, "abcdef", ["def", "c"]);
re = new RegExp(r"(?<=(\w{2}))def");
execRE(re, "abcdef", ["def", "bc"]);
re = new RegExp(r"(?<=(\w(\w)))def");
execRE(re, "abcdef", ["def", "bc", "c"]);
re = new RegExp(r"(?<=(\w){3})def");
execRE(re, "abcdef", ["def", "a"]);
re = new RegExp(r"(?<=(bc)|(cd)).");
execRE(re, "abcdef", ["d", "bc", null]);
re = new RegExp(r"(?<=([ab]{1,2})\D|(abc))\w");
execRE(re, "abcdef", ["c", "a", null]);
re = new RegExp(r"\D(?<=([ab]+))(\w)");
execRE(re, "abcdef", ["ab", "a", "b"]);
// Captures inside negative lookbehind. (They never capture.)
re = new RegExp(r"(?<!(^|[ab]))\w{2}");
execRE(re, "abcdef", ["de", null]);
// Nested lookaround.
re = new RegExp(r"(?<=ab(?=c)\wd)\w\w");
execRE(re, "abcdef", ["ef"]);
re = new RegExp(r"(?<=a(?=([^a]{2})d)\w{3})\w\w");
execRE(re, "abcdef", ["ef", "bc"]);
re = new RegExp(r"(?<=a(?=([bc]{2}(?<!a{2}))d)\w{3})\w\w");
execRE(re, "abcdef", ["ef", "bc"]);
re = new RegExp(r"(?<=a(?=([bc]{2}(?<!a*))d)\w{3})\w\w/");
testRE(re, "abcdef", false);
re = new RegExp(r"^faaao?(?<=^f[oa]+(?=o))");
execRE(re, "faaao", ["faaa"]);
// Back references.
re = new RegExp(r"(.)(?<=(\1\1))");
execRE(re, "abb", ["b", "b", "bb"]);
re = new RegExp(r"(.)(?<=(\1\1))", caseSensitive: false);
execRE(re, "abB", ["B", "B", "bB"]);
re = new RegExp(r"((\w)\w)(?<=\1\2\1)", caseSensitive: false);
execRE(re, "aabAaBa", ["aB", "aB", "a"]);
re = new RegExp(r"(\w(\w))(?<=\1\2\1)", caseSensitive: false);
execRE(re, "aabAaBa", ["Ba", "Ba", "a"]);
re = new RegExp(r"(?=(\w))(?<=(\1)).", caseSensitive: false);
execRE(re, "abaBbAa", ["b", "b", "B"]);
re = new RegExp(r"(?<=(.))(\w+)(?=\1)");
execRE(re, " 'foo' ", ["foo", "'", "foo"]);
execRE(re, " \"foo\" ", ["foo", "\"", "foo"]);
testRE(re, " .foo\" ", false);
re = new RegExp(r"(.)(?<=\1\1\1)");
testRE(re, "ab", false);
testRE(re, "abb", false);
execRE(re, "abbb", ["b", "b"]);
re = new RegExp(r"(..)(?<=\1\1\1)");
testRE(re, "ab", false);
testRE(re, "abb", false);
testRE(re, "aabb", false);
testRE(re, "abab", false);
testRE(re, "fabxbab", false);
testRE(re, "faxabab", false);
execRE(re, "fababab", ["ab", "ab"]);
// Back references to captures inside the lookbehind.
re = new RegExp(r"(?<=\1(\w))d", caseSensitive: false);
execRE(re, "abcCd", ["d", "C"]);
re = new RegExp(r"(?<=\1([abx]))d");
execRE(re, "abxxd", ["d", "x"]);
re = new RegExp(r"(?<=\1(\w+))c");
execRE(re, "ababc", ["c", "ab"]);
execRE(re, "ababbc", ["c", "b"]);
testRE(re, "ababdc", false);
re = new RegExp(r"(?<=(\w+)\1)c");
execRE(re, "ababc", ["c", "abab"]);
// Alternations are tried left to right,
// and we do not backtrack into a lookbehind.
re = new RegExp(r".*(?<=(..|...|....))(.*)");
execRE(re, "xabcd", ["xabcd", "cd", ""]);
re = new RegExp(r".*(?<=(xx|...|....))(.*)");
execRE(re, "xabcd", ["xabcd", "bcd", ""]);
re = new RegExp(r".*(?<=(xx|...))(.*)");
execRE(re, "xxabcd", ["xxabcd", "bcd", ""]);
re = new RegExp(r".*(?<=(xx|xxx))(.*)");
execRE(re, "xxabcd", ["xxabcd", "xx", "abcd"]);
// We do not backtrack into a lookbehind.
// The lookbehind captures "abc" so that \1 does not match. We do not backtrack
// to capture only "bc" in the lookbehind.
re = new RegExp(r"(?<=([abc]+)).\1");
testRE(re, "abcdbc", false);
// Greedy loop.
re = new RegExp(r"(?<=(b+))c");
execRE(re, "abbbbbbc", ["c", "bbbbbb"]);
re = new RegExp(r"(?<=(b\d+))c");
execRE(re, "ab1234c", ["c", "b1234"]);
re = new RegExp(r"(?<=((?:b\d{2})+))c");
execRE(re, "ab12b23b34c", ["c", "b12b23b34"]);
// Sticky
re = new RegExp(r"(?<=^(\w+))def");
multiRE(re, "abcdefdef", [
["def", "abc"],
["def", "abcdef"]
]);
re = new RegExp(r"\Bdef");
multiRE(re, "abcdefdef", [
["def"],
["def"]
]);
// Misc
re = new RegExp(r"(?<=$abc)def");
testRE(re, "abcdef", false);
re = new RegExp(r"^foo(?<=foo)$");
execRE(re, "foo", ["foo"]);
re = new RegExp(r"^f.o(?<=foo)$");
execRE(re, "foo", ["foo"]);
re = new RegExp(r"^f.o(?<=foo)$");
testRE(re, "fno", false);
re = new RegExp(r"^foo(?<!foo)$");
testRE(re, "foo", false);
re = new RegExp(r"^f.o(?<!foo)$");
testRE(re, "foo", false);
execRE(re, "fno", ["fno"]);
re = new RegExp(r"^foooo(?<=fo+)$");
execRE(re, "foooo", ["foooo"]);
re = new RegExp(r"^foooo(?<=fo*)$");
execRE(re, "foooo", ["foooo"]);
re = new RegExp(r"(abc\1)");
execRE(re, "abc", ["abc", "abc"]);
execRE(re, "abc\u1234", ["abc", "abc"]);
re = new RegExp(r"(abc\1)", caseSensitive: false);
execRE(re, "abc", ["abc", "abc"]);
execRE(re, "abc\u1234", ["abc", "abc"]);
final oob_subject = "abcdefghijklmnabcdefghijklmn".substring(14);
re = new RegExp(r"(?=(abcdefghijklmn))(?<=\1)a", caseSensitive: false);
testRE(re, oob_subject, false);
re = new RegExp(r"(?=(abcdefghijklmn))(?<=\1)a");
testRE(re, oob_subject, false);
re = new RegExp(r"(?=(abcdefg))(?<=\1)");
testRE(re, "abcdefgabcdefg".substring(1), false);
// Mutual recursive capture/back references
re = new RegExp(r"(?<=a(.\2)b(\1)).{4}");
execRE(re, "aabcacbc", ["cacb", "a", ""]);
re = new RegExp(r"(?<=a(\2)b(..\1))b");
execRE(re, "aacbacb", ["b", "ac", "ac"]);
re = new RegExp(r"(?<=(?:\1b)(aa)).");
execRE(re, "aabaax", ["x", "aa"]);
re = new RegExp(r"(?<=(?:\1|b)(aa)).");
execRE(re, "aaaax", ["x", "aa"]);
// Restricted syntax in Annex B 1.4.
// The check for quantifiers on lookbehinds was added later than the
// original feature in v8, so we may need to approve failures here
// separately from the rest of the file.
assertThrows(() => new RegExp(r"(?<=.)*")); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.)?")); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.)+")); //# 01: ok
// No unicode flag (yet), so can't test these.
// See https://github.com/dart-lang/sdk/issues/36170.
// assertThrows("/(?<=.)*/u", SyntaxError);
// assertThrows("/(?<=.){1,2}/u", SyntaxError);
}