Revert "[vm] Finish adding support for ECMAScript 2018 features."
This reverts commit 5ebb640a67908e7694f1dacf3bc8d138ebb530eb.
Reason for revert: <INSERT REASONING HERE>
Original change's description:
> [vm] Finish adding support for ECMAScript 2018 features.
>
> This work pulls in v8 support for these features with
> appropriate changes for Dart and closes
> https://github.com/dart-lang/sdk/issues/34935.
>
> This adds support for the following features:
>
> * Interpreting patterns as Unicode patterns instead of
> BMP patterns
> * the dotAll flag (`/s`) for changing the behavior
> of '.' to also match line terminators
> * Escapes for character classes described by Unicode
> property groups (e.g., \p{Greek} to match all Greek
> characters, or \P{Greek} for all non-Greek characters).
>
> The following TC39 proposals describe some of the added features:
>
> * https://github.com/tc39/proposal-regexp-dotall-flag
> * https://github.com/tc39/proposal-regexp-unicode-property-escapes
>
> These additional changes are included:
>
> * Extends named capture group names to include the full
> range of identifier characters supported by ECMAScript,
> not just ASCII.
> * Changing the RegExp interface to return RegExpMatch
> objects, not Match objects, so that downcasting is
> not necessary to use named capture groups from Dart
>
> **Note**: The changes to the RegExp interface are a
> breaking change for implementers of the RegExp interface.
> Current users of the RegExp interface (i.e., code using Dart
> RegExp objects) will not be affected.
>
> Change-Id: I0709ed0a8d5db36680e32bbad585594857b9ace4
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/95651
> Commit-Queue: Stevie Strickland <sstrickl@google.com>
> Reviewed-by: Johnni Winther <johnniwinther@google.com>
> Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
> Reviewed-by: Martin Kustermann <kustermann@google.com>
TBR=lrn@google.com,kustermann@google.com,jmesserly@google.com,johnniwinther@google.com,sstrickl@google.com
# Not skipping CQ checks because original CL landed > 1 day ago.
Change-Id: I1eda0fee4fd9e94df095944049833a67b07277e2
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100560
Reviewed-by: Keerti Parthasarathy <keertip@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Keerti Parthasarathy <keertip@google.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d28400..21265c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,32 +1,3 @@
-## 2.3.0-dev.XX.0
-(Add new changes here, and they will be copied to the change section for the
- next dev version)
-
-### Core library changes
-
-#### `dart:core`
-
-* **Breaking change**: The `RegExp` interface has been extended with two new
- constructor named parameters:
-
- * `unicode:` (`bool`, default: `false`), for Unicode patterns , and
- * `dotAll:` (`bool`, default: `false`), to change the matching behavior of
- '.' to also match line terminating characters.
-
- Appropriate properties for these named parameters have also been added so
- their use can be detected after construction.
-
- In addition, `RegExp` methods that originally returned `Match` objects
- now return a more specific subtype, `RegExpMatch`, which adds two features:
-
- * `Iterable<String> groupNames`, a property that contains the names of all
- named capture groups, and
- * `String namedGroup(String name)`: a method that retrieves the match for
- the given named capture group
-
- This change only affects implementers of the `RegExp` interface; current
- code using Dart regular expressions will not be affected.
-
## 2.3.0
The focus in this release is on the new "UI-as-code" language features which
diff --git a/DEPS b/DEPS
index 6e37b9b..0ad497f 100644
--- a/DEPS
+++ b/DEPS
@@ -91,7 +91,6 @@
"http_retry_tag": "0.1.1",
"http_tag" : "0.12.0+2",
"http_throttle_tag" : "1.0.2",
- "icu_rev" : "c56c671998902fcc4fc9ace88c83daa99f980793",
"idl_parser_rev": "5fb1ebf49d235b5a70c9f49047e83b0654031eb7",
"intl_tag": "0.15.7",
"jinja2_rev": "2222b31554f03e62600cd7e383376a7c187967a1",
@@ -211,10 +210,6 @@
Var("chromium_git") + "/chromium/src/third_party/ply.git" +
"@" + Var("ply_rev"),
- Var("dart_root") + "/third_party/icu":
- Var("chromium_git") + "/chromium/deps/icu.git" +
- "@" + Var("icu_rev"),
-
Var("dart_root") + "/tools/idl_parser":
Var("chromium_git") + "/chromium/src/tools/idl_parser.git" +
"@" + Var("idl_parser_rev"),
diff --git a/build/config/android/config.gni b/build/config/android/config.gni
index c61d41a..38e3f8e 100644
--- a/build/config/android/config.gni
+++ b/build/config/android/config.gni
@@ -15,9 +15,6 @@
android_sdk_root = default_android_sdk_root
android_sdk_version = default_android_sdk_version
android_sdk_build_tools_version = default_android_sdk_build_tools_version
-
- # Unused by Dart. Required for GN files in the third_party package ICU.
- enable_java_templates = false
}
# Host stuff -----------------------------------------------------------------
diff --git a/build/config/android/rules.gni b/build/config/android/rules.gni
deleted file mode 100644
index f22ba84..0000000
--- a/build/config/android/rules.gni
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
-# for details. All rights reserved. Use of this source code is governed by a
-# BSD-style license that can be found in the LICENSE file.
-
-# A trivial rules file that allows for the Chromium third_party ICU to
-# be built successfully.
-
-import("//build/config/android/config.gni")
-
-assert(is_android)
diff --git a/build/config/host_byteorder.gni b/build/config/host_byteorder.gni
deleted file mode 100644
index 50ea94b..0000000
--- a/build/config/host_byteorder.gni
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2017 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Originally from v8, included in our repository as the ICU third party
-# import depends on it for building.
-
-# This header file defines the "host_byteorder" variable.
-# Not that this is currently used only for building v8.
-# The chromium code generally assumes little-endianness.
-declare_args() {
- host_byteorder = "undefined"
-}
-
-# Detect host byteorder
-# ppc64 can be either BE or LE
-if (host_cpu == "ppc64") {
- if (current_os == "aix") {
- host_byteorder = "big"
- } else {
- # Only use the script when absolutely necessary
- host_byteorder =
- exec_script("//build/config/get_host_byteorder.py", [], "trim string")
- }
-} else if (host_cpu == "ppc" || host_cpu == "s390" || host_cpu == "s390x" ||
- host_cpu == "mips" || host_cpu == "mips64") {
- host_byteorder = "big"
-} else {
- host_byteorder = "little"
-}
diff --git a/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart b/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart
index da07a3c..192b5f6 100644
--- a/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart
+++ b/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart
@@ -547,15 +547,9 @@
class RegExp {
@patch
factory RegExp(String source,
- {bool multiLine = false,
- bool caseSensitive = true,
- bool unicode = false,
- bool dotAll = false}) =>
+ {bool multiLine = false, bool caseSensitive = true}) =>
JSSyntaxRegExp(source,
- multiLine: multiLine,
- caseSensitive: caseSensitive,
- unicode: unicode,
- dotAll: dotAll);
+ multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);
diff --git a/pkg/dev_compiler/tool/input_sdk/private/regexp_helper.dart b/pkg/dev_compiler/tool/input_sdk/private/regexp_helper.dart
index 744effc..9206685 100644
--- a/pkg/dev_compiler/tool/input_sdk/private/regexp_helper.dart
+++ b/pkg/dev_compiler/tool/input_sdk/private/regexp_helper.dart
@@ -47,22 +47,18 @@
var _nativeGlobalRegExp;
var _nativeAnchoredRegExp;
- String toString() =>
- 'RegExp/$pattern/' + JS('String', '#.flags', _nativeRegExp);
+ String toString() => "RegExp/$pattern/";
JSSyntaxRegExp(String source,
- {bool multiLine = false,
- bool caseSensitive = true,
- bool unicode = false,
- bool dotAll = false})
+ {bool multiLine = false, bool caseSensitive = true})
: this.pattern = source,
- this._nativeRegExp = makeNative(
- source, multiLine, caseSensitive, unicode, dotAll, false);
+ this._nativeRegExp =
+ makeNative(source, multiLine, caseSensitive, false);
get _nativeGlobalVersion {
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp;
- return _nativeGlobalRegExp = makeNative(
- pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true);
+ return _nativeGlobalRegExp =
+ makeNative(pattern, _isMultiLine, _isCaseSensitive, true);
}
get _nativeAnchoredVersion {
@@ -72,21 +68,17 @@
// that it tries, and you can see if the original regexp matched, or it
// was the added zero-width match that matched, by looking at the last
// capture. If it is a String, the match participated, otherwise it didn't.
- return _nativeAnchoredRegExp = makeNative("$pattern|()", _isMultiLine,
- _isCaseSensitive, _isUnicode, _isDotAll, true);
+ return _nativeAnchoredRegExp =
+ makeNative("$pattern|()", _isMultiLine, _isCaseSensitive, true);
}
bool get _isMultiLine => JS("bool", "#.multiline", _nativeRegExp);
bool get _isCaseSensitive => JS("bool", "!#.ignoreCase", _nativeRegExp);
- bool get _isUnicode => JS("bool", "#.unicode", _nativeRegExp);
- bool get _isDotAll => JS("bool", "#.dotAll", _nativeRegExp);
static makeNative(@nullCheck String source, bool multiLine,
- bool caseSensitive, bool unicode, bool dotAll, bool global) {
+ bool caseSensitive, bool global) {
String m = multiLine ? 'm' : '';
String i = caseSensitive ? '' : 'i';
- String u = unicode ? 'u' : '';
- String s = dotAll ? 's' : '';
String g = global ? 'g' : '';
// We're using the JavaScript's try catch instead of the Dart one
// to avoid dragging in Dart runtime support just because of using
@@ -95,7 +87,7 @@
'',
'(function() {'
'try {'
- 'return new RegExp(#, # + # + # + # + #);'
+ 'return new RegExp(#, # + # + #);'
'} catch (e) {'
'return e;'
'}'
@@ -103,8 +95,6 @@
source,
m,
i,
- u,
- s,
g);
if (JS('bool', '# instanceof RegExp', regexp)) return regexp;
// The returned value is the JavaScript exception. Turn it into a
@@ -113,7 +103,7 @@
throw FormatException("Illegal RegExp pattern: $source, $errorMessage");
}
- RegExpMatch firstMatch(@nullCheck String string) {
+ Match firstMatch(@nullCheck String string) {
List m = JS('JSExtendableArray|Null', r'#.exec(#)', _nativeRegExp, string);
if (m == null) return null;
return _MatchImplementation(this, JSArray<String>.of(m));
@@ -130,7 +120,7 @@
return null;
}
- Iterable<RegExpMatch> allMatches(@nullCheck String string,
+ Iterable<Match> allMatches(@nullCheck String string,
[@nullCheck int start = 0]) {
if (start < 0 || start > string.length) {
throw RangeError.range(start, 0, string.length);
@@ -138,7 +128,7 @@
return _AllMatchesIterable(this, string, start);
}
- RegExpMatch _execGlobal(String string, int start) {
+ Match _execGlobal(String string, int start) {
Object regexp = _nativeGlobalVersion;
JS("void", "#.lastIndex = #", regexp, start);
List match = JS("JSExtendableArray|Null", "#.exec(#)", regexp, string);
@@ -146,7 +136,7 @@
return _MatchImplementation(this, JSArray<String>.of(match));
}
- RegExpMatch _execAnchored(String string, int start) {
+ Match _execAnchored(String string, int start) {
Object regexp = _nativeAnchoredVersion;
JS("void", "#.lastIndex = #", regexp, start);
List match = JS("JSExtendableArray|Null", "#.exec(#)", regexp, string);
@@ -158,7 +148,7 @@
return _MatchImplementation(this, JSArray<String>.of(match));
}
- RegExpMatch matchAsPrefix(String string, [int start = 0]) {
+ Match matchAsPrefix(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw RangeError.range(start, 0, string.length);
}
@@ -167,8 +157,6 @@
bool get isMultiLine => _isMultiLine;
bool get isCaseSensitive => _isCaseSensitive;
- bool get isUnicode => _isUnicode;
- bool get isDotAll => _isDotAll;
}
class _MatchImplementation implements RegExpMatch {
@@ -219,34 +207,25 @@
}
}
-class _AllMatchesIterable extends IterableBase<RegExpMatch> {
+class _AllMatchesIterable extends IterableBase<Match> {
final JSSyntaxRegExp _re;
final String _string;
final int _start;
_AllMatchesIterable(this._re, this._string, this._start);
- Iterator<RegExpMatch> get iterator =>
- _AllMatchesIterator(_re, _string, _start);
+ Iterator<Match> get iterator => _AllMatchesIterator(_re, _string, _start);
}
-class _AllMatchesIterator implements Iterator<RegExpMatch> {
+class _AllMatchesIterator implements Iterator<Match> {
final JSSyntaxRegExp _regExp;
String _string;
int _nextIndex;
- RegExpMatch _current;
+ Match _current;
_AllMatchesIterator(this._regExp, this._string, this._nextIndex);
- RegExpMatch get current => _current;
-
- static bool _isLeadSurrogate(int c) {
- return c >= 0xd800 && c <= 0xdbff;
- }
-
- static bool _isTrailSurrogate(int c) {
- return c >= 0xdc00 && c <= 0xdfff;
- }
+ Match get current => _current;
bool moveNext() {
if (_string == null) return false;
@@ -256,15 +235,6 @@
_current = match;
int nextIndex = match.end;
if (match.start == nextIndex) {
- // Zero-width match. Advance by one more, unless the regexp
- // is in unicode mode and it would put us within a surrogate
- // pair. In that case, advance past the code point as a whole.
- if (_regExp.isUnicode &&
- _nextIndex + 1 < _string.length &&
- _isLeadSurrogate(_string.codeUnitAt(_nextIndex)) &&
- _isTrailSurrogate(_string.codeUnitAt(_nextIndex + 1))) {
- nextIndex++;
- }
nextIndex++;
}
_nextIndex = nextIndex;
@@ -278,6 +248,6 @@
}
/** Find the first match of [regExp] in [string] at or after [start]. */
-RegExpMatch firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
+Match firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
return regExp._execGlobal(string, start);
}
diff --git a/runtime/lib/regexp.cc b/runtime/lib/regexp.cc
index eb4237c..4dba76e 100644
--- a/runtime/lib/regexp.cc
+++ b/runtime/lib/regexp.cc
@@ -14,7 +14,7 @@
namespace dart {
-DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 6) {
+DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 4) {
ASSERT(
TypeArguments::CheckedHandle(zone, arguments->NativeArgAt(0)).IsNull());
GET_NON_NULL_NATIVE_ARGUMENT(String, pattern, arguments->NativeArgAt(1));
@@ -22,30 +22,17 @@
arguments->NativeArgAt(2));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_case_sensitive,
arguments->NativeArgAt(3));
- GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_unicode,
- arguments->NativeArgAt(4));
- GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_dot_all,
- arguments->NativeArgAt(5));
bool ignore_case = handle_case_sensitive.raw() != Bool::True().raw();
bool multi_line = handle_multi_line.raw() == Bool::True().raw();
- bool unicode = handle_unicode.raw() == Bool::True().raw();
- bool dot_all = handle_dot_all.raw() == Bool::True().raw();
-
- RegExpFlags flags;
-
- if (ignore_case) flags.SetIgnoreCase();
- if (multi_line) flags.SetMultiLine();
- if (unicode) flags.SetUnicode();
- if (dot_all) flags.SetDotAll();
// Parse the pattern once in order to throw any format exceptions within
// the factory constructor. It is parsed again upon compilation.
RegExpCompileData compileData;
// Throws an exception on parsing failure.
- RegExpParser::ParseRegExp(pattern, flags, &compileData);
+ RegExpParser::ParseRegExp(pattern, multi_line, &compileData);
// Create a RegExp object containing only the initial parameters.
- return RegExpEngine::CreateRegExp(thread, pattern, flags);
+ return RegExpEngine::CreateRegExp(thread, pattern, multi_line, ignore_case);
}
DEFINE_NATIVE_ENTRY(RegExp_getPattern, 0, 1) {
@@ -57,25 +44,13 @@
DEFINE_NATIVE_ENTRY(RegExp_getIsMultiLine, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
- return Bool::Get(regexp.flags().IsMultiLine()).raw();
-}
-
-DEFINE_NATIVE_ENTRY(RegExp_getIsUnicode, 0, 1) {
- const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
- ASSERT(!regexp.IsNull());
- return Bool::Get(regexp.flags().IsUnicode()).raw();
-}
-
-DEFINE_NATIVE_ENTRY(RegExp_getIsDotAll, 0, 1) {
- const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
- ASSERT(!regexp.IsNull());
- return Bool::Get(regexp.flags().IsDotAll()).raw();
+ return Bool::Get(regexp.is_multi_line()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getIsCaseSensitive, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
- return Bool::Get(!regexp.flags().IgnoreCase()).raw();
+ return Bool::Get(!regexp.is_ignore_case()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getGroupCount, 0, 1) {
diff --git a/runtime/lib/regexp_patch.dart b/runtime/lib/regexp_patch.dart
index 35a13b1..4eac446 100644
--- a/runtime/lib/regexp_patch.dart
+++ b/runtime/lib/regexp_patch.dart
@@ -8,12 +8,8 @@
class RegExp {
@patch
factory RegExp(String source,
- {bool multiLine: false,
- bool caseSensitive: true,
- bool unicode: false,
- bool dotAll: false}) {
- _RegExpHashKey key =
- new _RegExpHashKey(source, multiLine, caseSensitive, unicode, dotAll);
+ {bool multiLine: false, bool caseSensitive: true}) {
+ _RegExpHashKey key = new _RegExpHashKey(source, multiLine, caseSensitive);
_RegExpHashValue value = _cache[key];
if (value == null) {
@@ -25,10 +21,7 @@
value = new _RegExpHashValue(
new _RegExp(source,
- multiLine: multiLine,
- caseSensitive: caseSensitive,
- unicode: unicode,
- dotAll: dotAll),
+ multiLine: multiLine, caseSensitive: caseSensitive),
key);
_cache[key] = value;
} else {
@@ -121,20 +114,15 @@
final String pattern;
final bool multiLine;
final bool caseSensitive;
- final bool unicode;
- final bool dotAll;
- _RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive, this.unicode,
- this.dotAll);
+ _RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive);
int get hashCode => pattern.hashCode;
bool operator ==(that) {
return (that is _RegExpHashKey) &&
(this.pattern == that.pattern) &&
(this.multiLine == that.multiLine) &&
- (this.caseSensitive == that.caseSensitive) &&
- (this.unicode == that.unicode) &&
- (this.dotAll == that.dotAll);
+ (this.caseSensitive == that.caseSensitive);
}
}
@@ -212,11 +200,9 @@
class _RegExp implements RegExp {
factory _RegExp(String pattern,
{bool multiLine: false,
- bool caseSensitive: true,
- bool unicode: false,
- bool dotAll: false}) native "RegExp_factory";
+ bool caseSensitive: true}) native "RegExp_factory";
- RegExpMatch firstMatch(String str) {
+ Match firstMatch(String str) {
if (str is! String) throw new ArgumentError(str);
List match = _ExecuteMatch(str, 0);
if (match == null) {
@@ -225,7 +211,7 @@
return new _RegExpMatch(this, str, match);
}
- Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
+ Iterable<Match> allMatches(String string, [int start = 0]) {
if (string is! String) throw new ArgumentError(string);
if (start is! int) throw new ArgumentError(start);
if (0 > start || start > string.length) {
@@ -234,7 +220,7 @@
return new _AllMatchesIterable(this, string, start);
}
- RegExpMatch matchAsPrefix(String string, [int start = 0]) {
+ Match matchAsPrefix(String string, [int start = 0]) {
if (string is! String) throw new ArgumentError(string);
if (start is! int) throw new ArgumentError(start);
if (start < 0 || start > string.length) {
@@ -266,10 +252,6 @@
bool get isCaseSensitive native "RegExp_getIsCaseSensitive";
- bool get isUnicode native "RegExp_getIsUnicode";
-
- bool get isDotAll native "RegExp_getIsDotAll";
-
int get _groupCount native "RegExp_getGroupCount";
// Returns a List [String, int, String, int, ...] where each
@@ -345,34 +327,25 @@
native "RegExp_ExecuteMatchSticky";
}
-class _AllMatchesIterable extends IterableBase<RegExpMatch> {
+class _AllMatchesIterable extends IterableBase<Match> {
final _RegExp _re;
final String _str;
final int _start;
_AllMatchesIterable(this._re, this._str, this._start);
- Iterator<RegExpMatch> get iterator =>
- new _AllMatchesIterator(_re, _str, _start);
+ Iterator<Match> get iterator => new _AllMatchesIterator(_re, _str, _start);
}
-class _AllMatchesIterator implements Iterator<RegExpMatch> {
+class _AllMatchesIterator implements Iterator<Match> {
final String _str;
int _nextIndex;
_RegExp _re;
- RegExpMatch _current;
+ Match _current;
_AllMatchesIterator(this._re, this._str, this._nextIndex);
- RegExpMatch get current => _current;
-
- static bool _isLeadSurrogate(int c) {
- return c >= 0xd800 && c <= 0xdbff;
- }
-
- static bool _isTrailSurrogate(int c) {
- return c >= 0xdc00 && c <= 0xdfff;
- }
+ Match get current => _current;
bool moveNext() {
if (_re == null) return false; // Cleared after a failed match.
@@ -382,15 +355,7 @@
_current = new _RegExpMatch(_re, _str, match);
_nextIndex = _current.end;
if (_nextIndex == _current.start) {
- // Zero-width match. Advance by one more, unless the regexp
- // is in unicode mode and it would put us within a surrogate
- // pair. In that case, advance past the code point as a whole.
- if (_re.isUnicode &&
- _nextIndex + 1 < _str.length &&
- _isLeadSurrogate(_str.codeUnitAt(_nextIndex)) &&
- _isTrailSurrogate(_str.codeUnitAt(_nextIndex + 1))) {
- _nextIndex++;
- }
+ // Zero-width match. Advance by one more.
_nextIndex++;
}
return true;
diff --git a/runtime/platform/splay-tree-inl.h b/runtime/platform/splay-tree-inl.h
deleted file mode 100644
index 7322a56..0000000
--- a/runtime/platform/splay-tree-inl.h
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright (c) 2019, the Dart project authors.
-// Copyright 2010 the V8 project authors.
-// Please see the AUTHORS file for details. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// The original file can be found at:
-// https://github.com/v8/v8/blob/master/src/splay-tree-inl.h
-
-#ifndef RUNTIME_PLATFORM_SPLAY_TREE_INL_H_
-#define RUNTIME_PLATFORM_SPLAY_TREE_INL_H_
-
-#include <vector>
-
-#include "platform/splay-tree.h"
-
-namespace dart {
-
-template <typename Config, class B, class Allocator>
-SplayTree<Config, B, Allocator>::~SplayTree() {
- NodeDeleter deleter;
- ForEachNode(&deleter);
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::Insert(const Key& key, Locator* locator) {
- if (is_empty()) {
- // If the tree is empty, insert the new node.
- root_ = new (allocator_) Node(key, Config::NoValue());
- } else {
- // Splay on the key to move the last node on the search path
- // for the key to the root of the tree.
- Splay(key);
- // Ignore repeated insertions with the same key.
- int cmp = Config::Compare(key, root_->key_);
- if (cmp == 0) {
- locator->bind(root_);
- return false;
- }
- // Insert the new node.
- Node* node = new (allocator_) Node(key, Config::NoValue());
- InsertInternal(cmp, node);
- }
- locator->bind(root_);
- return true;
-}
-
-template <typename Config, class B, class Allocator>
-void SplayTree<Config, B, Allocator>::InsertInternal(int cmp, Node* node) {
- if (cmp > 0) {
- node->left_ = root_;
- node->right_ = root_->right_;
- root_->right_ = nullptr;
- } else {
- node->right_ = root_;
- node->left_ = root_->left_;
- root_->left_ = nullptr;
- }
- root_ = node;
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::FindInternal(const Key& key) {
- if (is_empty()) return false;
- Splay(key);
- return Config::Compare(key, root_->key_) == 0;
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::Contains(const Key& key) {
- return FindInternal(key);
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::Find(const Key& key, Locator* locator) {
- if (FindInternal(key)) {
- locator->bind(root_);
- return true;
- } else {
- return false;
- }
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::FindGreatestLessThan(const Key& key,
- Locator* locator) {
- if (is_empty()) return false;
- // Splay on the key to move the node with the given key or the last
- // node on the search path to the top of the tree.
- Splay(key);
- // Now the result is either the root node or the greatest node in
- // the left subtree.
- int cmp = Config::Compare(root_->key_, key);
- if (cmp <= 0) {
- locator->bind(root_);
- return true;
- } else {
- Node* temp = root_;
- root_ = root_->left_;
- bool result = FindGreatest(locator);
- root_ = temp;
- return result;
- }
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::FindLeastGreaterThan(const Key& key,
- Locator* locator) {
- if (is_empty()) return false;
- // Splay on the key to move the node with the given key or the last
- // node on the search path to the top of the tree.
- Splay(key);
- // Now the result is either the root node or the least node in
- // the right subtree.
- int cmp = Config::Compare(root_->key_, key);
- if (cmp >= 0) {
- locator->bind(root_);
- return true;
- } else {
- Node* temp = root_;
- root_ = root_->right_;
- bool result = FindLeast(locator);
- root_ = temp;
- return result;
- }
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::FindGreatest(Locator* locator) {
- if (is_empty()) return false;
- Node* current = root_;
- while (current->right_ != nullptr)
- current = current->right_;
- locator->bind(current);
- return true;
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::FindLeast(Locator* locator) {
- if (is_empty()) return false;
- Node* current = root_;
- while (current->left_ != nullptr)
- current = current->left_;
- locator->bind(current);
- return true;
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::Move(const Key& old_key,
- const Key& new_key) {
- if (!FindInternal(old_key)) return false;
- Node* node_to_move = root_;
- RemoveRootNode(old_key);
- Splay(new_key);
- int cmp = Config::Compare(new_key, root_->key_);
- if (cmp == 0) {
- // A node with the target key already exists.
- delete node_to_move;
- return false;
- }
- node_to_move->key_ = new_key;
- InsertInternal(cmp, node_to_move);
- return true;
-}
-
-template <typename Config, class B, class Allocator>
-bool SplayTree<Config, B, Allocator>::Remove(const Key& key) {
- if (!FindInternal(key)) return false;
- Node* node_to_remove = root_;
- RemoveRootNode(key);
- delete node_to_remove;
- return true;
-}
-
-template <typename Config, class B, class Allocator>
-void SplayTree<Config, B, Allocator>::RemoveRootNode(const Key& key) {
- if (root_->left_ == nullptr) {
- // No left child, so the new tree is just the right child.
- root_ = root_->right_;
- } else {
- // Left child exists.
- Node* right = root_->right_;
- // Make the original left child the new root.
- root_ = root_->left_;
- // Splay to make sure that the new root has an empty right child.
- Splay(key);
- // Insert the original right child as the right child of the new
- // root.
- root_->right_ = right;
- }
-}
-
-template <typename Config, class B, class Allocator>
-void SplayTree<Config, B, Allocator>::Splay(const Key& key) {
- if (is_empty()) return;
- Node dummy_node(Config::kNoKey, Config::NoValue());
- // Create a dummy node. The use of the dummy node is a bit
- // counter-intuitive: The right child of the dummy node will hold
- // the L tree of the algorithm. The left child of the dummy node
- // will hold the R tree of the algorithm. Using a dummy node, left
- // and right will always be nodes and we avoid special cases.
- Node* dummy = &dummy_node;
- Node* left = dummy;
- Node* right = dummy;
- Node* current = root_;
- while (true) {
- int cmp = Config::Compare(key, current->key_);
- if (cmp < 0) {
- if (current->left_ == nullptr) break;
- if (Config::Compare(key, current->left_->key_) < 0) {
- // Rotate right.
- Node* temp = current->left_;
- current->left_ = temp->right_;
- temp->right_ = current;
- current = temp;
- if (current->left_ == nullptr) break;
- }
- // Link right.
- right->left_ = current;
- right = current;
- current = current->left_;
- } else if (cmp > 0) {
- if (current->right_ == nullptr) break;
- if (Config::Compare(key, current->right_->key_) > 0) {
- // Rotate left.
- Node* temp = current->right_;
- current->right_ = temp->left_;
- temp->left_ = current;
- current = temp;
- if (current->right_ == nullptr) break;
- }
- // Link left.
- left->right_ = current;
- left = current;
- current = current->right_;
- } else {
- break;
- }
- }
- // Assemble.
- left->right_ = current->left_;
- right->left_ = current->right_;
- current->left_ = dummy->right_;
- current->right_ = dummy->left_;
- root_ = current;
-}
-
-template <typename Config, class B, class Allocator>
-template <class Callback>
-void SplayTree<Config, B, Allocator>::ForEach(Callback* callback) {
- NodeToPairAdaptor<Callback> callback_adaptor(callback);
- ForEachNode(&callback_adaptor);
-}
-
-template <typename Config, class B, class Allocator>
-template <class Callback>
-void SplayTree<Config, B, Allocator>::ForEachNode(Callback* callback) {
- if (root_ == nullptr) return;
- // Pre-allocate some space for tiny trees.
- std::vector<Node*> nodes_to_visit;
- nodes_to_visit.push_back(root_);
- size_t pos = 0;
- while (pos < nodes_to_visit.size()) {
- Node* node = nodes_to_visit[pos++];
- if (node->left() != nullptr) nodes_to_visit.push_back(node->left());
- if (node->right() != nullptr) nodes_to_visit.push_back(node->right());
- callback->Call(node);
- }
-}
-
-} // namespace dart
-
-#endif // RUNTIME_PLATFORM_SPLAY_TREE_INL_H_
diff --git a/runtime/platform/splay-tree.h b/runtime/platform/splay-tree.h
deleted file mode 100644
index a85488b..0000000
--- a/runtime/platform/splay-tree.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright (c) 2019, the Dart project authors.
-// Copyright 2010 the V8 project authors.
-// Please see the AUTHORS file for details. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// The original file can be found at:
-// https://github.com/v8/v8/blob/master/src/splay-tree.h
-
-#ifndef RUNTIME_PLATFORM_SPLAY_TREE_H_
-#define RUNTIME_PLATFORM_SPLAY_TREE_H_
-
-#include "platform/allocation.h"
-
-namespace dart {
-
-// A splay tree. The config type parameter encapsulates the different
-// configurations of a concrete splay tree:
-//
-// typedef Key: the key type
-// typedef Value: the value type
-// static const Key kNoKey: the dummy key used when no key is set
-// static Value kNoValue(): the dummy value used to initialize nodes
-// static int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
-//
-// The tree is also parameterized by an allocation policy
-// (Allocator). The policy is used for allocating lists in the C free
-// store or the zone; see zone.h.
-
-template <typename Config, class B, class Allocator>
-class SplayTree : public B {
- public:
- typedef typename Config::Key Key;
- typedef typename Config::Value Value;
-
- class Locator;
-
- explicit SplayTree(Allocator* allocator)
- : root_(nullptr), allocator_(allocator) {}
- ~SplayTree();
-
- Allocator* allocator() { return allocator_; }
-
- // Checks if there is a mapping for the key.
- bool Contains(const Key& key);
-
- // Inserts the given key in this tree with the given value. Returns
- // true if a node was inserted, otherwise false. If found the locator
- // is enabled and provides access to the mapping for the key.
- bool Insert(const Key& key, Locator* locator);
-
- // Looks up the key in this tree and returns true if it was found,
- // otherwise false. If the node is found the locator is enabled and
- // provides access to the mapping for the key.
- bool Find(const Key& key, Locator* locator);
-
- // Finds the mapping with the greatest key less than or equal to the
- // given key.
- bool FindGreatestLessThan(const Key& key, Locator* locator);
-
- // Find the mapping with the greatest key in this tree.
- bool FindGreatest(Locator* locator);
-
- // Finds the mapping with the least key greater than or equal to the
- // given key.
- bool FindLeastGreaterThan(const Key& key, Locator* locator);
-
- // Find the mapping with the least key in this tree.
- bool FindLeast(Locator* locator);
-
- // Move the node from one key to another.
- bool Move(const Key& old_key, const Key& new_key);
-
- // Remove the node with the given key from the tree.
- bool Remove(const Key& key);
-
- // Remove all keys from the tree.
- void Clear() { ResetRoot(); }
-
- bool is_empty() { return root_ == nullptr; }
-
- // Perform the splay operation for the given key. Moves the node with
- // the given key to the top of the tree. If no node has the given
- // key, the last node on the search path is moved to the top of the
- // tree.
- void Splay(const Key& key);
-
- class Node : public B {
- public:
- Node(const Key& key, const Value& value)
- : key_(key), value_(value), left_(nullptr), right_(nullptr) {}
-
- Key key() { return key_; }
- Value value() { return value_; }
- Node* left() { return left_; }
- Node* right() { return right_; }
-
- private:
- friend class SplayTree;
- friend class Locator;
- Key key_;
- Value value_;
- Node* left_;
- Node* right_;
- };
-
- // A locator provides access to a node in the tree without actually
- // exposing the node.
- class Locator : public B {
- public:
- explicit Locator(Node* node) : node_(node) {}
- Locator() : node_(nullptr) {}
- const Key& key() { return node_->key_; }
- Value& value() { return node_->value_; }
- void set_value(const Value& value) { node_->value_ = value; }
- inline void bind(Node* node) { node_ = node; }
-
- private:
- Node* node_;
- };
-
- template <class Callback>
- void ForEach(Callback* callback);
-
- protected:
- // Resets tree root. Existing nodes become unreachable.
- void ResetRoot() { root_ = nullptr; }
-
- private:
- // Search for a node with a given key. If found, root_ points
- // to the node.
- bool FindInternal(const Key& key);
-
- // Inserts a node assuming that root_ is already set up.
- void InsertInternal(int cmp, Node* node);
-
- // Removes root_ node.
- void RemoveRootNode(const Key& key);
-
- template <class Callback>
- class NodeToPairAdaptor : public B {
- public:
- explicit NodeToPairAdaptor(Callback* callback) : callback_(callback) {}
- void Call(Node* node) { callback_->Call(node->key(), node->value()); }
-
- private:
- Callback* callback_;
-
- DISALLOW_COPY_AND_ASSIGN(NodeToPairAdaptor);
- };
-
- class NodeDeleter : public B {
- public:
- NodeDeleter() = default;
- void Call(Node* node) { delete node; }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(NodeDeleter);
- };
-
- template <class Callback>
- void ForEachNode(Callback* callback);
-
- Node* root_;
- Allocator* allocator_;
-
- DISALLOW_COPY_AND_ASSIGN(SplayTree);
-};
-
-} // namespace dart
-
-#endif // RUNTIME_PLATFORM_SPLAY_TREE_H_
diff --git a/runtime/platform/unicode.h b/runtime/platform/unicode.h
index b784823..f3c0778 100644
--- a/runtime/platform/unicode.h
+++ b/runtime/platform/unicode.h
@@ -15,7 +15,6 @@
class Utf : AllStatic {
public:
static const int32_t kMaxCodePoint = 0x10FFFF;
- static const int32_t kInvalidChar = 0xFFFFFFFF;
static bool IsLatin1(int32_t code_point) {
return (code_point >= 0) && (code_point <= 0xFF);
@@ -30,7 +29,7 @@
}
// Returns true if the code point value is above Plane 17.
- static bool IsOutOfRange(int32_t code_point) {
+ static bool IsOutOfRange(intptr_t code_point) {
return (code_point < 0) || (code_point > kMaxCodePoint);
}
};
@@ -57,11 +56,11 @@
static intptr_t Length(const String& str);
static intptr_t Encode(int32_t ch, char* dst);
- static intptr_t Encode(const String& src, char* dst, intptr_t len);
static intptr_t Decode(const uint8_t* utf8_array,
intptr_t array_len,
int32_t* ch);
+ static intptr_t Encode(const String& src, char* dst, intptr_t len);
static bool DecodeToLatin1(const uint8_t* utf8_array,
intptr_t array_len,
@@ -153,10 +152,6 @@
static void Encode(int32_t codepoint, uint16_t* dst);
static const int32_t kMaxCodeUnit = 0xFFFF;
- static const int32_t kLeadSurrogateStart = 0xD800;
- static const int32_t kLeadSurrogateEnd = 0xDBFF;
- static const int32_t kTrailSurrogateStart = 0xDC00;
- static const int32_t kTrailSurrogateEnd = 0xDFFF;
private:
static const int32_t kLeadSurrogateOffset = (0xD800 - (0x10000 >> 10));
@@ -192,11 +187,11 @@
// The size of the stage 1 index.
// TODO(cshapiro): improve indexing so this value is unnecessary.
- static const intptr_t kStage1Size = 261;
+ static const int kStage1Size = 261;
// The size of a stage 2 block in bytes.
- static const intptr_t kBlockSizeLog2 = 8;
- static const intptr_t kBlockSize = 1 << kBlockSizeLog2;
+ static const int kBlockSizeLog2 = 8;
+ static const int kBlockSize = 1 << kBlockSizeLog2;
static int32_t Convert(int32_t ch, int32_t mapping) {
if (Utf::IsLatin1(ch)) {
@@ -227,25 +222,6 @@
static const int32_t stage2_exception_[][2];
};
-class Latin1 {
- public:
- static const int32_t kMaxChar = 0xff;
- // Convert the character to Latin-1 case equivalent if possible.
- static inline uint16_t TryConvertToLatin1(uint16_t c) {
- switch (c) {
- // This are equivalent characters in unicode.
- case 0x39c:
- case 0x3bc:
- return 0xb5;
- // This is an uppercase of a Latin-1 character
- // outside of Latin-1.
- case 0x178:
- return 0xff;
- }
- return c;
- }
-};
-
} // namespace dart
#endif // RUNTIME_PLATFORM_UNICODE_H_
diff --git a/runtime/platform/utils.h b/runtime/platform/utils.h
index 993ff26..9f72205 100644
--- a/runtime/platform/utils.h
+++ b/runtime/platform/utils.h
@@ -203,14 +203,7 @@
return (static_cast<int64_t>(high) << 32) | (low & 0x0ffffffffLL);
}
- static inline constexpr bool IsAlphaNumeric(uint32_t c) {
- return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
- IsDecimalDigit(c);
- }
-
- static inline constexpr bool IsDecimalDigit(uint32_t c) {
- return ('0' <= c) && (c <= '9');
- }
+ static bool IsDecimalDigit(char c) { return ('0' <= c) && (c <= '9'); }
static bool IsHexDigit(char c) {
return IsDecimalDigit(c) || (('A' <= c) && (c <= 'F')) ||
diff --git a/runtime/vm/BUILD.gn b/runtime/vm/BUILD.gn
index f2737c9..680ab84 100644
--- a/runtime/vm/BUILD.gn
+++ b/runtime/vm/BUILD.gn
@@ -60,9 +60,8 @@
library_for_all_configs("libdart_vm") {
target_type = "source_set"
- extra_deps = [ "//third_party/icu" ]
if (is_fuchsia) {
- extra_deps += [
+ extra_deps = [
# TODO(US-399): Remove time_service specific code when it is no longer
# necessary.
"//sdk/lib/sys/cpp",
diff --git a/runtime/vm/bootstrap_natives.h b/runtime/vm/bootstrap_natives.h
index 34cfbc4..da1dc36 100644
--- a/runtime/vm/bootstrap_natives.h
+++ b/runtime/vm/bootstrap_natives.h
@@ -98,12 +98,10 @@
V(Double_toStringAsExponential, 2) \
V(Double_toStringAsPrecision, 2) \
V(Double_flipSignBit, 1) \
- V(RegExp_factory, 6) \
+ V(RegExp_factory, 4) \
V(RegExp_getPattern, 1) \
V(RegExp_getIsMultiLine, 1) \
V(RegExp_getIsCaseSensitive, 1) \
- V(RegExp_getIsUnicode, 1) \
- V(RegExp_getIsDotAll, 1) \
V(RegExp_getGroupCount, 1) \
V(RegExp_getGroupNameMap, 1) \
V(RegExp_ExecuteMatch, 3) \
diff --git a/runtime/vm/clustered_snapshot.cc b/runtime/vm/clustered_snapshot.cc
index 033ffbc..4f66027 100644
--- a/runtime/vm/clustered_snapshot.cc
+++ b/runtime/vm/clustered_snapshot.cc
@@ -3630,8 +3630,7 @@
RawRegExp* regexp = objects_[i];
AutoTraceObject(regexp);
WriteFromTo(regexp);
- s->Write<int32_t>(regexp->ptr()->num_one_byte_registers_);
- s->Write<int32_t>(regexp->ptr()->num_two_byte_registers_);
+ s->Write<int32_t>(regexp->ptr()->num_registers_);
s->Write<int8_t>(regexp->ptr()->type_flags_);
}
}
@@ -3662,8 +3661,7 @@
Deserializer::InitializeHeader(regexp, kRegExpCid,
RegExp::InstanceSize());
ReadFromTo(regexp);
- regexp->ptr()->num_one_byte_registers_ = d->Read<int32_t>();
- regexp->ptr()->num_two_byte_registers_ = d->Read<int32_t>();
+ regexp->ptr()->num_registers_ = d->Read<int32_t>();
regexp->ptr()->type_flags_ = d->Read<int8_t>();
}
}
diff --git a/runtime/vm/compiler/backend/constant_propagator.cc b/runtime/vm/compiler/backend/constant_propagator.cc
index acf4f9c..d27b6e2 100644
--- a/runtime/vm/compiler/backend/constant_propagator.cc
+++ b/runtime/vm/compiler/backend/constant_propagator.cc
@@ -1272,8 +1272,8 @@
}
}
-void ConstantPropagator::VisitCaseInsensitiveCompare(
- CaseInsensitiveCompareInstr* instr) {
+void ConstantPropagator::VisitCaseInsensitiveCompareUC16(
+ CaseInsensitiveCompareUC16Instr* instr) {
SetValue(instr, non_constant_);
}
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index dda6e01..4ee0e27 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -5161,6 +5161,10 @@
return "";
}
+const RuntimeEntry& CaseInsensitiveCompareUC16Instr::TargetFunction() const {
+ return kCaseInsensitiveCompareUC16RuntimeEntry;
+}
+
TruncDivModInstr::TruncDivModInstr(Value* lhs, Value* rhs, intptr_t deopt_id)
: TemplateDefinition(deopt_id) {
SetInputAt(0, lhs);
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index 4c22692..c5997de 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -18,7 +18,6 @@
#include "vm/native_entry.h"
#include "vm/object.h"
#include "vm/parser.h"
-#include "vm/runtime_entry.h"
#include "vm/static_type_exactness_state.h"
#include "vm/token_position.h"
@@ -423,7 +422,7 @@
M(Unbox, kNoGC) \
M(BoxInt64, _) \
M(UnboxInt64, kNoGC) \
- M(CaseInsensitiveCompare, _) \
+ M(CaseInsensitiveCompareUC16, _) \
M(BinaryInt64Op, kNoGC) \
M(ShiftInt64Op, kNoGC) \
M(SpeculativeShiftInt64Op, kNoGC) \
@@ -5955,18 +5954,18 @@
// Calls into the runtime and performs a case-insensitive comparison of the
// UTF16 strings (i.e. TwoByteString or ExternalTwoByteString) located at
// str[lhs_index:lhs_index + length] and str[rhs_index:rhs_index + length].
-// Depending on the runtime entry passed, we will treat the strings as either
-// UCS2 (no surrogate handling) or UTF16 (surrogates handled appropriately).
-class CaseInsensitiveCompareInstr
+//
+// TODO(zerny): Remove this once (if) functions inherited from unibrow
+// are moved to dart code.
+class CaseInsensitiveCompareUC16Instr
: public TemplateDefinition<4, NoThrow, Pure> {
public:
- CaseInsensitiveCompareInstr(Value* str,
- Value* lhs_index,
- Value* rhs_index,
- Value* length,
- const RuntimeEntry& entry,
- intptr_t cid)
- : entry_(entry), cid_(cid) {
+ CaseInsensitiveCompareUC16Instr(Value* str,
+ Value* lhs_index,
+ Value* rhs_index,
+ Value* length,
+ intptr_t cid)
+ : cid_(cid) {
ASSERT(cid == kTwoByteStringCid || cid == kExternalTwoByteStringCid);
ASSERT(index_scale() == 2);
SetInputAt(0, str);
@@ -5980,7 +5979,7 @@
Value* rhs_index() const { return inputs_[2]; }
Value* length() const { return inputs_[3]; }
- const RuntimeEntry& TargetFunction() const { return entry_; }
+ const RuntimeEntry& TargetFunction() const;
bool IsExternal() const { return cid_ == kExternalTwoByteStringCid; }
intptr_t class_id() const { return cid_; }
intptr_t index_scale() const { return Instance::ElementSizeFor(cid_); }
@@ -5989,18 +5988,17 @@
virtual Representation representation() const { return kTagged; }
- DECLARE_INSTRUCTION(CaseInsensitiveCompare)
+ DECLARE_INSTRUCTION(CaseInsensitiveCompareUC16)
virtual CompileType ComputeType() const;
virtual bool AttributesEqual(Instruction* other) const {
- return other->AsCaseInsensitiveCompare()->cid_ == cid_;
+ return other->AsCaseInsensitiveCompareUC16()->cid_ == cid_;
}
private:
- const RuntimeEntry& entry_;
const intptr_t cid_;
- DISALLOW_COPY_AND_ASSIGN(CaseInsensitiveCompareInstr);
+ DISALLOW_COPY_AND_ASSIGN(CaseInsensitiveCompareUC16Instr);
};
// Represents Math's static min and max functions.
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index d2c6d25..c7ce0c0 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -5137,7 +5137,7 @@
}
}
-LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
+LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@@ -5151,7 +5151,8 @@
return summary;
}
-void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
+ FlowGraphCompiler* compiler) {
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
}
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index f1ac89a..949490b 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -4361,7 +4361,7 @@
}
}
-LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
+LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@@ -4375,7 +4375,8 @@
return summary;
}
-void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
+ FlowGraphCompiler* compiler) {
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
}
diff --git a/runtime/vm/compiler/backend/il_dbc.cc b/runtime/vm/compiler/backend/il_dbc.cc
index dedc848..14d5596 100644
--- a/runtime/vm/compiler/backend/il_dbc.cc
+++ b/runtime/vm/compiler/backend/il_dbc.cc
@@ -55,7 +55,7 @@
// - Optimized RegExps,
// - Precompilation.
#define FOR_EACH_UNREACHABLE_INSTRUCTION(M) \
- M(CaseInsensitiveCompare) \
+ M(CaseInsensitiveCompareUC16) \
M(GenericCheckBound) \
M(IndirectGoto) \
M(Int64ToDouble) \
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 933a52d..6b8c380 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -4459,7 +4459,7 @@
}
}
-LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
+LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@@ -4473,7 +4473,8 @@
return summary;
}
-void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
+ FlowGraphCompiler* compiler) {
// Save ESP. EDI is chosen because it is callee saved so we do not need to
// back it up before calling into the runtime.
static const Register kSavedSPReg = EDI;
@@ -4488,7 +4489,7 @@
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
- // Restore ESP and pop the old value off the stack.
+ // Restore ESP.
__ movl(ESP, kSavedSPReg);
}
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 2e055a0..2ff2880 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -4511,7 +4511,7 @@
}
}
-LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
+LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@@ -4525,7 +4525,8 @@
return summary;
}
-void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
+ FlowGraphCompiler* compiler) {
// Save RSP. R13 is chosen because it is callee saved so we do not need to
// back it up before calling into the runtime.
static const Register kSavedSPReg = R13;
diff --git a/runtime/vm/compiler/backend/type_propagator.cc b/runtime/vm/compiler/backend/type_propagator.cc
index 0352767..0dec0c9 100644
--- a/runtime/vm/compiler/backend/type_propagator.cc
+++ b/runtime/vm/compiler/backend/type_propagator.cc
@@ -1515,7 +1515,7 @@
return CompileType::FromCid(result_cid_);
}
-CompileType CaseInsensitiveCompareInstr::ComputeType() const {
+CompileType CaseInsensitiveCompareUC16Instr::ComputeType() const {
return CompileType::FromCid(kBoolCid);
}
diff --git a/runtime/vm/compiler/jit/compiler.cc b/runtime/vm/compiler/jit/compiler.cc
index d334330..db5a703 100644
--- a/runtime/vm/compiler/jit/compiler.cc
+++ b/runtime/vm/compiler/jit/compiler.cc
@@ -166,10 +166,11 @@
RegExp& regexp = RegExp::Handle(parsed_function->function().regexp());
const String& pattern = String::Handle(regexp.pattern());
+ const bool multiline = regexp.is_multi_line();
RegExpCompileData* compile_data = new (zone) RegExpCompileData();
// Parsing failures are handled in the RegExp factory constructor.
- RegExpParser::ParseRegExp(pattern, regexp.flags(), compile_data);
+ RegExpParser::ParseRegExp(pattern, multiline, compile_data);
regexp.set_num_bracket_expressions(compile_data->capture_count);
regexp.set_capture_name_map(compile_data->capture_name_map);
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index e25bf06..9804f69 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -21751,45 +21751,23 @@
NoSafepointScope no_safepoint;
result ^= raw;
result.set_type(kUninitialized);
- result.set_flags(RegExpFlags());
- result.set_num_registers(/*is_one_byte=*/false, -1);
- result.set_num_registers(/*is_one_byte=*/true, -1);
+ result.set_flags(0);
+ result.set_num_registers(-1);
}
return result.raw();
}
-const char* RegExpFlags::ToCString() const {
- switch (value_ & ~kGlobal) {
- case kIgnoreCase | kMultiLine | kDotAll | kUnicode:
- return "imsu";
- case kIgnoreCase | kMultiLine | kDotAll:
- return "ims";
- case kIgnoreCase | kMultiLine | kUnicode:
- return "imu";
- case kIgnoreCase | kUnicode | kDotAll:
- return "ius";
- case kMultiLine | kDotAll | kUnicode:
- return "msu";
+const char* RegExp::Flags() const {
+ switch (flags()) {
+ case kGlobal | kIgnoreCase | kMultiLine:
case kIgnoreCase | kMultiLine:
return "im";
- case kIgnoreCase | kDotAll:
- return "is";
- case kIgnoreCase | kUnicode:
- return "iu";
- case kMultiLine | kDotAll:
- return "ms";
- case kMultiLine | kUnicode:
- return "mu";
- case kDotAll | kUnicode:
- return "su";
+ case kGlobal | kIgnoreCase:
case kIgnoreCase:
return "i";
+ case kGlobal | kMultiLine:
case kMultiLine:
return "m";
- case kDotAll:
- return "s";
- case kUnicode:
- return "u";
default:
break;
}
@@ -21811,7 +21789,9 @@
return false;
}
// Match the flags.
- if (flags() != other_js.flags()) {
+ if ((is_global() != other_js.is_global()) ||
+ (is_ignore_case() != other_js.is_ignore_case()) ||
+ (is_multi_line() != other_js.is_multi_line())) {
return false;
}
return true;
@@ -21820,7 +21800,7 @@
const char* RegExp::ToCString() const {
const String& str = String::Handle(pattern());
return OS::SCreate(Thread::Current()->zone(), "RegExp: pattern=%s flags=%s",
- str.ToCString(), flags().ToCString());
+ str.ToCString(), Flags());
}
RawWeakProperty* WeakProperty::New(Heap::Space space) {
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index f0040cf..5b0bd63 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -9105,55 +9105,6 @@
friend class Debugger;
};
-class RegExpFlags {
- public:
- // Flags are passed to a regex object as follows:
- // 'i': ignore case, 'g': do global matches, 'm': pattern is multi line,
- // 'u': pattern is full Unicode, not just BMP, 's': '.' in pattern matches
- // all characters including line terminators.
- enum Flags {
- kNone = 0,
- kGlobal = 1,
- kIgnoreCase = 2,
- kMultiLine = 4,
- kUnicode = 8,
- kDotAll = 16,
- };
-
- static const int kDefaultFlags = 0;
-
- RegExpFlags() : value_(kDefaultFlags) {}
- explicit RegExpFlags(int value) : value_(value) {}
-
- inline bool IsGlobal() const { return (value_ & kGlobal) != 0; }
- inline bool IgnoreCase() const { return (value_ & kIgnoreCase) != 0; }
- inline bool IsMultiLine() const { return (value_ & kMultiLine) != 0; }
- inline bool IsUnicode() const { return (value_ & kUnicode) != 0; }
- inline bool IsDotAll() const { return (value_ & kDotAll) != 0; }
-
- inline bool NeedsUnicodeCaseEquivalents() {
- // Both unicode and ignore_case flags are set. We need to use ICU to find
- // the closure over case equivalents.
- return IsUnicode() && IgnoreCase();
- }
-
- void SetGlobal() { value_ |= kGlobal; }
- void SetIgnoreCase() { value_ |= kIgnoreCase; }
- void SetMultiLine() { value_ |= kMultiLine; }
- void SetUnicode() { value_ |= kUnicode; }
- void SetDotAll() { value_ |= kDotAll; }
-
- const char* ToCString() const;
-
- int value() const { return value_; }
-
- bool operator==(const RegExpFlags& other) { return value_ == other.value_; }
- bool operator!=(const RegExpFlags& other) { return value_ != other.value_; }
-
- private:
- int value_;
-};
-
// Internal JavaScript regular expression object.
class RegExp : public Instance {
public:
@@ -9167,11 +9118,20 @@
kComplex = 2,
};
+ // Flags are passed to a regex object as follows:
+ // 'i': ignore case, 'g': do global matches, 'm': pattern is multi line.
+ enum Flags {
+ kNone = 0,
+ kGlobal = 1,
+ kIgnoreCase = 2,
+ kMultiLine = 4,
+ };
+
enum {
kTypePos = 0,
kTypeSize = 2,
kFlagsPos = 2,
- kFlagsSize = 5,
+ kFlagsSize = 4,
};
class TypeBits : public BitField<int8_t, RegExType, kTypePos, kTypeSize> {};
@@ -9181,10 +9141,11 @@
bool is_simple() const { return (type() == kSimple); }
bool is_complex() const { return (type() == kComplex); }
- intptr_t num_registers(bool is_one_byte) const {
- return is_one_byte ? raw_ptr()->num_one_byte_registers_
- : raw_ptr()->num_two_byte_registers_;
- }
+ bool is_global() const { return (flags() & kGlobal); }
+ bool is_ignore_case() const { return (flags() & kIgnoreCase); }
+ bool is_multi_line() const { return (flags() & kMultiLine); }
+
+ intptr_t num_registers() const { return raw_ptr()->num_registers_; }
RawString* pattern() const { return raw_ptr()->pattern_; }
RawSmi* num_bracket_expressions() const {
@@ -9248,48 +9209,15 @@
void set_num_bracket_expressions(intptr_t value) const;
void set_capture_name_map(const Array& array) const;
- void set_is_global() const {
- RegExpFlags f = flags();
- f.SetGlobal();
- set_flags(f);
- }
- void set_is_ignore_case() const {
- RegExpFlags f = flags();
- f.SetIgnoreCase();
- set_flags(f);
- }
- void set_is_multi_line() const {
- RegExpFlags f = flags();
- f.SetMultiLine();
- set_flags(f);
- }
- void set_is_unicode() const {
- RegExpFlags f = flags();
- f.SetUnicode();
- set_flags(f);
- }
- void set_is_dot_all() const {
- RegExpFlags f = flags();
- f.SetDotAll();
- set_flags(f);
- }
+ void set_is_global() const { set_flags(flags() | kGlobal); }
+ void set_is_ignore_case() const { set_flags(flags() | kIgnoreCase); }
+ void set_is_multi_line() const { set_flags(flags() | kMultiLine); }
void set_is_simple() const { set_type(kSimple); }
void set_is_complex() const { set_type(kComplex); }
- void set_num_registers(bool is_one_byte, intptr_t value) const {
- if (is_one_byte) {
- StoreNonPointer(&raw_ptr()->num_one_byte_registers_, value);
- } else {
- StoreNonPointer(&raw_ptr()->num_two_byte_registers_, value);
- }
+ void set_num_registers(intptr_t value) const {
+ StoreNonPointer(&raw_ptr()->num_registers_, value);
}
- RegExpFlags flags() const {
- return RegExpFlags(FlagsBits::decode(raw_ptr()->type_flags_));
- }
- void set_flags(RegExpFlags flags) const {
- StoreNonPointer(&raw_ptr()->type_flags_,
- FlagsBits::update(flags.value(), raw_ptr()->type_flags_));
- }
const char* Flags() const;
virtual bool CanonicalizeEquals(const Instance& other) const;
@@ -9305,8 +9233,13 @@
StoreNonPointer(&raw_ptr()->type_flags_,
TypeBits::update(type, raw_ptr()->type_flags_));
}
+ void set_flags(intptr_t value) const {
+ StoreNonPointer(&raw_ptr()->type_flags_,
+ FlagsBits::update(value, raw_ptr()->type_flags_));
+ }
RegExType type() const { return TypeBits::decode(raw_ptr()->type_flags_); }
+ intptr_t flags() const { return FlagsBits::decode(raw_ptr()->type_flags_); }
FINAL_HEAP_OBJECT_IMPLEMENTATION(RegExp, Instance);
friend class Class;
diff --git a/runtime/vm/object_service.cc b/runtime/vm/object_service.cc
index 9895c42..0397f6e 100644
--- a/runtime/vm/object_service.cc
+++ b/runtime/vm/object_service.cc
@@ -1487,8 +1487,8 @@
return;
}
- jsobj.AddProperty("isCaseSensitive", !flags().IgnoreCase());
- jsobj.AddProperty("isMultiLine", flags().IsMultiLine());
+ jsobj.AddProperty("isCaseSensitive", !is_ignore_case());
+ jsobj.AddProperty("isMultiLine", is_multi_line());
if (!FLAG_interpret_irregexp) {
Function& func = Function::Handle();
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index 75e7a92..05d60dd 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -2428,17 +2428,11 @@
VISIT_TO(RawObject*, external_two_byte_sticky_function_)
RawObject** to_snapshot(Snapshot::Kind kind) { return to(); }
- // The same pattern may use different amount of registers if compiled
- // for a one-byte target than a two-byte target. For example, we do not
- // need to allocate registers to check whether the current position is within
- // a surrogate pair when matching a Unicode pattern against a one-byte string.
- intptr_t num_one_byte_registers_;
- intptr_t num_two_byte_registers_;
+ intptr_t num_registers_;
// A bitfield with two fields:
// type: Uninitialized, simple or complex.
- // flags: Represents global/local, case insensitive, multiline, unicode,
- // dotAll.
+ // flags: Represents global/local, case insensitive, multiline.
int8_t type_flags_;
};
diff --git a/runtime/vm/raw_object_snapshot.cc b/runtime/vm/raw_object_snapshot.cc
index 72ef288..20e3983 100644
--- a/runtime/vm/raw_object_snapshot.cc
+++ b/runtime/vm/raw_object_snapshot.cc
@@ -2172,9 +2172,7 @@
*reader->StringHandle() ^= reader->ReadObjectImpl(kAsInlinedObject);
regex.set_pattern(*reader->StringHandle());
- regex.StoreNonPointer(®ex.raw_ptr()->num_one_byte_registers_,
- reader->Read<int32_t>());
- regex.StoreNonPointer(®ex.raw_ptr()->num_two_byte_registers_,
+ regex.StoreNonPointer(®ex.raw_ptr()->num_registers_,
reader->Read<int32_t>());
regex.StoreNonPointer(®ex.raw_ptr()->type_flags_, reader->Read<int8_t>());
@@ -2204,8 +2202,7 @@
// Write out all the other fields.
writer->Write<RawObject*>(ptr()->num_bracket_expressions_);
writer->WriteObjectImpl(ptr()->pattern_, kAsInlinedObject);
- writer->Write<int32_t>(ptr()->num_one_byte_registers_);
- writer->Write<int32_t>(ptr()->num_two_byte_registers_);
+ writer->Write<int32_t>(ptr()->num_registers_);
writer->Write<int8_t>(ptr()->type_flags_);
}
diff --git a/runtime/vm/regexp.cc b/runtime/vm/regexp.cc
index 23eff8f..dd559bd 100644
--- a/runtime/vm/regexp.cc
+++ b/runtime/vm/regexp.cc
@@ -4,11 +4,7 @@
#include "vm/regexp.h"
-#include "platform/splay-tree-inl.h"
#include "platform/unicode.h"
-
-#include "unicode/uniset.h"
-
#include "vm/dart_entry.h"
#include "vm/regexp_assembler.h"
#include "vm/regexp_assembler_bytecode.h"
@@ -29,14 +25,14 @@
static const intptr_t kMaxLookaheadForBoyerMoore = 8;
ContainedInLattice AddRange(ContainedInLattice containment,
- const int32_t* ranges,
+ const intptr_t* ranges,
intptr_t ranges_length,
Interval new_range) {
ASSERT((ranges_length & 1) == 1);
- ASSERT(ranges[ranges_length - 1] == Utf::kMaxCodePoint + 1);
+ ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);
if (containment == kLatticeUnknown) return containment;
bool inside = false;
- int32_t last = 0;
+ intptr_t last = 0;
for (intptr_t i = 0; i < ranges_length;
inside = !inside, last = ranges[i], i++) {
// Consider the range from last to ranges[i].
@@ -286,26 +282,10 @@
class RegExpCompiler : public ValueObject {
public:
- RegExpCompiler(intptr_t capture_count, bool is_one_byte);
+ RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte);
intptr_t AllocateRegister() { return next_register_++; }
- // Lookarounds to match lone surrogates for unicode character class matches
- // are never nested. We can therefore reuse registers.
- intptr_t UnicodeLookaroundStackRegister() {
- if (unicode_lookaround_stack_register_ == kNoRegister) {
- unicode_lookaround_stack_register_ = AllocateRegister();
- }
- return unicode_lookaround_stack_register_;
- }
-
- intptr_t UnicodeLookaroundPositionRegister() {
- if (unicode_lookaround_position_register_ == kNoRegister) {
- unicode_lookaround_position_register_ = AllocateRegister();
- }
- return unicode_lookaround_position_register_;
- }
-
#if !defined(DART_PRECOMPILED_RUNTIME)
RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
RegExpNode* start,
@@ -335,6 +315,7 @@
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
+ inline bool ignore_case() { return ignore_case_; }
inline bool one_byte() const { return is_one_byte_; }
bool read_backward() { return read_backward_; }
void set_read_backward(bool value) { read_backward_ = value; }
@@ -352,11 +333,10 @@
private:
EndNode* accept_;
intptr_t next_register_;
- intptr_t unicode_lookaround_stack_register_;
- intptr_t unicode_lookaround_position_register_;
ZoneGrowableArray<RegExpNode*>* work_list_;
intptr_t recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
+ bool ignore_case_;
bool is_one_byte_;
bool reg_exp_too_big_;
bool read_backward_;
@@ -382,12 +362,13 @@
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool is_one_byte)
+RegExpCompiler::RegExpCompiler(intptr_t capture_count,
+ bool ignore_case,
+ bool is_one_byte)
: next_register_(2 * (capture_count + 1)),
- unicode_lookaround_stack_register_(kNoRegister),
- unicode_lookaround_position_register_(kNoRegister),
work_list_(NULL),
recursion_depth_(0),
+ ignore_case_(ignore_case),
is_one_byte_(is_one_byte),
reg_exp_too_big_(false),
read_backward_(false),
@@ -1018,7 +999,7 @@
}
static void EmitBoundaryTest(RegExpMacroAssembler* masm,
- uint16_t border,
+ intptr_t border,
BlockLabel* fall_through,
BlockLabel* above_or_equal,
BlockLabel* below) {
@@ -1031,8 +1012,8 @@
}
static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
- uint16_t first,
- uint16_t last,
+ intptr_t first,
+ intptr_t last,
BlockLabel* fall_through,
BlockLabel* in_range,
BlockLabel* out_of_range) {
@@ -1055,10 +1036,10 @@
// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
static void EmitUseLookupTable(RegExpMacroAssembler* masm,
- ZoneGrowableArray<uint16_t>* ranges,
+ ZoneGrowableArray<int>* ranges,
intptr_t start_index,
intptr_t end_index,
- uint16_t min_char,
+ intptr_t min_char,
BlockLabel* fall_through,
BlockLabel* even_label,
BlockLabel* odd_label) {
@@ -1112,7 +1093,7 @@
}
static void CutOutRange(RegExpMacroAssembler* masm,
- ZoneGrowableArray<uint16_t>* ranges,
+ ZoneGrowableArray<int>* ranges,
intptr_t start_index,
intptr_t end_index,
intptr_t cut_index,
@@ -1138,17 +1119,17 @@
// Unicode case. Split the search space into kSize spaces that are handled
// with recursion.
-static void SplitSearchSpace(ZoneGrowableArray<uint16_t>* ranges,
+static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,
intptr_t start_index,
intptr_t end_index,
intptr_t* new_start_index,
intptr_t* new_end_index,
- uint16_t* border) {
+ intptr_t* border) {
static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
static const intptr_t kMask = RegExpMacroAssembler::kTableMask;
- uint16_t first = ranges->At(start_index);
- uint16_t last = ranges->At(end_index) - 1;
+ intptr_t first = ranges->At(start_index);
+ intptr_t last = ranges->At(end_index) - 1;
*new_start_index = start_index;
*border = (ranges->At(start_index) & ~kMask) + kSize;
@@ -1208,7 +1189,7 @@
// Either label can be NULL indicating backtracking. Either label can also be
// equal to the fall_through label.
static void GenerateBranches(RegExpMacroAssembler* masm,
- ZoneGrowableArray<uint16_t>* ranges,
+ ZoneGrowableArray<int>* ranges,
intptr_t start_index,
intptr_t end_index,
uint16_t min_char,
@@ -1216,8 +1197,8 @@
BlockLabel* fall_through,
BlockLabel* even_label,
BlockLabel* odd_label) {
- uint16_t first = ranges->At(start_index);
- uint16_t last = ranges->At(end_index) - 1;
+ intptr_t first = ranges->At(start_index);
+ intptr_t last = ranges->At(end_index) - 1;
ASSERT(min_char < first);
@@ -1277,7 +1258,7 @@
intptr_t new_start_index = 0;
intptr_t new_end_index = 0;
- uint16_t border = 0;
+ intptr_t border = 0;
SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
&new_end_index, &border);
@@ -1334,7 +1315,7 @@
CharacterRange::Canonicalize(ranges);
}
- uint16_t max_char;
+ intptr_t max_char;
if (one_byte) {
max_char = Symbols::kMaxOneCharCodeSymbol;
} else {
@@ -1345,7 +1326,7 @@
intptr_t last_valid_range = range_count - 1;
while (last_valid_range >= 0) {
- const CharacterRange& range = ranges->At(last_valid_range);
+ CharacterRange& range = (*ranges)[last_valid_range];
if (range.from() <= max_char) {
break;
}
@@ -1373,6 +1354,14 @@
}
return;
}
+ if (last_valid_range == 0 && !cc->is_negated() &&
+ ranges->At(0).IsEverything(max_char)) {
+ // This is a common case hit by non-anchored expressions.
+ if (check_offset) {
+ macro_assembler->CheckPosition(cp_offset, on_failure);
+ }
+ return;
+ }
if (!preloaded) {
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
@@ -1389,24 +1378,25 @@
// entry at zero which goes to the failure label, but if there
// was already one there we fall through for success on that entry.
// Subsequent entries have alternating meaning (success/failure).
- ZoneGrowableArray<uint16_t>* range_boundaries =
- new (zone) ZoneGrowableArray<uint16_t>(last_valid_range);
+ ZoneGrowableArray<int>* range_boundaries =
+ new (zone) ZoneGrowableArray<int>(last_valid_range);
bool zeroth_entry_is_failure = !cc->is_negated();
for (intptr_t i = 0; i <= last_valid_range; i++) {
- const CharacterRange& range = ranges->At(i);
+ CharacterRange& range = (*ranges)[i];
if (range.from() == 0) {
ASSERT(i == 0);
zeroth_entry_is_failure = !zeroth_entry_is_failure;
} else {
range_boundaries->Add(range.from());
}
- if (range.to() + 1 <= max_char) {
- range_boundaries->Add(range.to() + 1);
- }
+ range_boundaries->Add(range.to() + 1);
}
intptr_t end_index = range_boundaries->length() - 1;
+ if (range_boundaries->At(end_index) > max_char) {
+ end_index--;
+ }
BlockLabel fall_through;
GenerateBranches(macro_assembler, range_boundaries,
@@ -1704,7 +1694,7 @@
if (read_backward()) return;
ASSERT(characters_filled_in < details->characters());
intptr_t characters = details->characters();
- int32_t char_mask;
+ intptr_t char_mask;
if (compiler->one_byte()) {
char_mask = Symbols::kMaxOneCharCodeSymbol;
} else {
@@ -1729,7 +1719,7 @@
pos->determines_perfectly = false;
return;
}
- if (elm.atom()->ignore_case()) {
+ if (compiler->ignore_case()) {
int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
intptr_t length =
GetCaseIndependentLetters(c, compiler->one_byte(), chars);
@@ -1779,7 +1769,6 @@
details->positions(characters_filled_in);
RegExpCharacterClass* tree = elm.char_class();
ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();
- ASSERT(!ranges->is_empty());
if (tree->is_negated()) {
// A quick check uses multi-character mask and compare. There is no
// useful way to incorporate a negative char class into this scheme
@@ -1918,16 +1907,16 @@
NodeInfo* info_;
};
-RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth) {
+RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
ASSERT(!info()->visited);
VisitMarker marker(info());
- return FilterSuccessor(depth - 1);
+ return FilterSuccessor(depth - 1, ignore_case);
}
-RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth) {
- RegExpNode* next = on_success_->FilterOneByte(depth - 1);
+RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) {
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
if (next == NULL) return set_replacement(NULL);
on_success_ = next;
return set_replacement(this);
@@ -1964,7 +1953,7 @@
return 0;
}
-RegExpNode* TextNode::FilterOneByte(intptr_t depth) {
+RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
ASSERT(!info()->visited);
@@ -1977,7 +1966,7 @@
for (intptr_t j = 0; j < quarks->length(); j++) {
uint16_t c = quarks->At(j);
if (c <= Symbols::kMaxOneCharCodeSymbol) continue;
- if (!elm.atom()->ignore_case()) return set_replacement(NULL);
+ if (!ignore_case) return set_replacement(NULL);
// Here, we need to check for characters whose upper and lower cases
// are outside the Latin-1 range.
uint16_t converted = ConvertNonLatin1ToLatin1(c);
@@ -1999,44 +1988,40 @@
if (range_count != 0 && ranges->At(0).from() == 0 &&
ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) {
// This will be handled in a later filter.
- if (cc->flags().IgnoreCase() &&
- RangesContainLatin1Equivalents(ranges)) {
- continue;
- }
+ if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
return set_replacement(NULL);
}
} else {
if (range_count == 0 ||
ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) {
// This will be handled in a later filter.
- if (cc->flags().IgnoreCase() &&
- RangesContainLatin1Equivalents(ranges))
- continue;
+ if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
return set_replacement(NULL);
}
}
}
}
- return FilterSuccessor(depth - 1);
+ return FilterSuccessor(depth - 1, ignore_case);
}
-RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth) {
+RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
{
VisitMarker marker(info());
- RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
+ RegExpNode* continue_replacement =
+ continue_node_->FilterOneByte(depth - 1, ignore_case);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == NULL) return set_replacement(NULL);
}
- return ChoiceNode::FilterOneByte(depth - 1);
+ return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
}
-RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth) {
+RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2055,7 +2040,8 @@
RegExpNode* survivor = NULL;
for (intptr_t i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->At(i);
- RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
+ RegExpNode* replacement =
+ alternative.node()->FilterOneByte(depth - 1, ignore_case);
ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != NULL) {
(*alternatives_)[i].set_node(replacement);
@@ -2075,7 +2061,7 @@
new (Z) ZoneGrowableArray<GuardedAlternative>(surviving);
for (intptr_t i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- (*alternatives_)[i].node()->FilterOneByte(depth - 1);
+ (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case);
if (replacement != NULL) {
(*alternatives_)[i].set_node(replacement);
new_alternatives->Add((*alternatives_)[i]);
@@ -2085,7 +2071,8 @@
return this;
}
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(intptr_t depth) {
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(intptr_t depth,
+ bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2093,12 +2080,12 @@
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = (*alternatives_)[1].node();
- RegExpNode* replacement = node->FilterOneByte(depth - 1);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
if (replacement == NULL) return set_replacement(NULL);
(*alternatives_)[1].set_node(replacement);
RegExpNode* neg_node = (*alternatives_)[0].node();
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == NULL) return set_replacement(replacement);
@@ -2396,21 +2383,13 @@
if (elm.text_type() == TextElement::ATOM) {
ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) {
- if (SkipPass(pass, elm.atom()->ignore_case())) continue;
if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
EmitCharacterFunction* emit_function = NULL;
- uint16_t quark = quarks->At(j);
- if (elm.atom()->ignore_case()) {
- // Everywhere else we assume that a non-Latin-1 character cannot match
- // a Latin-1 character. Avoid the cases where this is assumption is
- // invalid by using the Latin1 equivalent instead.
- quark = Latin1::TryConvertToLatin1(quark);
- }
switch (pass) {
case NON_LATIN1_MATCH:
ASSERT(one_byte);
- if (quark > Symbols::kMaxOneCharCodeSymbol) {
+ if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) {
assembler->GoTo(backtrack);
return;
}
@@ -2466,34 +2445,6 @@
}
}
-TextNode* TextNode::CreateForCharacterRanges(
- ZoneGrowableArray<CharacterRange>* ranges,
- bool read_backward,
- RegExpNode* on_success,
- RegExpFlags flags) {
- ASSERT(ranges != nullptr);
- ZoneGrowableArray<TextElement>* elms = new ZoneGrowableArray<TextElement>(1);
- elms->Add(TextElement::CharClass(new RegExpCharacterClass(ranges, flags)));
- return new TextNode(elms, read_backward, on_success);
-}
-
-TextNode* TextNode::CreateForSurrogatePair(CharacterRange lead,
- CharacterRange trail,
- bool read_backward,
- RegExpNode* on_success,
- RegExpFlags flags) {
- auto lead_ranges = CharacterRange::List(on_success->zone(), lead);
- auto trail_ranges = CharacterRange::List(on_success->zone(), trail);
- auto elms = new ZoneGrowableArray<TextElement>(2);
-
- elms->Add(
- TextElement::CharClass(new RegExpCharacterClass(lead_ranges, flags)));
- elms->Add(
- TextElement::CharClass(new RegExpCharacterClass(trail_ranges, flags)));
-
- return new TextNode(elms, read_backward, on_success);
-}
-
// This generates the code to match a text node. A text node can contain
// straight character sequences (possibly to be matched in a case-independent
// way) and character classes. For efficiency we do not do this in a single
@@ -2523,15 +2474,19 @@
// check that now.
if (trace->characters_preloaded() == 1) {
for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
- TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
- false, &bound_checked_to);
+ if (!SkipPass(pass, compiler->ignore_case())) {
+ TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
+ false, &bound_checked_to);
+ }
}
first_elt_done = true;
}
for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
- TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
- first_elt_done, &bound_checked_to);
+ if (!SkipPass(pass, compiler->ignore_case())) {
+ TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
+ first_elt_done, &bound_checked_to);
+ }
}
Trace successor_trace(*trace);
@@ -2573,13 +2528,13 @@
TextElement elm = elms_->At(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
- bool case_equivalents_already_added =
- cc->flags().NeedsUnicodeCaseEquivalents();
- if (cc->flags().IgnoreCase() && !case_equivalents_already_added) {
- // None of the standard character classes is different in the case
- // independent case and it slows us down if we don't know that.
- if (cc->is_standard()) continue;
- CharacterRange::AddCaseEquivalents(cc->ranges(), is_one_byte, Z);
+ // None of the standard character classes is different in the case
+ // independent case and it slows us down if we don't know that.
+ if (cc->is_standard()) continue;
+ ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
+ intptr_t range_count = ranges->length();
+ for (intptr_t j = 0; j < range_count; j++) {
+ (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z);
}
}
}
@@ -2619,7 +2574,7 @@
// length nodes or other complications in the way then return a sentinel
// value indicating that a greedy loop cannot be constructed.
intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative(
- const GuardedAlternative* alternative) {
+ GuardedAlternative* alternative) {
intptr_t length = 0;
RegExpNode* node = alternative->node();
// Later we will generate code for all these text nodes using recursion
@@ -2657,7 +2612,7 @@
if (trace->stop_node() == this) {
// Back edge of greedy optimized loop node graph.
intptr_t text_length =
- GreedyLoopTextLengthForAlternative(&alternatives_->At(0));
+ GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
// Update the counter-based backtracking info on the stack. This is an
// optimization for greedy loops (see below).
@@ -2738,27 +2693,25 @@
DISALLOW_ALLOCATION();
};
-static const int32_t kRangeEndMarker = Utf::kMaxCodePoint + 1;
-
// The '2' variant is inclusive from and exclusive to.
// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
// 0x180E has been removed from Unicode's Zs category and thus
// from ECMAScript's WhiteSpace category as of Unicode 6.3.
-static const int32_t kSpaceRanges[] = {
+static const intptr_t kSpaceRanges[] = {
'\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680,
0x1681, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
- 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};
+ 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000};
static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
-static const int32_t kWordRanges[] = {
- '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker};
+static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_',
+ '_' + 1, 'a', 'z' + 1, 0x10000};
static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);
-static const int32_t kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
+static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000};
static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
-static const int32_t kSurrogateRanges[] = {0xd800, 0xe000, kRangeEndMarker};
+static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000};
static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
-static const int32_t kLineTerminatorRanges[] = {
- 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};
+static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E,
+ 0x2028, 0x202A, 0x10000};
static const intptr_t kLineTerminatorRangeCount =
ARRAY_SIZE(kLineTerminatorRanges);
@@ -3104,11 +3057,6 @@
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
intptr_t choice_count = alternatives_->length();
- if (choice_count == 1 && alternatives_->At(0).guards() == nullptr) {
- alternatives_->At(0).node()->Emit(compiler, trace);
- return;
- }
-
AssertGuardsMentionRegisters(trace);
LimitResult limit_result = LimitVersions(compiler, trace);
@@ -3129,7 +3077,7 @@
GreedyLoopState greedy_loop_state(not_at_start());
intptr_t text_length =
- GreedyLoopTextLengthForAlternative(&alternatives_->At(0));
+ GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
AlternativeGenerationList alt_gens(choice_count);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
@@ -3515,9 +3463,9 @@
RecursionCheck rc(compiler);
ASSERT(start_reg_ + 1 == end_reg_);
- if (flags_.IgnoreCase()) {
- assembler->CheckNotBackReferenceIgnoreCase(
- start_reg_, read_backward(), flags_.IsUnicode(), trace->backtrack());
+ if (compiler->ignore_case()) {
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
+ trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, read_backward(),
trace->backtrack());
@@ -3525,11 +3473,6 @@
// We are going to advance backward, so we may end up at the start.
if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
- // Check that the back reference does not end inside a surrogate pair.
- if (flags_.IsUnicode() && !compiler->one_byte()) {
- assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
- }
-
on_success()->Emit(compiler, trace);
}
@@ -3788,10 +3731,10 @@
}
static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,
- const int32_t* special_class,
+ const intptr_t* special_class,
intptr_t length) {
- length--; // Remove final kRangeEndMarker.
- ASSERT(special_class[length] == kRangeEndMarker);
+ length--; // Remove final 0x10000.
+ ASSERT(special_class[length] == 0x10000);
ASSERT(ranges->length() != 0);
ASSERT(length != 0);
ASSERT(special_class[0] != 0);
@@ -3811,17 +3754,17 @@
return false;
}
}
- if (range.to() != Utf::kMaxCodePoint) {
+ if (range.to() != 0xffff) {
return false;
}
return true;
}
static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges,
- const int32_t* special_class,
+ const intptr_t* special_class,
intptr_t length) {
- length--; // Remove final kRangeEndMarker.
- ASSERT(special_class[length] == kRangeEndMarker);
+ length--; // Remove final 0x10000.
+ ASSERT(special_class[length] == 0x10000);
if (ranges->length() * 2 != length) {
return false;
}
@@ -3838,7 +3781,7 @@
bool RegExpCharacterClass::is_standard() {
// TODO(lrn): Remove need for this function, by not throwing away information
// along the way.
- if (is_negated()) {
+ if (is_negated_) {
return false;
}
if (set_.is_standard()) {
@@ -3873,307 +3816,8 @@
return false;
}
-UnicodeRangeSplitter::UnicodeRangeSplitter(
- Zone* zone,
- ZoneGrowableArray<CharacterRange>* base)
- : zone_(zone),
- table_(zone),
- bmp_(nullptr),
- lead_surrogates_(nullptr),
- trail_surrogates_(nullptr),
- non_bmp_(nullptr) {
- // The unicode range splitter categorizes given character ranges into:
- // - Code points from the BMP representable by one code unit.
- // - Code points outside the BMP that need to be split into surrogate pairs.
- // - Lone lead surrogates.
- // - Lone trail surrogates.
- // Lone surrogates are valid code points, even though no actual characters.
- // They require special matching to make sure we do not split surrogate pairs.
- // We use the dispatch table to accomplish this. The base range is split up
- // by the table by the overlay ranges, and the Call callback is used to
- // filter and collect ranges for each category.
- for (intptr_t i = 0; i < base->length(); i++) {
- table_.AddRange(base->At(i), kBase, zone_);
- }
- // Add overlay ranges.
- table_.AddRange(CharacterRange::Range(0, Utf16::kLeadSurrogateStart - 1),
- kBmpCodePoints, zone_);
- table_.AddRange(CharacterRange::Range(Utf16::kLeadSurrogateStart,
- Utf16::kLeadSurrogateEnd),
- kLeadSurrogates, zone_);
- table_.AddRange(CharacterRange::Range(Utf16::kTrailSurrogateStart,
- Utf16::kTrailSurrogateEnd),
- kTrailSurrogates, zone_);
- table_.AddRange(
- CharacterRange::Range(Utf16::kTrailSurrogateEnd + 1, Utf16::kMaxCodeUnit),
- kBmpCodePoints, zone_);
- table_.AddRange(
- CharacterRange::Range(Utf16::kMaxCodeUnit + 1, Utf::kMaxCodePoint),
- kNonBmpCodePoints, zone_);
- table_.ForEach(this);
-}
-
-void UnicodeRangeSplitter::Call(uint32_t from, DispatchTable::Entry entry) {
- OutSet* outset = entry.out_set();
- if (!outset->Get(kBase)) return;
- ZoneGrowableArray<CharacterRange>** target = nullptr;
- if (outset->Get(kBmpCodePoints)) {
- target = &bmp_;
- } else if (outset->Get(kLeadSurrogates)) {
- target = &lead_surrogates_;
- } else if (outset->Get(kTrailSurrogates)) {
- target = &trail_surrogates_;
- } else {
- ASSERT(outset->Get(kNonBmpCodePoints));
- target = &non_bmp_;
- }
- if (*target == nullptr) {
- *target = new (zone_) ZoneGrowableArray<CharacterRange>(2);
- }
- (*target)->Add(CharacterRange::Range(entry.from(), entry.to()));
-}
-
-void AddBmpCharacters(RegExpCompiler* compiler,
- ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- ZoneGrowableArray<CharacterRange>* bmp = splitter->bmp();
- if (bmp == nullptr) return;
- result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
- bmp, compiler->read_backward(), on_success, RegExpFlags())));
-}
-
-void AddNonBmpSurrogatePairs(RegExpCompiler* compiler,
- ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- ZoneGrowableArray<CharacterRange>* non_bmp = splitter->non_bmp();
- if (non_bmp == nullptr) return;
- ASSERT(!compiler->one_byte());
- CharacterRange::Canonicalize(non_bmp);
- for (int i = 0; i < non_bmp->length(); i++) {
- // Match surrogate pair.
- // E.g. [\u10005-\u11005] becomes
- // \ud800[\udc05-\udfff]|
- // [\ud801-\ud803][\udc00-\udfff]|
- // \ud804[\udc00-\udc05]
- uint32_t from = non_bmp->At(i).from();
- uint32_t to = non_bmp->At(i).to();
- uint16_t from_points[2];
- Utf16::Encode(from, from_points);
- uint16_t to_points[2];
- Utf16::Encode(to, to_points);
- if (from_points[0] == to_points[0]) {
- // The lead surrogate is the same.
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- CharacterRange::Singleton(from_points[0]),
- CharacterRange::Range(from_points[1], to_points[1]),
- compiler->read_backward(), on_success, RegExpFlags())));
- } else {
- if (from_points[1] != Utf16::kTrailSurrogateStart) {
- // Add [from_l][from_t-\udfff]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- CharacterRange::Singleton(from_points[0]),
- CharacterRange::Range(from_points[1],
- Utf16::kTrailSurrogateEnd),
- compiler->read_backward(), on_success, RegExpFlags())));
- from_points[0]++;
- }
- if (to_points[1] != Utf16::kTrailSurrogateEnd) {
- // Add [to_l][\udc00-to_t]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- CharacterRange::Singleton(to_points[0]),
- CharacterRange::Range(Utf16::kTrailSurrogateStart,
- to_points[1]),
- compiler->read_backward(), on_success, RegExpFlags())));
- to_points[0]--;
- }
- if (from_points[0] <= to_points[0]) {
- // Add [from_l-to_l][\udc00-\udfff]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- CharacterRange::Range(from_points[0], to_points[0]),
- CharacterRange::Range(Utf16::kTrailSurrogateStart,
- Utf16::kTrailSurrogateEnd),
- compiler->read_backward(), on_success, RegExpFlags())));
- }
- }
- }
-}
-
-RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
- RegExpCompiler* compiler,
- ZoneGrowableArray<CharacterRange>* lookbehind,
- ZoneGrowableArray<CharacterRange>* match,
- RegExpNode* on_success,
- bool read_backward,
- RegExpFlags flags) {
- RegExpNode* match_node = TextNode::CreateForCharacterRanges(
- match, read_backward, on_success, flags);
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpLookaround::Builder lookaround(false, match_node, stack_register,
- position_register);
- RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- lookbehind, !read_backward, lookaround.on_match_success(), flags);
- return lookaround.ForMatch(negative_match);
-}
-
-RegExpNode* MatchAndNegativeLookaroundInReadDirection(
- RegExpCompiler* compiler,
- ZoneGrowableArray<CharacterRange>* match,
- ZoneGrowableArray<CharacterRange>* lookahead,
- RegExpNode* on_success,
- bool read_backward,
- RegExpFlags flags) {
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpLookaround::Builder lookaround(false, on_success, stack_register,
- position_register);
- RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- lookahead, read_backward, lookaround.on_match_success(), flags);
- return TextNode::CreateForCharacterRanges(
- match, read_backward, lookaround.ForMatch(negative_match), flags);
-}
-
-void AddLoneLeadSurrogates(RegExpCompiler* compiler,
- ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- auto lead_surrogates = splitter->lead_surrogates();
- if (lead_surrogates == nullptr) return;
- // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
- auto trail_surrogates = CharacterRange::List(
- on_success->zone(), CharacterRange::Range(Utf16::kTrailSurrogateStart,
- Utf16::kTrailSurrogateEnd));
-
- RegExpNode* match;
- if (compiler->read_backward()) {
- // Reading backward. Assert that reading forward, there is no trail
- // surrogate, and then backward match the lead surrogate.
- match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, trail_surrogates, lead_surrogates, on_success, true,
- RegExpFlags());
- } else {
- // Reading forward. Forward match the lead surrogate and assert that
- // no trail surrogate follows.
- match = MatchAndNegativeLookaroundInReadDirection(
- compiler, lead_surrogates, trail_surrogates, on_success, false,
- RegExpFlags());
- }
- result->AddAlternative(GuardedAlternative(match));
-}
-
-void AddLoneTrailSurrogates(RegExpCompiler* compiler,
- ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- auto trail_surrogates = splitter->trail_surrogates();
- if (trail_surrogates == nullptr) return;
- // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
- auto lead_surrogates = CharacterRange::List(
- on_success->zone(), CharacterRange::Range(Utf16::kLeadSurrogateStart,
- Utf16::kLeadSurrogateEnd));
-
- RegExpNode* match;
- if (compiler->read_backward()) {
- // Reading backward. Backward match the trail surrogate and assert that no
- // lead surrogate precedes it.
- match = MatchAndNegativeLookaroundInReadDirection(
- compiler, trail_surrogates, lead_surrogates, on_success, true,
- RegExpFlags());
- } else {
- // Reading forward. Assert that reading backward, there is no lead
- // surrogate, and then forward match the trail surrogate.
- match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, lead_surrogates, trail_surrogates, on_success, false,
- RegExpFlags());
- }
- result->AddAlternative(GuardedAlternative(match));
-}
-
-RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
- ASSERT(!compiler->read_backward());
- // Advance any character. If the character happens to be a lead surrogate and
- // we advanced into the middle of a surrogate pair, it will work out, as
- // nothing will match from there. We will have to advance again, consuming
- // the associated trail surrogate.
- auto range = CharacterRange::List(
- on_success->zone(), CharacterRange::Range(0, Utf16::kMaxCodeUnit));
- return TextNode::CreateForCharacterRanges(range, false, on_success,
- RegExpFlags());
-}
-
-void AddUnicodeCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges) {
- ASSERT(CharacterRange::IsCanonical(ranges));
-
- // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
- // See also https://crbug.com/v8/6727.
- // TODO(sstrickl): This only covers the special case of the {0,0x10FFFF}
- // range, which we use frequently internally. But large ranges can also easily
- // be created by the user. We might want to have a more general caching
- // mechanism for such ranges.
- if (ranges->length() == 1 && ranges->At(0).IsEverything(Utf::kMaxCodePoint)) {
- return;
- }
-
- icu::UnicodeSet set;
- for (int i = 0; i < ranges->length(); i++) {
- set.add(ranges->At(i).from(), ranges->At(i).to());
- }
- ranges->Clear();
- set.closeOver(USET_CASE_INSENSITIVE);
- // Full case mapping map single characters to multiple characters.
- // Those are represented as strings in the set. Remove them so that
- // we end up with only simple and common case mappings.
- set.removeAllStrings();
- for (int i = 0; i < set.getRangeCount(); i++) {
- ranges->Add(
- CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)));
- }
- // No errors and everything we collected have been ranges.
- CharacterRange::Canonicalize(ranges);
-}
-
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- set_.Canonicalize();
- ZoneGrowableArray<CharacterRange>* ranges = this->ranges();
- if (flags_.NeedsUnicodeCaseEquivalents()) {
- AddUnicodeCaseEquivalents(ranges);
- }
- if (flags_.IsUnicode() && !compiler->one_byte() &&
- !contains_split_surrogate()) {
- if (is_negated()) {
- ZoneGrowableArray<CharacterRange>* negated =
- new ZoneGrowableArray<CharacterRange>(2);
- CharacterRange::Negate(ranges, negated);
- ranges = negated;
- }
- if (ranges->length() == 0) {
- RegExpCharacterClass* fail =
- new RegExpCharacterClass(ranges, RegExpFlags());
- return new TextNode(fail, compiler->read_backward(), on_success);
- }
- if (standard_type() == '*') {
- return UnanchoredAdvance(compiler, on_success);
- } else {
- ChoiceNode* result = new (OZ) ChoiceNode(2, OZ);
- UnicodeRangeSplitter splitter(OZ, ranges);
- AddBmpCharacters(compiler, result, on_success, &splitter);
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
- return result;
- }
- } else {
- return new TextNode(this, compiler->read_backward(), on_success);
- }
return new (OZ) TextNode(this, compiler->read_backward(), on_success);
}
@@ -4370,43 +4014,6 @@
}
}
-namespace {
-// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
-// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
-RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
- RegExpNode* on_success,
- RegExpAssertion::AssertionType type,
- RegExpFlags flags) {
- ASSERT(flags.NeedsUnicodeCaseEquivalents());
- ZoneGrowableArray<CharacterRange>* word_range =
- new ZoneGrowableArray<CharacterRange>(2);
- CharacterRange::AddClassEscape('w', word_range, true);
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- ChoiceNode* result = new (OZ) ChoiceNode(2, OZ);
- // Add two choices. The (non-)boundary could start with a word or
- // a non-word-character.
- for (int i = 0; i < 2; i++) {
- bool lookbehind_for_word = i == 0;
- bool lookahead_for_word =
- (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
- // Look to the left.
- RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
- stack_register, position_register);
- RegExpNode* backward = TextNode::CreateForCharacterRanges(
- word_range, true, lookbehind.on_match_success(), flags);
- // Look to the right.
- RegExpLookaround::Builder lookahead(lookahead_for_word,
- lookbehind.ForMatch(backward),
- stack_register, position_register);
- RegExpNode* forward = TextNode::CreateForCharacterRanges(
- word_range, false, lookahead.on_match_success(), flags);
- result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
- }
- return result;
-}
-} // anonymous namespace
-
RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
switch (assertion_type()) {
@@ -4415,15 +4022,9 @@
case START_OF_INPUT:
return AssertionNode::AtStart(on_success);
case BOUNDARY:
- return flags_.NeedsUnicodeCaseEquivalents()
- ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
- flags_)
- : AssertionNode::AtBoundary(on_success);
+ return AssertionNode::AtBoundary(on_success);
case NON_BOUNDARY:
- return flags_.NeedsUnicodeCaseEquivalents()
- ? BoundaryAssertionAsLookaround(compiler, on_success,
- NON_BOUNDARY, flags_)
- : AssertionNode::AtNonBoundary(on_success);
+ return AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
return AssertionNode::AtEnd(on_success);
case END_OF_LINE: {
@@ -4438,8 +4039,7 @@
ZoneGrowableArray<CharacterRange>* newline_ranges =
new ZoneGrowableArray<CharacterRange>(3);
CharacterRange::AddClassEscape('n', newline_ranges);
- RegExpCharacterClass* newline_atom =
- new RegExpCharacterClass('n', RegExpFlags());
+ RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
TextNode* newline_matcher =
new TextNode(newline_atom, /*read_backwards=*/false,
ActionNode::PositiveSubmatchSuccess(
@@ -4466,7 +4066,7 @@
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
return new (OZ) BackReferenceNode(RegExpCapture::StartRegister(index()),
- RegExpCapture::EndRegister(index()), flags_,
+ RegExpCapture::EndRegister(index()),
compiler->read_backward(), on_success);
}
@@ -4574,24 +4174,24 @@
return current;
}
-static void AddClass(const int32_t* elmv,
+static void AddClass(const intptr_t* elmv,
intptr_t elmc,
ZoneGrowableArray<CharacterRange>* ranges) {
elmc--;
- ASSERT(elmv[elmc] == kRangeEndMarker);
+ ASSERT(elmv[elmc] == 0x10000);
for (intptr_t i = 0; i < elmc; i += 2) {
ASSERT(elmv[i] < elmv[i + 1]);
ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));
}
}
-static void AddClassNegated(const int32_t* elmv,
+static void AddClassNegated(const intptr_t* elmv,
intptr_t elmc,
ZoneGrowableArray<CharacterRange>* ranges) {
elmc--;
- ASSERT(elmv[elmc] == kRangeEndMarker);
+ ASSERT(elmv[elmc] == 0x10000);
ASSERT(elmv[0] != 0x0000);
- ASSERT(elmv[elmc - 1] != Utf::kMaxCodePoint);
+ ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit);
uint16_t last = 0x0000;
for (intptr_t i = 0; i < elmc; i += 2) {
ASSERT(last <= elmv[i] - 1);
@@ -4599,30 +4199,7 @@
ranges->Add(CharacterRange(last, elmv[i] - 1));
last = elmv[i + 1];
}
- ranges->Add(CharacterRange(last, Utf::kMaxCodePoint));
-}
-
-void CharacterRange::AddClassEscape(uint16_t type,
- ZoneGrowableArray<CharacterRange>* ranges,
- bool add_unicode_case_equivalents) {
- if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
- // See #sec-runtime-semantics-wordcharacters-abstract-operation
- // In case of unicode and ignore_case, we need to create the closure over
- // case equivalent characters before negating.
- ZoneGrowableArray<CharacterRange>* new_ranges =
- new ZoneGrowableArray<CharacterRange>(2);
- AddClass(kWordRanges, kWordRangeCount, new_ranges);
- AddUnicodeCaseEquivalents(new_ranges);
- if (type == 'W') {
- ZoneGrowableArray<CharacterRange>* negated =
- new ZoneGrowableArray<CharacterRange>(2);
- CharacterRange::Negate(new_ranges, negated);
- new_ranges = negated;
- }
- ranges->AddArray(*new_ranges);
- return;
- }
- AddClassEscape(type, ranges);
+ ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit));
}
void CharacterRange::AddClassEscape(uint16_t type,
@@ -4669,79 +4246,69 @@
ZoneGrowableArray<CharacterRange>* ranges,
bool is_one_byte,
Zone* zone) {
- CharacterRange::Canonicalize(ranges);
- int range_count = ranges->length();
- for (intptr_t i = 0; i < range_count; i++) {
- CharacterRange range = ranges->At(i);
- int32_t bottom = range.from();
- if (bottom > Utf16::kMaxCodeUnit) continue;
- int32_t top = Utils::Minimum(range.to(), Utf16::kMaxCodeUnit);
- // Nothing to be done for surrogates
- if (bottom >= Utf16::kLeadSurrogateStart &&
- top <= Utf16::kTrailSurrogateEnd) {
- continue;
+ uint16_t bottom = from();
+ uint16_t top = to();
+ if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
+ if (bottom > Symbols::kMaxOneCharCodeSymbol) return;
+ if (top > Symbols::kMaxOneCharCodeSymbol) {
+ top = Symbols::kMaxOneCharCodeSymbol;
}
- if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
- if (bottom > Symbols::kMaxOneCharCodeSymbol) continue;
- if (top > Symbols::kMaxOneCharCodeSymbol) {
- top = Symbols::kMaxOneCharCodeSymbol;
- }
- }
+ }
- unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
- unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
- int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- if (top == bottom) {
- // If this is a singleton we just expand the one character.
- intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars);
+ unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
+ unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
+ int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ if (top == bottom) {
+ // If this is a singleton we just expand the one character.
+ intptr_t length =
+ jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLINT
+ for (intptr_t i = 0; i < length; i++) {
+ uint32_t chr = chars[i];
+ if (chr != bottom) {
+ ranges->Add(CharacterRange::Singleton(chars[i]));
+ }
+ }
+ } else {
+ // If this is a range we expand the characters block by block,
+ // expanding contiguous subranges (blocks) one at a time.
+ // The approach is as follows. For a given start character we
+ // look up the remainder of the block that contains it (represented
+ // by the end point), for instance we find 'z' if the character
+ // is 'c'. A block is characterized by the property
+ // that all characters uncanonicalize in the same way, except that
+ // each entry in the result is incremented by the distance from the first
+ // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
+ // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
+ // Once we've found the end point we look up its uncanonicalization
+ // and produce a range for each element. For instance for [c-f]
+ // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
+ // add a range if it is not already contained in the input, so [c-f]
+ // will be skipped but [C-F] will be added. If this range is not
+ // completely contained in a block we do this for all the blocks
+ // covered by the range (handling characters that is not in a block
+ // as a "singleton block").
+ int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ intptr_t pos = bottom;
+ while (pos <= top) {
+ intptr_t length = jsregexp_canonrange.get(pos, '\0', range);
+ uint16_t block_end;
+ if (length == 0) {
+ block_end = pos;
+ } else {
+ ASSERT(length == 1);
+ block_end = range[0];
+ }
+ intptr_t end = (block_end > top) ? top : block_end;
+ length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT
for (intptr_t i = 0; i < length; i++) {
- int32_t chr = chars[i];
- if (chr != bottom) {
- ranges->Add(CharacterRange::Singleton(chars[i]));
+ uint32_t c = range[i];
+ uint16_t range_from = c - (block_end - pos);
+ uint16_t range_to = c - (block_end - end);
+ if (!(bottom <= range_from && range_to <= top)) {
+ ranges->Add(CharacterRange(range_from, range_to));
}
}
- } else {
- // If this is a range we expand the characters block by block,
- // expanding contiguous subranges (blocks) one at a time.
- // The approach is as follows. For a given start character we
- // look up the remainder of the block that contains it (represented
- // by the end point), for instance we find 'z' if the character
- // is 'c'. A block is characterized by the property
- // that all characters uncanonicalize in the same way, except that
- // each entry in the result is incremented by the distance from the first
- // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A']
- // and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
- // Once we've found the end point we look up its uncanonicalization
- // and produce a range for each element. For instance for [c-f]
- // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
- // add a range if it is not already contained in the input, so [c-f]
- // will be skipped but [C-F] will be added. If this range is not
- // completely contained in a block we do this for all the blocks
- // covered by the range (handling characters that is not in a block
- // as a "singleton block").
- int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- intptr_t pos = bottom;
- while (pos <= top) {
- intptr_t length = jsregexp_canonrange.get(pos, '\0', range);
- int32_t block_end;
- if (length == 0) {
- block_end = pos;
- } else {
- ASSERT(length == 1);
- block_end = range[0];
- }
- intptr_t end = (block_end > top) ? top : block_end;
- length = jsregexp_uncanonicalize.get(block_end, '\0', range);
- for (intptr_t i = 0; i < length; i++) {
- int32_t c = range[i];
- int32_t range_from = c - (block_end - pos);
- int32_t range_to = c - (block_end - end);
- if (!(bottom <= range_from && range_to <= top)) {
- ranges->Add(CharacterRange(range_from, range_to));
- }
- }
- pos = end + 1;
- }
+ pos = end + 1;
}
}
}
@@ -4794,8 +4361,8 @@
// list[0..count] for the result. Returns the number of resulting
// canonicalized ranges. Inserting a range may collapse existing ranges into
// fewer ranges, so the return value can be anything in the range 1..count+1.
- int32_t from = insert.from();
- int32_t to = insert.to();
+ uint16_t from = insert.from();
+ uint16_t to = insert.to();
intptr_t start_pos = 0;
intptr_t end_pos = count;
for (intptr_t i = count - 1; i >= 0; i--) {
@@ -4891,7 +4458,7 @@
ASSERT(CharacterRange::IsCanonical(ranges));
ASSERT(negated_ranges->length() == 0);
intptr_t range_count = ranges->length();
- uint32_t from = 0;
+ uint16_t from = 0;
intptr_t i = 0;
if (range_count > 0 && ranges->At(0).from() == 0) {
from = ranges->At(0).to();
@@ -4903,8 +4470,8 @@
from = range.to();
i++;
}
- if (from < Utf::kMaxCodePoint) {
- negated_ranges->Add(CharacterRange(from + 1, Utf::kMaxCodePoint));
+ if (from < Utf16::kMaxCodeUnit) {
+ negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit));
}
}
@@ -4921,22 +4488,6 @@
return false;
}
-OutSet* OutSet::Extend(unsigned value, Zone* zone) {
- if (Get(value)) return this;
- if (successors() != nullptr) {
- for (int i = 0; i < successors()->length(); i++) {
- OutSet* successor = successors()->At(i);
- if (successor->Get(value)) return successor;
- }
- } else {
- successors_ = new (zone) ZoneGrowableArray<OutSet*>(2);
- }
- OutSet* result = new (zone) OutSet(first_, remaining_);
- result->Set(value, zone);
- successors()->Add(result);
- return result;
-}
-
void OutSet::Set(unsigned value, Zone* zone) {
if (value < kFirstLimit) {
first_ |= (1 << value);
@@ -4961,108 +4512,6 @@
}
}
-const int32_t DispatchTable::Config::kNoKey = Utf::kInvalidChar;
-
-void DispatchTable::AddRange(CharacterRange full_range,
- int32_t value,
- Zone* zone) {
- CharacterRange current = full_range;
- if (tree()->is_empty()) {
- // If this is the first range we just insert into the table.
- ZoneSplayTree<Config>::Locator loc;
- bool inserted = tree()->Insert(current.from(), &loc);
- ASSERT(inserted);
- USE(inserted);
- loc.set_value(
- Entry(current.from(), current.to(), empty()->Extend(value, zone)));
- return;
- }
- // First see if there is a range to the left of this one that
- // overlaps.
- ZoneSplayTree<Config>::Locator loc;
- if (tree()->FindGreatestLessThan(current.from(), &loc)) {
- Entry* entry = &loc.value();
- // If we've found a range that overlaps with this one, and it
- // starts strictly to the left of this one, we have to fix it
- // because the following code only handles ranges that start on
- // or after the start point of the range we're adding.
- if (entry->from() < current.from() && entry->to() >= current.from()) {
- // Snap the overlapping range in half around the start point of
- // the range we're adding.
- CharacterRange left =
- CharacterRange::Range(entry->from(), current.from() - 1);
- CharacterRange right = CharacterRange::Range(current.from(), entry->to());
- // The left part of the overlapping range doesn't overlap.
- // Truncate the whole entry to be just the left part.
- entry->set_to(left.to());
- // The right part is the one that overlaps. We add this part
- // to the map and let the next step deal with merging it with
- // the range we're adding.
- ZoneSplayTree<Config>::Locator loc;
- bool inserted = tree()->Insert(right.from(), &loc);
- ASSERT(inserted);
- USE(inserted);
- loc.set_value(Entry(right.from(), right.to(), entry->out_set()));
- }
- }
- while (current.is_valid()) {
- if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
- (loc.value().from() <= current.to()) &&
- (loc.value().to() >= current.from())) {
- Entry* entry = &loc.value();
- // We have overlap. If there is space between the start point of
- // the range we're adding and where the overlapping range starts
- // then we have to add a range covering just that space.
- if (current.from() < entry->from()) {
- ZoneSplayTree<Config>::Locator ins;
- bool inserted = tree()->Insert(current.from(), &ins);
- ASSERT(inserted);
- USE(inserted);
- ins.set_value(Entry(current.from(), entry->from() - 1,
- empty()->Extend(value, zone)));
- current.set_from(entry->from());
- }
- ASSERT(current.from() == entry->from());
- // If the overlapping range extends beyond the one we want to add
- // we have to snap the right part off and add it separately.
- if (entry->to() > current.to()) {
- ZoneSplayTree<Config>::Locator ins;
- bool inserted = tree()->Insert(current.to() + 1, &ins);
- ASSERT(inserted);
- USE(inserted);
- ins.set_value(Entry(current.to() + 1, entry->to(), entry->out_set()));
- entry->set_to(current.to());
- }
- ASSERT(entry->to() <= current.to());
- // The overlapping range is now completely contained by the range
- // we're adding so we can just update it and move the start point
- // of the range we're adding just past it.
- entry->AddValue(value, zone);
- ASSERT(entry->to() + 1 > current.from());
- current.set_from(entry->to() + 1);
- } else {
- // There is no overlap so we can just add the range
- ZoneSplayTree<Config>::Locator ins;
- bool inserted = tree()->Insert(current.from(), &ins);
- ASSERT(inserted);
- USE(inserted);
- ins.set_value(
- Entry(current.from(), current.to(), empty()->Extend(value, zone)));
- break;
- }
- }
-}
-
-OutSet* DispatchTable::Get(int32_t value) {
- ZoneSplayTree<Config>::Locator loc;
- if (!tree()->FindGreatestLessThan(value, &loc)) return empty();
- Entry* entry = &loc.value();
- if (value <= entry->to())
- return entry->out_set();
- else
- return empty();
-}
-
// -------------------------------------------------------------------
// Analysis
@@ -5091,7 +4540,9 @@
}
void Analysis::VisitText(TextNode* that) {
- that->MakeCaseIndependent(is_one_byte_);
+ if (ignore_case_) {
+ that->MakeCaseIndependent(is_one_byte_);
+ }
EnsureAnalyzed(that->on_success());
if (!has_failed()) {
that->CalculateOffsets();
@@ -5198,7 +4649,7 @@
return;
}
uint16_t character = atom->data()->At(j);
- if (atom->flags().IgnoreCase()) {
+ if (bm->compiler()->ignore_case()) {
int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
intptr_t length = GetCaseIndependentLetters(
character, bm->max_char() == Symbols::kMaxOneCharCodeSymbol,
@@ -5218,7 +4669,7 @@
bm->SetAll(offset);
} else {
for (intptr_t k = 0; k < ranges->length(); k++) {
- const CharacterRange& range = ranges->At(k);
+ CharacterRange& range = (*ranges)[k];
if (range.from() > max_char) continue;
intptr_t to =
Utils::Minimum(max_char, static_cast<intptr_t>(range.to()));
@@ -5237,40 +4688,6 @@
if (initial_offset == 0) set_bm_info(not_at_start, bm);
}
-RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
- RegExpNode* on_success,
- RegExpFlags flags) {
- // If the regexp matching starts within a surrogate pair, step back
- // to the lead surrogate and start matching from there.
- ASSERT(!compiler->read_backward());
- Zone* zone = compiler->zone();
-
- auto lead_surrogates = CharacterRange::List(
- on_success->zone(), CharacterRange::Range(Utf16::kLeadSurrogateStart,
- Utf16::kLeadSurrogateEnd));
- auto trail_surrogates = CharacterRange::List(
- on_success->zone(), CharacterRange::Range(Utf16::kTrailSurrogateStart,
- Utf16::kTrailSurrogateEnd));
-
- ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone);
-
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpNode* step_back = TextNode::CreateForCharacterRanges(
- lead_surrogates, /*read_backward=*/true, on_success, flags);
- RegExpLookaround::Builder builder(/*is_positive=*/true, step_back,
- stack_register, position_register);
- RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
- trail_surrogates, /*read_backward=*/false, builder.on_match_success(),
- flags);
-
- optional_step_back->AddAlternative(
- GuardedAlternative(builder.ForMatch(match_trail)));
- optional_step_back->AddAlternative(GuardedAlternative(on_success));
-
- return optional_step_back;
-}
-
#if !defined(DART_PRECOMPILED_RUNTIME)
RegExpEngine::CompilationResult RegExpEngine::CompileIR(
RegExpCompileData* data,
@@ -5291,10 +4708,10 @@
ASSERT(!regexp.IsNull());
ASSERT(!pattern.IsNull());
- const bool is_global = regexp.flags().IsGlobal();
- const bool is_unicode = regexp.flags().IsUnicode();
+ const bool ignore_case = regexp.is_ignore_case();
+ const bool is_global = regexp.is_global();
- RegExpCompiler compiler(data->capture_count, is_one_byte);
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
// TODO(zerny): Frequency sampling is currently disabled because of several
// issues. We do not want to store subject strings in the regexp object since
@@ -5317,37 +4734,34 @@
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or is sticky.
RegExpNode* loop_node = RegExpQuantifier::ToNode(
- 0, RegExpTree::kInfinity, false,
- new (zone) RegExpCharacterClass('*', RegExpFlags()), &compiler,
- captured_body, data->contains_anchor);
+ 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
+ &compiler, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
// at the start of input.
ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
- first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
- new (zone) RegExpCharacterClass('*', RegExpFlags()),
- /*read_backwards=*/false, loop_node)));
+ first_step_node->AddAlternative(GuardedAlternative(
+ new (zone) TextNode(new (zone) RegExpCharacterClass('*'),
+ /*read_backwards=*/false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != NULL) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
}
- } else if (is_unicode && (is_global || is_sticky)) {
- node = OptionallyStepBackToLeadSurrogate(&compiler, node, regexp.flags());
}
if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(is_one_byte);
+ Analysis analysis(ignore_case, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
@@ -5369,13 +4783,10 @@
}
if (is_global) {
- RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
- if (data->tree->min_match() > 0) {
- mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
- } else if (is_unicode) {
- mode = RegExpMacroAssembler::GLOBAL_UNICODE;
- }
- macro_assembler->set_global_mode(mode);
+ macro_assembler->set_global_mode(
+ (data->tree->min_match() > 0)
+ ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
+ : RegExpMacroAssembler::GLOBAL);
}
RegExpEngine::CompilationResult result =
@@ -5401,10 +4812,10 @@
ASSERT(!regexp.IsNull());
ASSERT(!pattern.IsNull());
- const bool is_global = regexp.flags().IsGlobal();
- const bool is_unicode = regexp.flags().IsUnicode();
+ const bool ignore_case = regexp.is_ignore_case();
+ const bool is_global = regexp.is_global();
- RegExpCompiler compiler(data->capture_count, is_one_byte);
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
// TODO(zerny): Frequency sampling is currently disabled because of several
// issues. We do not want to store subject strings in the regexp object since
@@ -5427,37 +4838,34 @@
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning.
RegExpNode* loop_node = RegExpQuantifier::ToNode(
- 0, RegExpTree::kInfinity, false,
- new (zone) RegExpCharacterClass('*', RegExpFlags()), &compiler,
- captured_body, data->contains_anchor);
+ 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
+ &compiler, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
// at the start of input.
ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
- first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
- new (zone) RegExpCharacterClass('*', RegExpFlags()),
- /*read_backwards=*/false, loop_node)));
+ first_step_node->AddAlternative(GuardedAlternative(
+ new (zone) TextNode(new (zone) RegExpCharacterClass('*'),
+ /*read_backwards=*/false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != NULL) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
}
- } else if (is_unicode && (is_global || is_sticky)) {
- node = OptionallyStepBackToLeadSurrogate(&compiler, node, regexp.flags());
}
if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(is_one_byte);
+ Analysis analysis(ignore_case, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
@@ -5479,13 +4887,10 @@
}
if (is_global) {
- RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
- if (data->tree->min_match() > 0) {
- mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
- } else if (is_unicode) {
- mode = RegExpMacroAssembler::GLOBAL_UNICODE;
- }
- macro_assembler->set_global_mode(mode);
+ macro_assembler->set_global_mode(
+ (data->tree->min_match() > 0)
+ ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
+ : RegExpMacroAssembler::GLOBAL);
}
RegExpEngine::CompilationResult result =
@@ -5547,12 +4952,19 @@
RawRegExp* RegExpEngine::CreateRegExp(Thread* thread,
const String& pattern,
- RegExpFlags flags) {
+ bool multi_line,
+ bool ignore_case) {
Zone* zone = thread->zone();
const RegExp& regexp = RegExp::Handle(RegExp::New());
regexp.set_pattern(pattern);
- regexp.set_flags(flags);
+
+ if (multi_line) {
+ regexp.set_is_multi_line();
+ }
+ if (ignore_case) {
+ regexp.set_is_ignore_case();
+ }
// TODO(zerny): We might want to use normal string searching algorithms
// for simple patterns.
diff --git a/runtime/vm/regexp.h b/runtime/vm/regexp.h
index febcb91..092a06a 100644
--- a/runtime/vm/regexp.h
+++ b/runtime/vm/regexp.h
@@ -5,14 +5,11 @@
#ifndef RUNTIME_VM_REGEXP_H_
#define RUNTIME_VM_REGEXP_H_
-#include "platform/unicode.h"
-
#include "vm/compiler/assembler/assembler.h"
#include "vm/compiler/backend/flow_graph_compiler.h"
#include "vm/compiler/backend/il.h"
#include "vm/object.h"
#include "vm/regexp_assembler.h"
-#include "vm/splay-tree.h"
namespace dart {
@@ -28,42 +25,32 @@
class CharacterRange {
public:
CharacterRange() : from_(0), to_(0) {}
- CharacterRange(int32_t from, int32_t to) : from_(from), to_(to) {}
+ CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) {}
static void AddClassEscape(uint16_t type,
ZoneGrowableArray<CharacterRange>* ranges);
- // Add class escapes with case equivalent closure for \w and \W if necessary.
- static void AddClassEscape(uint16_t type,
- ZoneGrowableArray<CharacterRange>* ranges,
- bool add_unicode_case_equivalents);
static GrowableArray<const intptr_t> GetWordBounds();
- static inline CharacterRange Singleton(int32_t value) {
+ static inline CharacterRange Singleton(uint16_t value) {
return CharacterRange(value, value);
}
- static inline CharacterRange Range(int32_t from, int32_t to) {
+ static inline CharacterRange Range(uint16_t from, uint16_t to) {
ASSERT(from <= to);
return CharacterRange(from, to);
}
static inline CharacterRange Everything() {
- return CharacterRange(0, Utf::kMaxCodePoint);
+ return CharacterRange(0, 0xFFFF);
}
- static inline ZoneGrowableArray<CharacterRange>* List(Zone* zone,
- CharacterRange range) {
- auto list = new (zone) ZoneGrowableArray<CharacterRange>(1);
- list->Add(range);
- return list;
- }
- bool Contains(int32_t i) const { return from_ <= i && i <= to_; }
- int32_t from() const { return from_; }
- void set_from(int32_t value) { from_ = value; }
- int32_t to() const { return to_; }
- void set_to(int32_t value) { to_ = value; }
+ bool Contains(uint16_t i) const { return from_ <= i && i <= to_; }
+ uint16_t from() const { return from_; }
+ void set_from(uint16_t value) { from_ = value; }
+ uint16_t to() const { return to_; }
+ void set_to(uint16_t value) { to_ = value; }
bool is_valid() const { return from_ <= to_; }
- bool IsEverything(int32_t max) const { return from_ == 0 && to_ >= max; }
+ bool IsEverything(uint16_t max) const { return from_ == 0 && to_ >= max; }
bool IsSingleton() const { return (from_ == to_); }
- static void AddCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges,
- bool is_one_byte,
- Zone* zone);
+ void AddCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges,
+ bool is_one_byte,
+ Zone* zone);
static void Split(ZoneGrowableArray<CharacterRange>* base,
GrowableArray<const intptr_t> overlay,
ZoneGrowableArray<CharacterRange>** included,
@@ -84,8 +71,8 @@
static const intptr_t kPayloadMask = (1 << 24) - 1;
private:
- int32_t from_;
- int32_t to_;
+ uint16_t from_;
+ uint16_t to_;
DISALLOW_ALLOCATION();
};
@@ -118,98 +105,6 @@
friend class Trace;
};
-// A mapping from integers, specified as ranges, to a set of integers.
-// Used for mapping character ranges to choices.
-class DispatchTable : public ValueObject {
- public:
- explicit DispatchTable(Zone* zone) : tree_(zone) {}
-
- class Entry {
- public:
- Entry() : from_(0), to_(0), out_set_(nullptr) {}
- Entry(int32_t from, int32_t to, OutSet* out_set)
- : from_(from), to_(to), out_set_(out_set) {
- ASSERT(from <= to);
- }
- int32_t from() { return from_; }
- int32_t to() { return to_; }
- void set_to(int32_t value) { to_ = value; }
- void AddValue(int value, Zone* zone) {
- out_set_ = out_set_->Extend(value, zone);
- }
- OutSet* out_set() { return out_set_; }
-
- private:
- int32_t from_;
- int32_t to_;
- OutSet* out_set_;
- };
-
- class Config {
- public:
- typedef int32_t Key;
- typedef Entry Value;
- static const int32_t kNoKey;
- static const Entry NoValue() { return Value(); }
- static inline int Compare(int32_t a, int32_t b) {
- if (a == b)
- return 0;
- else if (a < b)
- return -1;
- else
- return 1;
- }
- };
-
- void AddRange(CharacterRange range, int32_t value, Zone* zone);
- OutSet* Get(int32_t value);
- void Dump();
-
- template <typename Callback>
- void ForEach(Callback* callback) {
- return tree()->ForEach(callback);
- }
-
- private:
- // There can't be a static empty set since it allocates its
- // successors in a zone and caches them.
- OutSet* empty() { return &empty_; }
- OutSet empty_;
- ZoneSplayTree<Config>* tree() { return &tree_; }
- ZoneSplayTree<Config> tree_;
-};
-
-// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
-class UnicodeRangeSplitter : public ValueObject {
- public:
- UnicodeRangeSplitter(Zone* zone, ZoneGrowableArray<CharacterRange>* base);
- void Call(uint32_t from, DispatchTable::Entry entry);
-
- ZoneGrowableArray<CharacterRange>* bmp() { return bmp_; }
- ZoneGrowableArray<CharacterRange>* lead_surrogates() {
- return lead_surrogates_;
- }
- ZoneGrowableArray<CharacterRange>* trail_surrogates() {
- return trail_surrogates_;
- }
- ZoneGrowableArray<CharacterRange>* non_bmp() const { return non_bmp_; }
-
- private:
- static const int kBase = 0;
- // Separate ranges into
- static const int kBmpCodePoints = 1;
- static const int kLeadSurrogates = 2;
- static const int kTrailSurrogates = 3;
- static const int kNonBmpCodePoints = 4;
-
- Zone* zone_;
- DispatchTable table_;
- ZoneGrowableArray<CharacterRange>* bmp_;
- ZoneGrowableArray<CharacterRange>* lead_surrogates_;
- ZoneGrowableArray<CharacterRange>* trail_surrogates_;
- ZoneGrowableArray<CharacterRange>* non_bmp_;
-};
-
#define FOR_EACH_NODE_TYPE(VISIT) \
VISIT(End) \
VISIT(Action) \
@@ -449,7 +344,9 @@
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or NULL if the node can never match.
- virtual RegExpNode* FilterOneByte(intptr_t depth) { return this; }
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) {
+ return this;
+ }
// Helper for FilterOneByte.
RegExpNode* replacement() {
ASSERT(info()->replacement_calculated);
@@ -546,7 +443,7 @@
: RegExpNode(on_success->zone()), on_success_(on_success) {}
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
- virtual RegExpNode* FilterOneByte(intptr_t depth);
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
virtual void FillInBMInfo(intptr_t offset,
intptr_t budget,
BoyerMooreLookahead* bm,
@@ -556,7 +453,7 @@
}
protected:
- RegExpNode* FilterSuccessor(intptr_t depth);
+ RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case);
private:
RegExpNode* on_success_;
@@ -664,19 +561,6 @@
read_backward_(read_backward) {
elms_->Add(TextElement::CharClass(that));
}
- // Create TextNode for a single character class for the given ranges.
- static TextNode* CreateForCharacterRanges(
- ZoneGrowableArray<CharacterRange>* ranges,
- bool read_backward,
- RegExpNode* on_success,
- RegExpFlags flags);
- // Create TextNode for a surrogate pair with a range given for the
- // lead and the trail surrogate each.
- static TextNode* CreateForSurrogatePair(CharacterRange lead,
- CharacterRange trail,
- bool read_backward,
- RegExpNode* on_success,
- RegExpFlags flags);
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual intptr_t EatsAtLeast(intptr_t still_to_find,
@@ -697,7 +581,7 @@
BoyerMooreLookahead* bm,
bool not_at_start);
void CalculateOffsets();
- virtual RegExpNode* FilterOneByte(intptr_t depth);
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
private:
enum TextEmitPassType {
@@ -775,13 +659,11 @@
public:
BackReferenceNode(intptr_t start_reg,
intptr_t end_reg,
- RegExpFlags flags,
bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
end_reg_(end_reg),
- flags_(flags),
read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
intptr_t start_register() { return start_reg_; }
@@ -805,7 +687,6 @@
private:
intptr_t start_reg_;
intptr_t end_reg_;
- RegExpFlags flags_;
bool read_backward_;
};
@@ -880,9 +761,9 @@
public:
explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) {}
void AddGuard(Guard* guard, Zone* zone);
- RegExpNode* node() const { return node_; }
+ RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
- ZoneGrowableArray<Guard*>* guards() const { return guards_; }
+ ZoneGrowableArray<Guard*>* guards() { return guards_; }
private:
RegExpNode* node_;
@@ -930,12 +811,11 @@
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
- virtual RegExpNode* FilterOneByte(intptr_t depth);
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
virtual bool read_backward() { return false; }
protected:
- intptr_t GreedyLoopTextLengthForAlternative(
- const GuardedAlternative* alternative);
+ intptr_t GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
ZoneGrowableArray<GuardedAlternative>* alternatives_;
private:
@@ -1006,7 +886,7 @@
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
- virtual RegExpNode* FilterOneByte(intptr_t depth);
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
};
class LoopChoiceNode : public ChoiceNode {
@@ -1038,7 +918,7 @@
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
- virtual RegExpNode* FilterOneByte(intptr_t depth);
+ virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
private:
// AddAlternative is made private for loop nodes because alternatives
@@ -1406,8 +1286,10 @@
// +-------+ +------------+
class Analysis : public NodeVisitor {
public:
- explicit Analysis(bool is_one_byte)
- : is_one_byte_(is_one_byte), error_message_(NULL) {}
+ Analysis(bool ignore_case, bool is_one_byte)
+ : ignore_case_(ignore_case),
+ is_one_byte_(is_one_byte),
+ error_message_(NULL) {}
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
@@ -1423,6 +1305,7 @@
void fail(const char* error_message) { error_message_ = error_message; }
private:
+ bool ignore_case_;
bool is_one_byte_;
const char* error_message_;
@@ -1515,7 +1398,8 @@
static RawRegExp* CreateRegExp(Thread* thread,
const String& pattern,
- RegExpFlags flags);
+ bool multi_line,
+ bool ignore_case);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
diff --git a/runtime/vm/regexp_assembler.cc b/runtime/vm/regexp_assembler.cc
index ab242c4..fb7dc74 100644
--- a/runtime/vm/regexp_assembler.cc
+++ b/runtime/vm/regexp_assembler.cc
@@ -4,10 +4,6 @@
#include "vm/regexp_assembler.h"
-#include "unicode/uchar.h"
-
-#include "platform/unicode.h"
-
#include "vm/flags.h"
#include "vm/regexp.h"
#include "vm/unibrow-inl.h"
@@ -20,10 +16,11 @@
OS::PrintErr(format, c);
}
-RawBool* CaseInsensitiveCompareUCS2(RawString* str_raw,
- RawSmi* lhs_index_raw,
- RawSmi* rhs_index_raw,
- RawSmi* length_raw) {
+
+static RawBool* CaseInsensitiveCompareUC16(RawString* str_raw,
+ RawSmi* lhs_index_raw,
+ RawSmi* rhs_index_raw,
+ RawSmi* length_raw) {
const String& str = String::Handle(str_raw);
const Smi& lhs_index = Smi::Handle(lhs_index_raw);
const Smi& rhs_index = Smi::Handle(rhs_index_raw);
@@ -51,50 +48,13 @@
return Bool::True().raw();
}
-RawBool* CaseInsensitiveCompareUTF16(RawString* str_raw,
- RawSmi* lhs_index_raw,
- RawSmi* rhs_index_raw,
- RawSmi* length_raw) {
- const String& str = String::Handle(str_raw);
- const Smi& lhs_index = Smi::Handle(lhs_index_raw);
- const Smi& rhs_index = Smi::Handle(rhs_index_raw);
- const Smi& length = Smi::Handle(length_raw);
-
- for (intptr_t i = 0; i < length.Value(); i++) {
- int32_t c1 = str.CharAt(lhs_index.Value() + i);
- int32_t c2 = str.CharAt(rhs_index.Value() + i);
- if (Utf16::IsLeadSurrogate(c1)) {
- // Non-BMP characters do not have case-equivalents in the BMP.
- // Both have to be non-BMP for them to be able to match.
- if (!Utf16::IsLeadSurrogate(c2)) return Bool::False().raw();
- if (i + 1 < length.Value()) {
- uint16_t c1t = str.CharAt(lhs_index.Value() + i + 1);
- uint16_t c2t = str.CharAt(rhs_index.Value() + i + 1);
- if (Utf16::IsTrailSurrogate(c1t) && Utf16::IsTrailSurrogate(c2t)) {
- c1 = Utf16::Decode(c1, c1t);
- c2 = Utf16::Decode(c2, c2t);
- i++;
- }
- }
- }
- c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
- c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
- if (c1 != c2) return Bool::False().raw();
- }
- return Bool::True().raw();
-}
DEFINE_RAW_LEAF_RUNTIME_ENTRY(
- CaseInsensitiveCompareUCS2,
+ CaseInsensitiveCompareUC16,
4,
false /* is_float */,
- reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUCS2));
+ reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUC16));
-DEFINE_RAW_LEAF_RUNTIME_ENTRY(
- CaseInsensitiveCompareUTF16,
- 4,
- false /* is_float */,
- reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUTF16));
BlockLabel::BlockLabel()
: block_(NULL), is_bound_(false), is_linked_(false), pos_(-1) {
@@ -112,18 +72,4 @@
RegExpMacroAssembler::~RegExpMacroAssembler() {}
-void RegExpMacroAssembler::CheckNotInSurrogatePair(intptr_t cp_offset,
- BlockLabel* on_failure) {
- BlockLabel ok;
- // Check that current character is not a trail surrogate.
- LoadCurrentCharacter(cp_offset, &ok);
- CheckCharacterNotInRange(Utf16::kTrailSurrogateStart,
- Utf16::kTrailSurrogateEnd, &ok);
- // Check that previous character is not a lead surrogate.
- LoadCurrentCharacter(cp_offset - 1, &ok);
- CheckCharacterInRange(Utf16::kLeadSurrogateStart, Utf16::kLeadSurrogateEnd,
- on_failure);
- BindBlock(&ok);
-}
-
} // namespace dart
diff --git a/runtime/vm/regexp_assembler.h b/runtime/vm/regexp_assembler.h
index 818f573..a7b087e 100644
--- a/runtime/vm/regexp_assembler.h
+++ b/runtime/vm/regexp_assembler.h
@@ -13,19 +13,6 @@
// Utility function for the DotPrinter
void PrintUtf16(uint16_t c);
-// Compares two-byte strings case insensitively as UCS2.
-// Called from generated RegExp code.
-RawBool* CaseInsensitiveCompareUCS2(RawString* str_raw,
- RawSmi* lhs_index_raw,
- RawSmi* rhs_index_raw,
- RawSmi* length_raw);
-
-// Compares two-byte strings case insensitively as UTF16.
-// Called from generated RegExp code.
-RawBool* CaseInsensitiveCompareUTF16(RawString* str_raw,
- RawSmi* lhs_index_raw,
- RawSmi* rhs_index_raw,
- RawSmi* length_raw);
/// Convenience wrapper around a BlockEntryInstr pointer.
class BlockLabel : public ValueObject {
@@ -140,7 +127,6 @@
BlockLabel* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
- bool unicode,
BlockLabel* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -229,33 +215,22 @@
virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0;
virtual void WriteStackPointerToRegister(intptr_t reg) = 0;
- // Check that we are not in the middle of a surrogate pair.
- void CheckNotInSurrogatePair(intptr_t cp_offset, BlockLabel* on_failure);
-
// Controls the generation of large inlined constants in the code.
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
- enum GlobalMode {
- NOT_GLOBAL,
- GLOBAL,
- GLOBAL_NO_ZERO_LENGTH_CHECK,
- GLOBAL_UNICODE
- };
+ enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
// Set whether the regular expression has the global flag. Exiting due to
// a failure in a global regexp may still mean success overall.
inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
inline bool global() { return global_mode_ != NOT_GLOBAL; }
- inline bool global_with_zero_length_check() {
- return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
- }
- inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
+ inline bool global_with_zero_length_check() { return global_mode_ == GLOBAL; }
Zone* zone() const { return zone_; }
private:
bool slow_safe_compiler_;
- GlobalMode global_mode_;
+ bool global_mode_;
Zone* zone_;
};
diff --git a/runtime/vm/regexp_assembler_bytecode.cc b/runtime/vm/regexp_assembler_bytecode.cc
index 0318a27..8288e1b 100644
--- a/runtime/vm/regexp_assembler_bytecode.cc
+++ b/runtime/vm/regexp_assembler_bytecode.cc
@@ -349,14 +349,11 @@
void BytecodeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
intptr_t start_reg,
bool read_backward,
- bool unicode,
BlockLabel* on_not_equal) {
ASSERT(start_reg >= 0);
ASSERT(start_reg <= kMaxRegister);
- Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
- : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
- : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
- : BC_CHECK_NOT_BACK_REF_NO_CASE),
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
+ : BC_CHECK_NOT_BACK_REF_NO_CASE,
start_reg);
EmitOrLink(on_not_equal);
}
@@ -437,10 +434,11 @@
}
#endif // !defined(PRODUCT)
+ const bool multiline = regexp.is_multi_line();
RegExpCompileData* compile_data = new (zone) RegExpCompileData();
// Parsing failures are handled in the RegExp factory constructor.
- RegExpParser::ParseRegExp(pattern, regexp.flags(), compile_data);
+ RegExpParser::ParseRegExp(pattern, multiline, compile_data);
regexp.set_num_bracket_expressions(compile_data->capture_count);
regexp.set_capture_name_map(compile_data->capture_name_map);
@@ -453,15 +451,15 @@
RegExpEngine::CompilationResult result = RegExpEngine::CompileBytecode(
compile_data, regexp, is_one_byte, sticky, zone);
ASSERT(result.bytecode != NULL);
- ASSERT(regexp.num_registers(is_one_byte) == -1 ||
- regexp.num_registers(is_one_byte) == result.num_registers);
- regexp.set_num_registers(is_one_byte, result.num_registers);
+ ASSERT((regexp.num_registers() == -1) ||
+ (regexp.num_registers() == result.num_registers));
+ regexp.set_num_registers(result.num_registers);
regexp.set_bytecode(is_one_byte, sticky, *(result.bytecode));
}
- ASSERT(regexp.num_registers(is_one_byte) != -1);
+ ASSERT(regexp.num_registers() != -1);
- return regexp.num_registers(is_one_byte) +
+ return regexp.num_registers() +
(Smi::Value(regexp.num_bracket_expressions()) + 1) * 2;
}
diff --git a/runtime/vm/regexp_assembler_bytecode.h b/runtime/vm/regexp_assembler_bytecode.h
index 59cf68f..3e17d49 100644
--- a/runtime/vm/regexp_assembler_bytecode.h
+++ b/runtime/vm/regexp_assembler_bytecode.h
@@ -83,7 +83,6 @@
BlockLabel* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
- bool unicode,
BlockLabel* on_no_match);
virtual void IfRegisterLT(intptr_t register_index,
intptr_t comparand,
diff --git a/runtime/vm/regexp_assembler_ir.cc b/runtime/vm/regexp_assembler_ir.cc
index f7f18ac..b1efebe 100644
--- a/runtime/vm/regexp_assembler_ir.cc
+++ b/runtime/vm/regexp_assembler_ir.cc
@@ -821,7 +821,6 @@
void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
intptr_t start_reg,
bool read_backward,
- bool unicode,
BlockLabel* on_no_match) {
TAG();
ASSERT(start_reg + 1 <= registers_count_);
@@ -968,17 +967,9 @@
Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
Value* length_value = Bind(LoadLocal(capture_length_));
- Definition* is_match_def;
-
- if (unicode) {
- is_match_def = new (Z) CaseInsensitiveCompareInstr(
- string_value, lhs_index_value, rhs_index_value, length_value,
- kCaseInsensitiveCompareUTF16RuntimeEntry, specialization_cid_);
- } else {
- is_match_def = new (Z) CaseInsensitiveCompareInstr(
- string_value, lhs_index_value, rhs_index_value, length_value,
- kCaseInsensitiveCompareUCS2RuntimeEntry, specialization_cid_);
- }
+ Definition* is_match_def = new (Z) CaseInsensitiveCompareUC16Instr(
+ string_value, lhs_index_value, rhs_index_value, length_value,
+ specialization_cid_);
BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
on_no_match);
diff --git a/runtime/vm/regexp_assembler_ir.h b/runtime/vm/regexp_assembler_ir.h
index d8c222a..c4f6e1f 100644
--- a/runtime/vm/regexp_assembler_ir.h
+++ b/runtime/vm/regexp_assembler_ir.h
@@ -67,7 +67,6 @@
BlockLabel* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
- bool unicode,
BlockLabel* on_no_match);
virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
diff --git a/runtime/vm/regexp_ast.h b/runtime/vm/regexp_ast.h
index 4fb64a3..17a175e 100644
--- a/runtime/vm/regexp_ast.h
+++ b/runtime/vm/regexp_ast.h
@@ -108,8 +108,7 @@
BOUNDARY,
NON_BOUNDARY
};
- RegExpAssertion(AssertionType type, RegExpFlags flags)
- : assertion_type_(type), flags_(flags) {}
+ explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
@@ -122,7 +121,6 @@
private:
AssertionType assertion_type_;
- RegExpFlags flags_;
};
class CharacterSet : public ValueObject {
@@ -152,40 +150,18 @@
class RegExpCharacterClass : public RegExpTree {
public:
- enum Flag {
- // The character class is negated and should match everything but the
- // specified ranges.
- NEGATED = 1 << 0,
- // The character class contains part of a split surrogate and should not
- // be unicode-desugared.
- CONTAINS_SPLIT_SURROGATE = 1 << 1,
- };
- using CharacterClassFlags = intptr_t;
- static inline CharacterClassFlags DefaultFlags() { return 0; }
-
- RegExpCharacterClass(
- ZoneGrowableArray<CharacterRange>* ranges,
- RegExpFlags flags,
- CharacterClassFlags character_class_flags = DefaultFlags())
- : set_(ranges),
- flags_(flags),
- character_class_flags_(character_class_flags) {
- // Convert the empty set of ranges to the negated Everything() range.
- if (ranges->is_empty()) {
- ranges->Add(CharacterRange::Everything());
- character_class_flags_ ^= NEGATED;
- }
- }
- RegExpCharacterClass(uint16_t type, RegExpFlags flags)
- : set_(type), flags_(flags), character_class_flags_(0) {}
+ RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
+ bool is_negated)
+ : set_(ranges), is_negated_(is_negated) {}
+ explicit RegExpCharacterClass(uint16_t type)
+ : set_(type), is_negated_(false) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass() const;
virtual bool IsTextElement() const { return true; }
virtual intptr_t min_match() const { return 1; }
- // The character class may match two code units for unicode regexps.
- virtual intptr_t max_match() const { return 2; }
+ virtual intptr_t max_match() const { return 1; }
virtual void AppendToText(RegExpText* text);
CharacterSet character_set() const { return set_; }
// TODO(lrn): Remove need for complex version if is_standard that
@@ -204,22 +180,16 @@
// * : All characters
uint16_t standard_type() const { return set_.standard_set_type(); }
ZoneGrowableArray<CharacterRange>* ranges() { return set_.ranges(); }
- bool is_negated() const { return character_class_flags_ & NEGATED; }
- RegExpFlags flags() const { return flags_; }
- bool contains_split_surrogate() const {
- return character_class_flags_ & CONTAINS_SPLIT_SURROGATE;
- }
+ bool is_negated() const { return is_negated_; }
private:
CharacterSet set_;
- RegExpFlags flags_;
- CharacterClassFlags character_class_flags_;
+ bool is_negated_;
};
class RegExpAtom : public RegExpTree {
public:
- RegExpAtom(ZoneGrowableArray<uint16_t>* data, RegExpFlags flags)
- : data_(data), flags_(flags) {}
+ explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpAtom* AsAtom();
@@ -230,12 +200,9 @@
virtual void AppendToText(RegExpText* text);
ZoneGrowableArray<uint16_t>* data() const { return data_; }
intptr_t length() const { return data_->length(); }
- RegExpFlags flags() const { return flags_; }
- bool ignore_case() const { return flags_.IgnoreCase(); }
private:
ZoneGrowableArray<uint16_t>* data_;
- const RegExpFlags flags_;
};
class RegExpText : public RegExpTree {
@@ -403,10 +370,9 @@
class RegExpBackReference : public RegExpTree {
public:
- explicit RegExpBackReference(RegExpFlags flags)
- : capture_(nullptr), name_(nullptr), flags_(flags) {}
- RegExpBackReference(RegExpCapture* capture, RegExpFlags flags)
- : capture_(capture), name_(nullptr), flags_(flags) {}
+ RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
+ explicit RegExpBackReference(RegExpCapture* capture)
+ : capture_(capture), name_(nullptr) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
@@ -425,7 +391,6 @@
private:
RegExpCapture* capture_;
const ZoneGrowableArray<uint16_t>* name_;
- RegExpFlags flags_;
};
class RegExpEmpty : public RegExpTree {
diff --git a/runtime/vm/regexp_bytecodes.h b/runtime/vm/regexp_bytecodes.h
index 178c950..858eef5 100644
--- a/runtime/vm/regexp_bytecodes.h
+++ b/runtime/vm/regexp_bytecodes.h
@@ -55,19 +55,17 @@
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) /*bc8 reg_idx24 addr32*/ \
-V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
-V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
-V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
-V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */
+V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
+V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \
+V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
+V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */
// clang-format on
diff --git a/runtime/vm/regexp_interpreter.cc b/runtime/vm/regexp_interpreter.cc
index 2921e3e..94c44ec 100644
--- a/runtime/vm/regexp_interpreter.cc
+++ b/runtime/vm/regexp_interpreter.cc
@@ -24,25 +24,27 @@
intptr_t from,
intptr_t current,
intptr_t len,
- const String& subject,
- bool unicode);
+ const String& subject);
template <>
bool BackRefMatchesNoCase<uint16_t>(Canonicalize* interp_canonicalize,
intptr_t from,
intptr_t current,
intptr_t len,
- const String& subject,
- bool unicode) {
- Bool& ret = Bool::Handle();
- if (unicode) {
- ret = CaseInsensitiveCompareUTF16(subject.raw(), Smi::New(from),
- Smi::New(current), Smi::New(len));
- } else {
- ret = CaseInsensitiveCompareUCS2(subject.raw(), Smi::New(from),
- Smi::New(current), Smi::New(len));
+ const String& subject) {
+ for (int i = 0; i < len; i++) {
+ int32_t old_char = subject.CharAt(from++);
+ int32_t new_char = subject.CharAt(current++);
+ if (old_char == new_char) continue;
+ int32_t old_string[1] = {old_char};
+ int32_t new_string[1] = {new_char};
+ interp_canonicalize->get(old_char, '\0', old_string);
+ interp_canonicalize->get(new_char, '\0', new_string);
+ if (old_string[0] != new_string[0]) {
+ return false;
+ }
}
- return ret.value();
+ return true;
}
template <>
@@ -50,9 +52,7 @@
intptr_t from,
intptr_t current,
intptr_t len,
- const String& subject,
- bool unicode) {
- // For Latin1 characters the unicode flag makes no difference.
+ const String& subject) {
for (int i = 0; i < len; i++) {
unsigned int old_char = subject.CharAt(from++);
unsigned int new_char = subject.CharAt(current++);
@@ -513,11 +513,7 @@
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
- FALL_THROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
- const bool unicode =
- (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
@@ -529,7 +525,7 @@
break;
} else {
if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len,
- subject, unicode)) {
+ subject)) {
current += len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
@@ -566,11 +562,7 @@
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
break;
}
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
- FALL_THROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
- bool unicode = (insn & BYTECODE_MASK) ==
- BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
@@ -582,7 +574,7 @@
break;
} else {
if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len,
- len, subject, unicode)) {
+ len, subject)) {
current -= len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
} else {
diff --git a/runtime/vm/regexp_parser.cc b/runtime/vm/regexp_parser.cc
index 31aff3f..b70b945 100644
--- a/runtime/vm/regexp_parser.cc
+++ b/runtime/vm/regexp_parser.cc
@@ -3,12 +3,6 @@
// BSD-style license that can be found in the LICENSE file.
#include "vm/regexp_parser.h"
-
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-
-#include "platform/unicode.h"
-
#include "vm/longjump.h"
#include "vm/object_store.h"
@@ -19,12 +13,10 @@
// Enables possessive quantifier syntax for testing.
static const bool FLAG_regexp_possessive_quantifier = false;
-RegExpBuilder::RegExpBuilder(RegExpFlags flags)
+RegExpBuilder::RegExpBuilder()
: zone_(Thread::Current()->zone()),
pending_empty_(false),
- flags_(flags),
characters_(NULL),
- pending_surrogate_(kNoPendingSurrogate),
terms_(),
text_(),
alternatives_()
@@ -35,49 +27,10 @@
{
}
-void RegExpBuilder::AddLeadSurrogate(uint16_t lead_surrogate) {
- ASSERT(Utf16::IsLeadSurrogate(lead_surrogate));
- FlushPendingSurrogate();
- // Hold onto the lead surrogate, waiting for a trail surrogate to follow.
- pending_surrogate_ = lead_surrogate;
-}
-
-void RegExpBuilder::AddTrailSurrogate(uint16_t trail_surrogate) {
- ASSERT(Utf16::IsTrailSurrogate(trail_surrogate));
- if (pending_surrogate_ != kNoPendingSurrogate) {
- uint16_t lead_surrogate = pending_surrogate_;
- pending_surrogate_ = kNoPendingSurrogate;
- ASSERT(Utf16::IsLeadSurrogate(lead_surrogate));
- uint32_t combined = Utf16::Decode(lead_surrogate, trail_surrogate);
- if (NeedsDesugaringForIgnoreCase(combined)) {
- AddCharacterClassForDesugaring(combined);
- } else {
- auto surrogate_pair = new (Z) ZoneGrowableArray<uint16_t>(2);
- surrogate_pair->Add(lead_surrogate);
- surrogate_pair->Add(trail_surrogate);
- RegExpAtom* atom = new (Z) RegExpAtom(surrogate_pair, flags_);
- AddAtom(atom);
- }
- } else {
- pending_surrogate_ = trail_surrogate;
- FlushPendingSurrogate();
- }
-}
-
-void RegExpBuilder::FlushPendingSurrogate() {
- if (pending_surrogate_ != kNoPendingSurrogate) {
- ASSERT(is_unicode());
- uint32_t c = pending_surrogate_;
- pending_surrogate_ = kNoPendingSurrogate;
- AddCharacterClassForDesugaring(c);
- }
-}
-
void RegExpBuilder::FlushCharacters() {
- FlushPendingSurrogate();
pending_empty_ = false;
if (characters_ != NULL) {
- RegExpTree* atom = new (Z) RegExpAtom(characters_, flags_);
+ RegExpTree* atom = new (Z) RegExpAtom(characters_);
characters_ = NULL;
text_.Add(atom);
LAST(ADD_ATOM);
@@ -101,62 +54,18 @@
}
void RegExpBuilder::AddCharacter(uint16_t c) {
- FlushPendingSurrogate();
pending_empty_ = false;
- if (NeedsDesugaringForIgnoreCase(c)) {
- AddCharacterClassForDesugaring(c);
- } else {
- if (characters_ == NULL) {
- characters_ = new (Z) ZoneGrowableArray<uint16_t>(4);
- }
- characters_->Add(c);
- LAST(ADD_CHAR);
+ if (characters_ == NULL) {
+ characters_ = new (Z) ZoneGrowableArray<uint16_t>(4);
}
-}
-
-void RegExpBuilder::AddUnicodeCharacter(uint32_t c) {
- if (c > static_cast<uint32_t>(Utf16::kMaxCodeUnit)) {
- ASSERT(is_unicode());
- uint16_t surrogates[2];
- Utf16::Encode(c, surrogates);
- AddLeadSurrogate(surrogates[0]);
- AddTrailSurrogate(surrogates[1]);
- } else if (is_unicode() && Utf16::IsLeadSurrogate(c)) {
- AddLeadSurrogate(c);
- } else if (is_unicode() && Utf16::IsTrailSurrogate(c)) {
- AddTrailSurrogate(c);
- } else {
- AddCharacter(static_cast<uint16_t>(c));
- }
-}
-
-void RegExpBuilder::AddEscapedUnicodeCharacter(uint32_t character) {
- // A lead or trail surrogate parsed via escape sequence will not
- // pair up with any preceding lead or following trail surrogate.
- FlushPendingSurrogate();
- AddUnicodeCharacter(character);
- FlushPendingSurrogate();
+ characters_->Add(c);
+ LAST(ADD_CHAR);
}
void RegExpBuilder::AddEmpty() {
pending_empty_ = true;
}
-void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
- if (NeedsDesugaringForUnicode(cc)) {
- // With /u, character class needs to be desugared, so it
- // must be a standalone term instead of being part of a RegExpText.
- AddTerm(cc);
- } else {
- AddAtom(cc);
- }
-}
-
-void RegExpBuilder::AddCharacterClassForDesugaring(uint32_t c) {
- auto ranges = CharacterRange::List(Z, CharacterRange::Singleton(c));
- AddTerm(new (Z) RegExpCharacterClass(ranges, flags_));
-}
-
void RegExpBuilder::AddAtom(RegExpTree* term) {
if (term->IsEmpty()) {
AddEmpty();
@@ -172,12 +81,6 @@
LAST(ADD_ATOM);
}
-void RegExpBuilder::AddTerm(RegExpTree* term) {
- FlushText();
- terms_.Add(term);
- LAST(ADD_ATOM);
-}
-
void RegExpBuilder::AddAssertion(RegExpTree* assert) {
FlushText();
terms_.Add(assert);
@@ -209,37 +112,6 @@
LAST(ADD_NONE);
}
-bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
- if (!is_unicode()) return false;
- // TODO(yangguo): we could be smarter than this. Case-insensitivity does not
- // necessarily mean that we need to desugar. It's probably nicer to have a
- // separate pass to figure out unicode desugarings.
- if (ignore_case()) return true;
- ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
- CharacterRange::Canonicalize(ranges);
- for (int i = ranges->length() - 1; i >= 0; i--) {
- uint32_t from = ranges->At(i).from();
- uint32_t to = ranges->At(i).to();
- // Check for non-BMP characters.
- if (to >= Utf16::kMaxCodeUnit) return true;
- // Check for lone surrogates.
- if (from <= Utf16::kTrailSurrogateEnd && to >= Utf16::kLeadSurrogateStart) {
- return true;
- }
- }
- return false;
-}
-
-bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uint32_t c) {
- if (is_unicode() && ignore_case()) {
- icu::UnicodeSet set(c, c);
- set.closeOver(USET_CASE_INSENSITIVE);
- set.removeAllStrings();
- return set.size() > 1;
- }
- return false;
-}
-
RegExpTree* RegExpBuilder::ToRegExp() {
FlushTerms();
intptr_t num_alternatives = alternatives_.length();
@@ -280,13 +152,13 @@
for (intptr_t i = 0; i < num_chars - 1; i++) {
prefix->Add(char_vector->At(i));
}
- text_.Add(new (Z) RegExpAtom(prefix, flags_));
+ text_.Add(new (Z) RegExpAtom(prefix));
ZoneGrowableArray<uint16_t>* tail = new (Z) ZoneGrowableArray<uint16_t>();
tail->Add(char_vector->At(num_chars - 1));
char_vector = tail;
}
characters_ = NULL;
- atom = new (Z) RegExpAtom(char_vector, flags_);
+ atom = new (Z) RegExpAtom(char_vector);
FlushText();
} else if (text_.length() > 0) {
DEBUG_ASSERT(last_added_ == ADD_ATOM);
@@ -296,8 +168,6 @@
DEBUG_ASSERT(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
if (auto lookaround = atom->AsLookaround()) {
- // With /u, lookarounds are not quantifiable.
- if (is_unicode()) return false;
// Lookbehinds are not quantifiable.
if (lookaround->type() == RegExpLookaround::LOOKBEHIND) {
return false;
@@ -324,7 +194,7 @@
// ----------------------------------------------------------------------------
// Implementation of Parser
-RegExpParser::RegExpParser(const String& in, String* error, RegExpFlags flags)
+RegExpParser::RegExpParser(const String& in, String* error, bool multiline)
: zone_(Thread::Current()->zone()),
captures_(nullptr),
named_captures_(nullptr),
@@ -335,7 +205,7 @@
captures_started_(0),
capture_count_(0),
has_more_(true),
- top_level_flags_(flags),
+ multiline_(multiline),
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false),
@@ -343,38 +213,20 @@
Advance();
}
-inline uint32_t RegExpParser::ReadNext(bool update_position) {
- intptr_t position = next_pos_;
- const uint16_t c0 = in().CharAt(position);
- uint32_t c = c0;
- position++;
- if (is_unicode() && position < in().Length() && Utf16::IsLeadSurrogate(c0)) {
- const uint16_t c1 = in().CharAt(position);
- if (Utf16::IsTrailSurrogate(c1)) {
- c = Utf16::Decode(c0, c1);
- position++;
- }
- }
- if (update_position) next_pos_ = position;
- return c;
-}
-
uint32_t RegExpParser::Next() {
if (has_next()) {
- return ReadNext(false);
+ return in().CharAt(next_pos_);
} else {
return kEndMarker;
}
}
void RegExpParser::Advance() {
- if (has_next()) {
- current_ = ReadNext(true);
+ if (next_pos_ < in().Length()) {
+ current_ = in().CharAt(next_pos_);
+ next_pos_++;
} else {
current_ = kEndMarker;
- // Advance so that position() points to 1 after the last character. This is
- // important so that Reset() to this position works correctly.
- next_pos_ = in().Length() + 1;
has_more_ = false;
}
}
@@ -394,30 +246,6 @@
return simple_;
}
-bool RegExpParser::IsSyntaxCharacterOrSlash(uint32_t c) {
- switch (c) {
- case '^':
- case '$':
- case '\\':
- case '.':
- case '*':
- case '+':
- case '?':
- case '(':
- case ')':
- case '[':
- case ']':
- case '{':
- case '}':
- case '|':
- case '/':
- return true;
- default:
- break;
- }
- return false;
-}
-
void RegExpParser::ReportError(const char* message) {
// Zip to the end to make sure the no more input is read.
current_ = kEndMarker;
@@ -446,11 +274,6 @@
return result;
}
-// Used for error messages where we would have fallen back on treating an
-// escape as the identity escape, but we are in Unicode mode.
-static const char* kUnicodeIdentity =
- "Invalid identity escape in Unicode pattern";
-
// Disjunction ::
// Alternative
// Alternative | Disjunction
@@ -464,7 +287,7 @@
RegExpTree* RegExpParser::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
- 0, nullptr, top_level_flags_, Z);
+ 0, nullptr, Z);
RegExpParserState* stored_state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
@@ -535,12 +358,12 @@
UNREACHABLE();
case '^': {
Advance();
- if (builder->is_multi_line()) {
- builder->AddAssertion(new (Z) RegExpAssertion(
- RegExpAssertion::START_OF_LINE, builder->flags()));
+ if (multiline_) {
+ builder->AddAssertion(
+ new (Z) RegExpAssertion(RegExpAssertion::START_OF_LINE));
} else {
- builder->AddAssertion(new (Z) RegExpAssertion(
- RegExpAssertion::START_OF_INPUT, builder->flags()));
+ builder->AddAssertion(
+ new (Z) RegExpAssertion(RegExpAssertion::START_OF_INPUT));
set_contains_anchor();
}
continue;
@@ -548,29 +371,19 @@
case '$': {
Advance();
RegExpAssertion::AssertionType assertion_type =
- builder->is_multi_line() ? RegExpAssertion::END_OF_LINE
- : RegExpAssertion::END_OF_INPUT;
- builder->AddAssertion(
- new (Z) RegExpAssertion(assertion_type, builder->flags()));
+ multiline_ ? RegExpAssertion::END_OF_LINE
+ : RegExpAssertion::END_OF_INPUT;
+ builder->AddAssertion(new RegExpAssertion(assertion_type));
continue;
}
case '.': {
Advance();
- auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
- if (builder->is_dot_all()) {
- // Everything.
- CharacterRange::AddClassEscape(
- '*', ranges,
- /*add_unicode_case_equivalents=*/false);
- } else {
- // everything except \x0a, \x0d, \u2028 and \u2029
- CharacterRange::AddClassEscape(
- '.', ranges,
- /*add_unicode_case_equivalents=*/false);
- }
- RegExpCharacterClass* cc =
- new (Z) RegExpCharacterClass(ranges, builder->flags());
- builder->AddCharacterClass(cc);
+ // everything except \x0a, \x0d, \u2028 and \u2029
+ ZoneGrowableArray<CharacterRange>* ranges =
+ new ZoneGrowableArray<CharacterRange>(2);
+ CharacterRange::AddClassEscape('.', ranges);
+ RegExpTree* atom = new RegExpCharacterClass(ranges, false);
+ builder->AddAtom(atom);
break;
}
case '(': {
@@ -579,8 +392,8 @@
continue;
}
case '[': {
- RegExpTree* atom = ParseCharacterClass(builder);
- builder->AddCharacterClass(atom->AsCharacterClass());
+ RegExpTree* atom = ParseCharacterClass();
+ builder->AddAtom(atom);
break;
}
// Atom ::
@@ -592,13 +405,13 @@
UNREACHABLE();
case 'b':
Advance(2);
- builder->AddAssertion(new (Z) RegExpAssertion(
- RegExpAssertion::BOUNDARY, builder->flags()));
+ builder->AddAssertion(
+ new RegExpAssertion(RegExpAssertion::BOUNDARY));
continue;
case 'B':
Advance(2);
- builder->AddAssertion(new (Z) RegExpAssertion(
- RegExpAssertion::NON_BOUNDARY, builder->flags()));
+ builder->AddAssertion(
+ new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
continue;
// AtomEscape ::
// CharacterClassEscape
@@ -613,36 +426,11 @@
case 'W': {
uint32_t c = Next();
Advance(2);
- auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
- CharacterRange::AddClassEscape(
- c, ranges, is_unicode() && builder->ignore_case());
- RegExpCharacterClass* cc =
- new (Z) RegExpCharacterClass(ranges, builder->flags());
- builder->AddCharacterClass(cc);
- break;
- }
- case 'p':
- case 'P': {
- uint32_t p = Next();
- Advance(2);
-
- if (is_unicode()) {
- auto name_1 = new (Z) ZoneGrowableArray<char>();
- auto name_2 = new (Z) ZoneGrowableArray<char>();
- auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
- if (ParsePropertyClassName(name_1, name_2)) {
- if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
- RegExpCharacterClass* cc =
- new (Z) RegExpCharacterClass(ranges, builder->flags());
- builder->AddCharacterClass(cc);
- break;
- }
- }
- ReportError("Invalid property name");
- UNREACHABLE();
- } else {
- builder->AddCharacter(p);
- }
+ ZoneGrowableArray<CharacterRange>* ranges =
+ new ZoneGrowableArray<CharacterRange>(2);
+ CharacterRange::AddClassEscape(c, ranges);
+ RegExpTree* atom = new RegExpCharacterClass(ranges, false);
+ builder->AddAtom(atom);
break;
}
case '1':
@@ -665,20 +453,14 @@
builder->AddEmpty();
} else {
RegExpCapture* capture = GetCapture(index);
- RegExpTree* atom =
- new (Z) RegExpBackReference(capture, builder->flags());
+ RegExpTree* atom = new RegExpBackReference(capture);
builder->AddAtom(atom);
}
break;
}
- // With /u, no identity escapes except for syntax characters are
- // allowed. Otherwise, all identity escapes are allowed.
- if (is_unicode()) {
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
- }
uint32_t first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
+ // Treat as identity escape
builder->AddCharacter(first_digit);
Advance(2);
break;
@@ -687,11 +469,6 @@
}
case '0': {
Advance();
- if (is_unicode() && Next() >= '0' && Next() <= '9') {
- // With /u, decimal escape with leading 0 are not parsed as octal.
- ReportError("Invalid decimal escape");
- UNREACHABLE();
- }
uint32_t octal = ParseOctalLiteral();
builder->AddCharacter(octal);
break;
@@ -729,11 +506,6 @@
// This is outside the specification. We match JSC in
// reading the backslash as a literal character instead
// of as starting an escape.
- if (is_unicode()) {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
- }
builder->AddCharacter('\\');
} else {
Advance(2);
@@ -746,26 +518,18 @@
uint32_t value;
if (ParseHexEscape(2, &value)) {
builder->AddCharacter(value);
- } else if (!is_unicode()) {
- builder->AddCharacter('x');
} else {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
+ builder->AddCharacter('x');
}
break;
}
case 'u': {
Advance(2);
uint32_t value;
- if (ParseUnicodeEscape(&value)) {
- builder->AddEscapedUnicodeCharacter(value);
- } else if (!is_unicode()) {
- builder->AddCharacter('u');
+ if (ParseHexEscape(4, &value)) {
+ builder->AddCharacter(value);
} else {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
+ builder->AddCharacter('u');
}
break;
}
@@ -775,24 +539,16 @@
// an identity escape for non-Unicode patterns without named
// capture groups, and as the beginning of a named back-reference
// in all other cases.
- if (is_unicode() || HasNamedCaptures()) {
+ if (HasNamedCaptures()) {
Advance(2);
ParseNamedBackReference(builder, stored_state);
break;
}
FALL_THROUGH;
default:
- Advance();
- // With the unicode flag, no identity escapes except for syntax
- // characters are allowed. Otherwise, all identity escapes are
- // allowed.
- if (!is_unicode() || IsSyntaxCharacterOrSlash(current())) {
- builder->AddCharacter(current());
- Advance();
- } else {
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
- }
+ // Identity escape.
+ builder->AddCharacter(Next());
+ Advance(2);
break;
}
break;
@@ -804,15 +560,8 @@
}
FALL_THROUGH;
}
- case '}':
- case ']':
- if (is_unicode()) {
- ReportError("Lone quantifier brackets");
- UNREACHABLE();
- }
- FALL_THROUGH;
default:
- builder->AddUnicodeCharacter(current());
+ builder->AddCharacter(current());
Advance();
break;
} // end switch(current())
@@ -944,9 +693,8 @@
}
}
// Store current state and begin new disjunction parsing.
- return new (Z)
- RegExpParserState(state, subexpr_type, lookaround_type, captures_started_,
- capture_name, state->builder()->flags(), Z);
+ return new RegExpParserState(state, subexpr_type, lookaround_type,
+ captures_started_, capture_name, Z);
}
// In order to know whether an escape is a backreference or not we have to scan
@@ -1008,6 +756,10 @@
Reset(saved_position);
}
+static inline bool IsDecimalDigit(int32_t c) {
+ return '0' <= c && c <= '9';
+}
+
bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) {
ASSERT('\\' == current());
ASSERT('1' <= Next() && Next() <= '9');
@@ -1018,7 +770,7 @@
Advance(2);
while (true) {
uint32_t c = current();
- if (Utils::IsDecimalDigit(c)) {
+ if (IsDecimalDigit(c)) {
value = 10 * value + (c - '0');
if (value > kMaxCaptures) {
Reset(start);
@@ -1042,43 +794,17 @@
namespace {
-static inline constexpr bool IsAsciiIdentifierPart(uint32_t ch) {
- return Utils::IsAlphaNumeric(ch) || ch == '_' || ch == '$';
+inline constexpr bool IsIdentifierStart(uint16_t ch) {
+ return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
+ ch == '$';
}
-// ES#sec-names-and-keywords Names and Keywords
-// UnicodeIDStart, '$', '_' and '\'
-static bool IsIdentifierStartSlow(uint32_t c) {
- // cannot use u_isIDStart because it does not work for
- // Other_ID_Start characters.
- return u_hasBinaryProperty(c, UCHAR_ID_START) ||
- (c < 0x60 && (c == '$' || c == '\\' || c == '_'));
+inline constexpr bool IsIdentifierPart(uint16_t ch) {
+ return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9');
}
-// ES#sec-names-and-keywords Names and Keywords
-// UnicodeIDContinue, '$', '_', '\', ZWJ, and ZWNJ
-static bool IsIdentifierPartSlow(uint32_t c) {
- const uint32_t kZeroWidthNonJoiner = 0x200C;
- const uint32_t kZeroWidthJoiner = 0x200D;
- // Can't use u_isIDPart because it does not work for
- // Other_ID_Continue characters.
- return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) ||
- (c < 0x60 && (c == '$' || c == '\\' || c == '_')) ||
- c == kZeroWidthNonJoiner || c == kZeroWidthJoiner;
-}
-
-static inline bool IsIdentifierStart(uint32_t c) {
- if (c > 127) return IsIdentifierStartSlow(c);
- return IsAsciiIdentifierPart(c) && !Utils::IsDecimalDigit(c);
-}
-
-static inline bool IsIdentifierPart(uint32_t c) {
- if (c > 127) return IsIdentifierPartSlow(c);
- return IsAsciiIdentifierPart(c);
-}
-
-static bool IsSameName(const RegExpCaptureName* name1,
- const RegExpCaptureName* name2) {
+bool IsSameName(const RegExpCaptureName* name1,
+ const RegExpCaptureName* name2) {
if (name1->length() != name2->length()) return false;
for (intptr_t i = 0; i < name1->length(); i++) {
if (name1->At(i) != name2->At(i)) return false;
@@ -1088,34 +814,14 @@
} // end namespace
-static void PushCodeUnit(RegExpCaptureName* v, uint32_t code_unit) {
- if (code_unit <= Utf16::kMaxCodeUnit) {
- v->Add(code_unit);
- } else {
- uint16_t units[2];
- Utf16::Encode(code_unit, units);
- v->Add(units[0]);
- v->Add(units[1]);
- }
-}
-
const RegExpCaptureName* RegExpParser::ParseCaptureGroupName() {
auto name = new (Z) RegExpCaptureName();
bool at_start = true;
while (true) {
- uint32_t c = current();
+ const uint16_t c = current();
Advance();
- // Convert unicode escapes.
- if (c == '\\' && current() == 'u') {
- Advance();
- if (!ParseUnicodeEscape(&c)) {
- ReportError("Invalid Unicode escape sequence");
- UNREACHABLE();
- }
- }
-
// The backslash char is misclassified as both ID_Start and ID_Continue.
if (c == '\\') {
ReportError("Invalid capture group name");
@@ -1127,13 +833,13 @@
ReportError("Invalid capture group name");
UNREACHABLE();
}
- PushCodeUnit(name, c);
+ name->Add(c);
at_start = false;
} else {
if (c == '>') {
break;
} else if (IsIdentifierPart(c)) {
- PushCodeUnit(name, c);
+ name->Add(c);
} else {
ReportError("Invalid capture group name");
UNREACHABLE();
@@ -1190,7 +896,7 @@
if (state->IsInsideCaptureGroup(name)) {
builder->AddEmpty();
} else {
- RegExpBackReference* atom = new (Z) RegExpBackReference(builder->flags());
+ RegExpBackReference* atom = new (Z) RegExpBackReference();
atom->set_name(name);
builder->AddAtom(atom);
@@ -1309,17 +1015,17 @@
intptr_t start = position();
Advance();
intptr_t min = 0;
- if (!Utils::IsDecimalDigit(current())) {
+ if (!IsDecimalDigit(current())) {
Reset(start);
return false;
}
- while (Utils::IsDecimalDigit(current())) {
+ while (IsDecimalDigit(current())) {
intptr_t next = current() - '0';
if (min > (RegExpTree::kInfinity - next) / 10) {
// Overflow. Skip past remaining decimal digits and return -1.
do {
Advance();
- } while (Utils::IsDecimalDigit(current()));
+ } while (IsDecimalDigit(current()));
min = RegExpTree::kInfinity;
break;
}
@@ -1336,12 +1042,12 @@
max = RegExpTree::kInfinity;
Advance();
} else {
- while (Utils::IsDecimalDigit(current())) {
+ while (IsDecimalDigit(current())) {
intptr_t next = current() - '0';
if (max > (RegExpTree::kInfinity - next) / 10) {
do {
Advance();
- } while (Utils::IsDecimalDigit(current()));
+ } while (IsDecimalDigit(current()));
max = RegExpTree::kInfinity;
break;
}
@@ -1411,317 +1117,6 @@
return true;
}
-// This parses RegExpUnicodeEscapeSequence as described in ECMA262.
-bool RegExpParser::ParseUnicodeEscape(uint32_t* value) {
- // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
- // allowed). In the latter case, the number of hex digits between { } is
- // arbitrary. \ and u have already been read.
- if (current() == '{' && is_unicode()) {
- int start = position();
- Advance();
- if (ParseUnlimitedLengthHexNumber(Utf::kMaxCodePoint, value)) {
- if (current() == '}') {
- Advance();
- return true;
- }
- }
- Reset(start);
- return false;
- }
- // \u but no {, or \u{...} escapes not allowed.
- bool result = ParseHexEscape(4, value);
- if (result && is_unicode() && Utf16::IsLeadSurrogate(*value) &&
- current() == '\\') {
- // Attempt to read trail surrogate.
- int start = position();
- if (Next() == 'u') {
- Advance(2);
- uint32_t trail;
- if (ParseHexEscape(4, &trail) && Utf16::IsTrailSurrogate(trail)) {
- *value = Utf16::Decode(static_cast<uint16_t>(*value),
- static_cast<uint16_t>(trail));
- return true;
- }
- }
- Reset(start);
- }
- return result;
-}
-
-namespace {
-
-bool IsExactPropertyAlias(const char* property_name, UProperty property) {
- const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
- if (short_name != nullptr && strcmp(property_name, short_name) == 0) {
- return true;
- }
- for (int i = 0;; i++) {
- const char* long_name = u_getPropertyName(
- property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == nullptr) break;
- if (strcmp(property_name, long_name) == 0) return true;
- }
- return false;
-}
-
-bool IsExactPropertyValueAlias(const char* property_value_name,
- UProperty property,
- int32_t property_value) {
- const char* short_name =
- u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
- if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) {
- return true;
- }
- for (int i = 0;; i++) {
- const char* long_name = u_getPropertyValueName(
- property, property_value,
- static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == nullptr) break;
- if (strcmp(property_value_name, long_name) == 0) return true;
- }
- return false;
-}
-
-bool LookupPropertyValueName(UProperty property,
- const char* property_value_name,
- bool negate,
- ZoneGrowableArray<CharacterRange>* result) {
- UProperty property_for_lookup = property;
- if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
- // For the property Script_Extensions, we have to do the property value
- // name lookup as if the property is Script.
- property_for_lookup = UCHAR_SCRIPT;
- }
- int32_t property_value =
- u_getPropertyValueEnum(property_for_lookup, property_value_name);
- if (property_value == UCHAR_INVALID_CODE) return false;
-
- // We require the property name to match exactly to one of the property value
- // aliases. However, u_getPropertyValueEnum uses loose matching.
- if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,
- property_value)) {
- return false;
- }
-
- UErrorCode ec = U_ZERO_ERROR;
- icu::UnicodeSet set;
- set.applyIntPropertyValue(property, property_value, ec);
- bool success = ec == U_ZERO_ERROR && !set.isEmpty();
-
- if (success) {
- set.removeAllStrings();
- if (negate) set.complement();
- for (int i = 0; i < set.getRangeCount(); i++) {
- result->Add(
- CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)));
- }
- }
- return success;
-}
-
-template <size_t N>
-inline bool NameEquals(const char* name, const char (&literal)[N]) {
- return strncmp(name, literal, N + 1) == 0;
-}
-
-bool LookupSpecialPropertyValueName(const char* name,
- ZoneGrowableArray<CharacterRange>* result,
- bool negate) {
- if (NameEquals(name, "Any")) {
- if (negate) {
- // Leave the list of character ranges empty, since the negation of 'Any'
- // is the empty set.
- } else {
- result->Add(CharacterRange::Everything());
- }
- } else if (NameEquals(name, "ASCII")) {
- result->Add(negate ? CharacterRange::Range(0x80, Utf::kMaxCodePoint)
- : CharacterRange::Range(0x0, 0x7F));
- } else if (NameEquals(name, "Assigned")) {
- return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
- !negate, result);
- } else {
- return false;
- }
- return true;
-}
-
-// Explicitly whitelist supported binary properties. The spec forbids supporting
-// properties outside of this set to ensure interoperability.
-bool IsSupportedBinaryProperty(UProperty property) {
- switch (property) {
- case UCHAR_ALPHABETIC:
- // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
- // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
- case UCHAR_ASCII_HEX_DIGIT:
- // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
- case UCHAR_BIDI_CONTROL:
- case UCHAR_BIDI_MIRRORED:
- case UCHAR_CASE_IGNORABLE:
- case UCHAR_CASED:
- case UCHAR_CHANGES_WHEN_CASEFOLDED:
- case UCHAR_CHANGES_WHEN_CASEMAPPED:
- case UCHAR_CHANGES_WHEN_LOWERCASED:
- case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
- case UCHAR_CHANGES_WHEN_TITLECASED:
- case UCHAR_CHANGES_WHEN_UPPERCASED:
- case UCHAR_DASH:
- case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
- case UCHAR_DEPRECATED:
- case UCHAR_DIACRITIC:
- case UCHAR_EMOJI:
- case UCHAR_EMOJI_COMPONENT:
- case UCHAR_EMOJI_MODIFIER_BASE:
- case UCHAR_EMOJI_MODIFIER:
- case UCHAR_EMOJI_PRESENTATION:
- case UCHAR_EXTENDED_PICTOGRAPHIC:
- case UCHAR_EXTENDER:
- case UCHAR_GRAPHEME_BASE:
- case UCHAR_GRAPHEME_EXTEND:
- case UCHAR_HEX_DIGIT:
- case UCHAR_ID_CONTINUE:
- case UCHAR_ID_START:
- case UCHAR_IDEOGRAPHIC:
- case UCHAR_IDS_BINARY_OPERATOR:
- case UCHAR_IDS_TRINARY_OPERATOR:
- case UCHAR_JOIN_CONTROL:
- case UCHAR_LOGICAL_ORDER_EXCEPTION:
- case UCHAR_LOWERCASE:
- case UCHAR_MATH:
- case UCHAR_NONCHARACTER_CODE_POINT:
- case UCHAR_PATTERN_SYNTAX:
- case UCHAR_PATTERN_WHITE_SPACE:
- case UCHAR_QUOTATION_MARK:
- case UCHAR_RADICAL:
- case UCHAR_REGIONAL_INDICATOR:
- case UCHAR_S_TERM:
- case UCHAR_SOFT_DOTTED:
- case UCHAR_TERMINAL_PUNCTUATION:
- case UCHAR_UNIFIED_IDEOGRAPH:
- case UCHAR_UPPERCASE:
- case UCHAR_VARIATION_SELECTOR:
- case UCHAR_WHITE_SPACE:
- case UCHAR_XID_CONTINUE:
- case UCHAR_XID_START:
- return true;
- default:
- break;
- }
- return false;
-}
-
-bool IsUnicodePropertyValueCharacter(char c) {
- // https://tc39.github.io/proposal-regexp-unicode-property-escapes/
- //
- // Note that using this to validate each parsed char is quite conservative.
- // A possible alternative solution would be to only ensure the parsed
- // property name/value candidate string does not contain '\0' characters and
- // let ICU lookups trigger the final failure.
- if (Utils::IsAlphaNumeric(c)) return true;
- return (c == '_');
-}
-
-} // anonymous namespace
-
-bool RegExpParser::ParsePropertyClassName(ZoneGrowableArray<char>* name_1,
- ZoneGrowableArray<char>* name_2) {
- ASSERT(name_1->is_empty());
- ASSERT(name_2->is_empty());
- // Parse the property class as follows:
- // - In \p{name}, 'name' is interpreted
- // - either as a general category property value name.
- // - or as a binary property name.
- // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
- // and 'value' is interpreted as one of the available property value names.
- // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
- // - Loose matching is not applied.
- if (current() == '{') {
- // Parse \p{[PropertyName=]PropertyNameValue}
- for (Advance(); current() != '}' && current() != '='; Advance()) {
- if (!IsUnicodePropertyValueCharacter(current())) return false;
- if (!has_next()) return false;
- name_1->Add(static_cast<char>(current()));
- }
- if (current() == '=') {
- for (Advance(); current() != '}'; Advance()) {
- if (!IsUnicodePropertyValueCharacter(current())) return false;
- if (!has_next()) return false;
- name_2->Add(static_cast<char>(current()));
- }
- name_2->Add(0); // null-terminate string.
- }
- } else {
- return false;
- }
- Advance();
- name_1->Add(0); // null-terminate string.
-
- ASSERT(static_cast<size_t>(name_1->length() - 1) == strlen(name_1->data()));
- ASSERT(name_2->is_empty() ||
- static_cast<size_t>(name_2->length() - 1) == strlen(name_2->data()));
- return true;
-}
-
-bool RegExpParser::AddPropertyClassRange(
- ZoneGrowableArray<CharacterRange>* add_to,
- bool negate,
- ZoneGrowableArray<char>* name_1,
- ZoneGrowableArray<char>* name_2) {
- ASSERT(name_1->At(name_1->length() - 1) == '\0');
- ASSERT(name_2->is_empty() || name_2->At(name_2->length() - 1) == '\0');
- if (name_2->is_empty()) {
- // First attempt to interpret as general category property value name.
- const char* name = name_1->data();
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
- add_to)) {
- return true;
- }
- // Interpret "Any", "ASCII", and "Assigned".
- if (LookupSpecialPropertyValueName(name, add_to, negate)) {
- return true;
- }
- // Then attempt to interpret as binary property name with value name 'Y'.
- UProperty property = u_getPropertyEnum(name);
- if (!IsSupportedBinaryProperty(property)) return false;
- if (!IsExactPropertyAlias(name, property)) return false;
- return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to);
- } else {
- // Both property name and value name are specified. Attempt to interpret
- // the property name as enumerated property.
- const char* property_name = name_1->data();
- const char* value_name = name_2->data();
- UProperty property = u_getPropertyEnum(property_name);
- if (!IsExactPropertyAlias(property_name, property)) return false;
- if (property == UCHAR_GENERAL_CATEGORY) {
- // We want to allow aggregate value names such as "Letter".
- property = UCHAR_GENERAL_CATEGORY_MASK;
- } else if (property != UCHAR_SCRIPT &&
- property != UCHAR_SCRIPT_EXTENSIONS) {
- return false;
- }
- return LookupPropertyValueName(property, value_name, negate, add_to);
- }
-}
-
-bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t max_value,
- uint32_t* value) {
- uint32_t x = 0;
- int d = HexValue(current());
- if (d < 0) {
- return false;
- }
- while (d >= 0) {
- x = x * 16 + d;
- if (x > max_value) {
- return false;
- }
- Advance();
- d = HexValue(current());
- }
- *value = x;
- return true;
-}
-
uint32_t RegExpParser::ParseClassCharacterEscape() {
ASSERT(current() == '\\');
DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next()));
@@ -1752,47 +1147,35 @@
uint32_t letter = controlLetter & ~('A' ^ 'a');
// For compatibility with JSC, inside a character class
// we also accept digits and underscore as control characters.
- if (letter >= 'A' && letter <= 'Z') {
+ if ((controlLetter >= '0' && controlLetter <= '9') ||
+ controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) {
Advance(2);
// Control letters mapped to ASCII control characters in the range
// 0x00-0x1f.
return controlLetter & 0x1f;
}
- if (is_unicode()) {
- // With /u, \c# or \c_ are invalid.
- ReportError("Invalid class escape");
- UNREACHABLE();
- }
- if (Utils::IsDecimalDigit(controlLetter) || controlLetter == '_') {
- Advance(2);
- return controlLetter & 0x1f;
- }
// We match JSC in reading the backslash as a literal
// character instead of as starting an escape.
return '\\';
}
case '0':
- // With /u, \0 is interpreted as NUL if not followed by another digit.
- if (is_unicode() && !(Next() >= '0' && Next() <= '9')) {
- Advance();
- return 0;
- }
FALL_THROUGH;
case '1':
+ FALL_THROUGH;
case '2':
+ FALL_THROUGH;
case '3':
+ FALL_THROUGH;
case '4':
+ FALL_THROUGH;
case '5':
+ FALL_THROUGH;
case '6':
+ FALL_THROUGH;
case '7':
// For compatibility, we interpret a decimal escape that isn't
// a back reference (and therefore either \0 or not valid according
// to the specification) as a 1..3 digit octal character code.
- if (is_unicode()) {
- // With \u, decimal escape is not interpreted as octal character code.
- ReportError("Invalid class escape");
- UNREACHABLE();
- }
return ParseOctalLiteral();
case 'x': {
Advance();
@@ -1800,11 +1183,6 @@
if (ParseHexEscape(2, &value)) {
return value;
}
- if (is_unicode()) {
- // With \u, invalid escapes are not treated as identity escapes.
- ReportError("Invalid escape");
- UNREACHABLE();
- }
// If \x is not followed by a two-digit hexadecimal, treat it
// as an identity escape.
return 'x';
@@ -1812,14 +1190,9 @@
case 'u': {
Advance();
uint32_t value;
- if (ParseUnicodeEscape(&value)) {
+ if (ParseHexEscape(4, &value)) {
return value;
}
- if (is_unicode()) {
- // With \u, invalid escapes are not treated as identity escapes.
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
- }
// If \u is not followed by a four-digit hexadecimal, treat it
// as an identity escape.
return 'u';
@@ -1829,20 +1202,15 @@
// been matched by a more specific case, not just the subset required
// by the ECMAScript specification.
uint32_t result = current();
- if (!is_unicode() || IsSyntaxCharacterOrSlash(result) || result == '-') {
- Advance();
- return result;
- }
- ReportError(kUnicodeIdentity);
- UNREACHABLE();
+ Advance();
+ return result;
}
}
return 0;
}
-bool RegExpParser::ParseClassEscape(ZoneGrowableArray<CharacterRange>* ranges,
- bool add_unicode_case_equivalents,
- uint32_t* char_out) {
+CharacterRange RegExpParser::ParseClassAtom(uint16_t* char_class) {
+ ASSERT(0 == *char_class);
uint32_t first = current();
if (first == '\\') {
switch (Next()) {
@@ -1852,42 +1220,40 @@
case 'D':
case 's':
case 'S': {
- CharacterRange::AddClassEscape(static_cast<uint16_t>(Next()), ranges,
- add_unicode_case_equivalents);
+ *char_class = Next();
Advance(2);
- return true;
- }
- case 'p':
- case 'P': {
- if (!is_unicode()) break;
- bool negate = Next() == 'P';
- Advance(2);
- auto name_1 = new (Z) ZoneGrowableArray<char>();
- auto name_2 = new (Z) ZoneGrowableArray<char>();
- if (!ParsePropertyClassName(name_1, name_2) ||
- !AddPropertyClassRange(ranges, negate, name_1, name_2)) {
- ReportError("Invalid property name in character class");
- UNREACHABLE();
- }
- return true;
+ return CharacterRange::Singleton(0); // Return dummy value.
}
case kEndMarker:
ReportError("\\ at end of pattern");
UNREACHABLE();
default:
- break;
+ uint32_t c = ParseClassCharacterEscape();
+ return CharacterRange::Singleton(c);
}
- *char_out = ParseClassCharacterEscape();
- return false;
+ } else {
+ Advance();
+ return CharacterRange::Singleton(first);
}
- Advance();
- *char_out = first;
- return false;
}
-RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
+static const uint16_t kNoCharClass = 0;
+
+// Adds range or pre-defined character class to character ranges.
+// If char_class is not kInvalidClass, it's interpreted as a class
+// escape (i.e., 's' means whitespace, from '\s').
+static inline void AddRangeOrEscape(ZoneGrowableArray<CharacterRange>* ranges,
+ uint16_t char_class,
+ CharacterRange range) {
+ if (char_class != kNoCharClass) {
+ CharacterRange::AddClassEscape(char_class, ranges);
+ } else {
+ ranges->Add(range);
+ }
+}
+
+RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class";
- static const char* kRangeInvalid = "Invalid character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
ASSERT(current() == '[');
@@ -1899,11 +1265,9 @@
}
ZoneGrowableArray<CharacterRange>* ranges =
new (Z) ZoneGrowableArray<CharacterRange>(2);
- bool add_unicode_case_equivalents = is_unicode() && builder->ignore_case();
while (has_more() && current() != ']') {
- uint32_t char_1;
- bool is_class_1 =
- ParseClassEscape(ranges, add_unicode_case_equivalents, &char_1);
+ uint16_t char_class = kNoCharClass;
+ CharacterRange first = ParseClassAtom(&char_class);
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
@@ -1911,32 +1275,26 @@
// following code report an error.
break;
} else if (current() == ']') {
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
- uint32_t char_2;
- bool is_class_2 =
- ParseClassEscape(ranges, add_unicode_case_equivalents, &char_2);
- if (is_class_1 || is_class_2) {
+ uint16_t char_class_2 = kNoCharClass;
+ CharacterRange next = ParseClassAtom(&char_class_2);
+ if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
// Either end is an escaped character class. Treat the '-' verbatim.
- if (is_unicode()) {
- // ES2015 21.2.2.15.1 step 1.
- ReportError(kRangeInvalid);
- UNREACHABLE();
- }
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
- if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2));
+ AddRangeOrEscape(ranges, char_class_2, next);
continue;
}
- if (char_1 > char_2) {
+ if (first.from() > next.to()) {
ReportError(kRangeOutOfOrder);
UNREACHABLE();
}
- ranges->Add(CharacterRange::Range(char_1, char_2));
+ ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
+ AddRangeOrEscape(ranges, char_class, first);
}
}
if (!has_more()) {
@@ -1944,21 +1302,21 @@
UNREACHABLE();
}
Advance();
- RegExpCharacterClass::CharacterClassFlags character_class_flags =
- RegExpCharacterClass::DefaultFlags();
- if (is_negated) character_class_flags |= RegExpCharacterClass::NEGATED;
- return new (Z)
- RegExpCharacterClass(ranges, builder->flags(), character_class_flags);
+ if (ranges->length() == 0) {
+ ranges->Add(CharacterRange::Everything());
+ is_negated = !is_negated;
+ }
+ return new (Z) RegExpCharacterClass(ranges, is_negated);
}
// ----------------------------------------------------------------------------
// The Parser interface.
void RegExpParser::ParseRegExp(const String& input,
- RegExpFlags flags,
+ bool multiline,
RegExpCompileData* result) {
ASSERT(result != NULL);
- RegExpParser parser(input, &result->error, flags);
+ RegExpParser parser(input, &result->error, multiline);
// Throws an exception if 'input' is not valid.
RegExpTree* tree = parser.ParsePattern();
ASSERT(tree != NULL);
diff --git a/runtime/vm/regexp_parser.h b/runtime/vm/regexp_parser.h
index 63c237a..a2b626b 100644
--- a/runtime/vm/regexp_parser.h
+++ b/runtime/vm/regexp_parser.h
@@ -14,18 +14,13 @@
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneAllocated {
public:
- explicit RegExpBuilder(RegExpFlags flags);
+ RegExpBuilder();
void AddCharacter(uint16_t character);
- void AddUnicodeCharacter(uint32_t character);
- void AddEscapedUnicodeCharacter(uint32_t character);
// "Adds" an empty expression. Does nothing except consume a
// following quantifier
void AddEmpty();
- void AddCharacterClass(RegExpCharacterClass* cc);
- void AddCharacterClassForDesugaring(uint32_t c);
void AddAtom(RegExpTree* tree);
- void AddTerm(RegExpTree* tree);
void AddAssertion(RegExpTree* tree);
void NewAlternative(); // '|'
// Attempt to add a quantifier to the last atom added. The return value
@@ -35,30 +30,17 @@
intptr_t max,
RegExpQuantifier::QuantifierType type);
RegExpTree* ToRegExp();
- RegExpFlags flags() const { return flags_; }
- bool ignore_case() const { return flags_.IgnoreCase(); }
- bool is_multi_line() const { return flags_.IsMultiLine(); }
- bool is_dot_all() const { return flags_.IsDotAll(); }
private:
- static const uint16_t kNoPendingSurrogate = 0;
- void AddLeadSurrogate(uint16_t lead_surrogate);
- void AddTrailSurrogate(uint16_t trail_surrogate);
- void FlushPendingSurrogate();
void FlushCharacters();
void FlushText();
void FlushTerms();
- bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
- bool NeedsDesugaringForIgnoreCase(uint32_t c);
Zone* zone() const { return zone_; }
- bool is_unicode() const { return flags_.IsUnicode(); }
Zone* zone_;
bool pending_empty_;
- RegExpFlags flags_;
ZoneGrowableArray<uint16_t>* characters_;
- uint16_t pending_surrogate_;
GrowableArray<RegExpTree*> terms_;
GrowableArray<RegExpTree*> text_;
GrowableArray<RegExpTree*> alternatives_;
@@ -74,15 +56,16 @@
class RegExpParser : public ValueObject {
public:
- RegExpParser(const String& in, String* error, RegExpFlags regexp_flags);
+ RegExpParser(const String& in, String* error, bool multiline_mode);
static void ParseRegExp(const String& input,
- RegExpFlags regexp_flags,
+ bool multiline,
RegExpCompileData* result);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
RegExpTree* ParseGroup();
+ RegExpTree* ParseCharacterClass();
// Parses a {...,...} quantifier and stores the range in the given
// out parameters.
@@ -95,24 +78,6 @@
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(intptr_t length, uint32_t* value);
- bool ParseUnicodeEscape(uint32_t* value);
- bool ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t* value);
-
- // Parses either {UNICODE_PROPERTY_NAME=UNICODE_PROPERTY_VALUE} or
- // the shorthand {UNICODE_PROPERTY_NAME_OR_VALUE} and stores the
- // result in the given out parameters. If the shorthand is used,
- // nothing will be added to name_2.
- bool ParsePropertyClassName(ZoneGrowableArray<char>* name_1,
- ZoneGrowableArray<char>* name_2);
- // Adds the specified unicode property to the provided character range.
- bool AddPropertyClassRange(ZoneGrowableArray<CharacterRange>* add_to,
- bool negate,
- ZoneGrowableArray<char>* name_1,
- ZoneGrowableArray<char>* name_2);
- // Returns a regexp node that corresponds to one of these unicode
- // property sequences: "Any", "ASCII", "Assigned".
- RegExpTree* GetPropertySequence(ZoneGrowableArray<char>* name_1);
- RegExpTree* ParseCharacterClass(const RegExpBuilder* builder);
uint32_t ParseOctalLiteral();
@@ -122,10 +87,7 @@
// can be reparsed.
bool ParseBackReferenceIndex(intptr_t* index_out);
- // Attempts to parse a possible escape within a character class.
- bool ParseClassEscape(ZoneGrowableArray<CharacterRange>* ranges,
- bool add_unicode_case_equivalents,
- uint32_t* char_out);
+ CharacterRange ParseClassAtom(uint16_t* char_class);
void ReportError(const char* message);
void Advance();
void Advance(intptr_t dist);
@@ -138,9 +100,6 @@
void set_contains_anchor() { contains_anchor_ = true; }
intptr_t captures_started() { return captures_started_; }
intptr_t position() { return next_pos_ - 1; }
- bool is_unicode() const { return top_level_flags_.IsUnicode(); }
-
- static bool IsSyntaxCharacterOrSlash(uint32_t c);
static const intptr_t kMaxCaptures = 1 << 16;
static const uint32_t kEndMarker = (1 << 21);
@@ -161,10 +120,9 @@
RegExpLookaround::Type lookaround_type,
intptr_t disjunction_capture_index,
const RegExpCaptureName* capture_name,
- RegExpFlags flags,
Zone* zone)
: previous_state_(previous_state),
- builder_(new (zone) RegExpBuilder(flags)),
+ builder_(new (zone) RegExpBuilder()),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index),
@@ -240,7 +198,6 @@
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < in().Length(); }
uint32_t Next();
- uint32_t ReadNext(bool update_position);
const String& in() { return in_; }
void ScanForCaptures();
@@ -255,7 +212,7 @@
// The capture count is only valid after we have scanned for captures.
intptr_t capture_count_;
bool has_more_;
- RegExpFlags top_level_flags_;
+ bool multiline_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
diff --git a/runtime/vm/regexp_test.cc b/runtime/vm/regexp_test.cc
index f800931..531f12d 100644
--- a/runtime/vm/regexp_test.cc
+++ b/runtime/vm/regexp_test.cc
@@ -16,7 +16,7 @@
Thread* thread = Thread::Current();
Zone* zone = thread->zone();
const RegExp& regexp =
- RegExp::Handle(RegExpEngine::CreateRegExp(thread, pat, RegExpFlags()));
+ RegExp::Handle(RegExpEngine::CreateRegExp(thread, pat, false, false));
const Smi& idx = Smi::Handle(Smi::New(0));
return IRRegExpMacroAssembler::Execute(regexp, str, idx, /*sticky=*/false,
zone);
diff --git a/runtime/vm/runtime_entry_list.h b/runtime/vm/runtime_entry_list.h
index dd4159d..ddcd350 100644
--- a/runtime/vm/runtime_entry_list.h
+++ b/runtime/vm/runtime_entry_list.h
@@ -79,9 +79,7 @@
V(double, LibcAsin, double) \
V(double, LibcAtan, double) \
V(double, LibcAtan2, double, double) \
- V(RawBool*, CaseInsensitiveCompareUCS2, RawString*, RawSmi*, RawSmi*, \
- RawSmi*) \
- V(RawBool*, CaseInsensitiveCompareUTF16, RawString*, RawSmi*, RawSmi*, \
+ V(RawBool*, CaseInsensitiveCompareUC16, RawString*, RawSmi*, RawSmi*, \
RawSmi*) \
V(void, EnterSafepoint) \
V(void, ExitSafepoint)
diff --git a/runtime/vm/splay-tree.h b/runtime/vm/splay-tree.h
deleted file mode 100644
index 54c5d9f..0000000
--- a/runtime/vm/splay-tree.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-#ifndef RUNTIME_VM_SPLAY_TREE_H_
-#define RUNTIME_VM_SPLAY_TREE_H_
-
-#include "platform/splay-tree.h"
-#include "vm/zone.h"
-
-namespace dart {
-
-// A zone splay tree. The config type parameter encapsulates the
-// different configurations of a concrete splay tree (see
-// platform/splay-tree.h). The tree itself and all its elements are allocated
-// in the Zone.
-template <typename Config>
-class ZoneSplayTree final : public SplayTree<Config, ZoneAllocated, Zone> {
- public:
- explicit ZoneSplayTree(Zone* zone)
- : SplayTree<Config, ZoneAllocated, Zone>(ASSERT_NOTNULL(zone)) {}
- ~ZoneSplayTree() {
- // Reset the root to avoid unneeded iteration over all tree nodes
- // in the destructor. For a zone-allocated tree, nodes will be
- // freed by the Zone.
- SplayTree<Config, ZoneAllocated, Zone>::ResetRoot();
- }
-};
-
-} // namespace dart
-
-#endif // RUNTIME_VM_SPLAY_TREE_H_
diff --git a/runtime/vm/stub_code_arm64_test.cc b/runtime/vm/stub_code_arm64_test.cc
index 7471c7d..4e69a5a 100644
--- a/runtime/vm/stub_code_arm64_test.cc
+++ b/runtime/vm/stub_code_arm64_test.cc
@@ -83,7 +83,7 @@
__ LoadObject(R1, lhs_index);
__ LoadObject(R2, rhs_index);
__ LoadObject(R3, length);
- __ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
+ __ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveDartFrame();
__ ret(); // Return value is in R0.
}
diff --git a/runtime/vm/stub_code_arm_test.cc b/runtime/vm/stub_code_arm_test.cc
index d849dc0..2a66478 100644
--- a/runtime/vm/stub_code_arm_test.cc
+++ b/runtime/vm/stub_code_arm_test.cc
@@ -82,7 +82,7 @@
__ LoadObject(R1, lhs_index);
__ LoadObject(R2, rhs_index);
__ LoadObject(R3, length);
- __ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
+ __ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveDartFrameAndReturn(); // Return value is in R0.
}
diff --git a/runtime/vm/stub_code_ia32_test.cc b/runtime/vm/stub_code_ia32_test.cc
index 578afde..a4f79fc 100644
--- a/runtime/vm/stub_code_ia32_test.cc
+++ b/runtime/vm/stub_code_ia32_test.cc
@@ -86,7 +86,7 @@
__ movl(Address(ESP, 2 * kWordSize), EAX); // Push argument 3.
__ LoadObject(EAX, length);
__ movl(Address(ESP, 3 * kWordSize), EAX); // Push argument 4.
- __ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
+ __ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ leave();
__ ret(); // Return value is in EAX.
}
diff --git a/runtime/vm/stub_code_x64_test.cc b/runtime/vm/stub_code_x64_test.cc
index 4cdf613..a71484a 100644
--- a/runtime/vm/stub_code_x64_test.cc
+++ b/runtime/vm/stub_code_x64_test.cc
@@ -83,7 +83,7 @@
__ LoadObject(CallingConventions::kArg2Reg, lhs_index);
__ LoadObject(CallingConventions::kArg3Reg, rhs_index);
__ LoadObject(CallingConventions::kArg4Reg, length);
- __ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
+ __ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveStubFrame();
__ ret(); // Return value is in RAX.
}
diff --git a/sdk/lib/_internal/js_runtime/lib/core_patch.dart b/sdk/lib/_internal/js_runtime/lib/core_patch.dart
index 998f42b..e31fe29 100644
--- a/sdk/lib/_internal/js_runtime/lib/core_patch.dart
+++ b/sdk/lib/_internal/js_runtime/lib/core_patch.dart
@@ -528,15 +528,9 @@
@pragma('dart2js:noInline')
@patch
factory RegExp(String source,
- {bool multiLine: false,
- bool caseSensitive: true,
- bool unicode: false,
- bool dotAll: false}) =>
+ {bool multiLine: false, bool caseSensitive: true}) =>
new JSSyntaxRegExp(source,
- multiLine: multiLine,
- caseSensitive: caseSensitive,
- unicode: unicode,
- dotAll: dotAll);
+ multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);
diff --git a/sdk/lib/_internal/js_runtime/lib/regexp_helper.dart b/sdk/lib/_internal/js_runtime/lib/regexp_helper.dart
index 3dacc06..51b4bfb 100644
--- a/sdk/lib/_internal/js_runtime/lib/regexp_helper.dart
+++ b/sdk/lib/_internal/js_runtime/lib/regexp_helper.dart
@@ -42,22 +42,18 @@
var _nativeGlobalRegExp;
var _nativeAnchoredRegExp;
- String toString() =>
- 'RegExp/$pattern/' + JS('String', '#.flags', _nativeRegExp);
+ String toString() => 'RegExp/$pattern/';
JSSyntaxRegExp(String source,
- {bool multiLine: false,
- bool caseSensitive: true,
- bool unicode: false,
- bool dotAll: false})
+ {bool multiLine: false, bool caseSensitive: true})
: this.pattern = source,
- this._nativeRegExp = makeNative(
- source, multiLine, caseSensitive, unicode, dotAll, false);
+ this._nativeRegExp =
+ makeNative(source, multiLine, caseSensitive, false);
get _nativeGlobalVersion {
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp;
- return _nativeGlobalRegExp = makeNative(
- pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true);
+ return _nativeGlobalRegExp =
+ makeNative(pattern, _isMultiLine, _isCaseSensitive, true);
}
get _nativeAnchoredVersion {
@@ -67,22 +63,18 @@
// that it tries, and you can see if the original regexp matched, or it
// was the added zero-width match that matched, by looking at the last
// capture. If it is a String, the match participated, otherwise it didn't.
- return _nativeAnchoredRegExp = makeNative('$pattern|()', _isMultiLine,
- _isCaseSensitive, _isUnicode, _isDotAll, true);
+ return _nativeAnchoredRegExp =
+ makeNative('$pattern|()', _isMultiLine, _isCaseSensitive, true);
}
bool get _isMultiLine => JS('bool', '#.multiline', _nativeRegExp);
bool get _isCaseSensitive => JS('bool', '!#.ignoreCase', _nativeRegExp);
- bool get _isUnicode => JS('bool', '#.unicode', _nativeRegExp);
- bool get _isDotAll => JS('bool', '#.dotAll', _nativeRegExp);
- static makeNative(String source, bool multiLine, bool caseSensitive,
- bool unicode, bool dotAll, bool global) {
+ static makeNative(
+ String source, bool multiLine, bool caseSensitive, bool global) {
checkString(source);
String m = multiLine == true ? 'm' : '';
String i = caseSensitive == true ? '' : 'i';
- String u = unicode ? 'u' : '';
- String s = dotAll ? 's' : '';
String g = global ? 'g' : '';
// We're using the JavaScript's try catch instead of the Dart one to avoid
// dragging in Dart runtime support just because of using RegExp.
@@ -95,12 +87,10 @@
} catch (e) {
return e;
}
- })(#, # + # + # + # + #)''',
+ })(#, # + # + #)''',
source,
m,
i,
- u,
- s,
g);
if (JS('bool', '# instanceof RegExp', regexp)) return regexp;
// The returned value is the JavaScript exception. Turn it into a
@@ -109,7 +99,7 @@
throw new FormatException('Illegal RegExp pattern ($errorMessage)', source);
}
- RegExpMatch firstMatch(String string) {
+ Match firstMatch(String string) {
List m = JS('JSExtendableArray|Null', r'#.exec(#)', _nativeRegExp,
checkString(string));
if (m == null) return null;
@@ -126,7 +116,7 @@
return null;
}
- Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
+ Iterable<Match> allMatches(String string, [int start = 0]) {
checkString(string);
checkInt(start);
if (start < 0 || start > string.length) {
@@ -135,7 +125,7 @@
return new _AllMatchesIterable(this, string, start);
}
- RegExpMatch _execGlobal(String string, int start) {
+ Match _execGlobal(String string, int start) {
Object regexp = _nativeGlobalVersion;
JS('void', '#.lastIndex = #', regexp, start);
List match = JS('JSExtendableArray|Null', '#.exec(#)', regexp, string);
@@ -143,7 +133,7 @@
return new _MatchImplementation(this, match);
}
- RegExpMatch _execAnchored(String string, int start) {
+ Match _execAnchored(String string, int start) {
Object regexp = _nativeAnchoredVersion;
JS('void', '#.lastIndex = #', regexp, start);
List match = JS('JSExtendableArray|Null', '#.exec(#)', regexp, string);
@@ -154,7 +144,7 @@
return new _MatchImplementation(this, match);
}
- RegExpMatch matchAsPrefix(String string, [int start = 0]) {
+ Match matchAsPrefix(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw new RangeError.range(start, 0, string.length);
}
@@ -163,8 +153,6 @@
bool get isMultiLine => _isMultiLine;
bool get isCaseSensitive => _isCaseSensitive;
- bool get isUnicode => _isUnicode;
- bool get isDotAll => _isDotAll;
}
class _MatchImplementation implements RegExpMatch {
@@ -231,34 +219,25 @@
}
}
-class _AllMatchesIterable extends IterableBase<RegExpMatch> {
+class _AllMatchesIterable extends IterableBase<Match> {
final JSSyntaxRegExp _re;
final String _string;
final int _start;
_AllMatchesIterable(this._re, this._string, this._start);
- Iterator<RegExpMatch> get iterator =>
- new _AllMatchesIterator(_re, _string, _start);
+ Iterator<Match> get iterator => new _AllMatchesIterator(_re, _string, _start);
}
-class _AllMatchesIterator implements Iterator<RegExpMatch> {
+class _AllMatchesIterator implements Iterator<Match> {
final JSSyntaxRegExp _regExp;
String _string;
int _nextIndex;
- RegExpMatch _current;
+ Match _current;
_AllMatchesIterator(this._regExp, this._string, this._nextIndex);
- RegExpMatch get current => _current;
-
- static bool _isLeadSurrogate(int c) {
- return c >= 0xd800 && c <= 0xdbff;
- }
-
- static bool _isTrailSurrogate(int c) {
- return c >= 0xdc00 && c <= 0xdfff;
- }
+ Match get current => _current;
bool moveNext() {
if (_string == null) return false;
@@ -268,15 +247,6 @@
_current = match;
int nextIndex = match.end;
if (match.start == nextIndex) {
- // Zero-width match. Advance by one more, unless the regexp
- // is in unicode mode and it would put us within a surrogate
- // pair. In that case, advance past the code point as a whole.
- if (_regExp.isUnicode &&
- _nextIndex + 1 < _string.length &&
- _isLeadSurrogate(_string.codeUnitAt(_nextIndex)) &&
- _isTrailSurrogate(_string.codeUnitAt(_nextIndex + 1))) {
- nextIndex++;
- }
nextIndex++;
}
_nextIndex = nextIndex;
@@ -290,6 +260,6 @@
}
/// Find the first match of [regExp] in [string] at or after [start].
-RegExpMatch firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
+Match firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
return regExp._execGlobal(string, start);
}
diff --git a/sdk/lib/core/regexp.dart b/sdk/lib/core/regexp.dart
index 62a35f0..c0bd153 100644
--- a/sdk/lib/core/regexp.dart
+++ b/sdk/lib/core/regexp.dart
@@ -16,7 +16,7 @@
* for the specification of JavaScript regular expressions.
*
* [firstMatch] is the main implementation method that applies a regular
- * expression to a string and returns the first [RegExpMatch]. All
+ * expression to a string and returns the first [Match]. All
* other methods in [RegExp] can build on it.
*
* Use [allMatches] to look for all matches of a regular expression in
@@ -27,7 +27,7 @@
* ```dart
* RegExp exp = new RegExp(r"(\w+)");
* String str = "Parse my string";
- * Iterable<RegExpMatch> matches = exp.allMatches(str);
+ * Iterable<Match> matches = exp.allMatches(str);
* ```
*
* Note the use of a _raw string_ (a string prefixed with `r`)
@@ -47,12 +47,6 @@
*
* If `caseSensitive` is disabled, then case is ignored.
*
- * If `unicode` is enabled, then the pattern is treated as a Unicode
- * pattern as described by the ECMAScript standard.
- *
- * If `dotAll` is enabled, then the `.` pattern will match _all_ characters,
- * including line terminators.
- *
* Example:
*
* ```dart
@@ -66,10 +60,7 @@
* interpolation is required.
*/
external factory RegExp(String source,
- {bool multiLine = false,
- bool caseSensitive = true,
- bool unicode = false,
- bool dotAll = false});
+ {bool multiLine = false, bool caseSensitive = true});
/**
* Returns a regular expression that matches [text].
@@ -88,14 +79,14 @@
* Searches for the first match of the regular expression
* in the string [input]. Returns `null` if there is no match.
*/
- RegExpMatch firstMatch(String input);
+ Match firstMatch(String input);
/**
* Returns an iterable of the matches of the regular expression on [input].
*
* If [start] is provided, only start looking for matches at `start`.
*/
- Iterable<RegExpMatch> allMatches(String input, [int start = 0]);
+ Iterable<Match> allMatches(String input, [int start = 0]);
/**
* Returns whether the regular expression has a match in the string [input].
@@ -129,33 +120,6 @@
* versions of the same letter.
*/
bool get isCaseSensitive;
-
- /**
- * Whether this regular expression uses full Unicode matching.
- *
- * In Unicode mode, UTF-16 surrogate pairs in the original string will be
- * treated as a single code point and will not match separately. Otherwise,
- * the target string will be treated purely as a sequence of individual code
- * units and surrogates will not be treated specially.
- *
- * In Unicode mode, the syntax of the RegExp pattern is more restricted, but
- * some pattern features, like Unicode property escapes, are only available in
- * this mode.
- */
- bool get isUnicode;
-
- /**
- * Whether "." in this regular expression matches line terminators.
- *
- * Normally, the "." character matches a single character, unless that
- * character is a line terminator. If this feature is active, then the "."
- * character will match any single character including line terminators.
- *
- * This feature is distinct from [isMultiline], as they affect the behavior
- * of different pattern characters, and so they can be used together or
- * separately.
- */
- bool get isDotAll;
}
/**
diff --git a/tests/corelib_2/regexp/dot-all_test.dart b/tests/corelib_2/regexp/dot-all_test.dart
deleted file mode 100644
index 5610e7f..0000000
--- a/tests/corelib_2/regexp/dot-all_test.dart
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2017 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- // The flags accessors.
- var re = new RegExp(r".", dotAll: true);
- assertTrue(re.isCaseSensitive);
- assertFalse(re.isMultiLine);
- assertFalse(re.isUnicode);
- assertTrue(re.isDotAll);
-
- re = new RegExp(r".",
- caseSensitive: false, multiLine: true, unicode: true, dotAll: true);
- assertFalse(re.isCaseSensitive);
- assertTrue(re.isMultiLine);
- assertTrue(re.isUnicode);
- assertTrue(re.isDotAll);
-
- re = new RegExp(r".", caseSensitive: false, multiLine: true, unicode: true);
- assertFalse(re.isCaseSensitive);
- assertTrue(re.isMultiLine);
- assertTrue(re.isUnicode);
- assertFalse(re.isDotAll);
-
- // Default '.' behavior.
- re = new RegExp(r"^.$");
- assertTrue(re.hasMatch("a"));
- assertTrue(re.hasMatch("3"));
- assertTrue(re.hasMatch("π"));
- assertTrue(re.hasMatch("\u2027"));
- assertTrue(re.hasMatch("\u0085"));
- assertTrue(re.hasMatch("\v"));
- assertTrue(re.hasMatch("\f"));
- assertTrue(re.hasMatch("\u180E"));
- assertFalse(re.hasMatch("\u{10300}")); // Supplementary plane.
- assertFalse(re.hasMatch("\n"));
- assertFalse(re.hasMatch("\r"));
- assertFalse(re.hasMatch("\u2028"));
- assertFalse(re.hasMatch("\u2029"));
-
- // Default '.' behavior (unicode).
- re = new RegExp(r"^.$", unicode: true);
- assertTrue(re.hasMatch("a"));
- assertTrue(re.hasMatch("3"));
- assertTrue(re.hasMatch("π"));
- assertTrue(re.hasMatch("\u2027"));
- assertTrue(re.hasMatch("\u0085"));
- assertTrue(re.hasMatch("\v"));
- assertTrue(re.hasMatch("\f"));
- assertTrue(re.hasMatch("\u180E"));
- assertTrue(re.hasMatch("\u{10300}")); // Supplementary plane.
- assertFalse(re.hasMatch("\n"));
- assertFalse(re.hasMatch("\r"));
- assertFalse(re.hasMatch("\u2028"));
- assertFalse(re.hasMatch("\u2029"));
-
- // DotAll '.' behavior.
- re = new RegExp(r"^.$", dotAll: true);
- assertTrue(re.hasMatch("a"));
- assertTrue(re.hasMatch("3"));
- assertTrue(re.hasMatch("π"));
- assertTrue(re.hasMatch("\u2027"));
- assertTrue(re.hasMatch("\u0085"));
- assertTrue(re.hasMatch("\v"));
- assertTrue(re.hasMatch("\f"));
- assertTrue(re.hasMatch("\u180E"));
- assertFalse(re.hasMatch("\u{10300}")); // Supplementary plane.
- assertTrue(re.hasMatch("\n"));
- assertTrue(re.hasMatch("\r"));
- assertTrue(re.hasMatch("\u2028"));
- assertTrue(re.hasMatch("\u2029"));
-
- // DotAll '.' behavior (unicode).
- re = new RegExp(r"^.$", unicode: true, dotAll: true);
- assertTrue(re.hasMatch("a"));
- assertTrue(re.hasMatch("3"));
- assertTrue(re.hasMatch("π"));
- assertTrue(re.hasMatch("\u2027"));
- assertTrue(re.hasMatch("\u0085"));
- assertTrue(re.hasMatch("\v"));
- assertTrue(re.hasMatch("\f"));
- assertTrue(re.hasMatch("\u180E"));
- assertTrue(re.hasMatch("\u{10300}")); // Supplementary plane.
- assertTrue(re.hasMatch("\n"));
- assertTrue(re.hasMatch("\r"));
- assertTrue(re.hasMatch("\u2028"));
- assertTrue(re.hasMatch("\u2029"));
-}
diff --git a/tests/corelib_2/regexp/lookbehind_test.dart b/tests/corelib_2/regexp/lookbehind_test.dart
index 3fd11af..5ccdcdb 100644
--- a/tests/corelib_2/regexp/lookbehind_test.dart
+++ b/tests/corelib_2/regexp/lookbehind_test.dart
@@ -433,6 +433,8 @@
assertThrows(() => new RegExp(r"(?<=.)?")); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.)+")); //# 01: ok
- assertThrows(() => new RegExp(r"(?<=.)*", unicode: true)); //# 01: ok
- assertThrows(() => new RegExp(r"(?<=.){1,2}", unicode: true)); //# 01: ok
+ // No unicode flag (yet), so can't test these.
+ // See https://github.com/dart-lang/sdk/issues/36170.
+ // assertThrows("/(?<=.)*/u", SyntaxError);
+ // assertThrows("/(?<=.){1,2}/u", SyntaxError);
}
diff --git a/tests/corelib_2/regexp/named-captures_test.dart b/tests/corelib_2/regexp/named-captures_test.dart
index ee2d843..aff61b8 100644
--- a/tests/corelib_2/regexp/named-captures_test.dart
+++ b/tests/corelib_2/regexp/named-captures_test.dart
@@ -31,90 +31,35 @@
import 'v8_regexp_utils.dart';
void main() {
+ void testRE(RegExp re, String input, bool expectedResult) {
+ if (expectedResult) {
+ assertTrue(re.hasMatch(input));
+ } else {
+ assertFalse(re.hasMatch(input));
+ }
+ }
+
void execRE(RegExp re, String input, List<String> expectedResult) {
assertTrue(re.hasMatch(input));
shouldBe(re.firstMatch(input), expectedResult);
}
- void execString(String pattern, String input, List<String> expectedResult,
- {bool unicode = true, bool caseSensitive: false}) {
- execRE(RegExp(pattern, unicode: unicode, caseSensitive: caseSensitive),
- input, expectedResult);
- }
-
void namedRE(RegExp re, String input, Map<String, String> expectedResults) {
assertTrue(re.hasMatch(input));
- var match = re.firstMatch(input);
+ var match = re.firstMatch(input) as RegExpMatch;
for (var s in expectedResults.keys) {
assertEquals(match.namedGroup(s), expectedResults[s]);
}
}
- void execStringGroups(
- String pattern, String input, Map<String, String> expectedResults,
- {bool unicode = true, bool caseSensitive: false}) {
- namedRE(RegExp(pattern, unicode: unicode, caseSensitive: caseSensitive),
- input, expectedResults);
- }
-
void hasNames(RegExp re, String input, List<String> expectedResults) {
assertTrue(re.hasMatch(input));
- var match = re.firstMatch(input);
+ var match = re.firstMatch(input) as RegExpMatch;
for (var s in match.groupNames) {
assertTrue(expectedResults.contains(s));
}
}
- void matchesIndexEqual(String input, RegExp re1, RegExp re2) {
- var m1 = re1.firstMatch(input);
- var m2 = re2.firstMatch(input);
- if (m2 == null) {
- assertNull(m1);
- } else {
- assertTrue(m1 != null);
- assertEquals(m1.groupCount, m2.groupCount);
- for (int i = 0; i < m1.groupCount; i++) {
- assertEquals(m1.group(i), m2.group(i));
- }
- }
- }
-
- // Malformed named captures.
- // Empty name.
- assertThrows(() => RegExp(r"(?<>a)", unicode: true));
- // Unterminated name.
- assertThrows(() => RegExp(r"(?<aa)", unicode: true));
- // Name starting with digits.
- assertThrows(() => RegExp(r"(?<42a>a)", unicode: true));
- // Name starting with invalid char.
- assertThrows(() => RegExp(r"(?<:a>a)", unicode: true));
- // Name containing invalid char.
- assertThrows(() => RegExp(r"(?<a:>a)", unicode: true));
- // Duplicate name.
- assertThrows(() => RegExp(r"(?<a>a)(?<a>a)", unicode: true));
- // Duplicate name.
- assertThrows(() => RegExp(r"(?<a>a)(?<b>b)(?<a>a)", unicode: true));
- // Invalid reference.
- assertThrows(() => RegExp(r"\k<a>", unicode: true));
- // Unterminated reference.
- assertThrows(() => RegExp(r"\k<a", unicode: true));
- // Lone \k.
- assertThrows(() => RegExp(r"\k", unicode: true));
- // Lone \k.
- assertThrows(() => RegExp(r"(?<a>.)\k", unicode: true));
- // Unterminated reference.
- assertThrows(() => RegExp(r"(?<a>.)\k<a", unicode: true));
- // Invalid reference.
- assertThrows(() => RegExp(r"(?<a>.)\k<b>", unicode: true));
- // Invalid reference.
- assertThrows(() => RegExp(r"(?<a>a)\k<ab>", unicode: true));
- // Invalid reference.
- assertThrows(() => RegExp(r"(?<ab>a)\k<a>", unicode: true));
- // Invalid reference.
- assertThrows(() => RegExp(r"\k<a>(?<ab>a)", unicode: true));
- // Identity escape in capture.
- assertThrows(() => RegExp(r"(?<a>\a)", unicode: true));
-
// Behavior in non-unicode mode.
assertThrows(() => RegExp(r"(?<>a)"));
assertThrows(() => RegExp(r"(?<aa)"));
@@ -157,182 +102,6 @@
assertThrows(() => RegExp(r"\k<a(?<a>.)"));
assertThrows(() => RegExp(r"\k(?<a>.)"));
- // Basic named groups.
- execString(r"(?<a>a)", "bab", ["a", "a"]);
- execString(r"(?<a42>a)", "bab", ["a", "a"]);
- execString(r"(?<_>a)", "bab", ["a", "a"]);
- execString(r"(?<$>a)", "bab", ["a", "a"]);
- execString(r".(?<$>a).", "bab", ["bab", "a"]);
- execString(r".(?<a>a)(.)", "bab", ["bab", "a", "b"]);
- execString(r".(?<a>a)(?<b>.)", "bab", ["bab", "a", "b"]);
- execString(r".(?<a>\w\w)", "bab", ["bab", "ab"]);
- execString(r"(?<a>\w\w\w)", "bab", ["bab", "bab"]);
- execString(r"(?<a>\w\w)(?<b>\w)", "bab", ["bab", "ba", "b"]);
-
- execString(r"(?<a>a)", "bab", ["a", "a"], unicode: false);
- execString(r"(?<a42>a)", "bab", ["a", "a"], unicode: false);
- execString(r"(?<_>a)", "bab", ["a", "a"], unicode: false);
- execString(r"(?<$>a)", "bab", ["a", "a"], unicode: false);
- execString(r".(?<$>a).", "bab", ["bab", "a"], unicode: false);
- execString(r".(?<a>a)(.)", "bab", ["bab", "a", "b"], unicode: false);
- execString(r".(?<a>a)(?<b>.)", "bab", ["bab", "a", "b"], unicode: false);
- execString(r".(?<a>\w\w)", "bab", ["bab", "ab"], unicode: false);
- execString(r"(?<a>\w\w\w)", "bab", ["bab", "bab"], unicode: false);
- execString(r"(?<a>\w\w)(?<b>\w)", "bab", ["bab", "ba", "b"], unicode: false);
-
- matchesIndexEqual(
- "bab", RegExp(r"(?<a>a)", unicode: true), RegExp(r"(a)", unicode: true));
- matchesIndexEqual("bab", RegExp(r"(?<a42>a)", unicode: true),
- RegExp(r"(a)", unicode: true));
- matchesIndexEqual(
- "bab", RegExp(r"(?<_>a)", unicode: true), RegExp(r"(a)", unicode: true));
- matchesIndexEqual(
- "bab", RegExp(r"(?<$>a)", unicode: true), RegExp(r"(a)", unicode: true));
- matchesIndexEqual("bab", RegExp(r".(?<$>a).", unicode: true),
- RegExp(r".(a).", unicode: true));
- matchesIndexEqual("bab", RegExp(r".(?<a>a)(.)", unicode: true),
- RegExp(r".(a)(.)", unicode: true));
- matchesIndexEqual("bab", RegExp(r".(?<a>a)(?<b>.)", unicode: true),
- RegExp(r".(a)(.)", unicode: true));
- matchesIndexEqual("bab", RegExp(r".(?<a>\w\w)", unicode: true),
- RegExp(r".(\w\w)", unicode: true));
- matchesIndexEqual("bab", RegExp(r"(?<a>\w\w\w)", unicode: true),
- RegExp(r"(\w\w\w)", unicode: true));
- matchesIndexEqual("bab", RegExp(r"(?<a>\w\w)(?<b>\w)", unicode: true),
- RegExp(r"(\w\w)(\w)", unicode: true));
-
- execString(r"(?<b>b).\1", "bab", ["bab", "b"]);
- execString(r"(.)(?<a>a)\1\2", "baba", ["baba", "b", "a"]);
- execString(r"(.)(?<a>a)(?<b>\1)(\2)", "baba", ["baba", "b", "a", "b", "a"]);
- execString(r"(?<lt><)a", "<a", ["<a", "<"]);
- execString(r"(?<gt>>)a", ">a", [">a", ">"]);
-
- // Named references.
- var pattern = r"(?<b>.).\k<b>";
- execString(pattern, "bab", ["bab", "b"]);
- assertFalse(RegExp(pattern, unicode: true).hasMatch("baa"));
-
- // Nested groups.
- pattern = r"(?<a>.(?<b>.(?<c>.)))";
- execString(pattern, "bab", ["bab", "bab", "ab", "b"]);
- execStringGroups(pattern, "bab", {"a": "bab", "b": "ab", "c": "b"});
-
- // Reference inside group.
- pattern = r"(?<a>\k<a>\w)..";
- execString(pattern, "bab", ["bab", "b"]);
- execStringGroups(pattern, "bab", {"a": "b"});
-
- // Reference before group.
- pattern = r"\k<a>(?<a>b)\w\k<a>";
- execString(pattern, "bab", ["bab", "b"], unicode: false);
- execString(pattern, "bab", ["bab", "b"]);
- execStringGroups(pattern, "bab", {"a": "b"});
-
- pattern = r"(?<b>b)\k<a>(?<a>a)\k<b>";
- execString(pattern, "bab", ["bab", "b", "a"], unicode: false);
- execString(pattern, "bab", ["bab", "b", "a"]);
- execStringGroups(pattern, "bab", {"a": "a", "b": "b"});
-
- // Reference named groups.
- var match = RegExp(r"(?<a>a)(?<b>b)\k<a>", unicode: true).firstMatch("aba");
- assertEquals("a", match.namedGroup("a"));
- assertEquals("b", match.namedGroup("b"));
- assertFalse(match.groupNames.contains("c"));
-
- match =
- RegExp(r"(?<a>a)(?<b>b)\k<a>|(?<c>c)", unicode: true).firstMatch("aba");
- assertNull(match.namedGroup("c"));
-
- // Unicode names.
- execStringGroups(r"(?<π>a)", "bab", {"π": "a"});
- execStringGroups(r"(?<\u{03C0}>a)", "bab", {"π": "a"});
- execStringGroups(r"(?<π>a)", "bab", {"\u03C0": "a"});
- execStringGroups(r"(?<\u{03C0}>a)", "bab", {"\u03C0": "a"});
- execStringGroups(r"(?<$>a)", "bab", {"\$": "a"});
- execStringGroups(r"(?<_>a)", "bab", {"_": "a"});
- execStringGroups(r"(?<$𐒤>a)", "bab", {"\$𐒤": "a"});
- execStringGroups(r"(?<_\u200C>a)", "bab", {"_\u200C": "a"});
- execStringGroups(r"(?<_\u200D>a)", "bab", {"_\u200D": "a"});
- execStringGroups(r"(?<ಠ_ಠ>a)", "bab", {"ಠ_ಠ": "a"});
- // ID_Continue but not ID_Start.
- assertThrows(() => RegExp(r"/(?<❤>a)", unicode: true));
- assertThrows(() => RegExp(r"/(?<𐒤>a)", unicode: true));
-
- execStringGroups(r"(?<π>a)", "bab", {"π": "a"}, unicode: false);
- execStringGroups(r"(?<$>a)", "bab", {"\$": "a"}, unicode: false);
- execStringGroups(r"(?<_>a)", "bab", {"_": "a"}, unicode: false);
- assertThrows(() => RegExp(r"(?<$𐒤>a)"));
- execStringGroups(r"(?<ಠ_ಠ>a)", "bab", {"ಠ_ಠ": "a"}, unicode: false);
- // ID_Continue but not ID_Start.
- assertThrows(() => RegExp(r"/(?<❤>a)"));
- assertThrows(() => RegExp(r"/(?<𐒤>a)"));
-
- // Interaction with lookbehind assertions.
- pattern = r"(?<=(?<a>\w){3})f";
- execString(pattern, "abcdef", ["f", "c"]);
- execStringGroups(pattern, "abcdef", {"a": "c"});
-
- execStringGroups(r"(?<=(?<a>\w){4})f", "abcdef", {"a": "b"});
- execStringGroups(r"(?<=(?<a>\w)+)f", "abcdef", {"a": "a"});
- assertFalse(RegExp(r"(?<=(?<a>\w){6})f", unicode: true).hasMatch("abcdef"));
-
- execString(r"((?<=\w{3}))f", "abcdef", ["f", ""]);
- execString(r"(?<a>(?<=\w{3}))f", "abcdef", ["f", ""]);
-
- execString(r"(?<!(?<a>\d){3})f", "abcdef", ["f", null]);
- assertFalse(RegExp(r"(?<!(?<a>\D){3})f", unicode: true).hasMatch("abcdef"));
-
- execString(r"(?<!(?<a>\D){3})f|f", "abcdef", ["f", null]);
- execString(r"(?<a>(?<!\D{3}))f|f", "abcdef", ["f", null]);
-
- // Matches contain the names of named captures
- match = RegExp(r"(?<fst>.)|(?<snd>.)", unicode: true).firstMatch("abcd");
- Expect.setEquals(["fst", "snd"], match.groupNames);
-
- // Backslash as ID_Start and ID_Continue (v8:5868).
- assertThrows(() => RegExp("(?<\\>.)")); // '\' misclassified as ID_Start.
- assertThrows(() => RegExp("(?<a\\>.)")); // '\' misclassified as ID_Continue.
-
- // Backreference before the group (exercises the capture mini-parser).
- assertThrows(() => RegExp(r"/\1(?:.)", unicode: true));
- assertThrows(() => RegExp(r"/\1(?<=a).", unicode: true));
- assertThrows(() => RegExp(r"/\1(?<!a).", unicode: true));
- execString(r"\1(?<a>.)", "abcd", ["a", "a"]);
-
- // Unicode escapes in capture names. (Testing both unicode interpreted by
- // Dart string handling and also escaped unicode making it to RegExp parser.)
-
- // \u Lead \u Trail
- assertTrue(RegExp("(?<a\uD801\uDCA4>.)", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"(?<a\uD801\uDCA4>.)", unicode: true).hasMatch("a"));
- assertThrows(() => RegExp("(?<a\uD801>.)", unicode: true)); // \u Lead
- assertThrows(() => RegExp(r"(?<a\uD801>.)", unicode: true)); // \u Lead
- assertThrows(() => RegExp("(?<a\uDCA4>.)", unicode: true)); // \u Trail
- assertThrows(() => RegExp(r"(?<a\uDCA4>.)", unicode: true)); // \u Trail
- // \u NonSurrogate
- assertTrue(RegExp("(?<\u0041>.)", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"(?<\u0041>.)", unicode: true).hasMatch("a"));
- // \u{ Surrogate, ID_Continue }
- assertTrue(RegExp("(?<a\u{104A4}>.)", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"(?<a\u{104A4}>.)", unicode: true).hasMatch("a"));
-
- // \u{ Out-of-bounds } -- only need to test RegExp parser for this.
- assertThrows(() => RegExp(r"(?<a\\u{110000}>.)", unicode: true));
-
- // Also checking non-unicode patterns, where surrogate pairs will not
- // be combined (so only \u0041 will have any success).
-
- assertThrows(() => RegExp("(?<a\uD801\uDCA4>.)"));
- assertThrows(() => RegExp(r"(?<a\uD801\uDCA4>.)"));
- assertThrows(() => RegExp("(?<a\uD801>.)"));
- assertThrows(() => RegExp(r"(?<a\uD801>.)"));
- assertThrows(() => RegExp("(?<a\uDCA4>.)"));
- assertThrows(() => RegExp(r"(?<a\uDCA4>.)"));
- assertTrue(RegExp("(?<\u0041>.)").hasMatch("a"));
- assertTrue(RegExp(r"(?<\u0041>.)").hasMatch("a"));
- assertThrows(() => RegExp("(?<a\u{104A4}>.)"));
- assertThrows(() => RegExp(r"(?<a\u{104A4}>.)"));
- assertThrows(() => RegExp("(?<a\u{10FFFF}>.)"));
- assertThrows(() => RegExp(r"(?<a\u{10FFFF}>.)"));
- assertThrows(() => RegExp(r"(?<a\\u{110000}>.)"));
+ // TODO(sstrickl): Add more tests when unicode flag support is in.
+ // https://github.com/dart-lang/sdk/issues/36170
}
diff --git a/tests/corelib_2/regexp/unicode-character-ranges_test.dart b/tests/corelib_2/regexp/unicode-character-ranges_test.dart
deleted file mode 100644
index 7154d0f..0000000
--- a/tests/corelib_2/regexp/unicode-character-ranges_test.dart
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2011 the V8 project authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void execl(List<String> expectation, RegExp re, String subject) {
- shouldBe(re.firstMatch(subject), expectation);
-}
-
-void execs(List<String> expectation, String pattern, String subject) {
- final re = RegExp(pattern, unicode: true);
- shouldBe(re.firstMatch(subject), expectation);
-}
-
-void main() {
- // Character ranges.
- execs(["A"], r"[A-D]", "A");
- execs(["ABCD"], r"[A-D]+", "ZABCDEF");
-
- execs(["\u{12345}"], r"[\u1234-\u{12345}]", "\u{12345}");
- execs(null, r"[^\u1234-\u{12345}]", "\u{12345}");
-
- execs(["\u{1234}"], r"[\u1234-\u{12345}]", "\u{1234}");
- execs(null, r"[^\u1234-\u{12345}]", "\u{1234}");
-
- execs(null, r"[\u1234-\u{12345}]", "\u{1233}");
- execs(["\u{1233}"], r"[^\u1234-\u{12345}]", "\u{1233}");
-
- execs(["\u{12346}"], r"[^\u1234-\u{12345}]", "\u{12346}");
- execs(null, r"[\u1234-\u{12345}]", "\u{12346}");
-
- execs(["\u{12342}"], r"[\u{12340}-\u{12345}]", "\u{12342}");
- execs(["\u{12342}"], r"[\ud808\udf40-\ud808\udf45]", "\u{12342}");
- execs(null, r"[^\u{12340}-\u{12345}]", "\u{12342}");
- execs(null, r"[^\ud808\udf40-\ud808\udf45]", "\u{12342}");
-
- execs(["\u{ffff}"], r"[\u{ff80}-\u{12345}]", "\u{ffff}");
- execs(["\u{ffff}"], r"[\u{ff80}-\ud808\udf45]", "\u{ffff}");
- execs(null, r"[^\u{ff80}-\u{12345}]", "\u{ffff}");
- execs(null, r"[^\u{ff80}-\ud808\udf45]", "\u{ffff}");
-
- // Lone surrogate
- execs(["\udc00"], r"[^\u{ff80}-\u{12345}]", "\uff99\u{dc00}A");
- execs(["\udc01"], r"[\u0100-\u{10ffff}]", "A\udc01");
- execs(["\udc03"], r"[\udc01-\udc03]", "\ud801\udc02\udc03");
- execs(["\ud801"], r"[\ud801-\ud803]", "\ud802\udc01\ud801");
-
- // Paired surrogate.
- execs(null, r"[^\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
- execs(["\ud800\udc00"], r"[\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
- execs(["foo\u{10e6d}bar"], r"foo\ud803\ude6dbar", "foo\u{10e6d}bar");
-
- // Lone surrogates
- execs(["\ud801\ud801"], r"\ud801+", "\ud801\udc01\ud801\ud801");
- execs(["\udc01\udc01"], r"\udc01+", "\ud801\ud801\udc01\udc01\udc01");
-
- execs(["\udc02\udc03A"], r"\W\WA", "\ud801\udc01A\udc02\udc03A");
- execs(["\ud801\ud802"], r"\ud801.", "\ud801\udc01\ud801\ud802");
- execs(["\udc02\udc03A"], r"[\ud800-\udfff][\ud800-\udfff]A",
- "\ud801\udc01A\udc02\udc03A");
-
- // Character classes
- execs(null, r"\w", "\ud801\udc01");
- execl(["\ud801"], RegExp(r"[^\w]"), "\ud801\udc01");
- execs(["\ud801\udc01"], r"[^\w]", "\ud801\udc01");
- execl(["\ud801"], RegExp(r"\W"), "\ud801\udc01");
- execs(["\ud801\udc01"], r"\W", "\ud801\udc01");
-
- execs(["\ud800X"], r".X", "\ud800XaX");
- execs(["aX"], r".(?<!\ud800)X", "\ud800XaX");
- execs(["aX"], r".(?<![\ud800-\ud900])X", "\ud800XaX");
-
- execs(null, r"[]", "\u1234");
- execs(["0abc"], r"[^]abc", "0abc");
- execs(["\u1234abc"], r"[^]abc", "\u1234abc");
- execs(["\u{12345}abc"], r"[^]abc", "\u{12345}abc");
-
- execs(null, r"[\u{0}-\u{1F444}]", "\ud83d\udfff");
-
- // Backward matches of lone surrogates.
- execs(["B", "\ud803A"], r"(?<=([\ud800-\ud900]A))B",
- "\ud801\udc00AB\udc00AB\ud802\ud803AB");
- execs(["B", "\udc00A"], r"(?<=([\ud800-\u{10300}]A))B",
- "\ud801\udc00AB\udc00AB\ud802\ud803AB");
- execs(["B", "\udc11A"], r"(?<=([\udc00-\udd00]A))B",
- "\ud801\udc00AB\udc11AB\ud802\ud803AB");
- execs(["X", "\ud800C"], r"(?<=(\ud800\w))X",
- "\ud800\udc00AX\udc11BX\ud800\ud800CX");
- execs(["C", "\ud800\ud800"], r"(?<=(\ud800.))\w",
- "\ud800\udc00AX\udc11BX\ud800\ud800CX");
- execs(["X", "\udc01C"], r"(?<=(\udc01\w))X",
- "\ud800\udc01AX\udc11BX\udc01\udc01CX");
- execs(["C", "\udc01\udc01"], r"(?<=(\udc01.)).",
- "\ud800\udc01AX\udc11BX\udc01\udc01CX");
-
- const L = "\ud800";
- const T = "\udc00";
- const X = "X";
-
- // Test string contains only match.
- void testw(bool expect, String src, String subject) {
- var re = RegExp(r"^" + src + r"$", unicode: true);
- assertEquals(expect, re.hasMatch(subject));
- }
-
- // Test string starts with match.
- void tests(bool expect, String src, String subject) {
- var re = RegExp(r"^" + src, unicode: true);
- assertEquals(expect, re.hasMatch(subject));
- }
-
- testw(true, X, X);
- testw(true, L, L);
- testw(true, T, T);
- testw(true, L + T, L + T);
- testw(true, T + L, T + L);
- testw(false, T, L + T);
- testw(false, L, L + T);
- testw(true, r".(?<=" + L + r")", L);
- testw(true, r".(?<=" + T + r")", T);
- testw(true, r".(?<=" + L + T + r")", L + T);
- testw(true, r".(?<=" + L + T + r")", L + T);
- tests(true, r".(?<=" + T + r")", T + L);
- tests(false, r".(?<=" + L + r")", L + T);
- tests(false, r".(?<=" + T + r")", L + T);
- tests(true, r"..(?<=" + T + r")", T + T + L);
- tests(true, r"..(?<=" + T + r")", X + T + L);
- tests(true, r"...(?<=" + L + r")", X + T + L);
- tests(false, r"...(?<=" + T + r")", X + L + T);
- tests(true, r"..(?<=" + L + T + r")", X + L + T);
- tests(true, r"..(?<=" + L + T + r"(?<=" + L + T + r"))", X + L + T);
- tests(false, r"..(?<=" + L + r"(" + T + r"))", X + L + T);
- tests(false, r".*" + L, X + L + T);
- tests(true, r".*" + L, X + L + L + T);
- tests(false, r".*" + L, X + L + T + L + T);
- tests(false, r".*" + T, X + L + T + L + T);
- tests(true, r".*" + T, X + L + T + T + L + T);
-}
diff --git a/tests/corelib_2/regexp/unicode-escapes-in-regexps_test.dart b/tests/corelib_2/regexp/unicode-escapes-in-regexps_test.dart
deleted file mode 100644
index 7cb0894..0000000
--- a/tests/corelib_2/regexp/unicode-escapes-in-regexps_test.dart
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2014 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void testRegExpHelper(RegExp r) {
- assertTrue(r.hasMatch("foo"));
- assertTrue(r.hasMatch("boo"));
- assertFalse(r.hasMatch("moo"));
-}
-
-void TestUnicodeEscapes() {
- testRegExpHelper(RegExp(r"(\u0066|\u0062)oo"));
- testRegExpHelper(RegExp(r"(\u0066|\u0062)oo", unicode: true));
- testRegExpHelper(RegExp(r"(\u{0066}|\u{0062})oo", unicode: true));
- testRegExpHelper(RegExp(r"(\u{66}|\u{000062})oo", unicode: true));
-
- // Note that we need \\ inside a string, otherwise it's interpreted as a
- // unicode escape inside a string.
- testRegExpHelper(RegExp("(\\u0066|\\u0062)oo"));
- testRegExpHelper(RegExp("(\\u0066|\\u0062)oo", unicode: true));
- testRegExpHelper(RegExp("(\\u{0066}|\\u{0062})oo", unicode: true));
- testRegExpHelper(RegExp("(\\u{66}|\\u{000062})oo", unicode: true));
-
- // Though, unicode escapes via strings should work too.
- testRegExpHelper(RegExp("(\u0066|\u0062)oo"));
- testRegExpHelper(RegExp("(\u0066|\u0062)oo", unicode: true));
- testRegExpHelper(RegExp("(\u{0066}|\u{0062})oo", unicode: true));
- testRegExpHelper(RegExp("(\u{66}|\u{000062})oo", unicode: true));
-}
-
-void TestUnicodeEscapesInCharacterClasses() {
- testRegExpHelper(RegExp(r"[\u0062-\u0066]oo"));
- testRegExpHelper(RegExp(r"[\u0062-\u0066]oo", unicode: true));
- testRegExpHelper(RegExp(r"[\u{0062}-\u{0066}]oo", unicode: true));
- testRegExpHelper(RegExp(r"[\u{62}-\u{000066}]oo", unicode: true));
-
- // Note that we need \\ inside a string, otherwise it's interpreted as a
- // unicode escape inside a string.
- testRegExpHelper(RegExp("[\\u0062-\\u0066]oo"));
- testRegExpHelper(RegExp("[\\u0062-\\u0066]oo", unicode: true));
- testRegExpHelper(RegExp("[\\u{0062}-\\u{0066}]oo", unicode: true));
- testRegExpHelper(RegExp("[\\u{62}-\\u{000066}]oo", unicode: true));
-
- // Though, unicode escapes via strings should work too.
- testRegExpHelper(RegExp("[\u0062-\u0066]oo"));
- testRegExpHelper(RegExp("[\u0062-\u0066]oo", unicode: true));
- testRegExpHelper(RegExp("[\u{0062}-\u{0066}]oo", unicode: true));
- testRegExpHelper(RegExp("[\u{62}-\u{000066}]oo", unicode: true));
-}
-
-void TestBraceEscapesWithoutUnicodeFlag() {
- // \u followed by illegal escape will be parsed as u. {x} will be the
- // character count.
- void helper1(RegExp r) {
- assertFalse(r.hasMatch("fbar"));
- assertFalse(r.hasMatch("fubar"));
- assertTrue(r.hasMatch("fuubar"));
- assertFalse(r.hasMatch("fuuubar"));
- }
-
- helper1(RegExp(r"f\u{2}bar"));
- helper1(RegExp("f\\u{2}bar"));
-
- void helper2(RegExp r) {
- assertFalse(r.hasMatch("fbar"));
- assertTrue(r.hasMatch("fubar"));
- assertTrue(r.hasMatch("fuubar"));
- assertFalse(r.hasMatch("fuuubar"));
- }
-
- helper2(RegExp(r"f\u{1,2}bar"));
- helper2(RegExp("f\\u{1,2}bar"));
-
- void helper3(RegExp r) {
- assertTrue(r.hasMatch("u"));
- assertTrue(r.hasMatch("{"));
- assertTrue(r.hasMatch("2"));
- assertTrue(r.hasMatch("}"));
- assertFalse(r.hasMatch("q"));
- assertFalse(r.hasMatch("("));
- assertFalse(r.hasMatch(")"));
- }
-
- helper3(RegExp(r"[\u{2}]"));
- helper3(RegExp("[\\u{2}]"));
-}
-
-void TestInvalidEscapes() {
- // Without the u flag, invalid unicode escapes and other invalid escapes are
- // treated as identity escapes.
- void helper1(RegExp r) {
- assertTrue(r.hasMatch("firstuxz89second"));
- }
-
- helper1(RegExp(r"first\u\x\z\8\9second"));
- helper1(RegExp("first\\u\\x\\z\\8\\9second"));
-
- void helper2(RegExp r) {
- assertTrue(r.hasMatch("u"));
- assertTrue(r.hasMatch("x"));
- assertTrue(r.hasMatch("z"));
- assertTrue(r.hasMatch("8"));
- assertTrue(r.hasMatch("9"));
- assertFalse(r.hasMatch("q"));
- assertFalse(r.hasMatch("7"));
- }
-
- helper2(RegExp(r"[\u\x\z\8\9]"));
- helper2(RegExp("[\\u\\x\\z\\8\\9]"));
-
- // However, with the u flag, these are treated as invalid escapes.
- assertThrows(() => RegExp(r"\u", unicode: true));
- assertThrows(() => RegExp(r"\u12", unicode: true));
- assertThrows(() => RegExp(r"\ufoo", unicode: true));
- assertThrows(() => RegExp(r"\x", unicode: true));
- assertThrows(() => RegExp(r"\xfoo", unicode: true));
- assertThrows(() => RegExp(r"\z", unicode: true));
- assertThrows(() => RegExp(r"\8", unicode: true));
- assertThrows(() => RegExp(r"\9", unicode: true));
-
- assertThrows(() => RegExp("\\u", unicode: true));
- assertThrows(() => RegExp("\\u12", unicode: true));
- assertThrows(() => RegExp("\\ufoo", unicode: true));
- assertThrows(() => RegExp("\\x", unicode: true));
- assertThrows(() => RegExp("\\xfoo", unicode: true));
- assertThrows(() => RegExp("\\z", unicode: true));
- assertThrows(() => RegExp("\\8", unicode: true));
- assertThrows(() => RegExp("\\9", unicode: true));
-}
-
-void TestTooBigHexEscape() {
- // The hex number inside \u{} has a maximum value.
- RegExp(r"\u{10ffff}", unicode: true);
- RegExp("\\u{10ffff}", unicode: true);
- assertThrows(() => RegExp(r"\u{110000}", unicode: true));
- assertThrows(() => RegExp("\\u{110000}", unicode: true));
-
- // Without the u flag, they're of course fine ({x} is the count).
- RegExp(r"\u{110000}");
- RegExp("\\u{110000}");
-}
-
-void TestSyntaxEscapes() {
- // Syntax escapes work the same with or without the u flag.
- void helper(RegExp r) {
- assertTrue(r.hasMatch("foo[bar"));
- assertFalse(r.hasMatch("foo]bar"));
- }
-
- helper(RegExp(r"foo\[bar"));
- helper(RegExp("foo\\[bar"));
- helper(RegExp(r"foo\[bar", unicode: true));
- helper(RegExp("foo\\[bar", unicode: true));
-}
-
-void TestUnicodeSurrogates() {
- // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
- void helper(RegExp r) {
- assertTrue(r.hasMatch("foo\u{10e6d}bar"));
- }
-
- helper(RegExp(r"foo\ud803\ude6dbar", unicode: true));
- helper(RegExp("foo\\ud803\\ude6dbar", unicode: true));
-}
-
-void main() {
- TestUnicodeEscapes();
- TestUnicodeEscapesInCharacterClasses();
- TestBraceEscapesWithoutUnicodeFlag();
- TestInvalidEscapes();
- TestTooBigHexEscape();
- TestSyntaxEscapes();
- TestUnicodeSurrogates();
-
- // Non-BMP patterns.
- // Single character atom.
- assertTrue(RegExp("\u{12345}", unicode: true).hasMatch("\u{12345}"));
- assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\u{12345}"));
- assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
- assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
- assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));
- assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));
-
- // Multi-character atom.
- assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
- .hasMatch("a\u{12345}\u{23456}b"));
- assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
- .hasMatch("b\u{12345}\u{23456}c"));
- assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
- .hasMatch("a\udf45\u{23456}b"));
- assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
- .hasMatch("b\udf45\u{23456}c"));
-
- // Disjunction.
- assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
- .hasMatch("a\u{12345}\u{23456}b"));
- assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
- .hasMatch("b\u{12345}\u{23456}c"));
- assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
- .hasMatch("a\udf45\u{23456}b"));
- assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
- .hasMatch("b\udf45\u{23456}c"));
-
- // Alternative.
- assertTrue(
- RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\u{12345}b"));
- assertTrue(
- RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\u{23456}c"));
- assertFalse(
- RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\udf45\ud84db"));
- assertFalse(
- RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\udf45\ud808c"));
-
- // Capture.
- assertTrue(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
- .hasMatch("\u{12345}b\u{12345}"));
- assertTrue(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
- .hasMatch("\u{12345}b\u{12345}"));
- assertFalse(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
- .hasMatch("\u{12345}b\u{23456}"));
- assertFalse(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
- .hasMatch("\u{12345}b\u{23456}"));
-
- // Quantifier.
- assertTrue(RegExp("\u{12345}{3}", unicode: true)
- .hasMatch("\u{12345}\u{12345}\u{12345}"));
- assertTrue(RegExp(r"\u{12345}{3}", unicode: true)
- .hasMatch("\u{12345}\u{12345}\u{12345}"));
- assertTrue(RegExp("\u{12345}{3}").hasMatch("\u{12345}\udf45\udf45"));
- assertFalse(RegExp(r"\ud808\udf45{3}", unicode: true)
- .hasMatch("\u{12345}\udf45\udf45"));
- assertTrue(RegExp(r"\ud808\udf45{3}", unicode: true)
- .hasMatch("\u{12345}\u{12345}\u{12345}"));
- assertFalse(
- RegExp("\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));
- assertFalse(
- RegExp(r"\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));
-
- // Literal surrogates.
- shouldBe(
- RegExp("\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
- ["\u{10000}\u{10000}"]);
- shouldBe(
- RegExp("\\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
- ["\u{10000}\u{10000}"]);
-
- shouldBe(
- RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", unicode: true)
- .firstMatch("\u{10003}\u{50001}"),
- ["\u{10003}\u{50001}"]);
- shouldBe(
- RegExp("[\ud800\udc03-\u{50001}\]+", unicode: true)
- .firstMatch("\u{10003}\u{50001}"),
- ["\u{10003}\u{50001}"]);
-
- // Unicode escape sequences to represent a non-BMP character cannot have
- // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
- assertThrows(() => RegExp("[\\ud800\udc03-\ud900\\udc01\]+", unicode: true));
- assertNull(
- RegExp("\\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));
- assertNull(
- RegExp("\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));
-
- assertNull(RegExp("[\\ud800\udc00]", unicode: true).firstMatch("\u{10000}"));
- assertNull(
- RegExp("[\\{ud800}\udc00]", unicode: true).firstMatch("\u{10000}"));
- assertNull(RegExp("[\ud800\\udc00]", unicode: true).firstMatch("\u{10000}"));
- assertNull(
- RegExp("[\ud800\\{udc00}]", unicode: true).firstMatch("\u{10000}"));
-
- assertNull(RegExp(r"\u{d800}\u{dc00}+", unicode: true)
- .firstMatch("\ud800\udc00\udc00"));
- assertNull(RegExp(r"\ud800\u{dc00}+", unicode: true)
- .firstMatch("\ud800\udc00\udc00"));
- assertNull(RegExp(r"\u{d800}\udc00+", unicode: true)
- .firstMatch("\ud800\udc00\udc00"));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-binary_test.dart b/tests/corelib_2/regexp/unicode-property-binary_test.dart
deleted file mode 100644
index b8981a7..0000000
--- a/tests/corelib_2/regexp/unicode-property-binary_test.dart
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- void t(RegExp re, String s) {
- assertTrue(re.hasMatch(s));
- }
-
- void f(RegExp re, String s) {
- assertFalse(re.hasMatch(s));
- }
-
- assertThrows(() => RegExp("\\p{Hiragana}", unicode: true));
- assertThrows(() => RegExp("\\p{Bidi_Class}", unicode: true));
- assertThrows(() => RegExp("\\p{Bidi_C=False}", unicode: true));
- assertThrows(() => RegExp("\\P{Bidi_Control=Y}", unicode: true));
- assertThrows(() => RegExp("\\p{AHex=Yes}", unicode: true));
-
- assertThrows(() => RegExp("\\p{Composition_Exclusion}", unicode: true));
- assertThrows(() => RegExp("\\p{CE}", unicode: true));
- assertThrows(() => RegExp("\\p{Full_Composition_Exclusion}", unicode: true));
- assertThrows(() => RegExp("\\p{Comp_Ex}", unicode: true));
- assertThrows(() => RegExp("\\p{Grapheme_Link}", unicode: true));
- assertThrows(() => RegExp("\\p{Gr_Link}", unicode: true));
- assertThrows(() => RegExp("\\p{Hyphen}", unicode: true));
- assertThrows(() => RegExp("\\p{NFD_Inert}", unicode: true));
- assertThrows(() => RegExp("\\p{NFDK_Inert}", unicode: true));
- assertThrows(() => RegExp("\\p{NFC_Inert}", unicode: true));
- assertThrows(() => RegExp("\\p{NFKC_Inert}", unicode: true));
- assertThrows(() => RegExp("\\p{Segment_Starter}", unicode: true));
-
- t(RegExp(r"\p{Alphabetic}", unicode: true), "æ");
- f(RegExp(r"\p{Alpha}", unicode: true), "1");
-
- t(RegExp(r"\p{ASCII_Hex_Digit}", unicode: true), "f");
- f(RegExp(r"\p{AHex}", unicode: true), "g");
-
- t(RegExp(r"\p{Bidi_Control}", unicode: true), "\u200e");
- f(RegExp(r"\p{Bidi_C}", unicode: true), "g");
-
- t(RegExp(r"\p{Bidi_Mirrored}", unicode: true), "(");
- f(RegExp(r"\p{Bidi_M}", unicode: true), "-");
-
- t(RegExp(r"\p{Case_Ignorable}", unicode: true), "\u02b0");
- f(RegExp(r"\p{CI}", unicode: true), "a");
-
- t(RegExp(r"\p{Changes_When_Casefolded}", unicode: true), "B");
- f(RegExp(r"\p{CWCF}", unicode: true), "1");
-
- t(RegExp(r"\p{Changes_When_Casemapped}", unicode: true), "b");
- f(RegExp(r"\p{CWCM}", unicode: true), "1");
-
- t(RegExp(r"\p{Changes_When_Lowercased}", unicode: true), "B");
- f(RegExp(r"\p{CWL}", unicode: true), "1");
-
- t(RegExp(r"\p{Changes_When_Titlecased}", unicode: true), "b");
- f(RegExp(r"\p{CWT}", unicode: true), "1");
-
- t(RegExp(r"\p{Changes_When_Uppercased}", unicode: true), "b");
- f(RegExp(r"\p{CWU}", unicode: true), "1");
-
- t(RegExp(r"\p{Dash}", unicode: true), "-");
- f(RegExp(r"\p{Dash}", unicode: true), "1");
-
- t(RegExp(r"\p{Default_Ignorable_Code_Point}", unicode: true), "\u00ad");
- f(RegExp(r"\p{DI}", unicode: true), "1");
-
- t(RegExp(r"\p{Deprecated}", unicode: true), "\u17a3");
- f(RegExp(r"\p{Dep}", unicode: true), "1");
-
- t(RegExp(r"\p{Diacritic}", unicode: true), "\u0301");
- f(RegExp(r"\p{Dia}", unicode: true), "1");
-
- t(RegExp(r"\p{Emoji}", unicode: true), "\u2603");
- f(RegExp(r"\p{Emoji}", unicode: true), "x");
-
- t(RegExp(r"\p{Emoji_Component}", unicode: true), "\u{1F1E6}");
- f(RegExp(r"\p{Emoji_Component}", unicode: true), "x");
-
- t(RegExp(r"\p{Emoji_Modifier_Base}", unicode: true), "\u{1F6CC}");
- f(RegExp(r"\p{Emoji_Modifier_Base}", unicode: true), "x");
-
- t(RegExp(r"\p{Emoji_Modifier}", unicode: true), "\u{1F3FE}");
- f(RegExp(r"\p{Emoji_Modifier}", unicode: true), "x");
-
- t(RegExp(r"\p{Emoji_Presentation}", unicode: true), "\u{1F308}");
- f(RegExp(r"\p{Emoji_Presentation}", unicode: true), "x");
-
- t(RegExp(r"\p{Extender}", unicode: true), "\u3005");
- f(RegExp(r"\p{Ext}", unicode: true), "x");
-
- t(RegExp(r"\p{Grapheme_Base}", unicode: true), " ");
- f(RegExp(r"\p{Gr_Base}", unicode: true), "\u0010");
-
- t(RegExp(r"\p{Grapheme_Extend}", unicode: true), "\u0300");
- f(RegExp(r"\p{Gr_Ext}", unicode: true), "x");
-
- t(RegExp(r"\p{Hex_Digit}", unicode: true), "a");
- f(RegExp(r"\p{Hex}", unicode: true), "g");
-
- t(RegExp(r"\p{ID_Continue}", unicode: true), "1");
- f(RegExp(r"\p{IDC}", unicode: true), ".");
-
- t(RegExp(r"\p{ID_Start}", unicode: true), "a");
- f(RegExp(r"\p{IDS}", unicode: true), "1");
-
- t(RegExp(r"\p{Ideographic}", unicode: true), "漢");
- f(RegExp(r"\p{Ideo}", unicode: true), "H");
-
- t(RegExp(r"\p{IDS_Binary_Operator}", unicode: true), "\u2FF0");
- f(RegExp(r"\p{IDSB}", unicode: true), "a");
-
- t(RegExp(r"\p{IDS_Trinary_Operator}", unicode: true), "\u2FF2");
- f(RegExp(r"\p{IDST}", unicode: true), "a");
-
- t(RegExp(r"\p{Join_Control}", unicode: true), "\u200c");
- f(RegExp(r"\p{Join_C}", unicode: true), "a");
-
- t(RegExp(r"\p{Logical_Order_Exception}", unicode: true), "\u0e40");
- f(RegExp(r"\p{LOE}", unicode: true), "a");
-
- t(RegExp(r"\p{Lowercase}", unicode: true), "a");
- f(RegExp(r"\p{Lower}", unicode: true), "A");
-
- t(RegExp(r"\p{Math}", unicode: true), "=");
- f(RegExp(r"\p{Math}", unicode: true), "A");
-
- t(RegExp(r"\p{Noncharacter_Code_Point}", unicode: true), "\uFDD0");
- f(RegExp(r"\p{NChar}", unicode: true), "A");
-
- t(RegExp(r"\p{Pattern_Syntax}", unicode: true), "\u0021");
- f(RegExp(r"\p{NChar}", unicode: true), "A");
-
- t(RegExp(r"\p{Pattern_White_Space}", unicode: true), "\u0009");
- f(RegExp(r"\p{Pat_Syn}", unicode: true), "A");
-
- t(RegExp(r"\p{Quotation_Mark}", unicode: true), "'");
- f(RegExp(r"\p{QMark}", unicode: true), "A");
-
- t(RegExp(r"\p{Radical}", unicode: true), "\u2FAD");
- f(RegExp(r"\p{Radical}", unicode: true), "A");
-
- t(RegExp(r"\p{Regional_Indicator}", unicode: true), "\u{1F1E6}");
- f(RegExp(r"\p{Regional_Indicator}", unicode: true), "A");
-
- t(RegExp(r"\p{Sentence_Terminal}", unicode: true), "!");
- f(RegExp(r"\p{STerm}", unicode: true), "A");
-
- t(RegExp(r"\p{Soft_Dotted}", unicode: true), "i");
- f(RegExp(r"\p{SD}", unicode: true), "A");
-
- t(RegExp(r"\p{Terminal_Punctuation}", unicode: true), ".");
- f(RegExp(r"\p{Term}", unicode: true), "A");
-
- t(RegExp(r"\p{Unified_Ideograph}", unicode: true), "\u4e00");
- f(RegExp(r"\p{UIdeo}", unicode: true), "A");
-
- t(RegExp(r"\p{Uppercase}", unicode: true), "A");
- f(RegExp(r"\p{Upper}", unicode: true), "a");
-
- t(RegExp(r"\p{Variation_Selector}", unicode: true), "\uFE00");
- f(RegExp(r"\p{VS}", unicode: true), "A");
-
- t(RegExp(r"\p{White_Space}", unicode: true), " ");
- f(RegExp(r"\p{WSpace}", unicode: true), "A");
-
- t(RegExp(r"\p{XID_Continue}", unicode: true), "1");
- f(RegExp(r"\p{XIDC}", unicode: true), " ");
-
- t(RegExp(r"\p{XID_Start}", unicode: true), "A");
- f(RegExp(r"\p{XIDS}", unicode: true), " ");
-}
diff --git a/tests/corelib_2/regexp/unicode-property-char-class_test.dart b/tests/corelib_2/regexp/unicode-property-char-class_test.dart
deleted file mode 100644
index db87fad..0000000
--- a/tests/corelib_2/regexp/unicode-property-char-class_test.dart
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2011 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- assertThrows(() => RegExp("[\\p]", unicode: true));
- assertThrows(() => RegExp("[\\p{garbage}]", unicode: true));
- assertThrows(() => RegExp("[\\p{}]", unicode: true));
- assertThrows(() => RegExp("[\\p{]", unicode: true));
- assertThrows(() => RegExp("[\\p}]", unicode: true));
- assertThrows(() => RegExp("^[\\p{Lu}-\\p{Ll}]+\$", unicode: true));
-
- assertTrue(RegExp(r"^[\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABCabc"));
- assertTrue(RegExp(r"^[\p{Lu}-]+$", unicode: true).hasMatch("ABC-"));
- assertFalse(RegExp(r"^[\P{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABCabc"));
- assertTrue(RegExp(r"^[\P{Lu}\p{Ll}]+$", unicode: true).hasMatch("abc"));
- assertTrue(RegExp(r"^[\P{Lu}]+$", unicode: true).hasMatch("abc123"));
- assertFalse(RegExp(r"^[\P{Lu}]+$", unicode: true).hasMatch("XYZ"));
- assertTrue(RegExp(r"[\p{Math}]", unicode: true).hasMatch("+"));
- assertTrue(RegExp(r"[\P{Bidi_M}]", unicode: true).hasMatch(" "));
- assertTrue(RegExp(r"[\p{Hex}]", unicode: true).hasMatch("A"));
-
- assertTrue(RegExp(r"^[^\P{Lu}]+$", unicode: true).hasMatch("XYZ"));
- assertFalse(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("abc"));
- assertFalse(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABC"));
- assertTrue(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("123"));
- assertTrue(RegExp(r"^[^\p{Lu}\P{Ll}]+$", unicode: true).hasMatch("abc"));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-enumerated_test.dart b/tests/corelib_2/regexp/unicode-property-enumerated_test.dart
deleted file mode 100644
index cfb3e02..0000000
--- a/tests/corelib_2/regexp/unicode-property-enumerated_test.dart
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- assertThrows(() => RegExp("\\p{Bidi_Class=L}+", unicode: true));
- assertThrows(() => RegExp("\\p{bc=Left_To_Right}+", unicode: true));
- assertThrows(() => RegExp("\\p{bc=AL}+", unicode: true));
- assertThrows(() => RegExp("\\p{bc=Arabic_Letter}+", unicode: true));
-
- assertThrows(() => RegExp("\\p{Line_Break=Glue}", unicode: true));
- assertThrows(() => RegExp("\\p{lb=AL}", unicode: true));
-
- assertThrows(() => RegExp("\\p{Block=}", unicode: true));
- assertThrows(() => RegExp("\\p{=}", unicode: true));
- assertThrows(() => RegExp("\\p{=L}", unicode: true));
- assertThrows(() => RegExp("\\p{=Hiragana}", unicode: true));
- assertThrows(() => RegExp("\\p{Block=CJK=}", unicode: true));
-
- assertThrows(() => RegExp("\\p{Age=V8_0}", unicode: true));
- assertDoesNotThrow(
- () => RegExp("\\p{General_Category=Letter}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{gc=L}", unicode: true));
- assertThrows(
- () => RegExp("\\p{General_Category_Mask=Letter}", unicode: true));
- assertThrows(() => RegExp("\\p{gcm=L}", unicode: true));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-exact-match_test.dart b/tests/corelib_2/regexp/unicode-property-exact-match_test.dart
deleted file mode 100644
index bbf99d0..0000000
--- a/tests/corelib_2/regexp/unicode-property-exact-match_test.dart
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- assertThrows(() => RegExp("\\p{In CJK}", unicode: true));
- assertThrows(() => RegExp("\\p{InCJKUnifiedIdeographs}", unicode: true));
- assertThrows(() => RegExp("\\p{InCJK}", unicode: true));
- assertThrows(() => RegExp("\\p{InCJK_Unified_Ideographs}", unicode: true));
-
- assertThrows(() => RegExp("\\p{InCyrillic_Sup}", unicode: true));
- assertThrows(() => RegExp("\\p{InCyrillic_Supplement}", unicode: true));
- assertThrows(() => RegExp("\\p{InCyrillic_Supplementary}", unicode: true));
- assertThrows(() => RegExp("\\p{InCyrillicSupplementary}", unicode: true));
- assertThrows(() => RegExp("\\p{InCyrillic_supplementary}", unicode: true));
-
- assertDoesNotThrow(() => RegExp("\\p{C}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{Other}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{Cc}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{Control}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{cntrl}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{M}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{Mark}", unicode: true));
- assertDoesNotThrow(() => RegExp("\\p{Combining_Mark}", unicode: true));
- assertThrows(() => RegExp("\\p{Combining Mark}", unicode: true));
-
- assertDoesNotThrow(() => RegExp("\\p{Script=Copt}", unicode: true));
- assertThrows(() => RegExp("\\p{Coptic}", unicode: true));
- assertThrows(() => RegExp("\\p{Qaac}", unicode: true));
- assertThrows(() => RegExp("\\p{Egyp}", unicode: true));
- assertDoesNotThrow(
- () => RegExp("\\p{Script=Egyptian_Hieroglyphs}", unicode: true));
- assertThrows(() => RegExp("\\p{EgyptianHieroglyphs}", unicode: true));
-
- assertThrows(() => RegExp("\\p{BidiClass=LeftToRight}", unicode: true));
- assertThrows(() => RegExp("\\p{BidiC=LeftToRight}", unicode: true));
- assertThrows(() => RegExp("\\p{bidi_c=Left_To_Right}", unicode: true));
-
- assertThrows(() => RegExp("\\p{Block=CJK}", unicode: true));
- assertThrows(() => RegExp("\\p{Block = CJK}", unicode: true));
- assertThrows(() => RegExp("\\p{Block=cjk}", unicode: true));
- assertThrows(() => RegExp("\\p{BLK=CJK}", unicode: true));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-general-category_test.dart b/tests/corelib_2/regexp/unicode-property-general-category_test.dart
deleted file mode 100644
index cb94cac..0000000
--- a/tests/corelib_2/regexp/unicode-property-general-category_test.dart
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- assertThrows(() => RegExp("\\p", unicode: true));
- assertThrows(() => RegExp("\\p{garbage}", unicode: true));
- assertThrows(() => RegExp("\\p{}", unicode: true));
- assertThrows(() => RegExp("\\p{", unicode: true));
- assertThrows(() => RegExp("\\p}", unicode: true));
- assertThrows(() => RegExp("\\pL", unicode: true));
- assertThrows(() => RegExp("\\P", unicode: true));
- assertThrows(() => RegExp("\\P{garbage}", unicode: true));
- assertThrows(() => RegExp("\\P{}", unicode: true));
- assertThrows(() => RegExp("\\P{", unicode: true));
- assertThrows(() => RegExp("\\P}", unicode: true));
- assertThrows(() => RegExp("\\PL", unicode: true));
-
- assertTrue(RegExp(r"\p{Ll}", unicode: true).hasMatch("a"));
- assertFalse(RegExp(r"\P{Ll}", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\P{Ll}", unicode: true).hasMatch("A"));
- assertFalse(RegExp(r"\p{Ll}", unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\p{Ll}", unicode: true).hasMatch("\u{1D7BE}"));
- assertFalse(RegExp(r"\P{Ll}", unicode: true).hasMatch("\u{1D7BE}"));
- assertFalse(RegExp(r"\p{Ll}", unicode: true).hasMatch("\u{1D5E3}"));
- assertTrue(RegExp(r"\P{Ll}", unicode: true).hasMatch("\u{1D5E3}"));
-
- assertTrue(
- RegExp(r"\p{Ll}", caseSensitive: false, unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\p{Ll}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118D4}"));
- assertTrue(
- RegExp(r"\p{Ll}", caseSensitive: false, unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\p{Ll}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118B4}"));
- assertTrue(
- RegExp(r"\P{Ll}", caseSensitive: false, unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\P{Ll}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118D4}"));
- assertTrue(
- RegExp(r"\P{Ll}", caseSensitive: false, unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\P{Ll}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118B4}"));
-
- assertTrue(RegExp(r"\p{Lu}", unicode: true).hasMatch("A"));
- assertFalse(RegExp(r"\P{Lu}", unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\P{Lu}", unicode: true).hasMatch("a"));
- assertFalse(RegExp(r"\p{Lu}", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\p{Lu}", unicode: true).hasMatch("\u{1D5E3}"));
- assertFalse(RegExp(r"\P{Lu}", unicode: true).hasMatch("\u{1D5E3}"));
- assertFalse(RegExp(r"\p{Lu}", unicode: true).hasMatch("\u{1D7BE}"));
- assertTrue(RegExp(r"\P{Lu}", unicode: true).hasMatch("\u{1D7BE}"));
-
- assertTrue(
- RegExp(r"\p{Lu}", caseSensitive: false, unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\p{Lu}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118D4}"));
- assertTrue(
- RegExp(r"\p{Lu}", caseSensitive: false, unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\p{Lu}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118B4}"));
- assertTrue(
- RegExp(r"\P{Lu}", caseSensitive: false, unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\P{Lu}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118D4}"));
- assertTrue(
- RegExp(r"\P{Lu}", caseSensitive: false, unicode: true).hasMatch("A"));
- assertTrue(RegExp(r"\P{Lu}", caseSensitive: false, unicode: true)
- .hasMatch("\u{118B4}"));
-
- assertTrue(RegExp(r"\p{Sm}", unicode: true).hasMatch("+"));
- assertFalse(RegExp(r"\P{Sm}", unicode: true).hasMatch("+"));
- assertTrue(RegExp(r"\p{Sm}", unicode: true).hasMatch("\u{1D6C1}"));
- assertFalse(RegExp(r"\P{Sm}", unicode: true).hasMatch("\u{1D6C1}"));
-
- assertFalse(RegExp(r"\p{L}", unicode: true).hasMatch("\uA6EE"));
- assertTrue(RegExp(r"\P{L}", unicode: true).hasMatch("\uA6EE"));
-
- assertTrue(RegExp(r"\p{Lowercase_Letter}", unicode: true).hasMatch("a"));
- assertTrue(RegExp(r"\p{Math_Symbol}", unicode: true).hasMatch("+"));
-
- assertTrue(RegExp(r"\p{gc=Ll}", unicode: true).hasMatch("a"));
- assertTrue(
- RegExp(r"\p{General_Category=Math_Symbol}", unicode: true).hasMatch("+"));
- assertTrue(RegExp(r"\p{General_Category=L}", unicode: true).hasMatch("X"));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-invalid_test.dart b/tests/corelib_2/regexp/unicode-property-invalid_test.dart
deleted file mode 100644
index 8dddbc1..0000000
--- a/tests/corelib_2/regexp/unicode-property-invalid_test.dart
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
-
- assertThrows(() => RegExp("\p{blk=CJK}+", unicode: true));
- assertThrows(() => RegExp("\p{blk=CJK_Unified_Ideographs}+", unicode: true));
- assertThrows(() => RegExp("\p{blk=CJK}+", unicode: true));
- assertThrows(() => RegExp("\p{blk=CJK_Unified_Ideographs}+", unicode: true));
-
- assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
- assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
-
- assertThrows(() => RegExp("\p{NFKD_Quick_Check=Y}+", unicode: true));
- assertThrows(() => RegExp("\p{NFKD_QC=Yes}+", unicode: true));
-
- assertThrows(() => RegExp("\p{Numeric_Type=Decimal}+", unicode: true));
- assertThrows(() => RegExp("\p{nt=De}+", unicode: true));
-
- assertThrows(() => RegExp("\p{Bidi_Class=Arabic_Letter}+", unicode: true));
- assertThrows(() => RegExp("\p{Bidi_Class=AN}+", unicode: true));
-
- assertThrows(() => RegExp("\p{ccc=OV}+", unicode: true));
-
- assertThrows(() => RegExp("\p{Sentence_Break=Format}+", unicode: true));
-
- assertThrows(() => RegExp("\\p{In}", unicode: true));
- assertThrows(() => RegExp("\\pI", unicode: true));
- assertThrows(() => RegExp("\\p{I}", unicode: true));
- assertThrows(() => RegExp("\\p{CJK}", unicode: true));
-
- assertThrows(() => RegExp("\\p{}", unicode: true));
-}
diff --git a/tests/corelib_2/regexp/unicode-property-scripts_test.dart b/tests/corelib_2/regexp/unicode-property-scripts_test.dart
deleted file mode 100644
index 5adec29..0000000
--- a/tests/corelib_2/regexp/unicode-property-scripts_test.dart
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- void t(RegExp re, String s) {
- assertTrue(re.hasMatch(s));
- }
-
- void f(RegExp re, String s) {
- assertFalse(re.hasMatch(s));
- }
-
- t(RegExp(r"\p{Script=Common}+", unicode: true), ".");
- f(RegExp(r"\p{Script=Common}+", unicode: true),
- "supercalifragilisticexpialidocious");
-
- t(RegExp(r"\p{Script=Han}+", unicode: true), "话说天下大势,分久必合,合久必分");
- t(RegExp(r"\p{Script=Hani}+", unicode: true), "吾庄后有一桃园,花开正盛");
- f(RegExp(r"\p{Script=Han}+", unicode: true), "おはようございます");
- f(RegExp(r"\p{Script=Hani}+", unicode: true),
- "Something is rotten in the state of Denmark");
-
- t(RegExp(r"\p{Script=Latin}+", unicode: true),
- "Wie froh bin ich, daß ich weg bin!");
- t(RegExp(r"\p{Script=Latn}+", unicode: true),
- "It was a bright day in April, and the clocks were striking thirteen");
- f(RegExp(r"\p{Script=Latin}+", unicode: true), "奔腾千里荡尘埃,渡水登山紫雾开");
- f(RegExp(r"\p{Script=Latn}+", unicode: true), "いただきます");
-
- t(RegExp(r"\p{sc=Hiragana}", unicode: true), "いただきます");
- t(RegExp(r"\p{sc=Hira}", unicode: true), "ありがとうございました");
- f(RegExp(r"\p{sc=Hiragana}", unicode: true),
- "Als Gregor Samsa eines Morgens aus unruhigen Träumen erwachte");
- f(RegExp(r"\p{sc=Hira}", unicode: true), "Call me Ishmael");
-
- t(RegExp(r"\p{sc=Phoenician}", unicode: true), "\u{10900}\u{1091a}");
- t(RegExp(r"\p{sc=Phnx}", unicode: true), "\u{1091f}\u{10916}");
- f(RegExp(r"\p{sc=Phoenician}", unicode: true), "Arthur est un perroquet");
- f(RegExp(r"\p{sc=Phnx}", unicode: true), "设心狠毒非良士,操卓原来一路人");
-
- t(RegExp(r"\p{sc=Grek}", unicode: true),
- "ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ");
- t(RegExp(r"\p{sc=Greek}", unicode: true),
- "μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος");
- f(RegExp(r"\p{sc=Greek}", unicode: true), "高贤未服英雄志,屈节偏生杰士疑");
- f(RegExp(r"\p{sc=Greek}", unicode: true),
- "Mr. Jones, of the Manor Farm, had locked the hen-houses for the night");
-}
diff --git a/tests/corelib_2/regexp/unicode-property-special_test.dart b/tests/corelib_2/regexp/unicode-property-special_test.dart
deleted file mode 100644
index 1e0d2c1..0000000
--- a/tests/corelib_2/regexp/unicode-property-special_test.dart
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- void t(RegExp re, String s) {
- assertTrue(re.hasMatch(s));
- }
-
- void f(RegExp re, String s) {
- assertFalse(re.hasMatch(s));
- }
-
- t(RegExp(r"\p{ASCII}+", unicode: true), "abc123");
- f(RegExp(r"\p{ASCII}+", unicode: true), "ⓐⓑⓒ①②③");
- f(RegExp(r"\p{ASCII}+", unicode: true), "🄰🄱🄲①②③");
- f(RegExp(r"\P{ASCII}+", unicode: true), "abcd123");
- t(RegExp(r"\P{ASCII}+", unicode: true), "ⓐⓑⓒ①②③");
- t(RegExp(r"\P{ASCII}+", unicode: true), "🄰🄱🄲①②③");
-
- f(RegExp(r"[^\p{ASCII}]+", unicode: true), "abc123");
- f(RegExp(r"[\p{ASCII}]+", unicode: true), "ⓐⓑⓒ①②③");
- f(RegExp(r"[\p{ASCII}]+", unicode: true), "🄰🄱🄲①②③");
- t(RegExp(r"[^\P{ASCII}]+", unicode: true), "abcd123");
- t(RegExp(r"[\P{ASCII}]+", unicode: true), "ⓐⓑⓒ①②③");
- f(RegExp(r"[^\P{ASCII}]+", unicode: true), "🄰🄱🄲①②③");
-
- t(RegExp(r"\p{Any}+", unicode: true), "🄰🄱🄲①②③");
-
- shouldBe(
- RegExp(r"\p{Any}", unicode: true).firstMatch("\ud800\ud801"), ["\ud800"]);
- shouldBe(
- RegExp(r"\p{Any}", unicode: true).firstMatch("\udc00\udc01"), ["\udc00"]);
- shouldBe(RegExp(r"\p{Any}", unicode: true).firstMatch("\ud800\udc01"),
- ["\ud800\udc01"]);
- shouldBe(RegExp(r"\p{Any}", unicode: true).firstMatch("\udc01"), ["\udc01"]);
-
- f(RegExp(r"\P{Any}+", unicode: true), "123");
- f(RegExp(r"[\P{Any}]+", unicode: true), "123");
- t(RegExp(r"[\P{Any}\d]+", unicode: true), "123");
- t(RegExp(r"[^\P{Any}]+", unicode: true), "123");
-
- t(RegExp(r"\p{Assigned}+", unicode: true), "123");
- t(RegExp(r"\p{Assigned}+", unicode: true), "🄰🄱🄲");
- f(RegExp(r"\p{Assigned}+", unicode: true), "\ufdd0");
- f(RegExp(r"\p{Assigned}+", unicode: true), "\u{fffff}");
-
- f(RegExp(r"\P{Assigned}+", unicode: true), "123");
- f(RegExp(r"\P{Assigned}+", unicode: true), "🄰🄱🄲");
- t(RegExp(r"\P{Assigned}+", unicode: true), "\ufdd0");
- t(RegExp(r"\P{Assigned}+", unicode: true), "\u{fffff}");
- f(RegExp(r"\P{Assigned}", unicode: true), "");
-
- t(RegExp(r"[^\P{Assigned}]+", unicode: true), "123");
- f(RegExp(r"[\P{Assigned}]+", unicode: true), "🄰🄱🄲");
- f(RegExp(r"[^\P{Assigned}]+", unicode: true), "\ufdd0");
- t(RegExp(r"[\P{Assigned}]+", unicode: true), "\u{fffff}");
- f(RegExp(r"[\P{Assigned}]", unicode: true), "");
-
- f(RegExp(r"[^\u1234\p{ASCII}]+", unicode: true), "\u1234");
- t(RegExp(r"[x\P{ASCII}]+", unicode: true), "x");
- t(RegExp(r"[\u1234\p{ASCII}]+", unicode: true), "\u1234");
-
-// Contributory binary properties are not supported.
- assertThrows(() => RegExp("\\p{Other_Alphabetic}", unicode: true));
- assertThrows(() => RegExp("\\P{OAlpha}", unicode: true));
- assertThrows(
- () => RegExp("\\p{Other_Default_Ignorable_Code_Point}", unicode: true));
- assertThrows(() => RegExp("\\P{ODI}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_Grapheme_Extend}", unicode: true));
- assertThrows(() => RegExp("\\P{OGr_Ext}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_ID_Continue}", unicode: true));
- assertThrows(() => RegExp("\\P{OIDC}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_ID_Start}", unicode: true));
- assertThrows(() => RegExp("\\P{OIDS}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_Lowercase}", unicode: true));
- assertThrows(() => RegExp("\\P{OLower}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_Math}", unicode: true));
- assertThrows(() => RegExp("\\P{OMath}", unicode: true));
- assertThrows(() => RegExp("\\p{Other_Uppercase}", unicode: true));
- assertThrows(() => RegExp("\\P{OUpper}", unicode: true));
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-backrefs_test.dart b/tests/corelib_2/regexp/unicode-regexp-backrefs_test.dart
deleted file mode 100644
index 2fcb7c1..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-backrefs_test.dart
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-String replace(String string) {
- return string
- .replaceAll("L", "\ud800")
- .replaceAll("l", "\ud801")
- .replaceAll("T", "\udc00")
- .replaceAll(".", "[^]");
-}
-
-void test(List<String> expectation, String regexp_source, String subject) {
- if (expectation != null) expectation = expectation.map(replace).toList();
- subject = replace(subject);
- regexp_source = replace(regexp_source);
- shouldBe(new RegExp(regexp_source, unicode: true).firstMatch(subject),
- expectation);
-}
-
-void main() {
- // Back reference does not end in the middle of a surrogate pair.
- test(null, "(L)\\1", "LLT");
- test(["LLTLl", "L", "l"], "(L).*\\1(.)", "LLTLl");
- test(null, "(aL).*\\1", "aLaLT");
- test(["aLaLTaLl", "aL", "l"], "(aL).*\\1(.)", "aLaLTaLl");
-
- var s = "TabcLxLTabcLxTabcLTyTabcLz";
- test([s, "TabcL", "z"], "([^x]+).*\\1(.)", s);
-
- // Back reference does not start in the middle of a surrogate pair.
- test(["TLTabTc", "T", "c"], "(T).*\\1(.)", "TLTabTc");
-
- // Lookbehinds.
- test(null, "(?<=\\1(T)x)", "LTTx");
- test(["", "b", "T"], "(?<=(.)\\2.*(T)x)", "bTaLTTx");
- test(null, "(?<=\\1.*(L)x)", "LTLx");
- test(["", "b", "L"], "(?<=(.)\\2.*(L)x)", "bLaLTLx");
-
- test(null, "([^x]+)x*\\1", "LxLT");
- test(null, "([^x]+)x*\\1", "TxLT");
- test(null, "([^x]+)x*\\1", "LTxL");
- test(null, "([^x]+)x*\\1", "LTxT");
- test(null, "([^x]+)x*\\1", "xLxLT");
- test(null, "([^x]+)x*\\1", "xTxLT");
- test(null, "([^x]+)x*\\1", "xLTxL");
- test(null, "([^x]+)x*\\1", "xLTxT");
- test(null, "([^x]+)x*\\1", "xxxLxxLTxx");
- test(null, "([^x]+)x*\\1", "xxxTxxLTxx");
- test(null, "([^x]+)x*\\1", "xxxLTxxLxx");
- test(null, "([^x]+)x*\\1", "xxxLTxxTxx");
- test(["LTTxxLTT", "LTT"], "([^x]+)x*\\1", "xxxLTTxxLTTxx");
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-ignore-case_test.dart b/tests/corelib_2/regexp/unicode-regexp-ignore-case_test.dart
deleted file mode 100644
index c87047c..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-ignore-case_test.dart
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- // Non-unicode use toUpperCase mappings.
- assertFalse(RegExp(r"[\u00e5]", caseSensitive: false).hasMatch("\u212b"));
- assertFalse(
- RegExp(r"[\u212b]", caseSensitive: false).hasMatch("\u00e5\u1234"));
- assertFalse(RegExp(r"[\u212b]", caseSensitive: false).hasMatch("\u00e5"));
-
- assertTrue("\u212b".toLowerCase() == "\u00e5");
- assertTrue("\u00c5".toLowerCase() == "\u00e5");
- assertTrue("\u00e5".toUpperCase() == "\u00c5");
-
- // Unicode uses case folding mappings.
- assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
- .hasMatch("\u212b"));
- assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
- .hasMatch("\u00c5"));
- assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
- .hasMatch("\u00e5"));
- assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
- .hasMatch("\u212b"));
- assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
- .hasMatch("\u00e5"));
- assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
- .hasMatch("\u212b"));
- assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
- .hasMatch("\u00c5"));
- assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
- .hasMatch("\u00c5"));
- assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
- .hasMatch("\u00e5"));
- assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
- .hasMatch("\u212b"));
-
- // Non-BMP.
- assertFalse(RegExp(r"\u{10400}", caseSensitive: false).hasMatch("\u{10428}"));
- assertTrue(RegExp(r"\u{10400}", caseSensitive: false, unicode: true)
- .hasMatch("\u{10428}"));
- assertTrue(RegExp(r"\ud801\udc00", caseSensitive: false, unicode: true)
- .hasMatch("\u{10428}"));
- assertTrue(RegExp(r"[\u{10428}]", caseSensitive: false, unicode: true)
- .hasMatch("\u{10400}"));
- assertTrue(RegExp(r"[\ud801\udc28]", caseSensitive: false, unicode: true)
- .hasMatch("\u{10400}"));
- shouldBe(
- RegExp(r"[\uff40-\u{10428}]+", caseSensitive: false, unicode: true)
- .firstMatch("\uff21\u{10400}abc"),
- ["\uff21\u{10400}"]);
- shouldBe(
- RegExp(r"[^\uff40-\u{10428}]+", caseSensitive: false, unicode: true)
- .firstMatch("\uff21\u{10400}abc\uff23"),
- ["abc"]);
- shouldBe(
- RegExp(r"[\u24d5-\uff33]+", caseSensitive: false, unicode: true)
- .firstMatch("\uff54\uff53\u24bb\u24ba"),
- ["\uff53\u24bb"]);
-
- // Full mappings are ignored.
- assertFalse(
- RegExp(r"\u00df", caseSensitive: false, unicode: true).hasMatch("SS"));
- assertFalse(RegExp(r"\u1f8d", caseSensitive: false, unicode: true)
- .hasMatch("\u1f05\u03b9"));
-
- // Simple mappings work.
- assertTrue(RegExp(r"\u1f8d", caseSensitive: false, unicode: true)
- .hasMatch("\u1f85"));
-
- // Common mappings work.
- assertTrue(RegExp(r"\u1f6b", caseSensitive: false, unicode: true)
- .hasMatch("\u1f63"));
-
- // Back references.
- shouldBe(
- RegExp(r"(.)\1\1", caseSensitive: false, unicode: true)
- .firstMatch("\u00e5\u212b\u00c5"),
- ["\u00e5\u212b\u00c5", "\u00e5"]);
- shouldBe(
- RegExp(r"(.)\1", caseSensitive: false, unicode: true)
- .firstMatch("\u{118aa}\u{118ca}"),
- ["\u{118aa}\u{118ca}", "\u{118aa}"]);
-
- // Misc.
- assertTrue(RegExp(r"\u00e5\u00e5\u00e5", caseSensitive: false, unicode: true)
- .hasMatch("\u212b\u00e5\u00c5"));
- assertTrue(RegExp(r"AB\u{10400}", caseSensitive: false, unicode: true)
- .hasMatch("ab\u{10428}"));
-
- // Non-Latin1 maps to Latin1.
- shouldBe(
- RegExp(r"^\u017F", caseSensitive: false, unicode: true).firstMatch("s"),
- ["s"]);
- shouldBe(
- RegExp(r"^\u017F", caseSensitive: false, unicode: true)
- .firstMatch("s\u1234"),
- ["s"]);
- shouldBe(
- RegExp(r"^a[\u017F]", caseSensitive: false, unicode: true)
- .firstMatch("as"),
- ["as"]);
- shouldBe(
- RegExp(r"^a[\u017F]", caseSensitive: false, unicode: true)
- .firstMatch("as\u1234"),
- ["as"]);
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-match-index_test.dart b/tests/corelib_2/regexp/unicode-regexp-match-index_test.dart
deleted file mode 100644
index 3e69658..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-match-index_test.dart
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- // Testing handling of paired and non-paired surrogates in unicode mode
- var r = new RegExp(r".", unicode: true);
-
- var m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800\udc00"]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
- shouldBe(m, ["\ud800\udc00"]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
- shouldBe(m, ["\ud801\udc01"]);
- assertEquals(m.end, 4);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
- shouldBe(m, ["\ud801\udc01"]);
- assertEquals(m.end, 4);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\ud802", 3);
- shouldBe(m, ["\ud802"]);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\ud802", 4));
-
- // Testing handling of paired and non-paired surrogates in non-unicode mode
- r = new RegExp(r".");
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800"]);
- assertEquals(m.end, 1);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
- shouldBe(m, ["\udc00"]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
- shouldBe(m, ["\ud801"]);
- assertEquals(m.end, 3);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
- shouldBe(m, ["\udc01"]);
- assertEquals(m.end, 4);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-
- // Testing same with start anchor, unicode mode.
- r = new RegExp("^.", unicode: true);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800\udc00"]);
- assertEquals(2, m.end);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
- shouldBe(m, ["\ud800\udc00"]);
- assertEquals(2, m.end);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2));
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3));
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-
- // Testing same with start anchor, non-unicode mode.
- r = new RegExp("^.");
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800"]);
- assertEquals(1, m.end);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1));
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2));
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3));
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-
- // Now with both anchored and not as alternatives (with the anchored
- // version as a captured group), unicode mode.
- r = new RegExp(r"(?:(^.)|.)", unicode: true);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800\udc00", "\ud800\udc00"]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
- shouldBe(m, ["\ud800\udc00", "\ud800\udc00"]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
- shouldBe(m, ["\ud801\udc01", null]);
- assertEquals(m.end, 4);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
- shouldBe(m, ["\ud801\udc01", null]);
- assertEquals(m.end, 4);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\ud802", 3);
- shouldBe(m, ["\ud802", null]);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\ud802", 4));
-
- // Now with both anchored and not as alternatives (with the anchored
- // version as a captured group), non-unicode mode.
- r = new RegExp(r"(?:(^.)|.)");
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
- shouldBe(m, ["\ud800", "\ud800"]);
- assertEquals(m.end, 1);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
- shouldBe(m, ["\udc00", null]);
- assertEquals(m.end, 2);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
- shouldBe(m, ["\ud801", null]);
- assertEquals(m.end, 3);
-
- m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
- shouldBe(m, ["\udc01", null]);
- assertEquals(m.end, 4);
-
- assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-restricted-syntax_test.dart b/tests/corelib_2/regexp/unicode-regexp-restricted-syntax_test.dart
deleted file mode 100644
index d894c7b..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-restricted-syntax_test.dart
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- // test262/data/test/language/literals/regexp/u-dec-esc
- assertThrows(() => RegExp(r"\1", unicode: true));
- // test262/language/literals/regexp/u-invalid-char-range-a
- assertThrows(() => RegExp(r"[\w-a]", unicode: true));
- // test262/language/literals/regexp/u-invalid-char-range-b
- assertThrows(() => RegExp(r"[a-\w]", unicode: true));
- // test262/language/literals/regexp/u-invalid-char-esc
- assertThrows(() => RegExp(r"\c", unicode: true));
- assertThrows(() => RegExp(r"\c0", unicode: true));
- // test262/built-ins/RegExp/unicode_restricted_quantifiable_assertion
- assertThrows(() => RegExp(r"(?=.)*", unicode: true));
- assertThrows(() => RegExp(r"(?=.){1,2}", unicode: true));
- // test262/built-ins/RegExp/unicode_restricted_octal_escape
- assertThrows(() => RegExp(r"[\1]", unicode: true));
- assertThrows(() => RegExp(r"\00", unicode: true));
- assertThrows(() => RegExp(r"\09", unicode: true));
- // test262/built-ins/RegExp/unicode_restricted_identity_escape_alpha
- assertThrows(() => RegExp(r"[\c]", unicode: true));
- // test262/built-ins/RegExp/unicode_restricted_identity_escape_c
- assertThrows(() => RegExp(r"[\c0]", unicode: true));
- // test262/built-ins/RegExp/unicode_restricted_incomple_quantifier
- assertThrows(() => RegExp(r"a{", unicode: true));
- assertThrows(() => RegExp(r"a{1,", unicode: true));
- assertThrows(() => RegExp(r"{", unicode: true));
- assertThrows(() => RegExp(r"}", unicode: true));
- // test262/data/test/built-ins/RegExp/unicode_restricted_brackets
- assertThrows(() => RegExp(r"]", unicode: true));
- // test262/built-ins/RegExp/unicode_identity_escape
- assertDoesNotThrow(() => RegExp(r"\/", unicode: true));
-
- // escaped \0 (as NUL) is allowed inside a character class.
- shouldBe(RegExp(r"[\0]", unicode: true).firstMatch("\u0000"), ["\u0000"]);
- // unless it is followed by another digit.
- assertThrows(() => RegExp(r"[\00]", unicode: true));
- assertThrows(() => RegExp(r"[\01]", unicode: true));
- assertThrows(() => RegExp(r"[\09]", unicode: true));
- shouldBe(RegExp(r"[1\0a]+", unicode: true).firstMatch("b\u{0}1\u{0}a\u{0}2"),
- ["\u{0}1\u{0}a\u{0}"]);
- // escaped \- is allowed inside a character class.
- shouldBe(RegExp(r"[a\-z]", unicode: true).firstMatch("12-34"), ["-"]);
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-unanchored-advance_test.dart b/tests/corelib_2/regexp/unicode-regexp-unanchored-advance_test.dart
deleted file mode 100644
index e57d810..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-unanchored-advance_test.dart
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2013 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- final s = "a" * 10000000 + "\u1234";
- shouldBe(
- RegExp(r"(\u1234)", unicode: true).firstMatch(s), ["\u1234", "\u1234"]);
-}
diff --git a/tests/corelib_2/regexp/unicode-regexp-zero-length_test.dart b/tests/corelib_2/regexp/unicode-regexp-zero-length_test.dart
deleted file mode 100644
index 1d07628..0000000
--- a/tests/corelib_2/regexp/unicode-regexp-zero-length_test.dart
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2019, the Dart project authors. All rights reserved.
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import 'package:expect/expect.dart';
-
-import 'v8_regexp_utils.dart';
-
-void main() {
- const L = "\ud800";
- const T = "\udc00";
- const x = "x";
-
- var r = RegExp(r"()"); // Not unicode.
- // Zero-length matches do not advance lastIndex.
- var m = r.matchAsPrefix(L + T + L + T);
- shouldBe(m, ["", ""]);
- assertEquals(0, m.end);
-
- m = r.matchAsPrefix(L + T + L + T, 1);
- shouldBe(m, ["", ""]);
- assertEquals(1, m.end);
-
- var u = RegExp(r"()", unicode: true);
-
- // Zero-length matches do not advance lastIndex (but do respect paired
- // surrogates).
- m = u.matchAsPrefix(L + T + L + T);
- shouldBe(m, ["", ""]);
- assertEquals(0, m.end);
-
- m = u.matchAsPrefix(L + T + L + T, 1);
- shouldBe(m, ["", ""]);
- assertEquals(0, m.end);
-
- // However, with repeating matches, we do advance from match to match.
- var ms = r.allMatches(L + T + L + T);
- assertEquals(5, ms.length);
- for (var i = 0; i < ms.length; i++) {
- shouldBe(ms.elementAt(i), ["", ""]);
- }
-
- // With unicode flag, we advance code point by code point.
- ms = u.allMatches(L + T + L + T);
- assertEquals(3, ms.length);
- for (var i = 0; i < ms.length; i++) {
- shouldBe(ms.elementAt(i), ["", ""]);
- }
-
- // Test with a lot of copies.
- const c = 1000;
- ms = u.allMatches((L + T) * c);
- assertEquals(c + 1, ms.length);
- for (var i = 0; i < ms.length; i++) {
- shouldBe(ms.elementAt(i), ["", ""]);
- }
-
- // Same with replaceAll().
- assertEquals(
- x + L + x + T + x + L + x + T + x, (L + T + L + T).replaceAll(r, "x"));
-
- assertEquals(x + L + T + x + L + T + x, (L + T + L + T).replaceAll(u, "x"));
-
- assertEquals((x + L + T) * c + x, ((L + T) * c).replaceAll(u, "x"));
-
- // Also test String#split.
- Expect.deepEquals(
- ["\u{12345}"], "\u{12345}".split(RegExp(r"(?:)", unicode: true)));
-}
diff --git a/tests/corelib_2/regexp/v8_regexp_utils.dart b/tests/corelib_2/regexp/v8_regexp_utils.dart
index 111ab90..cde750a 100644
--- a/tests/corelib_2/regexp/v8_regexp_utils.dart
+++ b/tests/corelib_2/regexp/v8_regexp_utils.dart
@@ -22,10 +22,6 @@
Expect.throws(fn, null, "Test $testid");
}
-void assertDoesNotThrow(fn, [num testid = null]) {
- fn();
-}
-
void assertNull(actual, [num testid = null]) {
Expect.isNull(actual, "Test $testid");
}