[dart2wasm] Initial regexp support.
Change-Id: Ia461c77979785bbc0510052a31f94bdd83babc01
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/250582
Reviewed-by: Aske Simon Christensen <askesc@google.com>
Commit-Queue: Joshua Litt <joshualitt@google.com>
diff --git a/pkg/dart2wasm/bin/run_wasm.js b/pkg/dart2wasm/bin/run_wasm.js
index 40d57a6..404e3c4 100644
--- a/pkg/dart2wasm/bin/run_wasm.js
+++ b/pkg/dart2wasm/bin/run_wasm.js
@@ -64,6 +64,15 @@
// A special symbol attached to functions that wrap Dart functions.
var jsWrappedDartFunctionSymbol = Symbol("JSWrappedDartFunction");
+// Calls a constructor with a variable number of arguments.
+function callConstructorVarArgs(constructor, args) {
+ // Apply bind to the constructor. We pass `null` as the first argument
+ // to `bind.apply` because this is `bind`'s unused context
+ // argument(`new` will explicitly create a new context).
+ var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
+ return new factoryFunction();
+}
+
// Imports for printing and event loop
var dart2wasm = {
printToConsole: function(string) {
@@ -204,6 +213,9 @@
isJSObject: function(o) {
return o instanceof Object;
},
+ isJSRegExp: function(o) {
+ return o instanceof RegExp;
+ },
roundtrip: function (o) {
// This function exists as a hook for the native JS -> Wasm type
// conversion rules. The Dart runtime will overload variants of this
@@ -229,12 +241,13 @@
callMethodVarArgs: function(object, name, args) {
return object[name].apply(object, args);
},
- callConstructorVarArgs: function(constructor, args) {
- // Apply bind to the constructor. We pass `null` as the first argument
- // to `bind.apply` because this is `bind`'s unused context
- // argument(`new` will explicitly create a new context).
- var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
- return new factoryFunction();
+ callConstructorVarArgs: callConstructorVarArgs,
+ safeCallConstructorVarArgs: function(constructor, args) {
+ try {
+ return callConstructorVarArgs(constructor, args);
+ } catch (e) {
+ return String(e);
+ }
},
getTimeZoneNameForSeconds: function(secondsSinceEpoch) {
var date = new Date(secondsSinceEpoch * 1000);
@@ -298,6 +311,17 @@
}
return parseFloat(jsSource);
},
+ quoteStringForRegExp: function(string) {
+ // We specialize this method in the runtime to avoid the overhead of
+ // jumping back and forth between JS and Dart. This method is optimized
+ // to test before replacement, which should be much faster. This might
+ // be worth measuring in real world use cases though.
+ var jsString = stringFromDartString(string);
+ if (/[[\]{}()*+?.\\^$|]/.test(jsString)) {
+ jsString = jsString.replace(/[[\]{}()*+?.\\^$|]/g, '\\$&');
+ }
+ return stringToDartString(jsString);
+ },
};
function instantiate(filename, imports) {
diff --git a/sdk/lib/_internal/wasm/lib/core_patch.dart b/sdk/lib/_internal/wasm/lib/core_patch.dart
index 68f6517..b9e37c1 100644
--- a/sdk/lib/_internal/wasm/lib/core_patch.dart
+++ b/sdk/lib/_internal/wasm/lib/core_patch.dart
@@ -32,6 +32,8 @@
import "dart:_internal" as _internal show Symbol;
+import 'dart:_js_helper' show JSSyntaxRegExp, quoteStringForRegExp;
+
import "dart:collection"
show
HashMap,
diff --git a/sdk/lib/_internal/wasm/lib/js_helper.dart b/sdk/lib/_internal/wasm/lib/js_helper.dart
index 07cfb05..4af502a 100644
--- a/sdk/lib/_internal/wasm/lib/js_helper.dart
+++ b/sdk/lib/_internal/wasm/lib/js_helper.dart
@@ -6,16 +6,23 @@
library dart._js_helper;
import 'dart:_internal';
+import 'dart:collection';
import 'dart:typed_data';
import 'dart:wasm';
+part 'regexp_helper.dart';
+
/// [JSValue] is the root of the JS interop object hierarchy.
class JSValue {
final WasmAnyRef _ref;
JSValue(this._ref);
- static JSValue? box(WasmAnyRef? ref) => ref == null ? null : JSValue(ref);
+ // Currently we always explictly box JS ref's in [JSValue] objects. In the
+ // future, we will want to leave these values unboxed when possible, even when
+ // they are nullable.
+ static JSValue? box(WasmAnyRef? ref) =>
+ isDartNull(ref) ? null : JSValue(ref!);
WasmAnyRef toAnyRef() => _ref;
String toString() => jsStringToDartString(_ref);
@@ -39,9 +46,79 @@
}
extension ObjectToJS on Object {
- JSValue toJS() => JSValue(jsObjectFromDartObject(this));
+ WasmAnyRef toAnyRef() => jsObjectFromDartObject(this);
+ JSValue toJS() => JSValue(toAnyRef());
}
+// For now both `null` and `undefined` in JS map to `null` in Dart.
+bool isDartNull(WasmAnyRef? ref) => ref == null || isJSUndefined(ref);
+
+/// A [JSArray] is a wrapper for a native JSArray.
+class JSArray extends JSValue {
+ JSArray(WasmAnyRef ref) : super(ref);
+
+ static JSArray? box(WasmAnyRef? ref) =>
+ isDartNull(ref) ? null : JSArray(ref!);
+
+ JSValue? pop() =>
+ JSValue.box(callMethodVarArgsRaw(_ref, 'pop'.toAnyRef(), [].toAnyRef()));
+ JSValue? operator [](int index) =>
+ JSValue.box(getPropertyRaw(_ref, intToJSNumber(index)));
+ void operator []=(int index, JSValue? value) =>
+ setPropertyRaw(_ref, intToJSNumber(index), value?.toAnyRef());
+ int get length =>
+ toDartNumber(getPropertyRaw(_ref, 'length'.toAnyRef())!).floor();
+}
+
+/// A [JSObject] is a wrapper for any JS object literal.
+class JSObject extends JSValue {
+ JSObject(WasmAnyRef ref) : super(ref);
+
+ static JSObject? box(WasmAnyRef? ref) =>
+ isDartNull(ref) ? null : JSObject(ref!);
+
+ JSValue? operator [](String key) =>
+ JSValue.box(getPropertyRaw(_ref, key.toAnyRef()));
+ void operator []=(String key, JSValue? value) =>
+ setPropertyRaw(_ref, key.toAnyRef(), value?.toAnyRef());
+}
+
+class JSArrayIteratorAdapter<T> extends Iterator<T> {
+ final JSArray array;
+ int index = -1;
+
+ JSArrayIteratorAdapter(this.array);
+
+ @override
+ bool moveNext() {
+ index++;
+ if (index > array.length) {
+ throw 'Iterator out of bounds';
+ }
+ return index < array.length;
+ }
+
+ @override
+ T get current => dartifyRaw(array[index]?.toAnyRef()) as T;
+}
+
+/// [JSArrayIterableAdapter] lazily adapts a [JSArray] to Dart's [Iterable]
+/// interface.
+class JSArrayIterableAdapter<T> extends EfficientLengthIterable<T> {
+ final JSArray array;
+
+ JSArrayIterableAdapter(this.array);
+
+ @override
+ Iterator<T> get iterator => JSArrayIteratorAdapter<T>(array);
+
+ @override
+ int get length => array.length;
+}
+
+// Convert to double to avoid converting to [BigInt] in the case of int64.
+WasmAnyRef intToJSNumber(int i) => toJSNumber(i.toDouble());
+
WasmAnyRef? getConstructorString(String constructor) =>
getPropertyRaw(globalThisRaw(), constructor.toAnyRef());
@@ -113,6 +190,9 @@
@pragma("wasm:import", "dart2wasm.isJSObject")
external bool isJSObject(WasmAnyRef? o);
+@pragma("wasm:import", "dart2wasm.isJSRegExp")
+external bool isJSRegExp(WasmAnyRef object);
+
// The JS runtime will run helpful conversion routines between refs and bool /
// double. In the longer term hopefully we can find a way to avoid the round
// trip.
@@ -187,6 +267,10 @@
@pragma("wasm:import", "dart2wasm.callConstructorVarArgs")
external WasmAnyRef callConstructorVarArgsRaw(WasmAnyRef o, WasmAnyRef args);
+@pragma("wasm:import", "dart2wasm.safeCallConstructorVarArgs")
+external WasmAnyRef safeCallConstructorVarArgsRaw(
+ WasmAnyRef o, WasmAnyRef args);
+
@pragma("wasm:import", "dart2wasm.hasProperty")
external bool hasPropertyRaw(WasmAnyRef o, WasmAnyRef name);
@@ -202,7 +286,7 @@
WasmAnyRef o, WasmAnyRef method, WasmAnyRef? args);
@pragma("wasm:import", "dart2wasm.stringify")
-external String stringifyRaw(WasmAnyRef? object);
+external String stringify(WasmAnyRef? object);
// Currently, `allowInterop` returns a Function type. This is unfortunate for
// Dart2wasm because it means arbitrary Dart functions can flow to JS util
@@ -409,6 +493,14 @@
return f;
}
+/// Returns the JS constructor object for a given [String].
+WasmAnyRef getConstructorRaw(String name) =>
+ getPropertyRaw(globalThisRaw(), name.toAnyRef())!;
+
+/// Equivalent to `Object.keys(object)`.
+JSArray objectKeys(JSValue object) => JSArray(callMethodVarArgsRaw(
+ getConstructorRaw('Object'), 'keys'.toAnyRef(), [object].toAnyRef())!);
+
/// Methods used by the wasm runtime.
@pragma("wasm:export", "\$listLength")
double _listLength(List list) => list.length.toDouble();
diff --git a/sdk/lib/_internal/wasm/lib/regexp_helper.dart b/sdk/lib/_internal/wasm/lib/regexp_helper.dart
new file mode 100644
index 0000000..e2de33d
--- /dev/null
+++ b/sdk/lib/_internal/wasm/lib/regexp_helper.dart
@@ -0,0 +1,270 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+part of dart._js_helper;
+
+// TODO(joshualitt): This is a fork of the DDC RegExp class. In the longer term,
+// with careful factoring we may be able to share this code.
+// TODO(joshualitt): We should be able to build this library off of static
+// interop.
+
+/// Returns a string for a RegExp pattern that matches [string]. This is done by
+/// escaping all RegExp metacharacters.
+@pragma('wasm:import', 'dart2wasm.quoteStringForRegExp')
+external String quoteStringForRegExp(String string);
+
+class JSNativeMatch extends JSArray {
+ JSNativeMatch(WasmAnyRef ref) : super(ref);
+
+ static JSNativeMatch? box(WasmAnyRef? ref) =>
+ isDartNull(ref) ? null : JSNativeMatch(ref!);
+
+ String get input => jsStringToDartString(
+ getPropertyRaw(this.toAnyRef(), 'input'.toAnyRef())!);
+ int get index =>
+ toDartNumber(getPropertyRaw(this.toAnyRef(), 'index'.toAnyRef())!)
+ .floor();
+ JSObject? get groups =>
+ JSObject.box(getPropertyRaw(this.toAnyRef(), 'groups'.toAnyRef()));
+}
+
+class JSNativeRegExp extends JSValue {
+ JSNativeRegExp(WasmAnyRef ref) : super(ref);
+
+ JSNativeMatch? exec(String string) => JSNativeMatch.box(callMethodVarArgsRaw(
+ this.toAnyRef(), 'exec'.toAnyRef(), [string].toAnyRef()));
+ bool test(String string) => toDartBool(callMethodVarArgsRaw(
+ this.toAnyRef(), 'test'.toAnyRef(), [string].toAnyRef())!);
+ String get flags => jsStringToDartString(
+ getPropertyRaw(this.toAnyRef(), 'flags'.toAnyRef())!);
+ bool get multiline =>
+ toDartBool(getPropertyRaw(this.toAnyRef(), 'multiline'.toAnyRef())!);
+ bool get ignoreCase =>
+ toDartBool(getPropertyRaw(this.toAnyRef(), 'ignoreCase'.toAnyRef())!);
+ bool get unicode =>
+ toDartBool(getPropertyRaw(this.toAnyRef(), 'unicode'.toAnyRef())!);
+ bool get dotAll =>
+ toDartBool(getPropertyRaw(this.toAnyRef(), 'dotAll'.toAnyRef())!);
+ set lastIndex(int start) => setPropertyRaw(
+ this.toAnyRef(), 'lastIndex'.toAnyRef(), intToJSNumber(start));
+}
+
+class JSSyntaxRegExp implements RegExp {
+ final String pattern;
+ final JSNativeRegExp _nativeRegExp;
+ JSNativeRegExp? _nativeGlobalRegExp;
+ JSNativeRegExp? _nativeAnchoredRegExp;
+
+ String toString() => 'RegExp/$pattern/' + _nativeRegExp.flags;
+
+ JSSyntaxRegExp(String source,
+ {bool multiLine: false,
+ bool caseSensitive: true,
+ bool unicode: false,
+ bool dotAll: false})
+ : this.pattern = source,
+ this._nativeRegExp = makeNative(
+ source, multiLine, caseSensitive, unicode, dotAll, false);
+
+ JSNativeRegExp get _nativeGlobalVersion {
+ if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp!;
+ return _nativeGlobalRegExp = makeNative(
+ pattern, isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
+ }
+
+ JSNativeRegExp get _nativeAnchoredVersion {
+ if (_nativeAnchoredRegExp != null) return _nativeAnchoredRegExp!;
+ // An "anchored version" of a regexp is created by adding "|()" to the
+ // source. This means that the regexp always matches at the first position
+ // that it tries, and you can see if the original regexp matched, or it
+ // was the added zero-width match that matched, by looking at the last
+ // capture. If it is a String, the match participated, otherwise it didn't.
+ return _nativeAnchoredRegExp = makeNative(
+ '$pattern|()', isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
+ }
+
+ bool get isMultiLine => _nativeRegExp.multiline;
+ bool get isCaseSensitive => _nativeRegExp.ignoreCase;
+ bool get isUnicode => _nativeRegExp.unicode;
+ bool get isDotAll => _nativeRegExp.dotAll;
+
+ static JSNativeRegExp makeNative(String source, bool multiLine,
+ bool caseSensitive, bool unicode, bool dotAll, bool global) {
+ String m = multiLine == true ? 'm' : '';
+ String i = caseSensitive == true ? '' : 'i';
+ String u = unicode ? 'u' : '';
+ String s = dotAll ? 's' : '';
+ String g = global ? 'g' : '';
+ String modifiers = '$m$i$u$s$g';
+ // The call to create the regexp is wrapped in a try catch so we can
+ // reformat the exception if need be.
+ WasmAnyRef result = safeCallConstructorVarArgsRaw(
+ getConstructorRaw('RegExp'), [source, modifiers].toAnyRef());
+ if (isJSRegExp(result)) return JSNativeRegExp(result);
+ // The returned value is the stringified JavaScript exception. Turn it into
+ // a Dart exception.
+ String errorMessage = jsStringToDartString(result);
+ throw new FormatException('Illegal RegExp pattern ($errorMessage)', source);
+ }
+
+ RegExpMatch? firstMatch(String string) {
+ JSNativeMatch? m = _nativeRegExp.exec(string);
+ if (m == null) return null;
+ return new _MatchImplementation(this, m);
+ }
+
+ bool hasMatch(String string) {
+ return _nativeRegExp.test(string);
+ }
+
+ String? stringMatch(String string) {
+ var match = firstMatch(string);
+ if (match != null) return match.group(0);
+ return null;
+ }
+
+ Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
+ if (start < 0 || start > string.length) {
+ throw new RangeError.range(start, 0, string.length);
+ }
+ return _AllMatchesIterable(this, string, start);
+ }
+
+ RegExpMatch? _execGlobal(String string, int start) {
+ JSNativeRegExp regexp = _nativeGlobalVersion;
+ regexp.lastIndex = start;
+ JSNativeMatch? match = regexp.exec(string);
+ if (match == null) return null;
+ return new _MatchImplementation(this, match);
+ }
+
+ RegExpMatch? _execAnchored(String string, int start) {
+ JSNativeRegExp regexp = _nativeAnchoredVersion;
+ regexp.lastIndex = start;
+ JSNativeMatch? match = regexp.exec(string);
+ if (match == null) return null;
+ // If the last capture group participated, the original regexp did not
+ // match at the start position.
+ if (match.pop() != null) return null;
+ return new _MatchImplementation(this, match);
+ }
+
+ RegExpMatch? matchAsPrefix(String string, [int start = 0]) {
+ if (start < 0 || start > string.length) {
+ throw new RangeError.range(start, 0, string.length);
+ }
+ return _execAnchored(string, start);
+ }
+}
+
+class _MatchImplementation implements RegExpMatch {
+ final Pattern pattern;
+ // Contains a JS RegExp match object.
+ // It is an Array of String values with extra 'index' and 'input' properties.
+ // If there were named capture groups, there will also be an extra 'groups'
+ // property containing an object with capture group names as keys and
+ // matched strings as values.
+ final JSNativeMatch _match;
+
+ _MatchImplementation(this.pattern, this._match);
+
+ String get input => _match.input;
+
+ int get start => _match.index;
+
+ int get end => (start + (_match[0].toString()).length);
+
+ String? group(int index) => _match[index]?.toString();
+
+ String? operator [](int index) => group(index);
+
+ int get groupCount => _match.length - 1;
+
+ List<String?> groups(List<int> groups) {
+ List<String?> out = [];
+ for (int i in groups) {
+ out.add(group(i));
+ }
+ return out;
+ }
+
+ String? namedGroup(String name) {
+ JSObject? groups = _match.groups;
+ if (groups != null) {
+ JSValue? result = groups[name];
+ if (result != null ||
+ hasPropertyRaw(groups.toAnyRef(), name.toAnyRef())) {
+ return result?.toString();
+ }
+ }
+ throw ArgumentError.value(name, "name", "Not a capture group name");
+ }
+
+ Iterable<String> get groupNames {
+ JSObject? groups = _match.groups;
+ if (groups != null) {
+ return JSArrayIterableAdapter<String>(objectKeys(groups));
+ }
+ return Iterable.empty();
+ }
+}
+
+class _AllMatchesIterable extends IterableBase<RegExpMatch> {
+ final JSSyntaxRegExp _re;
+ final String _string;
+ final int _start;
+
+ _AllMatchesIterable(this._re, this._string, this._start);
+
+ Iterator<RegExpMatch> get iterator =>
+ new _AllMatchesIterator(_re, _string, _start);
+}
+
+class _AllMatchesIterator implements Iterator<RegExpMatch> {
+ final JSSyntaxRegExp _regExp;
+ String? _string;
+ int _nextIndex;
+ RegExpMatch? _current;
+
+ _AllMatchesIterator(this._regExp, this._string, this._nextIndex);
+
+ RegExpMatch get current => _current as RegExpMatch;
+
+ static bool _isLeadSurrogate(int c) {
+ return c >= 0xd800 && c <= 0xdbff;
+ }
+
+ static bool _isTrailSurrogate(int c) {
+ return c >= 0xdc00 && c <= 0xdfff;
+ }
+
+ bool moveNext() {
+ var string = _string;
+ if (string == null) return false;
+ if (_nextIndex <= string.length) {
+ RegExpMatch? match = _regExp._execGlobal(string, _nextIndex);
+ if (match != null) {
+ _current = match;
+ int nextIndex = match.end;
+ if (match.start == nextIndex) {
+ // Zero-width match. Advance by one more, unless the regexp
+ // is in unicode mode and it would put us within a surrogate
+ // pair. In that case, advance past the code point as a whole.
+ if (_regExp.isUnicode &&
+ _nextIndex + 1 < string.length &&
+ _isLeadSurrogate(string.codeUnitAt(_nextIndex)) &&
+ _isTrailSurrogate(string.codeUnitAt(_nextIndex + 1))) {
+ nextIndex++;
+ }
+ nextIndex++;
+ }
+ _nextIndex = nextIndex;
+ return true;
+ }
+ }
+ _current = null;
+ _string = null; // Marks iteration as ended.
+ return false;
+ }
+}
diff --git a/sdk/lib/_internal/wasm/lib/regexp_patch.dart b/sdk/lib/_internal/wasm/lib/regexp_patch.dart
new file mode 100644
index 0000000..d7f063c
--- /dev/null
+++ b/sdk/lib/_internal/wasm/lib/regexp_patch.dart
@@ -0,0 +1,21 @@
+// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+@patch
+class RegExp {
+ @patch
+ factory RegExp(String source,
+ {bool multiLine = false,
+ bool caseSensitive = true,
+ bool unicode = false,
+ bool dotAll = false}) =>
+ JSSyntaxRegExp(source,
+ multiLine: multiLine,
+ caseSensitive: caseSensitive,
+ unicode: unicode,
+ dotAll: dotAll);
+
+ @patch
+ static String escape(String text) => quoteStringForRegExp(text);
+}
diff --git a/sdk/lib/libraries.json b/sdk/lib/libraries.json
index 45bea43..d45b6cb 100644
--- a/sdk/lib/libraries.json
+++ b/sdk/lib/libraries.json
@@ -214,6 +214,7 @@
"_internal/vm/lib/null_patch.dart",
"_internal/vm/lib/map_patch.dart",
"_internal/wasm/lib/object_patch.dart",
+ "_internal/wasm/lib/regexp_patch.dart",
"_internal/wasm/lib/stack_trace_patch.dart",
"_internal/wasm/lib/stopwatch_patch.dart",
"_internal/wasm/lib/string_buffer_patch.dart",
diff --git a/sdk/lib/libraries.yaml b/sdk/lib/libraries.yaml
index 04d1f2d..4641fbc 100644
--- a/sdk/lib/libraries.yaml
+++ b/sdk/lib/libraries.yaml
@@ -203,6 +203,7 @@
- _internal/vm/lib/null_patch.dart
- _internal/vm/lib/map_patch.dart
- _internal/wasm/lib/object_patch.dart
+ - _internal/wasm/lib/regexp_patch.dart
- _internal/wasm/lib/stack_trace_patch.dart
- _internal/wasm/lib/stopwatch_patch.dart
- _internal/wasm/lib/string_buffer_patch.dart