[dart2wasm] Initial regexp support.

Change-Id: Ia461c77979785bbc0510052a31f94bdd83babc01
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/250582
Reviewed-by: Aske Simon Christensen <askesc@google.com>
Commit-Queue: Joshua Litt <joshualitt@google.com>
diff --git a/pkg/dart2wasm/bin/run_wasm.js b/pkg/dart2wasm/bin/run_wasm.js
index 40d57a6..404e3c4 100644
--- a/pkg/dart2wasm/bin/run_wasm.js
+++ b/pkg/dart2wasm/bin/run_wasm.js
@@ -64,6 +64,15 @@
 // A special symbol attached to functions that wrap Dart functions.
 var jsWrappedDartFunctionSymbol = Symbol("JSWrappedDartFunction");
 
+// Calls a constructor with a variable number of arguments.
+function callConstructorVarArgs(constructor, args) {
+    // Apply bind to the constructor. We pass `null` as the first argument
+    // to `bind.apply` because this is `bind`'s unused context
+    // argument(`new` will explicitly create a new context).
+    var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
+    return new factoryFunction();
+}
+
 // Imports for printing and event loop
 var dart2wasm = {
     printToConsole: function(string) {
@@ -204,6 +213,9 @@
     isJSObject: function(o) {
         return o instanceof Object;
     },
+    isJSRegExp: function(o) {
+        return o instanceof RegExp;
+    },
     roundtrip: function (o) {
       // This function exists as a hook for the native JS -> Wasm type
       // conversion rules. The Dart runtime will overload variants of this
@@ -229,12 +241,13 @@
     callMethodVarArgs: function(object, name, args) {
         return object[name].apply(object, args);
     },
-    callConstructorVarArgs: function(constructor, args) {
-        // Apply bind to the constructor. We pass `null` as the first argument
-        // to `bind.apply` because this is `bind`'s unused context
-        // argument(`new` will explicitly create a new context).
-        var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
-        return new factoryFunction();
+    callConstructorVarArgs: callConstructorVarArgs,
+    safeCallConstructorVarArgs: function(constructor, args) {
+        try {
+            return callConstructorVarArgs(constructor, args);
+        } catch (e) {
+            return String(e);
+        }
     },
     getTimeZoneNameForSeconds: function(secondsSinceEpoch) {
         var date = new Date(secondsSinceEpoch * 1000);
@@ -298,6 +311,17 @@
         }
         return parseFloat(jsSource);
     },
+    quoteStringForRegExp: function(string) {
+        // We specialize this method in the runtime to avoid the overhead of
+        // jumping back and forth between JS and Dart. This method is optimized
+        // to test before replacement, which should be much faster. This might
+        // be worth measuring in real world use cases though.
+        var jsString = stringFromDartString(string);
+        if (/[[\]{}()*+?.\\^$|]/.test(jsString)) {
+            jsString = jsString.replace(/[[\]{}()*+?.\\^$|]/g, '\\$&');
+        }
+        return stringToDartString(jsString);
+    },
 };
 
 function instantiate(filename, imports) {
diff --git a/sdk/lib/_internal/wasm/lib/core_patch.dart b/sdk/lib/_internal/wasm/lib/core_patch.dart
index 68f6517..b9e37c1 100644
--- a/sdk/lib/_internal/wasm/lib/core_patch.dart
+++ b/sdk/lib/_internal/wasm/lib/core_patch.dart
@@ -32,6 +32,8 @@
 
 import "dart:_internal" as _internal show Symbol;
 
+import 'dart:_js_helper' show JSSyntaxRegExp, quoteStringForRegExp;
+
 import "dart:collection"
     show
         HashMap,
diff --git a/sdk/lib/_internal/wasm/lib/js_helper.dart b/sdk/lib/_internal/wasm/lib/js_helper.dart
index 07cfb05..4af502a 100644
--- a/sdk/lib/_internal/wasm/lib/js_helper.dart
+++ b/sdk/lib/_internal/wasm/lib/js_helper.dart
@@ -6,16 +6,23 @@
 library dart._js_helper;
 
 import 'dart:_internal';
+import 'dart:collection';
 import 'dart:typed_data';
 import 'dart:wasm';
 
+part 'regexp_helper.dart';
+
 /// [JSValue] is the root of the JS interop object hierarchy.
 class JSValue {
   final WasmAnyRef _ref;
 
   JSValue(this._ref);
 
-  static JSValue? box(WasmAnyRef? ref) => ref == null ? null : JSValue(ref);
+  // Currently we always explictly box JS ref's in [JSValue] objects. In the
+  // future, we will want to leave these values unboxed when possible, even when
+  // they are nullable.
+  static JSValue? box(WasmAnyRef? ref) =>
+      isDartNull(ref) ? null : JSValue(ref!);
 
   WasmAnyRef toAnyRef() => _ref;
   String toString() => jsStringToDartString(_ref);
@@ -39,9 +46,79 @@
 }
 
 extension ObjectToJS on Object {
-  JSValue toJS() => JSValue(jsObjectFromDartObject(this));
+  WasmAnyRef toAnyRef() => jsObjectFromDartObject(this);
+  JSValue toJS() => JSValue(toAnyRef());
 }
 
+// For now both `null` and `undefined` in JS map to `null` in Dart.
+bool isDartNull(WasmAnyRef? ref) => ref == null || isJSUndefined(ref);
+
+/// A [JSArray] is a wrapper for a native JSArray.
+class JSArray extends JSValue {
+  JSArray(WasmAnyRef ref) : super(ref);
+
+  static JSArray? box(WasmAnyRef? ref) =>
+      isDartNull(ref) ? null : JSArray(ref!);
+
+  JSValue? pop() =>
+      JSValue.box(callMethodVarArgsRaw(_ref, 'pop'.toAnyRef(), [].toAnyRef()));
+  JSValue? operator [](int index) =>
+      JSValue.box(getPropertyRaw(_ref, intToJSNumber(index)));
+  void operator []=(int index, JSValue? value) =>
+      setPropertyRaw(_ref, intToJSNumber(index), value?.toAnyRef());
+  int get length =>
+      toDartNumber(getPropertyRaw(_ref, 'length'.toAnyRef())!).floor();
+}
+
+/// A [JSObject] is a wrapper for any JS object literal.
+class JSObject extends JSValue {
+  JSObject(WasmAnyRef ref) : super(ref);
+
+  static JSObject? box(WasmAnyRef? ref) =>
+      isDartNull(ref) ? null : JSObject(ref!);
+
+  JSValue? operator [](String key) =>
+      JSValue.box(getPropertyRaw(_ref, key.toAnyRef()));
+  void operator []=(String key, JSValue? value) =>
+      setPropertyRaw(_ref, key.toAnyRef(), value?.toAnyRef());
+}
+
+class JSArrayIteratorAdapter<T> extends Iterator<T> {
+  final JSArray array;
+  int index = -1;
+
+  JSArrayIteratorAdapter(this.array);
+
+  @override
+  bool moveNext() {
+    index++;
+    if (index > array.length) {
+      throw 'Iterator out of bounds';
+    }
+    return index < array.length;
+  }
+
+  @override
+  T get current => dartifyRaw(array[index]?.toAnyRef()) as T;
+}
+
+/// [JSArrayIterableAdapter] lazily adapts a [JSArray] to Dart's [Iterable]
+/// interface.
+class JSArrayIterableAdapter<T> extends EfficientLengthIterable<T> {
+  final JSArray array;
+
+  JSArrayIterableAdapter(this.array);
+
+  @override
+  Iterator<T> get iterator => JSArrayIteratorAdapter<T>(array);
+
+  @override
+  int get length => array.length;
+}
+
+// Convert to double to avoid converting to [BigInt] in the case of int64.
+WasmAnyRef intToJSNumber(int i) => toJSNumber(i.toDouble());
+
 WasmAnyRef? getConstructorString(String constructor) =>
     getPropertyRaw(globalThisRaw(), constructor.toAnyRef());
 
@@ -113,6 +190,9 @@
 @pragma("wasm:import", "dart2wasm.isJSObject")
 external bool isJSObject(WasmAnyRef? o);
 
+@pragma("wasm:import", "dart2wasm.isJSRegExp")
+external bool isJSRegExp(WasmAnyRef object);
+
 // The JS runtime will run helpful conversion routines between refs and bool /
 // double. In the longer term hopefully we can find a way to avoid the round
 // trip.
@@ -187,6 +267,10 @@
 @pragma("wasm:import", "dart2wasm.callConstructorVarArgs")
 external WasmAnyRef callConstructorVarArgsRaw(WasmAnyRef o, WasmAnyRef args);
 
+@pragma("wasm:import", "dart2wasm.safeCallConstructorVarArgs")
+external WasmAnyRef safeCallConstructorVarArgsRaw(
+    WasmAnyRef o, WasmAnyRef args);
+
 @pragma("wasm:import", "dart2wasm.hasProperty")
 external bool hasPropertyRaw(WasmAnyRef o, WasmAnyRef name);
 
@@ -202,7 +286,7 @@
     WasmAnyRef o, WasmAnyRef method, WasmAnyRef? args);
 
 @pragma("wasm:import", "dart2wasm.stringify")
-external String stringifyRaw(WasmAnyRef? object);
+external String stringify(WasmAnyRef? object);
 
 // Currently, `allowInterop` returns a Function type. This is unfortunate for
 // Dart2wasm because it means arbitrary Dart functions can flow to JS util
@@ -409,6 +493,14 @@
   return f;
 }
 
+/// Returns the JS constructor object for a given [String].
+WasmAnyRef getConstructorRaw(String name) =>
+    getPropertyRaw(globalThisRaw(), name.toAnyRef())!;
+
+/// Equivalent to `Object.keys(object)`.
+JSArray objectKeys(JSValue object) => JSArray(callMethodVarArgsRaw(
+    getConstructorRaw('Object'), 'keys'.toAnyRef(), [object].toAnyRef())!);
+
 /// Methods used by the wasm runtime.
 @pragma("wasm:export", "\$listLength")
 double _listLength(List list) => list.length.toDouble();
diff --git a/sdk/lib/_internal/wasm/lib/regexp_helper.dart b/sdk/lib/_internal/wasm/lib/regexp_helper.dart
new file mode 100644
index 0000000..e2de33d
--- /dev/null
+++ b/sdk/lib/_internal/wasm/lib/regexp_helper.dart
@@ -0,0 +1,270 @@
+// Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+part of dart._js_helper;
+
+// TODO(joshualitt): This is a fork of the DDC RegExp class. In the longer term,
+// with careful factoring we may be able to share this code.
+// TODO(joshualitt): We should be able to build this library off of static
+// interop.
+
+/// Returns a string for a RegExp pattern that matches [string]. This is done by
+/// escaping all RegExp metacharacters.
+@pragma('wasm:import', 'dart2wasm.quoteStringForRegExp')
+external String quoteStringForRegExp(String string);
+
+class JSNativeMatch extends JSArray {
+  JSNativeMatch(WasmAnyRef ref) : super(ref);
+
+  static JSNativeMatch? box(WasmAnyRef? ref) =>
+      isDartNull(ref) ? null : JSNativeMatch(ref!);
+
+  String get input => jsStringToDartString(
+      getPropertyRaw(this.toAnyRef(), 'input'.toAnyRef())!);
+  int get index =>
+      toDartNumber(getPropertyRaw(this.toAnyRef(), 'index'.toAnyRef())!)
+          .floor();
+  JSObject? get groups =>
+      JSObject.box(getPropertyRaw(this.toAnyRef(), 'groups'.toAnyRef()));
+}
+
+class JSNativeRegExp extends JSValue {
+  JSNativeRegExp(WasmAnyRef ref) : super(ref);
+
+  JSNativeMatch? exec(String string) => JSNativeMatch.box(callMethodVarArgsRaw(
+      this.toAnyRef(), 'exec'.toAnyRef(), [string].toAnyRef()));
+  bool test(String string) => toDartBool(callMethodVarArgsRaw(
+      this.toAnyRef(), 'test'.toAnyRef(), [string].toAnyRef())!);
+  String get flags => jsStringToDartString(
+      getPropertyRaw(this.toAnyRef(), 'flags'.toAnyRef())!);
+  bool get multiline =>
+      toDartBool(getPropertyRaw(this.toAnyRef(), 'multiline'.toAnyRef())!);
+  bool get ignoreCase =>
+      toDartBool(getPropertyRaw(this.toAnyRef(), 'ignoreCase'.toAnyRef())!);
+  bool get unicode =>
+      toDartBool(getPropertyRaw(this.toAnyRef(), 'unicode'.toAnyRef())!);
+  bool get dotAll =>
+      toDartBool(getPropertyRaw(this.toAnyRef(), 'dotAll'.toAnyRef())!);
+  set lastIndex(int start) => setPropertyRaw(
+      this.toAnyRef(), 'lastIndex'.toAnyRef(), intToJSNumber(start));
+}
+
+class JSSyntaxRegExp implements RegExp {
+  final String pattern;
+  final JSNativeRegExp _nativeRegExp;
+  JSNativeRegExp? _nativeGlobalRegExp;
+  JSNativeRegExp? _nativeAnchoredRegExp;
+
+  String toString() => 'RegExp/$pattern/' + _nativeRegExp.flags;
+
+  JSSyntaxRegExp(String source,
+      {bool multiLine: false,
+      bool caseSensitive: true,
+      bool unicode: false,
+      bool dotAll: false})
+      : this.pattern = source,
+        this._nativeRegExp = makeNative(
+            source, multiLine, caseSensitive, unicode, dotAll, false);
+
+  JSNativeRegExp get _nativeGlobalVersion {
+    if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp!;
+    return _nativeGlobalRegExp = makeNative(
+        pattern, isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
+  }
+
+  JSNativeRegExp get _nativeAnchoredVersion {
+    if (_nativeAnchoredRegExp != null) return _nativeAnchoredRegExp!;
+    // An "anchored version" of a regexp is created by adding "|()" to the
+    // source. This means that the regexp always matches at the first position
+    // that it tries, and you can see if the original regexp matched, or it
+    // was the added zero-width match that matched, by looking at the last
+    // capture. If it is a String, the match participated, otherwise it didn't.
+    return _nativeAnchoredRegExp = makeNative(
+        '$pattern|()', isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
+  }
+
+  bool get isMultiLine => _nativeRegExp.multiline;
+  bool get isCaseSensitive => _nativeRegExp.ignoreCase;
+  bool get isUnicode => _nativeRegExp.unicode;
+  bool get isDotAll => _nativeRegExp.dotAll;
+
+  static JSNativeRegExp makeNative(String source, bool multiLine,
+      bool caseSensitive, bool unicode, bool dotAll, bool global) {
+    String m = multiLine == true ? 'm' : '';
+    String i = caseSensitive == true ? '' : 'i';
+    String u = unicode ? 'u' : '';
+    String s = dotAll ? 's' : '';
+    String g = global ? 'g' : '';
+    String modifiers = '$m$i$u$s$g';
+    // The call to create the regexp is wrapped in a try catch so we can
+    // reformat the exception if need be.
+    WasmAnyRef result = safeCallConstructorVarArgsRaw(
+        getConstructorRaw('RegExp'), [source, modifiers].toAnyRef());
+    if (isJSRegExp(result)) return JSNativeRegExp(result);
+    // The returned value is the stringified JavaScript exception. Turn it into
+    // a Dart exception.
+    String errorMessage = jsStringToDartString(result);
+    throw new FormatException('Illegal RegExp pattern ($errorMessage)', source);
+  }
+
+  RegExpMatch? firstMatch(String string) {
+    JSNativeMatch? m = _nativeRegExp.exec(string);
+    if (m == null) return null;
+    return new _MatchImplementation(this, m);
+  }
+
+  bool hasMatch(String string) {
+    return _nativeRegExp.test(string);
+  }
+
+  String? stringMatch(String string) {
+    var match = firstMatch(string);
+    if (match != null) return match.group(0);
+    return null;
+  }
+
+  Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
+    if (start < 0 || start > string.length) {
+      throw new RangeError.range(start, 0, string.length);
+    }
+    return _AllMatchesIterable(this, string, start);
+  }
+
+  RegExpMatch? _execGlobal(String string, int start) {
+    JSNativeRegExp regexp = _nativeGlobalVersion;
+    regexp.lastIndex = start;
+    JSNativeMatch? match = regexp.exec(string);
+    if (match == null) return null;
+    return new _MatchImplementation(this, match);
+  }
+
+  RegExpMatch? _execAnchored(String string, int start) {
+    JSNativeRegExp regexp = _nativeAnchoredVersion;
+    regexp.lastIndex = start;
+    JSNativeMatch? match = regexp.exec(string);
+    if (match == null) return null;
+    // If the last capture group participated, the original regexp did not
+    // match at the start position.
+    if (match.pop() != null) return null;
+    return new _MatchImplementation(this, match);
+  }
+
+  RegExpMatch? matchAsPrefix(String string, [int start = 0]) {
+    if (start < 0 || start > string.length) {
+      throw new RangeError.range(start, 0, string.length);
+    }
+    return _execAnchored(string, start);
+  }
+}
+
+class _MatchImplementation implements RegExpMatch {
+  final Pattern pattern;
+  // Contains a JS RegExp match object.
+  // It is an Array of String values with extra 'index' and 'input' properties.
+  // If there were named capture groups, there will also be an extra 'groups'
+  // property containing an object with capture group names as keys and
+  // matched strings as values.
+  final JSNativeMatch _match;
+
+  _MatchImplementation(this.pattern, this._match);
+
+  String get input => _match.input;
+
+  int get start => _match.index;
+
+  int get end => (start + (_match[0].toString()).length);
+
+  String? group(int index) => _match[index]?.toString();
+
+  String? operator [](int index) => group(index);
+
+  int get groupCount => _match.length - 1;
+
+  List<String?> groups(List<int> groups) {
+    List<String?> out = [];
+    for (int i in groups) {
+      out.add(group(i));
+    }
+    return out;
+  }
+
+  String? namedGroup(String name) {
+    JSObject? groups = _match.groups;
+    if (groups != null) {
+      JSValue? result = groups[name];
+      if (result != null ||
+          hasPropertyRaw(groups.toAnyRef(), name.toAnyRef())) {
+        return result?.toString();
+      }
+    }
+    throw ArgumentError.value(name, "name", "Not a capture group name");
+  }
+
+  Iterable<String> get groupNames {
+    JSObject? groups = _match.groups;
+    if (groups != null) {
+      return JSArrayIterableAdapter<String>(objectKeys(groups));
+    }
+    return Iterable.empty();
+  }
+}
+
+class _AllMatchesIterable extends IterableBase<RegExpMatch> {
+  final JSSyntaxRegExp _re;
+  final String _string;
+  final int _start;
+
+  _AllMatchesIterable(this._re, this._string, this._start);
+
+  Iterator<RegExpMatch> get iterator =>
+      new _AllMatchesIterator(_re, _string, _start);
+}
+
+class _AllMatchesIterator implements Iterator<RegExpMatch> {
+  final JSSyntaxRegExp _regExp;
+  String? _string;
+  int _nextIndex;
+  RegExpMatch? _current;
+
+  _AllMatchesIterator(this._regExp, this._string, this._nextIndex);
+
+  RegExpMatch get current => _current as RegExpMatch;
+
+  static bool _isLeadSurrogate(int c) {
+    return c >= 0xd800 && c <= 0xdbff;
+  }
+
+  static bool _isTrailSurrogate(int c) {
+    return c >= 0xdc00 && c <= 0xdfff;
+  }
+
+  bool moveNext() {
+    var string = _string;
+    if (string == null) return false;
+    if (_nextIndex <= string.length) {
+      RegExpMatch? match = _regExp._execGlobal(string, _nextIndex);
+      if (match != null) {
+        _current = match;
+        int nextIndex = match.end;
+        if (match.start == nextIndex) {
+          // Zero-width match. Advance by one more, unless the regexp
+          // is in unicode mode and it would put us within a surrogate
+          // pair. In that case, advance past the code point as a whole.
+          if (_regExp.isUnicode &&
+              _nextIndex + 1 < string.length &&
+              _isLeadSurrogate(string.codeUnitAt(_nextIndex)) &&
+              _isTrailSurrogate(string.codeUnitAt(_nextIndex + 1))) {
+            nextIndex++;
+          }
+          nextIndex++;
+        }
+        _nextIndex = nextIndex;
+        return true;
+      }
+    }
+    _current = null;
+    _string = null; // Marks iteration as ended.
+    return false;
+  }
+}
diff --git a/sdk/lib/_internal/wasm/lib/regexp_patch.dart b/sdk/lib/_internal/wasm/lib/regexp_patch.dart
new file mode 100644
index 0000000..d7f063c
--- /dev/null
+++ b/sdk/lib/_internal/wasm/lib/regexp_patch.dart
@@ -0,0 +1,21 @@
+// Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+@patch
+class RegExp {
+  @patch
+  factory RegExp(String source,
+          {bool multiLine = false,
+          bool caseSensitive = true,
+          bool unicode = false,
+          bool dotAll = false}) =>
+      JSSyntaxRegExp(source,
+          multiLine: multiLine,
+          caseSensitive: caseSensitive,
+          unicode: unicode,
+          dotAll: dotAll);
+
+  @patch
+  static String escape(String text) => quoteStringForRegExp(text);
+}
diff --git a/sdk/lib/libraries.json b/sdk/lib/libraries.json
index 45bea43..d45b6cb 100644
--- a/sdk/lib/libraries.json
+++ b/sdk/lib/libraries.json
@@ -214,6 +214,7 @@
           "_internal/vm/lib/null_patch.dart",
           "_internal/vm/lib/map_patch.dart",
           "_internal/wasm/lib/object_patch.dart",
+          "_internal/wasm/lib/regexp_patch.dart",
           "_internal/wasm/lib/stack_trace_patch.dart",
           "_internal/wasm/lib/stopwatch_patch.dart",
           "_internal/wasm/lib/string_buffer_patch.dart",
diff --git a/sdk/lib/libraries.yaml b/sdk/lib/libraries.yaml
index 04d1f2d..4641fbc 100644
--- a/sdk/lib/libraries.yaml
+++ b/sdk/lib/libraries.yaml
@@ -203,6 +203,7 @@
       - _internal/vm/lib/null_patch.dart
       - _internal/vm/lib/map_patch.dart
       - _internal/wasm/lib/object_patch.dart
+      - _internal/wasm/lib/regexp_patch.dart
       - _internal/wasm/lib/stack_trace_patch.dart
       - _internal/wasm/lib/stopwatch_patch.dart
       - _internal/wasm/lib/string_buffer_patch.dart