fix #31393, port utf8 decoder optimizations from dart2js to ddc
The dart2js changes were made in:
https://github.com/dart-lang/sdk/commit/3836c70a8150bc33a695788deee346b940b40769
Change-Id: I5476c10e3734fa8ab0c027dd258b2fa5ef43287b
Reviewed-on: https://dart-review.googlesource.com/54529
Reviewed-by: Vijay Menon <vsm@google.com>
diff --git a/pkg/dev_compiler/lib/src/kernel/compiler.dart b/pkg/dev_compiler/lib/src/kernel/compiler.dart
index bc50ca2..e18a3be 100644
--- a/pkg/dev_compiler/lib/src/kernel/compiler.dart
+++ b/pkg/dev_compiler/lib/src/kernel/compiler.dart
@@ -5034,7 +5034,7 @@
@override
visitFunctionExpression(FunctionExpression node) {
var fn = _emitArrowFunction(node);
- if (!_reifyFunctionType(_currentFunction)) return fn;
+ if (!_reifyFunctionType(node.function)) return fn;
return _emitFunctionTagged(fn, node.getStaticType(types) as FunctionType);
}
@@ -5140,7 +5140,7 @@
isBuiltinAnnotation(a, '_js_helper', 'ReifyFunctionTypes');
while (parent != null) {
var a = findAnnotation(parent, reifyFunctionTypes);
- if (a != null && a is ConstructorInvocation) {
+ if (a is ConstructorInvocation) {
var args = a.arguments.positional;
if (args.length == 1) {
var arg = args[0];
diff --git a/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart b/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart
index 5ef68a4..18a077e 100644
--- a/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart
+++ b/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart
@@ -9,6 +9,7 @@
import 'dart:_interceptors' show JSExtendableArray;
import 'dart:_internal' show MappedIterable, ListIterable;
import 'dart:collection' show Maps, LinkedHashMap, MapBase;
+import 'dart:_native_typed_data' show NativeUint8List;
/**
* Parses [json] and builds the corresponding parsed JSON value.
@@ -399,6 +400,98 @@
@patch
static String _convertIntercepted(
bool allowMalformed, List<int> codeUnits, int start, int end) {
- return null; // This call was not intercepted.
+ // Test `codeUnits is NativeUint8List`. Dart's NativeUint8List is
+ // implemented by JavaScript's Uint8Array.
+ if (JS('bool', '# instanceof Uint8Array', codeUnits)) {
+ // JS 'cast' to avoid a downcast equivalent to the is-check we hand-coded.
+ NativeUint8List casted = JS('NativeUint8List', '#', codeUnits);
+ return _convertInterceptedUint8List(allowMalformed, casted, start, end);
+ }
}
+
+ static String _convertInterceptedUint8List(
+ bool allowMalformed, NativeUint8List codeUnits, int start, int end) {
+ if (allowMalformed) {
+ // TextDecoder with option {fatal: false} does not produce the same result
+ // as [Utf8Decoder]. It disagrees on the number of `U+FFFD` (REPLACEMENT
+ // CHARACTER) generated for some malformed sequences. We could use
+ // TextDecoder with option {fatal: true}, catch the error, and re-try
+ // without acceleration. That turns out to be extremely slow (the Error
+ // captures a stack trace).
+ // TODO(31370): Bring Utf8Decoder into alignment with TextDecoder.
+ // TODO(sra): If we can't do that, can we detect valid input fast enough
+ // to use a check like the [_unsafe] check below?
+ return null;
+ }
+
+ var decoder = _decoder;
+ if (decoder == null) return null;
+ if (0 == start && end == null) {
+ return _useTextDecoderChecked(decoder, codeUnits);
+ }
+
+ int length = codeUnits.length;
+ end = RangeError.checkValidRange(start, end, length);
+
+ if (0 == start && end == codeUnits.length) {
+ return _useTextDecoderChecked(decoder, codeUnits);
+ }
+
+ return _useTextDecoderChecked(decoder,
+ JS('NativeUint8List', '#.subarray(#, #)', codeUnits, start, end));
+ }
+
+ static String _useTextDecoderChecked(decoder, NativeUint8List codeUnits) {
+ if (_unsafe(codeUnits)) return null;
+ return _useTextDecoderUnchecked(decoder, codeUnits);
+ }
+
+ static String _useTextDecoderUnchecked(decoder, NativeUint8List codeUnits) {
+ // If the input is malformed, catch the exception and return `null` to fall
+ // back on unintercepted decoder. The fallback will either succeed in
+ // decoding, or report the problem better than TextDecoder.
+ try {
+ return JS('String', '#.decode(#)', decoder, codeUnits);
+ } catch (e) {}
+ return null;
+ }
+
+ /// Returns `true` if [codeUnits] contains problematic encodings.
+ ///
+ /// TextDecoder behaves differently to [Utf8Encoder] when the input encodes a
+ /// surrogate (U+D800 through U+DFFF). TextDecoder considers the surrogate to
+ /// be an encoding error and, depending on the `fatal` option, either throws
+ /// and Error or encodes the surrogate as U+FFFD. [Utf8Decoder] does not
+ /// consider the surrogate to be an error and returns the code unit encoded by
+ /// the surrogate.
+ ///
+ /// Throwing an `Error` captures the stack, whoch makes it so expensive that
+ /// it is worth checking the input for surrogates and avoiding TextDecoder in
+ /// this case.
+ static bool _unsafe(NativeUint8List codeUnits) {
+ // Surrogates encode as (hex) ED Ax xx or ED Bx xx.
+ int limit = codeUnits.length - 2;
+ for (int i = 0; i < limit; i++) {
+ int unit1 = codeUnits[i];
+ if (unit1 == 0xED) {
+ int unit2 = JS('!', '#', codeUnits[i + 1]);
+ if ((unit2 & 0xE0) == 0xA0) return true;
+ }
+ }
+ return false;
+ }
+
+ //// TextDecoder is not defined on some browsers and on the stand-alone d8 and
+ /// jsshell engines. Use a lazy initializer to do feature detection once.
+ static final _decoder = () {
+ try {
+ // Use `{fatal: true}`. 'fatal' does not correspond exactly to
+ // `!allowMalformed`: TextDecoder rejects unpaired surrogates which
+ // [Utf8Decoder] accepts. In non-fatal mode, TextDecoder translates
+ // unpaired surrogates to REPLACEMENT CHARACTER (U+FFFD) whereas
+ // [Utf8Decoder] leaves the surrogate intact.
+ return JS('', 'new TextDecoder("utf-8", {fatal: true})');
+ } catch (e) {}
+ return null;
+ }();
}