fix #31393, port utf8 decoder optimizations from dart2js to ddc The dart2js changes were made in: https://github.com/dart-lang/sdk/commit/3836c70a8150bc33a695788deee346b940b40769 Change-Id: I5476c10e3734fa8ab0c027dd258b2fa5ef43287b Reviewed-on: https://dart-review.googlesource.com/54529 Reviewed-by: Vijay Menon <vsm@google.com>

commit: 0fde28b2dd00fc5967d3ba678b69e3f1df5eaed7 [log] [tgz]
author: Jenny Messerly <jmesserly@google.com> Thu May 10 00:06:12 2018 +0000
committer: Jenny Messerly <jmesserly@google.com> Thu May 10 00:06:12 2018 +0000
tree: 51ec20b499fbc265dd2c7eee3266935818543c26
parent: 779dc6eb85c1f2249803d017ffa785c85681ad1b [diff]
diff --git a/pkg/dev_compiler/lib/src/kernel/compiler.dart b/pkg/dev_compiler/lib/src/kernel/compiler.dart
index bc50ca2..e18a3be 100644
--- a/pkg/dev_compiler/lib/src/kernel/compiler.dart
+++ b/pkg/dev_compiler/lib/src/kernel/compiler.dart

@@ -5034,7 +5034,7 @@
   @override
   visitFunctionExpression(FunctionExpression node) {
     var fn = _emitArrowFunction(node);
-    if (!_reifyFunctionType(_currentFunction)) return fn;
+    if (!_reifyFunctionType(node.function)) return fn;
     return _emitFunctionTagged(fn, node.getStaticType(types) as FunctionType);
   }
 
@@ -5140,7 +5140,7 @@
         isBuiltinAnnotation(a, '_js_helper', 'ReifyFunctionTypes');
     while (parent != null) {
       var a = findAnnotation(parent, reifyFunctionTypes);
-      if (a != null && a is ConstructorInvocation) {
+      if (a is ConstructorInvocation) {
         var args = a.arguments.positional;
         if (args.length == 1) {
           var arg = args[0];

diff --git a/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart b/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart
index 5ef68a4..18a077e 100644
--- a/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart
+++ b/pkg/dev_compiler/tool/input_sdk/patch/convert_patch.dart

@@ -9,6 +9,7 @@
 import 'dart:_interceptors' show JSExtendableArray;
 import 'dart:_internal' show MappedIterable, ListIterable;
 import 'dart:collection' show Maps, LinkedHashMap, MapBase;
+import 'dart:_native_typed_data' show NativeUint8List;
 
 /**
  * Parses [json] and builds the corresponding parsed JSON value.
@@ -399,6 +400,98 @@
   @patch
   static String _convertIntercepted(
       bool allowMalformed, List<int> codeUnits, int start, int end) {
-    return null; // This call was not intercepted.
+    // Test `codeUnits is NativeUint8List`. Dart's NativeUint8List is
+    // implemented by JavaScript's Uint8Array.
+    if (JS('bool', '# instanceof Uint8Array', codeUnits)) {
+      // JS 'cast' to avoid a downcast equivalent to the is-check we hand-coded.
+      NativeUint8List casted = JS('NativeUint8List', '#', codeUnits);
+      return _convertInterceptedUint8List(allowMalformed, casted, start, end);
+    }
   }
+
+  static String _convertInterceptedUint8List(
+      bool allowMalformed, NativeUint8List codeUnits, int start, int end) {
+    if (allowMalformed) {
+      // TextDecoder with option {fatal: false} does not produce the same result
+      // as [Utf8Decoder]. It disagrees on the number of `U+FFFD` (REPLACEMENT
+      // CHARACTER) generated for some malformed sequences. We could use
+      // TextDecoder with option {fatal: true}, catch the error, and re-try
+      // without acceleration. That turns out to be extremely slow (the Error
+      // captures a stack trace).
+      // TODO(31370): Bring Utf8Decoder into alignment with TextDecoder.
+      // TODO(sra): If we can't do that, can we detect valid input fast enough
+      // to use a check like the [_unsafe] check below?
+      return null;
+    }
+
+    var decoder = _decoder;
+    if (decoder == null) return null;
+    if (0 == start && end == null) {
+      return _useTextDecoderChecked(decoder, codeUnits);
+    }
+
+    int length = codeUnits.length;
+    end = RangeError.checkValidRange(start, end, length);
+
+    if (0 == start && end == codeUnits.length) {
+      return _useTextDecoderChecked(decoder, codeUnits);
+    }
+
+    return _useTextDecoderChecked(decoder,
+        JS('NativeUint8List', '#.subarray(#, #)', codeUnits, start, end));
+  }
+
+  static String _useTextDecoderChecked(decoder, NativeUint8List codeUnits) {
+    if (_unsafe(codeUnits)) return null;
+    return _useTextDecoderUnchecked(decoder, codeUnits);
+  }
+
+  static String _useTextDecoderUnchecked(decoder, NativeUint8List codeUnits) {
+    // If the input is malformed, catch the exception and return `null` to fall
+    // back on unintercepted decoder. The fallback will either succeed in
+    // decoding, or report the problem better than TextDecoder.
+    try {
+      return JS('String', '#.decode(#)', decoder, codeUnits);
+    } catch (e) {}
+    return null;
+  }
+
+  /// Returns `true` if [codeUnits] contains problematic encodings.
+  ///
+  /// TextDecoder behaves differently to [Utf8Encoder] when the input encodes a
+  /// surrogate (U+D800 through U+DFFF). TextDecoder considers the surrogate to
+  /// be an encoding error and, depending on the `fatal` option, either throws
+  /// and Error or encodes the surrogate as U+FFFD. [Utf8Decoder] does not
+  /// consider the surrogate to be an error and returns the code unit encoded by
+  /// the surrogate.
+  ///
+  /// Throwing an `Error` captures the stack, whoch makes it so expensive that
+  /// it is worth checking the input for surrogates and avoiding TextDecoder in
+  /// this case.
+  static bool _unsafe(NativeUint8List codeUnits) {
+    // Surrogates encode as (hex) ED Ax xx or ED Bx xx.
+    int limit = codeUnits.length - 2;
+    for (int i = 0; i < limit; i++) {
+      int unit1 = codeUnits[i];
+      if (unit1 == 0xED) {
+        int unit2 = JS('!', '#', codeUnits[i + 1]);
+        if ((unit2 & 0xE0) == 0xA0) return true;
+      }
+    }
+    return false;
+  }
+
+  //// TextDecoder is not defined on some browsers and on the stand-alone d8 and
+  /// jsshell engines. Use a lazy initializer to do feature detection once.
+  static final _decoder = () {
+    try {
+      // Use `{fatal: true}`. 'fatal' does not correspond exactly to
+      // `!allowMalformed`: TextDecoder rejects unpaired surrogates which
+      // [Utf8Decoder] accepts.  In non-fatal mode, TextDecoder translates
+      // unpaired surrogates to REPLACEMENT CHARACTER (U+FFFD) whereas
+      // [Utf8Decoder] leaves the surrogate intact.
+      return JS('', 'new TextDecoder("utf-8", {fatal: true})');
+    } catch (e) {}
+    return null;
+  }();
 }
commit	0fde28b2dd00fc5967d3ba678b69e3f1df5eaed7	[log] [tgz]
author	Jenny Messerly <jmesserly@google.com>	Thu May 10 00:06:12 2018 +0000
committer	Jenny Messerly <jmesserly@google.com>	Thu May 10 00:06:12 2018 +0000
tree	51ec20b499fbc265dd2c7eee3266935818543c26
parent	779dc6eb85c1f2249803d017ffa785c85681ad1b [diff]