Adjust JSON encoding and decoding to the WHATWG standard. This brings JSON encoding and decoding in line with the UTF-8 changes described at https://github.com/dart-lang/sdk/issues/41100 The fused UTF-8 / JSON decoder for the VM now uses the new UTF-8 decoder instead of its own, separate UTF-8 decoder. The JSON encoder now escapes lone surrogates, so it can encode JSON string values containing lone surrogates while keeping its output valid UTF-8. Change-Id: Ie4d4601cf84012068849e64d4670f2dcd49ea088 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/144286 Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>

commit: 07d508b45fa5e284a70c429af9d2ec33c15e795b [log] [tgz]
author: Aske Simon Christensen <askesc@google.com> Mon May 04 10:48:32 2020 +0000
committer: commit-bot@chromium.org <commit-bot@chromium.org> Mon May 04 10:48:32 2020 +0000
tree: 4f23d42c21e64169827bd467172105f08d11efd8
parent: fa2fd41166db35afa4777e63f900e83d25709c5c [diff]
diff --git a/sdk/lib/_internal/vm/lib/convert_patch.dart b/sdk/lib/_internal/vm/lib/convert_patch.dart
index 973214a..33215c7 100644
--- a/sdk/lib/_internal/vm/lib/convert_patch.dart
+++ b/sdk/lib/_internal/vm/lib/convert_patch.dart

@@ -1498,267 +1498,17 @@
   }
 }
 
-class _Utf8StringBuffer {
-  static const int INITIAL_CAPACITY = 32;
-  // Partial state encoding.
-  static const int MASK_TWO_BIT = 0x03;
-  static const int MASK_SIZE = MASK_TWO_BIT;
-  static const int SHIFT_MISSING = 2;
-  static const int SHIFT_VALUE = 4;
-  static const int NO_PARTIAL = 0;
-
-  // UTF-8 encoding and limits.
-  static const int MAX_ASCII = 127;
-  static const int MAX_TWO_BYTE = 0x7ff;
-  static const int MAX_THREE_BYTE = 0xffff;
-  static const int MAX_UNICODE = 0X10ffff;
-  static const int MASK_TWO_BYTE = 0x1f;
-  static const int MASK_THREE_BYTE = 0x0f;
-  static const int MASK_FOUR_BYTE = 0x07;
-  static const int MASK_CONTINUE_TAG = 0xC0;
-  static const int MASK_CONTINUE_VALUE = 0x3f;
-  static const int CONTINUE_TAG = 0x80;
-
-  // UTF-16 surrogate encoding.
-  static const int LEAD_SURROGATE = 0xD800;
-  static const int TAIL_SURROGATE = 0xDC00;
-  static const int SHIFT_HIGH_SURROGATE = 10;
-  static const int MASK_LOW_SURROGATE = 0x3ff;
-
-  // The internal buffer starts as Uint8List, but may change to Uint16List
-  // if the string contains non-Latin-1 characters.
-  List<int> buffer = new Uint8List(INITIAL_CAPACITY);
-  // Number of elements in buffer.
-  int length = 0;
-  // Partial decoding state, for cases where an UTF-8 sequences is split
-  // between chunks.
-  int partialState = NO_PARTIAL;
-  // Whether all characters so far have been Latin-1 (and the buffer is
-  // still a Uint8List). Set to false when the first non-Latin-1 character
-  // is encountered, and the buffer is then also converted to a Uint16List.
-  bool isLatin1 = true;
-  // If allowing malformed, invalid UTF-8 sequences are converted to
-  // U+FFFD.
-  bool allowMalformed;
-
-  _Utf8StringBuffer(this.allowMalformed);
-
-  /**
-   * Parse the continuation of a multi-byte UTF-8 sequence.
-   *
-   * Parse [utf8] from [position] to [end]. If the sequence extends beyond
-   * `end`, store the partial state in [partialState], and continue from there
-   * on the next added slice.
-   *
-   * The [size] is the number of expected continuation bytes total,
-   * and [missing] is the number of remaining continuation bytes.
-   * The [size] is used to detect overlong encodings.
-   * The [value] is the value collected so far.
-   *
-   * When called after seeing the first multi-byte marker, the [size] and
-   * [missing] values are always the same, but they may differ if continuing
-   * after a partial sequence.
-   */
-  int addContinuation(
-      List<int> utf8, int position, int end, int size, int missing, int value) {
-    int codeEnd = position + missing;
-    do {
-      if (position == end) {
-        missing = codeEnd - position;
-        partialState =
-            size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
-        return end;
-      }
-      int char = utf8[position];
-      if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          return position;
-        }
-        throw new FormatException(
-            "Expected UTF-8 continuation byte, "
-            "found $char",
-            utf8,
-            position);
-      }
-      value = 64 * value + (char & MASK_CONTINUE_VALUE);
-      position++;
-    } while (position < codeEnd);
-    if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
-      // Over-long encoding.
-      if (allowMalformed) {
-        value = 0xFFFD;
-      } else {
-        throw new FormatException(
-            "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
-            " encoded in ${size + 1} bytes.",
-            utf8,
-            position - 1);
-      }
-    }
-    addCharCode(value);
-    return position;
-  }
-
-  void addCharCode(int char) {
-    assert(char >= 0);
-    assert(char <= MAX_UNICODE);
-    if (partialState != NO_PARTIAL) {
-      if (allowMalformed) {
-        partialState = NO_PARTIAL;
-        addCharCode(0xFFFD);
-      } else {
-        throw new FormatException("Incomplete UTF-8 sequence");
-      }
-    }
-    if (isLatin1 && char > 0xff) {
-      _to16Bit(); // Also grows a little if close to full.
-    }
-    int length = this.length;
-    if (char <= MAX_THREE_BYTE) {
-      if (length == buffer.length) _grow();
-      buffer[length] = char;
-      this.length = length + 1;
-      return;
-    }
-    if (length + 2 > buffer.length) _grow();
-    int bits = char - 0x10000;
-    buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
-    buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
-    this.length = length + 2;
-  }
-
-  void _to16Bit() {
-    assert(isLatin1);
-    Uint16List newBuffer;
-    if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {
-      // Reuse existing buffer if it's big enough.
-      newBuffer = new Uint16List.view((buffer as Uint8List).buffer);
-    } else {
-      int newCapacity = buffer.length;
-      if (newCapacity - length < INITIAL_CAPACITY) {
-        newCapacity = length + INITIAL_CAPACITY;
-      }
-      newBuffer = new Uint16List(newCapacity);
-    }
-    newBuffer.setRange(0, length, buffer);
-    buffer = newBuffer;
-    isLatin1 = false;
-  }
-
-  void _grow() {
-    int newCapacity = buffer.length * 2;
-    List newBuffer;
-    if (isLatin1) {
-      newBuffer = new Uint8List(newCapacity);
-    } else {
-      newBuffer = new Uint16List(newCapacity);
-    }
-    newBuffer.setRange(0, length, buffer);
-    buffer = newBuffer;
-  }
-
-  void addSlice(List<int> utf8, int position, int end) {
-    assert(position < end);
-    if (partialState > 0) {
-      int continueByteCount = (partialState & MASK_TWO_BIT);
-      int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
-      int value = partialState >> SHIFT_VALUE;
-      partialState = NO_PARTIAL;
-      position = addContinuation(
-          utf8, position, end, continueByteCount, missing, value);
-      if (position == end) return;
-    }
-    // Keep index and capacity in local variables while looping over
-    // ASCII characters.
-    int index = length;
-    int capacity = buffer.length;
-    while (position < end) {
-      int char = utf8[position];
-      if (char <= MAX_ASCII) {
-        if (index == capacity) {
-          length = index;
-          _grow();
-          capacity = buffer.length;
-        }
-        buffer[index++] = char;
-        position++;
-        continue;
-      }
-      length = index;
-      if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          position++;
-        } else {
-          throw new FormatException(
-              "Unexpected UTF-8 continuation byte", utf8, position);
-        }
-      } else if (char < 0xE0) {
-        // C0-DF
-        // Two-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 1, 1, char & MASK_TWO_BYTE);
-      } else if (char < 0xF0) {
-        // E0-EF
-        // Three-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 2, 2, char & MASK_THREE_BYTE);
-      } else if (char < 0xF8) {
-        // F0-F7
-        // Four-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 3, 3, char & MASK_FOUR_BYTE);
-      } else {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          position++;
-        } else {
-          throw new FormatException(
-              "Invalid UTF-8 byte: $char", utf8, position);
-        }
-      }
-      index = length;
-      capacity = buffer.length;
-    }
-    length = index;
-  }
-
-  String toString() {
-    if (partialState != NO_PARTIAL) {
-      if (allowMalformed) {
-        partialState = NO_PARTIAL;
-        addCharCode(0xFFFD);
-      } else {
-        int continueByteCount = (partialState & MASK_TWO_BIT);
-        int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
-        int value = partialState >> SHIFT_VALUE;
-        int seenByteCount = continueByteCount - missing + 1;
-        List source = new Uint8List(seenByteCount);
-        while (seenByteCount > 1) {
-          seenByteCount--;
-          source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
-          value >>= 6;
-        }
-        source[0] = value | (0x3c0 >> (continueByteCount - 1));
-        throw new FormatException(
-            "Incomplete UTF-8 sequence", source, source.length);
-      }
-    }
-    return new String.fromCharCodes(buffer, 0, length);
-  }
-}
-
 /**
  * Chunked JSON parser that parses UTF-8 chunks.
  */
 class _JsonUtf8Parser extends _ChunkedJsonParser<List<int>> {
-  final bool allowMalformed;
+  final _Utf8Decoder decoder;
   List<int> chunk;
   int chunkEnd;
 
-  _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
-      : super(listener) {
+  _JsonUtf8Parser(_JsonListener listener, bool allowMalformed)
+      : decoder = new _Utf8Decoder(allowMalformed),
+        super(listener) {
     // Starts out checking for an optional BOM (KWD_BOM, count = 0).
     partialState =
         _ChunkedJsonParser.PARTIAL_KEYWORD | _ChunkedJsonParser.KWD_BOM;
@@ -1778,21 +1528,24 @@
   }
 
   void beginString() {
-    this.buffer = new _Utf8StringBuffer(allowMalformed);
+    decoder.reset();
+    this.buffer = new StringBuffer();
   }
 
   void addSliceToString(int start, int end) {
-    _Utf8StringBuffer buffer = this.buffer;
-    buffer.addSlice(chunk, start, end);
+    final StringBuffer buffer = this.buffer;
+    buffer.write(decoder.convertChunked(chunk, start, end));
   }
 
   void addCharToString(int charCode) {
-    _Utf8StringBuffer buffer = this.buffer;
-    buffer.addCharCode(charCode);
+    final StringBuffer buffer = this.buffer;
+    decoder.flush(buffer);
+    buffer.writeCharCode(charCode);
   }
 
   String endString() {
-    _Utf8StringBuffer buffer = this.buffer;
+    final StringBuffer buffer = this.buffer;
+    decoder.flush(buffer);
     this.buffer = null;
     return buffer.toString();
   }
@@ -1908,6 +1661,13 @@
       "QQQQQQQQQQQQQQQQRRRRRbbbbbbbbbbb" // E0-FF
       ;
 
+  /// Reset the decoder to a state where it is ready to decode a new string but
+  /// will not skip a leading BOM. Used by the fused UTF-8 / JSON decoder.
+  void reset() {
+    _state = initial;
+    _bomIndex = -1;
+  }
+
   // The VM decoder handles BOM explicitly instead of via the state machine.
   @patch
   _Utf8Decoder(this.allowMalformed) : _state = initial;

diff --git a/sdk/lib/convert/json.dart b/sdk/lib/convert/json.dart
index 88dee25..fc55abc 100644
--- a/sdk/lib/convert/json.dart
+++ b/sdk/lib/convert/json.dart

@@ -535,11 +535,16 @@
   static const int char_0 = 0x30;
   static const int backslash = 0x5c;
   static const int char_b = 0x62;
+  static const int char_d = 0x64;
   static const int char_f = 0x66;
   static const int char_n = 0x6e;
   static const int char_r = 0x72;
   static const int char_t = 0x74;
   static const int char_u = 0x75;
+  static const int surrogateMin = 0xd800;
+  static const int surrogateMask = 0xfc00;
+  static const int surrogateLead = 0xd800;
+  static const int surrogateTrail = 0xdc00;
 
   /// List of objects currently being traversed. Used to detect cycles.
   final List _seen = [];
@@ -573,7 +578,30 @@
     final length = s.length;
     for (var i = 0; i < length; i++) {
       var charCode = s.codeUnitAt(i);
-      if (charCode > backslash) continue;
+      if (charCode > backslash) {
+        if (charCode >= surrogateMin) {
+          // Possible surrogate. Check if it is unpaired.
+          if (((charCode & surrogateMask) == surrogateLead &&
+                  !(i + 1 < length &&
+                      (s.codeUnitAt(i + 1) & surrogateMask) ==
+                          surrogateTrail)) ||
+              ((charCode & surrogateMask) == surrogateTrail &&
+                  !(i - 1 >= 0 &&
+                      (s.codeUnitAt(i - 1) & surrogateMask) ==
+                          surrogateLead))) {
+            // Lone surrogate.
+            if (i > offset) writeStringSlice(s, offset, i);
+            offset = i + 1;
+            writeCharCode(backslash);
+            writeCharCode(char_u);
+            writeCharCode(char_d);
+            writeCharCode(hexDigit((charCode >> 8) & 0xf));
+            writeCharCode(hexDigit((charCode >> 4) & 0xf));
+            writeCharCode(hexDigit(charCode & 0xf));
+          }
+        }
+        continue;
+      }
       if (charCode < 32) {
         if (i > offset) writeStringSlice(s, offset, i);
         offset = i + 1;

diff --git a/sdk_nnbd/lib/_internal/vm/lib/convert_patch.dart b/sdk_nnbd/lib/_internal/vm/lib/convert_patch.dart
index ae3d66a..e4302cc 100644
--- a/sdk_nnbd/lib/_internal/vm/lib/convert_patch.dart
+++ b/sdk_nnbd/lib/_internal/vm/lib/convert_patch.dart

@@ -1498,269 +1498,19 @@
   }
 }
 
-class _Utf8StringBuffer {
-  static const int INITIAL_CAPACITY = 32;
-  // Partial state encoding.
-  static const int MASK_TWO_BIT = 0x03;
-  static const int MASK_SIZE = MASK_TWO_BIT;
-  static const int SHIFT_MISSING = 2;
-  static const int SHIFT_VALUE = 4;
-  static const int NO_PARTIAL = 0;
-
-  // UTF-8 encoding and limits.
-  static const int MAX_ASCII = 127;
-  static const int MAX_TWO_BYTE = 0x7ff;
-  static const int MAX_THREE_BYTE = 0xffff;
-  static const int MAX_UNICODE = 0X10ffff;
-  static const int MASK_TWO_BYTE = 0x1f;
-  static const int MASK_THREE_BYTE = 0x0f;
-  static const int MASK_FOUR_BYTE = 0x07;
-  static const int MASK_CONTINUE_TAG = 0xC0;
-  static const int MASK_CONTINUE_VALUE = 0x3f;
-  static const int CONTINUE_TAG = 0x80;
-
-  // UTF-16 surrogate encoding.
-  static const int LEAD_SURROGATE = 0xD800;
-  static const int TAIL_SURROGATE = 0xDC00;
-  static const int SHIFT_HIGH_SURROGATE = 10;
-  static const int MASK_LOW_SURROGATE = 0x3ff;
-
-  // The internal buffer starts as Uint8List, but may change to Uint16List
-  // if the string contains non-Latin-1 characters.
-  List<int> buffer = new Uint8List(INITIAL_CAPACITY);
-  // Number of elements in buffer.
-  int length = 0;
-  // Partial decoding state, for cases where an UTF-8 sequences is split
-  // between chunks.
-  int partialState = NO_PARTIAL;
-  // Whether all characters so far have been Latin-1 (and the buffer is
-  // still a Uint8List). Set to false when the first non-Latin-1 character
-  // is encountered, and the buffer is then also converted to a Uint16List.
-  bool isLatin1 = true;
-  // If allowing malformed, invalid UTF-8 sequences are converted to
-  // U+FFFD.
-  bool allowMalformed;
-
-  _Utf8StringBuffer(this.allowMalformed);
-
-  /**
-   * Parse the continuation of a multi-byte UTF-8 sequence.
-   *
-   * Parse [utf8] from [position] to [end]. If the sequence extends beyond
-   * `end`, store the partial state in [partialState], and continue from there
-   * on the next added slice.
-   *
-   * The [size] is the number of expected continuation bytes total,
-   * and [missing] is the number of remaining continuation bytes.
-   * The [size] is used to detect overlong encodings.
-   * The [value] is the value collected so far.
-   *
-   * When called after seeing the first multi-byte marker, the [size] and
-   * [missing] values are always the same, but they may differ if continuing
-   * after a partial sequence.
-   */
-  int addContinuation(
-      List<int> utf8, int position, int end, int size, int missing, int value) {
-    int codeEnd = position + missing;
-    do {
-      if (position == end) {
-        missing = codeEnd - position;
-        partialState =
-            size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
-        return end;
-      }
-      int char = utf8[position];
-      if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          return position;
-        }
-        throw new FormatException(
-            "Expected UTF-8 continuation byte, "
-            "found $char",
-            utf8,
-            position);
-      }
-      value = 64 * value + (char & MASK_CONTINUE_VALUE);
-      position++;
-    } while (position < codeEnd);
-    if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
-      // Over-long encoding.
-      if (allowMalformed) {
-        value = 0xFFFD;
-      } else {
-        throw new FormatException(
-            "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
-            " encoded in ${size + 1} bytes.",
-            utf8,
-            position - 1);
-      }
-    }
-    addCharCode(value);
-    return position;
-  }
-
-  void addCharCode(int char) {
-    assert(char >= 0);
-    assert(char <= MAX_UNICODE);
-    if (partialState != NO_PARTIAL) {
-      if (allowMalformed) {
-        partialState = NO_PARTIAL;
-        addCharCode(0xFFFD);
-      } else {
-        throw new FormatException("Incomplete UTF-8 sequence");
-      }
-    }
-    if (isLatin1 && char > 0xff) {
-      _to16Bit(); // Also grows a little if close to full.
-    }
-    int length = this.length;
-    if (char <= MAX_THREE_BYTE) {
-      if (length == buffer.length) _grow();
-      buffer[length] = char;
-      this.length = length + 1;
-      return;
-    }
-    if (length + 2 > buffer.length) _grow();
-    int bits = char - 0x10000;
-    buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
-    buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
-    this.length = length + 2;
-  }
-
-  void _to16Bit() {
-    assert(isLatin1);
-    Uint16List newBuffer;
-    if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {
-      // Reuse existing buffer if it's big enough.
-      newBuffer = new Uint16List.view((buffer as Uint8List).buffer);
-    } else {
-      int newCapacity = buffer.length;
-      if (newCapacity - length < INITIAL_CAPACITY) {
-        newCapacity = length + INITIAL_CAPACITY;
-      }
-      newBuffer = new Uint16List(newCapacity);
-    }
-    newBuffer.setRange(0, length, buffer);
-    buffer = newBuffer;
-    isLatin1 = false;
-  }
-
-  void _grow() {
-    int newCapacity = buffer.length * 2;
-    List<int> newBuffer;
-    if (isLatin1) {
-      newBuffer = new Uint8List(newCapacity);
-    } else {
-      newBuffer = new Uint16List(newCapacity);
-    }
-    newBuffer.setRange(0, length, buffer);
-    buffer = newBuffer;
-  }
-
-  void addSlice(List<int> utf8, int position, int end) {
-    assert(position < end);
-    if (partialState > 0) {
-      int continueByteCount = (partialState & MASK_TWO_BIT);
-      int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
-      int value = partialState >> SHIFT_VALUE;
-      partialState = NO_PARTIAL;
-      position = addContinuation(
-          utf8, position, end, continueByteCount, missing, value);
-      if (position == end) return;
-    }
-    // Keep index and capacity in local variables while looping over
-    // ASCII characters.
-    int index = length;
-    int capacity = buffer.length;
-    while (position < end) {
-      int char = utf8[position];
-      if (char <= MAX_ASCII) {
-        if (index == capacity) {
-          length = index;
-          _grow();
-          capacity = buffer.length;
-        }
-        buffer[index++] = char;
-        position++;
-        continue;
-      }
-      length = index;
-      if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          position++;
-        } else {
-          throw new FormatException(
-              "Unexpected UTF-8 continuation byte", utf8, position);
-        }
-      } else if (char < 0xE0) {
-        // C0-DF
-        // Two-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 1, 1, char & MASK_TWO_BYTE);
-      } else if (char < 0xF0) {
-        // E0-EF
-        // Three-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 2, 2, char & MASK_THREE_BYTE);
-      } else if (char < 0xF8) {
-        // F0-F7
-        // Four-byte.
-        position = addContinuation(
-            utf8, position + 1, end, 3, 3, char & MASK_FOUR_BYTE);
-      } else {
-        if (allowMalformed) {
-          addCharCode(0xFFFD);
-          position++;
-        } else {
-          throw new FormatException(
-              "Invalid UTF-8 byte: $char", utf8, position);
-        }
-      }
-      index = length;
-      capacity = buffer.length;
-    }
-    length = index;
-  }
-
-  String toString() {
-    if (partialState != NO_PARTIAL) {
-      if (allowMalformed) {
-        partialState = NO_PARTIAL;
-        addCharCode(0xFFFD);
-      } else {
-        int continueByteCount = (partialState & MASK_TWO_BIT);
-        int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
-        int value = partialState >> SHIFT_VALUE;
-        int seenByteCount = continueByteCount - missing + 1;
-        List source = new Uint8List(seenByteCount);
-        while (seenByteCount > 1) {
-          seenByteCount--;
-          source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
-          value >>= 6;
-        }
-        source[0] = value | (0x3c0 >> (continueByteCount - 1));
-        throw new FormatException(
-            "Incomplete UTF-8 sequence", source, source.length);
-      }
-    }
-    return new String.fromCharCodes(buffer, 0, length);
-  }
-}
-
 /**
  * Chunked JSON parser that parses UTF-8 chunks.
  */
 class _JsonUtf8Parser extends _ChunkedJsonParser<List<int>> {
   static final Uint8List emptyChunk = Uint8List(0);
 
-  final bool allowMalformed;
+  final _Utf8Decoder decoder;
   List<int> chunk = emptyChunk;
   int chunkEnd = 0;
 
-  _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
-      : super(listener) {
+  _JsonUtf8Parser(_JsonListener listener, bool allowMalformed)
+      : decoder = new _Utf8Decoder(allowMalformed),
+        super(listener) {
     // Starts out checking for an optional BOM (KWD_BOM, count = 0).
     partialState =
         _ChunkedJsonParser.PARTIAL_KEYWORD | _ChunkedJsonParser.KWD_BOM;
@@ -1780,21 +1530,24 @@
   }
 
   void beginString() {
-    this.buffer = new _Utf8StringBuffer(allowMalformed);
+    decoder.reset();
+    this.buffer = new StringBuffer();
   }
 
   void addSliceToString(int start, int end) {
-    _Utf8StringBuffer buffer = this.buffer;
-    buffer.addSlice(chunk, start, end);
+    final StringBuffer buffer = this.buffer;
+    buffer.write(decoder.convertChunked(chunk, start, end));
   }
 
   void addCharToString(int charCode) {
-    _Utf8StringBuffer buffer = this.buffer;
-    buffer.addCharCode(charCode);
+    final StringBuffer buffer = this.buffer;
+    decoder.flush(buffer);
+    buffer.writeCharCode(charCode);
   }
 
   String endString() {
-    _Utf8StringBuffer buffer = this.buffer;
+    final StringBuffer buffer = this.buffer;
+    decoder.flush(buffer);
     this.buffer = null;
     return buffer.toString();
   }
@@ -1912,6 +1665,13 @@
       "QQQQQQQQQQQQQQQQRRRRRbbbbbbbbbbb" // E0-FF
       ;
 
+  /// Reset the decoder to a state where it is ready to decode a new string but
+  /// will not skip a leading BOM. Used by the fused UTF-8 / JSON decoder.
+  void reset() {
+    _state = initial;
+    _bomIndex = -1;
+  }
+
   // The VM decoder handles BOM explicitly instead of via the state machine.
   @patch
   _Utf8Decoder(this.allowMalformed) : _state = initial;

diff --git a/sdk_nnbd/lib/convert/json.dart b/sdk_nnbd/lib/convert/json.dart
index 1d54082..6d897a0 100644
--- a/sdk_nnbd/lib/convert/json.dart
+++ b/sdk_nnbd/lib/convert/json.dart

@@ -535,11 +535,16 @@
   static const int char_0 = 0x30;
   static const int backslash = 0x5c;
   static const int char_b = 0x62;
+  static const int char_d = 0x64;
   static const int char_f = 0x66;
   static const int char_n = 0x6e;
   static const int char_r = 0x72;
   static const int char_t = 0x74;
   static const int char_u = 0x75;
+  static const int surrogateMin = 0xd800;
+  static const int surrogateMask = 0xfc00;
+  static const int surrogateLead = 0xd800;
+  static const int surrogateTrail = 0xdc00;
 
   /// List of objects currently being traversed. Used to detect cycles.
   final List _seen = [];
@@ -573,7 +578,30 @@
     final length = s.length;
     for (var i = 0; i < length; i++) {
       var charCode = s.codeUnitAt(i);
-      if (charCode > backslash) continue;
+      if (charCode > backslash) {
+        if (charCode >= surrogateMin) {
+          // Possible surrogate. Check if it is unpaired.
+          if (((charCode & surrogateMask) == surrogateLead &&
+                  !(i + 1 < length &&
+                      (s.codeUnitAt(i + 1) & surrogateMask) ==
+                          surrogateTrail)) ||
+              ((charCode & surrogateMask) == surrogateTrail &&
+                  !(i - 1 >= 0 &&
+                      (s.codeUnitAt(i - 1) & surrogateMask) ==
+                          surrogateLead))) {
+            // Lone surrogate.
+            if (i > offset) writeStringSlice(s, offset, i);
+            offset = i + 1;
+            writeCharCode(backslash);
+            writeCharCode(char_u);
+            writeCharCode(char_d);
+            writeCharCode(hexDigit((charCode >> 8) & 0xf));
+            writeCharCode(hexDigit((charCode >> 4) & 0xf));
+            writeCharCode(hexDigit(charCode & 0xf));
+          }
+        }
+        continue;
+      }
       if (charCode < 32) {
         if (i > offset) writeStringSlice(s, offset, i);
         offset = i + 1;
commit	07d508b45fa5e284a70c429af9d2ec33c15e795b	[log] [tgz]
author	Aske Simon Christensen <askesc@google.com>	Mon May 04 10:48:32 2020 +0000
committer	commit-bot@chromium.org <commit-bot@chromium.org>	Mon May 04 10:48:32 2020 +0000
tree	4f23d42c21e64169827bd467172105f08d11efd8
parent	fa2fd41166db35afa4777e63f900e83d25709c5c [diff]