[vm] Speed up JSON encoding. TEST=ci Change-Id: I8bfe00472f3a5e4e6680de631072cea0dacc3f55 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/283980 Reviewed-by: Ben Konyi <bkonyi@google.com> Commit-Queue: Ryan Macnak <rmacnak@google.com>

commit: f56c45eb37ca336c8033c5ede121f89baf2002f2 [log] [tgz]
author: Ryan Macnak <rmacnak@google.com> Tue Feb 21 18:58:42 2023 +0000
committer: Commit Queue <dart-scoped@luci-project-accounts.iam.gserviceaccount.com> Tue Feb 21 18:58:42 2023 +0000
tree: 05a97696aed54c4eeab1d359701e93c01702f6e4
parent: 004b400dfbed92326d2972a5192449f1ff84796f [diff]
diff --git a/runtime/platform/text_buffer.cc b/runtime/platform/text_buffer.cc
index 2a3b80f..a451934 100644
--- a/runtime/platform/text_buffer.cc
+++ b/runtime/platform/text_buffer.cc

@@ -62,42 +62,186 @@
   buffer_[length_] = '\0';
 }
 
-// Write a UTF-32 code unit so it can be read by a JSON parser in a string
-// literal. Use official encoding from JSON specification. http://json.org/
-void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+void BaseTextBuffer::AddEscapedUTF8(const char* const s, intptr_t len) {
+  const uint8_t* cursor = reinterpret_cast<const uint8_t*>(s);
+  const uint8_t* end = cursor + len;
+
+  intptr_t needed = 0;
+  while (cursor < end) {
+    uint8_t codeunit = *cursor++;
+    if (codeunit >= 0x80) {
+      needed += 1;
+    } else {
+      needed += EscapedCodeUnitLength(codeunit);
+    }
+  }
+
+  if (!EnsureCapacity(needed)) return;
+
+  cursor = reinterpret_cast<const uint8_t*>(s);
+  while (cursor < end) {
+    uint8_t codeunit = *cursor++;
+    if (codeunit >= 0x80) {
+      buffer_[length_++] = codeunit;
+    } else {
+      EscapeAndAddCodeUnit(codeunit);
+    }
+  }
+  buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedLatin1(const uint8_t* const s, intptr_t len) {
+  const uint8_t* cursor = s;
+  const uint8_t* end = cursor + len;
+
+  intptr_t needed = 0;
+  while (cursor < end) {
+    needed += EscapedCodeUnitLength(*cursor++);
+  }
+
+  if (!EnsureCapacity(needed)) return;
+
+  cursor = s;
+  while (cursor < end) {
+    EscapeAndAddCodeUnit(*cursor++);
+  }
+  buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedUTF16(const uint16_t* s, intptr_t len) {
+  for (const uint16_t* end = s + len; s < end; s++) {
+    if (!EnsureCapacity(6)) return;
+
+    uint16_t code_unit = *s;
+    if (Utf16::IsTrailSurrogate(code_unit)) {
+      EscapeAndAddUTF16CodeUnit(code_unit);
+    } else if (Utf16::IsLeadSurrogate(code_unit)) {
+      if (s + 1 == end) {
+        EscapeAndAddUTF16CodeUnit(code_unit);
+      } else {
+        uint16_t next_code_unit = *(s + 1);
+        if (Utf16::IsTrailSurrogate(next_code_unit)) {
+          uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
+          EscapeAndAddCodeUnit(decoded);
+          s++;
+        } else {
+          EscapeAndAddUTF16CodeUnit(code_unit);
+        }
+      }
+    } else {
+      EscapeAndAddCodeUnit(code_unit);
+    }
+  }
+  buffer_[length_] = '\0';
+}
+
+DART_FORCE_INLINE
+intptr_t BaseTextBuffer::EscapedCodeUnitLength(uint32_t codeunit) {
   switch (codeunit) {
     case '"':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\\""), 2);
-      break;
     case '\\':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\\\"), 2);
-      break;
     case '/':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\/"), 2);
-      break;
     case '\b':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\b"), 2);
-      break;
     case '\f':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\f"), 2);
-      break;
     case '\n':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\n"), 2);
-      break;
     case '\r':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\r"), 2);
-      break;
     case '\t':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\t"), 2);
-      break;
+      return 2;
     default:
       if (codeunit < 0x20) {
-        EscapeAndAddUTF16CodeUnit(codeunit);
+        return 6;
+      } else if (codeunit <= Utf8::kMaxOneByteChar) {
+        return 1;
+      } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+        return 2;
+      } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+        return 3;
       } else {
-        char encoded[6];
-        intptr_t length = Utf8::Length(codeunit);
-        Utf8::Encode(codeunit, encoded);
-        AddRaw(reinterpret_cast<uint8_t const*>(encoded), length);
+        ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+        return 4;
+      }
+  }
+}
+
+static uint8_t Hex(uint8_t value) {
+  return value < 10 ? '0' + value : 'A' + value - 10;
+}
+
+// Write a UTF-32 code unit so it can be read by a JSON parser in a string
+// literal. Use official encoding from JSON specification. http://json.org/
+DART_FORCE_INLINE
+void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+  intptr_t remaining = capacity_ - length_;
+  switch (codeunit) {
+    case '"':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '\"';
+      break;
+    case '\\':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '\\';
+      break;
+    case '/':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '/';
+      break;
+    case '\b':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'b';
+      break;
+    case '\f':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'f';
+      break;
+    case '\n':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'n';
+      break;
+    case '\r':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'r';
+      break;
+    case '\t':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 't';
+      break;
+    default:
+      static constexpr int kMask = ~(1 << 6);
+      if (codeunit < 0x20) {
+        ASSERT(remaining > 6);
+        buffer_[length_++] = '\\';
+        buffer_[length_++] = 'u';
+        buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
+      } else if (codeunit <= Utf8::kMaxOneByteChar) {
+        ASSERT(remaining > 1);
+        buffer_[length_++] = codeunit;
+      } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+        ASSERT(remaining > 2);
+        buffer_[length_++] = 0xC0 | (codeunit >> 6);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
+      } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+        ASSERT(remaining > 3);
+        buffer_[length_++] = 0xE0 | (codeunit >> 12);
+        buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
+      } else {
+        ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+        ASSERT(remaining > 4);
+        buffer_[length_++] = 0xF0 | (codeunit >> 18);
+        buffer_[length_++] = 0x80 | ((codeunit >> 12) & kMask);
+        buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
       }
   }
 }
@@ -105,18 +249,22 @@
 // Write an incomplete UTF-16 code unit so it can be read by a JSON parser in a
 // string literal.
 void BaseTextBuffer::EscapeAndAddUTF16CodeUnit(uint16_t codeunit) {
-  Printf("\\u%04X", codeunit);
+  intptr_t remaining = capacity_ - length_;
+  ASSERT(remaining > 6);
+  buffer_[length_++] = '\\';
+  buffer_[length_++] = 'u';
+  buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
 }
 
 void BaseTextBuffer::AddString(const char* s) {
-  Printf("%s", s);
+  AddRaw(reinterpret_cast<const uint8_t*>(s), strlen(s));
 }
 
 void BaseTextBuffer::AddEscapedString(const char* s) {
-  intptr_t len = strlen(s);
-  for (int i = 0; i < len; i++) {
-    EscapeAndAddCodeUnit(s[i]);
-  }
+  AddEscapedUTF8(s, strlen(s));
 }
 
 TextBuffer::TextBuffer(intptr_t buf_size) {
@@ -143,6 +291,7 @@
   intptr_t remaining = capacity_ - length_;
   if (remaining <= len) {
     intptr_t new_size = capacity_ + Utils::Maximum(capacity_, len + 1);
+    new_size = Utils::Maximum(new_size, static_cast<intptr_t>(256));
     char* new_buf = reinterpret_cast<char*>(realloc(buffer_, new_size));
     buffer_ = new_buf;
     capacity_ = new_size;

diff --git a/runtime/platform/text_buffer.h b/runtime/platform/text_buffer.h
index 78f8e4f..9743a49 100644
--- a/runtime/platform/text_buffer.h
+++ b/runtime/platform/text_buffer.h

@@ -22,12 +22,14 @@
   intptr_t Printf(const char* format, ...) PRINTF_ATTRIBUTE(2, 3);
   intptr_t VPrintf(const char* format, va_list args);
   void AddChar(char ch);
-  void EscapeAndAddUTF16CodeUnit(uint16_t cu);
-  void EscapeAndAddCodeUnit(uint32_t cu);
   void AddString(const char* s);
-  void AddEscapedString(const char* s);
   void AddRaw(const uint8_t* buffer, intptr_t buffer_length);
 
+  void AddEscapedString(const char* s);
+  void AddEscapedUTF8(const char* s, intptr_t len);
+  void AddEscapedLatin1(const uint8_t* code_units, intptr_t len);
+  void AddEscapedUTF16(const uint16_t* code_units, intptr_t len);
+
   // Returns a pointer to the current internal buffer. Whether the pointer is
   // still valid after the BaseTextBuffer dies depends on the subclass.
   char* buffer() const { return buffer_; }
@@ -37,6 +39,11 @@
   // should be assumed to invalidate the contents of previous calls to buffer().
   virtual void Clear() = 0;
 
+ private:
+  intptr_t EscapedCodeUnitLength(uint32_t cu);
+  void EscapeAndAddCodeUnit(uint32_t cu);
+  void EscapeAndAddUTF16CodeUnit(uint16_t cu);
+
  protected:
   virtual bool EnsureCapacity(intptr_t len) = 0;
 

diff --git a/runtime/vm/json_writer.cc b/runtime/vm/json_writer.cc
index 5b7ca3a..c420a7c 100644
--- a/runtime/vm/json_writer.cc
+++ b/runtime/vm/json_writer.cc

@@ -348,18 +348,7 @@
   if (s == NULL) {
     return;
   }
-  const uint8_t* s8 = reinterpret_cast<const uint8_t*>(s);
-  intptr_t i = 0;
-  for (; i < len;) {
-    // Extract next UTF8 character.
-    int32_t ch = 0;
-    int32_t ch_len = Utf8::Decode(&s8[i], len - i, &ch);
-    ASSERT(ch_len != 0);
-    buffer_.EscapeAndAddCodeUnit(ch);
-    // Move i forward.
-    i += ch_len;
-  }
-  ASSERT(i == len);
+  buffer_.AddEscapedUTF8(s, len);
 }
 
 bool JSONWriter::AddDartString(const String& s,
@@ -373,29 +362,26 @@
   if (!Utils::RangeCheck(offset, count, length)) {
     count = length - offset;
   }
-  intptr_t limit = offset + count;
-  for (intptr_t i = offset; i < limit; i++) {
-    uint16_t code_unit = s.CharAt(i);
-    if (Utf16::IsTrailSurrogate(code_unit)) {
-      buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-    } else if (Utf16::IsLeadSurrogate(code_unit)) {
-      if (i + 1 == limit) {
-        buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-      } else {
-        uint16_t next_code_unit = s.CharAt(i + 1);
-        if (Utf16::IsTrailSurrogate(next_code_unit)) {
-          uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
-          buffer_.EscapeAndAddCodeUnit(decoded);
-          i++;
-        } else {
-          buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-        }
-      }
+
+  if (count > 0) {  // Avoid asserts about harmless out-of-bounds index.
+    NoSafepointScope no_safepoint;
+    if (s.IsOneByteString()) {
+      buffer_.AddEscapedLatin1(OneByteString::CharAddr(s, offset), count);
+    } else if (s.IsExternalOneByteString()) {
+      buffer_.AddEscapedLatin1(ExternalOneByteString::CharAddr(s, offset),
+                               count);
+    } else if (s.IsTwoByteString()) {
+      buffer_.AddEscapedUTF16(TwoByteString::CharAddr(s, offset), count);
+    } else if (s.IsExternalTwoByteString()) {
+      buffer_.AddEscapedUTF16(ExternalTwoByteString::CharAddr(s, offset),
+                              count);
     } else {
-      buffer_.EscapeAndAddCodeUnit(code_unit);
+      UNREACHABLE();
     }
   }
+
   // Return value indicates whether the string is truncated.
+  intptr_t limit = offset + count;
   return (offset > 0) || (limit < length);
 }
 

diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index fbc9b1a..b37ab56 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h

@@ -10235,6 +10235,7 @@
   friend class Utf8;
   friend class OneByteStringMessageSerializationCluster;
   friend class Deserializer;
+  friend class JSONWriter;
 };
 
 class TwoByteString : public AllStatic {
@@ -10353,6 +10354,7 @@
   friend class StringHasher;
   friend class Symbols;
   friend class TwoByteStringMessageSerializationCluster;
+  friend class JSONWriter;
 };
 
 class ExternalOneByteString : public AllStatic {
@@ -10445,6 +10447,7 @@
   friend class StringHasher;
   friend class Symbols;
   friend class Utf8;
+  friend class JSONWriter;
 };
 
 class ExternalTwoByteString : public AllStatic {
@@ -10532,6 +10535,7 @@
   friend class String;
   friend class StringHasher;
   friend class Symbols;
+  friend class JSONWriter;
 };
 
 // Matches null_patch.dart / bool_patch.dart.
commit	f56c45eb37ca336c8033c5ede121f89baf2002f2	[log] [tgz]
author	Ryan Macnak <rmacnak@google.com>	Tue Feb 21 18:58:42 2023 +0000
committer	Commit Queue <dart-scoped@luci-project-accounts.iam.gserviceaccount.com>	Tue Feb 21 18:58:42 2023 +0000
tree	05a97696aed54c4eeab1d359701e93c01702f6e4
parent	004b400dfbed92326d2972a5192449f1ff84796f [diff]