[vm] Speed up JSON encoding.
TEST=ci
Change-Id: I8bfe00472f3a5e4e6680de631072cea0dacc3f55
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/283980
Reviewed-by: Ben Konyi <bkonyi@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
diff --git a/runtime/platform/text_buffer.cc b/runtime/platform/text_buffer.cc
index 2a3b80f..a451934 100644
--- a/runtime/platform/text_buffer.cc
+++ b/runtime/platform/text_buffer.cc
@@ -62,42 +62,186 @@
buffer_[length_] = '\0';
}
-// Write a UTF-32 code unit so it can be read by a JSON parser in a string
-// literal. Use official encoding from JSON specification. http://json.org/
-void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+void BaseTextBuffer::AddEscapedUTF8(const char* const s, intptr_t len) {
+ const uint8_t* cursor = reinterpret_cast<const uint8_t*>(s);
+ const uint8_t* end = cursor + len;
+
+ intptr_t needed = 0;
+ while (cursor < end) {
+ uint8_t codeunit = *cursor++;
+ if (codeunit >= 0x80) {
+ needed += 1;
+ } else {
+ needed += EscapedCodeUnitLength(codeunit);
+ }
+ }
+
+ if (!EnsureCapacity(needed)) return;
+
+ cursor = reinterpret_cast<const uint8_t*>(s);
+ while (cursor < end) {
+ uint8_t codeunit = *cursor++;
+ if (codeunit >= 0x80) {
+ buffer_[length_++] = codeunit;
+ } else {
+ EscapeAndAddCodeUnit(codeunit);
+ }
+ }
+ buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedLatin1(const uint8_t* const s, intptr_t len) {
+ const uint8_t* cursor = s;
+ const uint8_t* end = cursor + len;
+
+ intptr_t needed = 0;
+ while (cursor < end) {
+ needed += EscapedCodeUnitLength(*cursor++);
+ }
+
+ if (!EnsureCapacity(needed)) return;
+
+ cursor = s;
+ while (cursor < end) {
+ EscapeAndAddCodeUnit(*cursor++);
+ }
+ buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedUTF16(const uint16_t* s, intptr_t len) {
+ for (const uint16_t* end = s + len; s < end; s++) {
+ if (!EnsureCapacity(6)) return;
+
+ uint16_t code_unit = *s;
+ if (Utf16::IsTrailSurrogate(code_unit)) {
+ EscapeAndAddUTF16CodeUnit(code_unit);
+ } else if (Utf16::IsLeadSurrogate(code_unit)) {
+ if (s + 1 == end) {
+ EscapeAndAddUTF16CodeUnit(code_unit);
+ } else {
+ uint16_t next_code_unit = *(s + 1);
+ if (Utf16::IsTrailSurrogate(next_code_unit)) {
+ uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
+ EscapeAndAddCodeUnit(decoded);
+ s++;
+ } else {
+ EscapeAndAddUTF16CodeUnit(code_unit);
+ }
+ }
+ } else {
+ EscapeAndAddCodeUnit(code_unit);
+ }
+ }
+ buffer_[length_] = '\0';
+}
+
+DART_FORCE_INLINE
+intptr_t BaseTextBuffer::EscapedCodeUnitLength(uint32_t codeunit) {
switch (codeunit) {
case '"':
- AddRaw(reinterpret_cast<uint8_t const*>("\\\""), 2);
- break;
case '\\':
- AddRaw(reinterpret_cast<uint8_t const*>("\\\\"), 2);
- break;
case '/':
- AddRaw(reinterpret_cast<uint8_t const*>("\\/"), 2);
- break;
case '\b':
- AddRaw(reinterpret_cast<uint8_t const*>("\\b"), 2);
- break;
case '\f':
- AddRaw(reinterpret_cast<uint8_t const*>("\\f"), 2);
- break;
case '\n':
- AddRaw(reinterpret_cast<uint8_t const*>("\\n"), 2);
- break;
case '\r':
- AddRaw(reinterpret_cast<uint8_t const*>("\\r"), 2);
- break;
case '\t':
- AddRaw(reinterpret_cast<uint8_t const*>("\\t"), 2);
- break;
+ return 2;
default:
if (codeunit < 0x20) {
- EscapeAndAddUTF16CodeUnit(codeunit);
+ return 6;
+ } else if (codeunit <= Utf8::kMaxOneByteChar) {
+ return 1;
+ } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+ return 2;
+ } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+ return 3;
} else {
- char encoded[6];
- intptr_t length = Utf8::Length(codeunit);
- Utf8::Encode(codeunit, encoded);
- AddRaw(reinterpret_cast<uint8_t const*>(encoded), length);
+ ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+ return 4;
+ }
+ }
+}
+
+static uint8_t Hex(uint8_t value) {
+ return value < 10 ? '0' + value : 'A' + value - 10;
+}
+
+// Write a UTF-32 code unit so it can be read by a JSON parser in a string
+// literal. Use official encoding from JSON specification. http://json.org/
+DART_FORCE_INLINE
+void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+ intptr_t remaining = capacity_ - length_;
+ switch (codeunit) {
+ case '"':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = '\"';
+ break;
+ case '\\':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = '\\';
+ break;
+ case '/':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = '/';
+ break;
+ case '\b':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'b';
+ break;
+ case '\f':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'f';
+ break;
+ case '\n':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'n';
+ break;
+ case '\r':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'r';
+ break;
+ case '\t':
+ ASSERT(remaining > 2);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 't';
+ break;
+ default:
+ static constexpr int kMask = ~(1 << 6);
+ if (codeunit < 0x20) {
+ ASSERT(remaining > 6);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'u';
+ buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
+ } else if (codeunit <= Utf8::kMaxOneByteChar) {
+ ASSERT(remaining > 1);
+ buffer_[length_++] = codeunit;
+ } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+ ASSERT(remaining > 2);
+ buffer_[length_++] = 0xC0 | (codeunit >> 6);
+ buffer_[length_++] = 0x80 | (codeunit & kMask);
+ } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+ ASSERT(remaining > 3);
+ buffer_[length_++] = 0xE0 | (codeunit >> 12);
+ buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+ buffer_[length_++] = 0x80 | (codeunit & kMask);
+ } else {
+ ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+ ASSERT(remaining > 4);
+ buffer_[length_++] = 0xF0 | (codeunit >> 18);
+ buffer_[length_++] = 0x80 | ((codeunit >> 12) & kMask);
+ buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+ buffer_[length_++] = 0x80 | (codeunit & kMask);
}
}
}
@@ -105,18 +249,22 @@
// Write an incomplete UTF-16 code unit so it can be read by a JSON parser in a
// string literal.
void BaseTextBuffer::EscapeAndAddUTF16CodeUnit(uint16_t codeunit) {
- Printf("\\u%04X", codeunit);
+ intptr_t remaining = capacity_ - length_;
+ ASSERT(remaining > 6);
+ buffer_[length_++] = '\\';
+ buffer_[length_++] = 'u';
+ buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+ buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
}
void BaseTextBuffer::AddString(const char* s) {
- Printf("%s", s);
+ AddRaw(reinterpret_cast<const uint8_t*>(s), strlen(s));
}
void BaseTextBuffer::AddEscapedString(const char* s) {
- intptr_t len = strlen(s);
- for (int i = 0; i < len; i++) {
- EscapeAndAddCodeUnit(s[i]);
- }
+ AddEscapedUTF8(s, strlen(s));
}
TextBuffer::TextBuffer(intptr_t buf_size) {
@@ -143,6 +291,7 @@
intptr_t remaining = capacity_ - length_;
if (remaining <= len) {
intptr_t new_size = capacity_ + Utils::Maximum(capacity_, len + 1);
+ new_size = Utils::Maximum(new_size, static_cast<intptr_t>(256));
char* new_buf = reinterpret_cast<char*>(realloc(buffer_, new_size));
buffer_ = new_buf;
capacity_ = new_size;
diff --git a/runtime/platform/text_buffer.h b/runtime/platform/text_buffer.h
index 78f8e4f..9743a49 100644
--- a/runtime/platform/text_buffer.h
+++ b/runtime/platform/text_buffer.h
@@ -22,12 +22,14 @@
intptr_t Printf(const char* format, ...) PRINTF_ATTRIBUTE(2, 3);
intptr_t VPrintf(const char* format, va_list args);
void AddChar(char ch);
- void EscapeAndAddUTF16CodeUnit(uint16_t cu);
- void EscapeAndAddCodeUnit(uint32_t cu);
void AddString(const char* s);
- void AddEscapedString(const char* s);
void AddRaw(const uint8_t* buffer, intptr_t buffer_length);
+ void AddEscapedString(const char* s);
+ void AddEscapedUTF8(const char* s, intptr_t len);
+ void AddEscapedLatin1(const uint8_t* code_units, intptr_t len);
+ void AddEscapedUTF16(const uint16_t* code_units, intptr_t len);
+
// Returns a pointer to the current internal buffer. Whether the pointer is
// still valid after the BaseTextBuffer dies depends on the subclass.
char* buffer() const { return buffer_; }
@@ -37,6 +39,11 @@
// should be assumed to invalidate the contents of previous calls to buffer().
virtual void Clear() = 0;
+ private:
+ intptr_t EscapedCodeUnitLength(uint32_t cu);
+ void EscapeAndAddCodeUnit(uint32_t cu);
+ void EscapeAndAddUTF16CodeUnit(uint16_t cu);
+
protected:
virtual bool EnsureCapacity(intptr_t len) = 0;
diff --git a/runtime/vm/json_writer.cc b/runtime/vm/json_writer.cc
index 5b7ca3a..c420a7c 100644
--- a/runtime/vm/json_writer.cc
+++ b/runtime/vm/json_writer.cc
@@ -348,18 +348,7 @@
if (s == NULL) {
return;
}
- const uint8_t* s8 = reinterpret_cast<const uint8_t*>(s);
- intptr_t i = 0;
- for (; i < len;) {
- // Extract next UTF8 character.
- int32_t ch = 0;
- int32_t ch_len = Utf8::Decode(&s8[i], len - i, &ch);
- ASSERT(ch_len != 0);
- buffer_.EscapeAndAddCodeUnit(ch);
- // Move i forward.
- i += ch_len;
- }
- ASSERT(i == len);
+ buffer_.AddEscapedUTF8(s, len);
}
bool JSONWriter::AddDartString(const String& s,
@@ -373,29 +362,26 @@
if (!Utils::RangeCheck(offset, count, length)) {
count = length - offset;
}
- intptr_t limit = offset + count;
- for (intptr_t i = offset; i < limit; i++) {
- uint16_t code_unit = s.CharAt(i);
- if (Utf16::IsTrailSurrogate(code_unit)) {
- buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
- } else if (Utf16::IsLeadSurrogate(code_unit)) {
- if (i + 1 == limit) {
- buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
- } else {
- uint16_t next_code_unit = s.CharAt(i + 1);
- if (Utf16::IsTrailSurrogate(next_code_unit)) {
- uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
- buffer_.EscapeAndAddCodeUnit(decoded);
- i++;
- } else {
- buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
- }
- }
+
+ if (count > 0) { // Avoid asserts about harmless out-of-bounds index.
+ NoSafepointScope no_safepoint;
+ if (s.IsOneByteString()) {
+ buffer_.AddEscapedLatin1(OneByteString::CharAddr(s, offset), count);
+ } else if (s.IsExternalOneByteString()) {
+ buffer_.AddEscapedLatin1(ExternalOneByteString::CharAddr(s, offset),
+ count);
+ } else if (s.IsTwoByteString()) {
+ buffer_.AddEscapedUTF16(TwoByteString::CharAddr(s, offset), count);
+ } else if (s.IsExternalTwoByteString()) {
+ buffer_.AddEscapedUTF16(ExternalTwoByteString::CharAddr(s, offset),
+ count);
} else {
- buffer_.EscapeAndAddCodeUnit(code_unit);
+ UNREACHABLE();
}
}
+
// Return value indicates whether the string is truncated.
+ intptr_t limit = offset + count;
return (offset > 0) || (limit < length);
}
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index fbc9b1a..b37ab56 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -10235,6 +10235,7 @@
friend class Utf8;
friend class OneByteStringMessageSerializationCluster;
friend class Deserializer;
+ friend class JSONWriter;
};
class TwoByteString : public AllStatic {
@@ -10353,6 +10354,7 @@
friend class StringHasher;
friend class Symbols;
friend class TwoByteStringMessageSerializationCluster;
+ friend class JSONWriter;
};
class ExternalOneByteString : public AllStatic {
@@ -10445,6 +10447,7 @@
friend class StringHasher;
friend class Symbols;
friend class Utf8;
+ friend class JSONWriter;
};
class ExternalTwoByteString : public AllStatic {
@@ -10532,6 +10535,7 @@
friend class String;
friend class StringHasher;
friend class Symbols;
+ friend class JSONWriter;
};
// Matches null_patch.dart / bool_patch.dart.