[vm] Switch representation of line_starts to allow binary searching

Change-Id: Iaa43d3776f1dde10eefc6b951816a12abd5a3ce2
Bug: https://github.com/flutter/flutter/issues/100751
TEST=Added kernel_test.cc, and tested before and after the switch
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/268841
Commit-Queue: Liam Appelbe <liama@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
diff --git a/runtime/vm/kernel.cc b/runtime/vm/kernel.cc
index 804d8f9..b4e34e8 100644
--- a/runtime/vm/kernel.cc
+++ b/runtime/vm/kernel.cc
@@ -24,91 +24,80 @@
     dart::Zone* zone)
     : line_starts_data_(line_starts_data) {
   TypedDataElementType type = line_starts_data_.ElementType();
-  if (type == kInt8ArrayElement) {
-    helper_ = new KernelInt8LineStartsHelper();
-  } else if (type == kInt16ArrayElement) {
-    helper_ = new KernelInt16LineStartsHelper();
-  } else if (type == kInt32ArrayElement) {
-    helper_ = new KernelInt32LineStartsHelper();
+  if (type == kUint16ArrayElement) {
+    helper_ = new KernelUint16LineStartsHelper();
+  } else if (type == kUint32ArrayElement) {
+    helper_ = new KernelUint32LineStartsHelper();
   } else {
     UNREACHABLE();
   }
 }
 
-int32_t KernelLineStartsReader::MaxPosition() const {
+uint32_t KernelLineStartsReader::MaxPosition() const {
   const intptr_t line_count = line_starts_data_.Length();
-  intptr_t current_start = 0;
-  for (intptr_t i = 0; i < line_count; i++) {
-    current_start += helper_->At(line_starts_data_, i);
+  if (line_count == 0) {
+    return 0;
   }
-  return current_start;
+  return helper_->At(line_starts_data_, line_count - 1);
 }
 
 bool KernelLineStartsReader::LocationForPosition(intptr_t position,
                                                  intptr_t* line,
                                                  intptr_t* col) const {
-  intptr_t line_count = line_starts_data_.Length();
-  intptr_t current_start = 0;
-  intptr_t previous_start = 0;
-  for (intptr_t i = 0; i < line_count; ++i) {
-    current_start += helper_->At(line_starts_data_, i);
-    if (current_start > position) {
-      *line = i;
-      if (col != nullptr) {
-        *col = position - previous_start + 1;
-      }
-      return true;
-    }
-    if (current_start == position) {
-      *line = i + 1;
-      if (col != nullptr) {
-        *col = 1;
-      }
-      return true;
-    }
-    previous_start = current_start;
+  const intptr_t line_count = line_starts_data_.Length();
+  if (position < 0 || static_cast<uint32_t>(position) > MaxPosition() ||
+      line_count == 0) {
+    return false;
   }
 
-  return false;
+  intptr_t lo = 0;
+  intptr_t hi = line_count;
+  while (hi > lo + 1) {
+    const intptr_t mid = lo + (hi - lo) / 2;
+    const intptr_t mid_position = helper_->At(line_starts_data_, mid);
+    if (mid_position > position) {
+      hi = mid;
+    } else {
+      lo = mid;
+    }
+  }
+  *line = lo + 1;
+  if (col != nullptr) {
+    *col = position - helper_->At(line_starts_data_, lo) + 1;
+  }
+
+  return true;
 }
 
 bool KernelLineStartsReader::TokenRangeAtLine(
     intptr_t line_number,
     TokenPosition* first_token_index,
     TokenPosition* last_token_index) const {
-  if (line_number < 0 || line_number > line_starts_data_.Length()) {
+  const intptr_t line_count = line_starts_data_.Length();
+  if (line_number <= 0 || line_number > line_count) {
     return false;
   }
-  intptr_t cumulative = 0;
-  for (intptr_t i = 0; i < line_number; ++i) {
-    cumulative += helper_->At(line_starts_data_, i);
-  }
-  *first_token_index = dart::TokenPosition::Deserialize(cumulative);
-  if (line_number == line_starts_data_.Length()) {
+  *first_token_index = dart::TokenPosition::Deserialize(
+      helper_->At(line_starts_data_, line_number - 1));
+  if (line_number == line_count) {
     *last_token_index = *first_token_index;
   } else {
     *last_token_index = dart::TokenPosition::Deserialize(
-        cumulative + helper_->At(line_starts_data_, line_number) - 1);
+        helper_->At(line_starts_data_, line_number) - 1);
   }
   return true;
 }
 
-int32_t KernelLineStartsReader::KernelInt8LineStartsHelper::At(
+uint32_t KernelLineStartsReader::KernelUint16LineStartsHelper::At(
     const dart::TypedData& data,
     intptr_t index) const {
-  return data.GetInt8(index);
+  return data.GetUint16(index << 1);
 }
 
-int32_t KernelLineStartsReader::KernelInt16LineStartsHelper::At(
+uint32_t KernelLineStartsReader::KernelUint32LineStartsHelper::At(
     const dart::TypedData& data,
     intptr_t index) const {
-  return data.GetInt16(index << 1);
-}
-
-int32_t KernelLineStartsReader::KernelInt32LineStartsHelper::At(
-    const dart::TypedData& data,
-    intptr_t index) const {
-  return data.GetInt32(index << 2);
+  return data.GetUint32(index << 2);
 }
 
 class KernelTokenPositionCollector : public KernelReaderHelper {
diff --git a/runtime/vm/kernel.h b/runtime/vm/kernel.h
index c48c811..e7bc426 100644
--- a/runtime/vm/kernel.h
+++ b/runtime/vm/kernel.h
@@ -146,11 +146,11 @@
 
   ~KernelLineStartsReader() { delete helper_; }
 
-  int32_t DeltaAt(intptr_t index) const {
+  uint32_t At(intptr_t index) const {
     return helper_->At(line_starts_data_, index);
   }
 
-  int32_t MaxPosition() const;
+  uint32_t MaxPosition() const;
 
   // Returns whether the given offset corresponds to a valid source offset
   // If it does, then *line and *column (if column is not nullptr) are set
@@ -173,37 +173,28 @@
    public:
     KernelLineStartsHelper() {}
     virtual ~KernelLineStartsHelper() {}
-    virtual int32_t At(const dart::TypedData& data, intptr_t index) const = 0;
+    virtual uint32_t At(const dart::TypedData& data, intptr_t index) const = 0;
 
    private:
     DISALLOW_COPY_AND_ASSIGN(KernelLineStartsHelper);
   };
 
-  class KernelInt8LineStartsHelper : public KernelLineStartsHelper {
+  class KernelUint16LineStartsHelper : public KernelLineStartsHelper {
    public:
-    KernelInt8LineStartsHelper() {}
-    virtual int32_t At(const dart::TypedData& data, intptr_t index) const;
+    KernelUint16LineStartsHelper() {}
+    virtual uint32_t At(const dart::TypedData& data, intptr_t index) const;
 
    private:
-    DISALLOW_COPY_AND_ASSIGN(KernelInt8LineStartsHelper);
+    DISALLOW_COPY_AND_ASSIGN(KernelUint16LineStartsHelper);
   };
 
-  class KernelInt16LineStartsHelper : public KernelLineStartsHelper {
+  class KernelUint32LineStartsHelper : public KernelLineStartsHelper {
    public:
-    KernelInt16LineStartsHelper() {}
-    virtual int32_t At(const dart::TypedData& data, intptr_t index) const;
+    KernelUint32LineStartsHelper() {}
+    virtual uint32_t At(const dart::TypedData& data, intptr_t index) const;
 
    private:
-    DISALLOW_COPY_AND_ASSIGN(KernelInt16LineStartsHelper);
-  };
-
-  class KernelInt32LineStartsHelper : public KernelLineStartsHelper {
-   public:
-    KernelInt32LineStartsHelper() {}
-    virtual int32_t At(const dart::TypedData& data, intptr_t index) const;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(KernelInt32LineStartsHelper);
+    DISALLOW_COPY_AND_ASSIGN(KernelUint32LineStartsHelper);
   };
 
   const dart::TypedData& line_starts_data_;
diff --git a/runtime/vm/kernel_binary.cc b/runtime/vm/kernel_binary.cc
index 4029608..2b31883 100644
--- a/runtime/vm/kernel_binary.cc
+++ b/runtime/vm/kernel_binary.cc
@@ -35,43 +35,29 @@
 }
 
 TypedDataPtr Reader::ReadLineStartsData(intptr_t line_start_count) {
-  TypedData& line_starts_data = TypedData::Handle(
-      TypedData::New(kTypedDataInt8ArrayCid, line_start_count, Heap::kOld));
-
   const intptr_t start_offset = offset();
-  intptr_t i = 0;
-  for (; i < line_start_count; ++i) {
+
+  // Choose representation between Uint16 and Uint32 typed data.
+  intptr_t max_start = 0;
+  for (intptr_t i = 0; i < line_start_count; ++i) {
     const intptr_t delta = ReadUInt();
-    if (delta > kMaxInt8) {
-      break;
-    }
-    line_starts_data.SetInt8(i, static_cast<int8_t>(delta));
+    max_start += delta;
   }
 
-  if (i < line_start_count) {
-    // Slow path: choose representation between Int16 and Int32 typed data.
-    set_offset(start_offset);
-    intptr_t max_delta = 0;
-    for (intptr_t i = 0; i < line_start_count; ++i) {
-      const intptr_t delta = ReadUInt();
-      if (delta > max_delta) {
-        max_delta = delta;
-      }
-    }
+  const intptr_t cid = (max_start <= kMaxUint16) ? kTypedDataUint16ArrayCid
+                                                 : kTypedDataUint32ArrayCid;
+  const TypedData& line_starts_data =
+      TypedData::Handle(TypedData::New(cid, line_start_count, Heap::kOld));
 
-    ASSERT(max_delta > kMaxInt8);
-    const intptr_t cid = (max_delta <= kMaxInt16) ? kTypedDataInt16ArrayCid
-                                                  : kTypedDataInt32ArrayCid;
-    line_starts_data = TypedData::New(cid, line_start_count, Heap::kOld);
-
-    set_offset(start_offset);
-    for (intptr_t i = 0; i < line_start_count; ++i) {
-      const intptr_t delta = ReadUInt();
-      if (cid == kTypedDataInt16ArrayCid) {
-        line_starts_data.SetInt16(i << 1, static_cast<int16_t>(delta));
-      } else {
-        line_starts_data.SetInt32(i << 2, delta);
-      }
+  set_offset(start_offset);
+  intptr_t current_start = 0;
+  for (intptr_t i = 0; i < line_start_count; ++i) {
+    const intptr_t delta = ReadUInt();
+    current_start += delta;
+    if (cid == kTypedDataUint16ArrayCid) {
+      line_starts_data.SetUint16(i << 1, static_cast<uint16_t>(current_start));
+    } else {
+      line_starts_data.SetUint32(i << 2, current_start);
     }
   }
 
diff --git a/runtime/vm/kernel_test.cc b/runtime/vm/kernel_test.cc
new file mode 100644
index 0000000..a86651b
--- /dev/null
+++ b/runtime/vm/kernel_test.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#include <type_traits>
+
+#include "platform/globals.h"
+
+#include "vm/kernel.h"
+#include "vm/object.h"
+#include "vm/unit_test.h"
+
+namespace dart {
+
+const dart::TypedData& CreateLineStartsData() {
+  const intptr_t raw_line_starts_data[] = {
+      0, 8, 12, 17, 18, 20, 23, 30, 31, 33,
+  };
+  const intptr_t length = std::extent<decltype(raw_line_starts_data)>::value;
+  ASSERT(length > 0);
+  const TypedData& line_starts_data = TypedData::Handle(
+      TypedData::New(kTypedDataUint16ArrayCid, length, Heap::kOld));
+  for (intptr_t i = 0; i < length; ++i) {
+    line_starts_data.SetUint16(i << 1,
+                               static_cast<int16_t>(raw_line_starts_data[i]));
+  }
+  return line_starts_data;
+}
+
+ISOLATE_UNIT_TEST_CASE(KernelLineStartsReader_MaxPosition) {
+  kernel::KernelLineStartsReader reader(CreateLineStartsData(), thread->zone());
+  EXPECT_EQ(33u, reader.MaxPosition());
+}
+
+void ExpectLocationForPosition(const kernel::KernelLineStartsReader& reader,
+                               intptr_t position,
+                               intptr_t expected_line,
+                               intptr_t expected_col) {
+  intptr_t line;
+  intptr_t col;
+  EXPECT_EQ(true, reader.LocationForPosition(position, &line, &col));
+  EXPECT_EQ(expected_line, line);
+  EXPECT_EQ(expected_col, col);
+}
+
+ISOLATE_UNIT_TEST_CASE(KernelLineStartsReader_LocationForPosition) {
+  kernel::KernelLineStartsReader reader(CreateLineStartsData(), thread->zone());
+  ExpectLocationForPosition(reader, 0, 1, 1);
+  ExpectLocationForPosition(reader, 4, 1, 5);
+  ExpectLocationForPosition(reader, 8, 2, 1);
+  ExpectLocationForPosition(reader, 14, 3, 3);
+  ExpectLocationForPosition(reader, 17, 4, 1);
+  ExpectLocationForPosition(reader, 19, 5, 2);
+  ExpectLocationForPosition(reader, 22, 6, 3);
+  ExpectLocationForPosition(reader, 29, 7, 7);
+  ExpectLocationForPosition(reader, 30, 8, 1);
+  ExpectLocationForPosition(reader, 32, 9, 2);
+  ExpectLocationForPosition(reader, 33, 10, 1);
+
+  intptr_t line;
+  intptr_t col;
+  EXPECT_EQ(false, reader.LocationForPosition(-1, &line, &col));
+  EXPECT_EQ(false, reader.LocationForPosition(34, &line, &col));
+}
+
+void ExpectTokenRangeAtLine(const kernel::KernelLineStartsReader& reader,
+                            intptr_t line,
+                            intptr_t expected_first_token,
+                            intptr_t expected_last_token) {
+  TokenPosition first_token = TokenPosition::Synthetic(0);
+  TokenPosition last_token = TokenPosition::Synthetic(0);
+  EXPECT_EQ(true, reader.TokenRangeAtLine(line, &first_token, &last_token));
+  EXPECT_EQ(expected_first_token, first_token.Serialize());
+  EXPECT_EQ(expected_last_token, last_token.Serialize());
+}
+
+ISOLATE_UNIT_TEST_CASE(KernelLineStartsReader_TokenRangeAtLine) {
+  kernel::KernelLineStartsReader reader(CreateLineStartsData(), thread->zone());
+  ExpectTokenRangeAtLine(reader, 1, 0, 7);
+  ExpectTokenRangeAtLine(reader, 2, 8, 11);
+  ExpectTokenRangeAtLine(reader, 3, 12, 16);
+  ExpectTokenRangeAtLine(reader, 4, 17, 17);
+  ExpectTokenRangeAtLine(reader, 5, 18, 19);
+  ExpectTokenRangeAtLine(reader, 6, 20, 22);
+  ExpectTokenRangeAtLine(reader, 7, 23, 29);
+  ExpectTokenRangeAtLine(reader, 8, 30, 30);
+  ExpectTokenRangeAtLine(reader, 9, 31, 32);
+  ExpectTokenRangeAtLine(reader, 10, 33, 33);
+
+  TokenPosition first_token = TokenPosition::Synthetic(0);
+  TokenPosition last_token = TokenPosition::Synthetic(0);
+  EXPECT_EQ(false, reader.TokenRangeAtLine(0, &first_token, &last_token));
+  EXPECT_EQ(false, reader.TokenRangeAtLine(11, &first_token, &last_token));
+}
+
+}  // namespace dart
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index c1d4006..001c4fd 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -12168,13 +12168,12 @@
   int token_index = 0;
 
   kernel::KernelLineStartsReader line_starts_reader(line_starts_data, zone);
-  intptr_t previous_start = 0;
   for (int line_index = 0; line_index < line_count; ++line_index) {
-    intptr_t start = previous_start + line_starts_reader.DeltaAt(line_index);
+    intptr_t start = line_starts_reader.At(line_index);
     // Output the rest of the tokens if we have no next line.
     intptr_t end = TokenPosition::kMaxSourcePos;
     if (line_index + 1 < line_count) {
-      end = start + line_starts_reader.DeltaAt(line_index + 1);
+      end = line_starts_reader.At(line_index + 1);
     }
     bool first = true;
     while (token_index < token_count) {
@@ -12195,7 +12194,6 @@
       info.Add(value);
       ++token_index;
     }
-    previous_start = start;
   }
 #endif  // !defined(DART_PRECOMPILED_RUNTIME)
   return info.ptr();
diff --git a/runtime/vm/vm_sources.gni b/runtime/vm/vm_sources.gni
index 4af1fd1..4413fde 100644
--- a/runtime/vm/vm_sources.gni
+++ b/runtime/vm/vm_sources.gni
@@ -423,6 +423,7 @@
   "isolate_reload_test.cc",
   "isolate_test.cc",
   "json_test.cc",
+  "kernel_test.cc",
   "log_test.cc",
   "longjump_test.cc",
   "malloc_hooks_test.cc",