[VM] Bare instructions - Part 6: Get rid of CODE_REG indirection in CallThroughCode/CallThroughFunction stubs

If the --use-bare-instructions flag is enabled we will:

  * Not load CODE_REG inside CallThroughCode (polymorphic case)

  * Not load CODE_REG inside CallThroughFunction (megamorphic case)

  * Store the EntryPoint of the function in the MegamorphicCache tables
    (instead of a Function pointer)

Issue https://github.com/dart-lang/sdk/issues/33274

Change-Id: I8aca689dc4e92ece897972da4ae202338527ac62
Reviewed-on: https://dart-review.googlesource.com/c/86928
Commit-Queue: Martin Kustermann <kustermann@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/clustered_snapshot.cc b/runtime/vm/clustered_snapshot.cc
index 6b68265..27536c5 100644
--- a/runtime/vm/clustered_snapshot.cc
+++ b/runtime/vm/clustered_snapshot.cc
@@ -2340,6 +2340,32 @@
       cache->ptr()->filled_entry_count_ = d->Read<int32_t>();
     }
   }
+
+  void PostLoad(const Array& refs, Snapshot::Kind kind, Zone* zone) {
+#if defined(DART_PRECOMPILED_RUNTIME)
+    if (FLAG_use_bare_instructions) {
+      // By default, every megamorphic call site will load the target
+      // [Function] from the hash table and call indirectly via loading the
+      // entrypoint from the function.
+      //
+      // In --use-bare-instruction we reduce the extra indirection via the
+      // [Function] object by storing the entry point directly into the hashmap.
+      //
+      // Currently our AOT compiler will emit megamorphic calls in certain
+      // situations (namely in slow-path code of CheckedSmi* instructions).
+      //
+      // TODO(compiler-team): Change the CheckedSmi* slow path code to use
+      // normal switchable calls instead of megamorphic calls. (This is also a
+      // memory balance beause [MegamorphicCache]s are per-selector while
+      // [ICData] are per-callsite.)
+      auto& cache = MegamorphicCache::Handle(zone);
+      for (intptr_t i = start_index_; i < stop_index_; ++i) {
+        cache ^= refs.At(i);
+        cache.SwitchToBareInstructions();
+      }
+    }
+#endif  // defined(DART_PRECOMPILED_RUNTIME)
+  }
 };
 
 #if !defined(DART_PRECOMPILED_RUNTIME)
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index e2f3264..e08de19 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -15259,8 +15259,8 @@
     const Array& new_buckets =
         Array::Handle(Array::New(kEntryLength * new_capacity));
 
-    Function& target = Function::Handle(
-        MegamorphicCacheTable::miss_handler(Isolate::Current()));
+    auto& target =
+        Object::Handle(MegamorphicCacheTable::miss_handler(Isolate::Current()));
     for (intptr_t i = 0; i < new_capacity; ++i) {
       SetEntry(new_buckets, i, smi_illegal_cid(), target);
     }
@@ -15273,15 +15273,14 @@
     for (intptr_t i = 0; i < old_capacity; ++i) {
       class_id ^= GetClassId(old_buckets, i);
       if (class_id.Value() != kIllegalCid) {
-        target ^= GetTargetFunction(old_buckets, i);
+        target = GetTargetFunction(old_buckets, i);
         Insert(class_id, target);
       }
     }
   }
 }
 
-void MegamorphicCache::Insert(const Smi& class_id,
-                              const Function& target) const {
+void MegamorphicCache::Insert(const Smi& class_id, const Object& target) const {
   ASSERT(static_cast<double>(filled_entry_count() + 1) <=
          (kLoadFactor * static_cast<double>(mask() + 1)));
   const Array& backing_array = Array::Handle(buckets());
@@ -15305,6 +15304,23 @@
                      name.ToCString());
 }
 
+void MegamorphicCache::SwitchToBareInstructions() {
+  NoSafepointScope no_safepoint_scope;
+
+  intptr_t capacity = mask() + 1;
+  for (intptr_t i = 0; i < capacity; ++i) {
+    const intptr_t target_index = i * kEntryLength + kTargetFunctionIndex;
+    RawObject** slot = &Array::DataOf(buckets())[target_index];
+    const intptr_t cid = (*slot)->GetClassIdMayBeSmi();
+    if (cid == kFunctionCid) {
+      RawCode* code = Function::CurrentCodeOf(Function::RawCast(*slot));
+      *slot = Smi::FromAlignedAddress(Code::EntryPoint(code));
+    } else {
+      ASSERT(cid == kSmiCid);
+    }
+  }
+}
+
 RawSubtypeTestCache* SubtypeTestCache::New() {
   ASSERT(Object::subtypetestcache_class() != Class::null());
   SubtypeTestCache& result = SubtypeTestCache::Handle();
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index 5cda55f..ee91913 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -2273,7 +2273,11 @@
 
   // Return the most recently compiled and installed code for this function.
   // It is not the only Code object that points to this function.
-  RawCode* CurrentCode() const { return raw_ptr()->code_; }
+  RawCode* CurrentCode() const { return CurrentCodeOf(raw()); }
+
+  static RawCode* CurrentCodeOf(const RawFunction* function) {
+    return function->ptr()->code_;
+  }
 
   RawCode* unoptimized_code() const {
 #if defined(DART_PRECOMPILED_RUNTIME)
@@ -5541,7 +5545,9 @@
 
   void EnsureCapacity() const;
 
-  void Insert(const Smi& class_id, const Function& target) const;
+  void Insert(const Smi& class_id, const Object& target) const;
+
+  void SwitchToBareInstructions();
 
   static intptr_t InstanceSize() {
     return RoundedAllocationSize(sizeof(RawMegamorphicCache));
@@ -5566,7 +5572,7 @@
   static inline void SetEntry(const Array& array,
                               intptr_t index,
                               const Smi& class_id,
-                              const Function& target);
+                              const Object& target);
 
   static inline RawObject* GetClassId(const Array& array, intptr_t index);
   static inline RawObject* GetTargetFunction(const Array& array,
@@ -9257,8 +9263,20 @@
 void MegamorphicCache::SetEntry(const Array& array,
                                 intptr_t index,
                                 const Smi& class_id,
-                                const Function& target) {
+                                const Object& target) {
+  ASSERT(target.IsFunction() || target.IsSmi());
   array.SetAt((index * kEntryLength) + kClassIdIndex, class_id);
+#if defined(DART_PRECOMPILED_RUNTIME)
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    if (target.IsFunction()) {
+      const auto& function = Function::Cast(target);
+      const auto& entry_point = Smi::Handle(
+          Smi::FromAlignedAddress(Code::EntryPoint(function.CurrentCode())));
+      array.SetAt((index * kEntryLength) + kTargetFunctionIndex, entry_point);
+      return;
+    }
+  }
+#endif  // defined(DART_PRECOMPILED_RUNTIME)
   array.SetAt((index * kEntryLength) + kTargetFunctionIndex, target);
 }
 
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 7c66ac2..71a8559 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -2680,10 +2680,18 @@
   // proper target for the given name and arguments descriptor.  If the
   // illegal class id was found, the target is a cache miss handler that can
   // be invoked as a normal Dart function.
-  __ ldr(R0, FieldAddress(IP, base + kWordSize));
-  __ ldr(R4, FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
-  __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
-  __ Branch(FieldAddress(R0, Function::entry_point_offset()));
+  const auto target_address = FieldAddress(IP, base + kWordSize);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    __ ldr(ARGS_DESC_REG,
+           FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
+    __ Branch(target_address);
+  } else {
+    __ ldr(R0, target_address);
+    __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+    __ ldr(ARGS_DESC_REG,
+           FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
+    __ Branch(FieldAddress(R0, Function::entry_point_offset()));
+  }
 
   // Probe failed, check if it is a miss.
   __ Bind(&probe_failed);
@@ -2704,7 +2712,8 @@
 //  R4: arguments descriptor
 void StubCode::GenerateICCallThroughFunctionStub(Assembler* assembler) {
   Label loop, found, miss;
-  __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
+  __ ldr(ARGS_DESC_REG,
+         FieldAddress(R9, ICData::arguments_descriptor_offset()));
   __ ldr(R8, FieldAddress(R9, ICData::ic_data_offset()));
   __ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
   // R8: first IC entry
@@ -2737,8 +2746,8 @@
 
 void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
   Label loop, found, miss;
-  __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
   __ ldr(R8, FieldAddress(R9, ICData::ic_data_offset()));
+  __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
   __ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
   // R8: first IC entry
   __ LoadTaggedClassIdMayBeSmi(R1, R0);
@@ -2759,7 +2768,9 @@
   __ Bind(&found);
   const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
   const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
-  __ ldr(CODE_REG, Address(R8, code_offset));
+  if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+    __ ldr(CODE_REG, Address(R8, code_offset));
+  }
   __ Branch(Address(R8, entry_offset));
 
   __ Bind(&miss);
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index 7a2b85a..91996d9 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -2945,10 +2945,18 @@
   // proper target for the given name and arguments descriptor.  If the
   // illegal class id was found, the target is a cache miss handler that can
   // be invoked as a normal Dart function.
-  __ ldr(R0, FieldAddress(TMP, base + kWordSize));
-  __ ldr(R4, FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
-  __ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
-  __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+  const auto target_address = FieldAddress(TMP, base + kWordSize);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    __ ldr(R1, target_address);
+    __ ldr(ARGS_DESC_REG,
+           FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
+  } else {
+    __ ldr(R0, target_address);
+    __ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
+    __ ldr(ARGS_DESC_REG,
+           FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
+    __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+  }
   __ br(R1);
 
   // Probe failed, check if it is a miss.
@@ -2975,7 +2983,8 @@
 //  R4: arguments descriptor
 void StubCode::GenerateICCallThroughFunctionStub(Assembler* assembler) {
   Label loop, found, miss;
-  __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
+  __ ldr(ARGS_DESC_REG,
+         FieldAddress(R5, ICData::arguments_descriptor_offset()));
   __ ldr(R8, FieldAddress(R5, ICData::ic_data_offset()));
   __ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
   // R8: first IC entry
@@ -3010,8 +3019,8 @@
 
 void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
   Label loop, found, miss;
-  __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
   __ ldr(R8, FieldAddress(R5, ICData::ic_data_offset()));
+  __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
   __ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
   // R8: first IC entry
   __ LoadTaggedClassIdMayBeSmi(R1, R0);
@@ -3033,7 +3042,9 @@
   const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
   const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
   __ ldr(R1, Address(R8, entry_offset));
-  __ ldr(CODE_REG, Address(R8, code_offset));
+  if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+    __ ldr(CODE_REG, Address(R8, code_offset));
+  }
   __ br(R1);
 
   __ Bind(&miss);
diff --git a/runtime/vm/stub_code_x64.cc b/runtime/vm/stub_code_x64.cc
index 42b6b21..7184e25 100644
--- a/runtime/vm/stub_code_x64.cc
+++ b/runtime/vm/stub_code_x64.cc
@@ -2952,12 +2952,19 @@
   // proper target for the given name and arguments descriptor.  If the
   // illegal class id was found, the target is a cache miss handler that can
   // be invoked as a normal Dart function.
-  __ movq(RAX, FieldAddress(RDI, RCX, TIMES_8, base + kWordSize));
-  __ movq(R10,
-          FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
-  __ movq(RCX, FieldAddress(RAX, Function::entry_point_offset()));
-  __ movq(CODE_REG, FieldAddress(RAX, Function::code_offset()));
-  __ jmp(RCX);
+  const auto target_address = FieldAddress(RDI, RCX, TIMES_8, base + kWordSize);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    __ movq(R10,
+            FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
+    __ jmp(target_address);
+  } else {
+    __ movq(RAX, target_address);
+    __ movq(R10,
+            FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
+    __ movq(RCX, FieldAddress(RAX, Function::entry_point_offset()));
+    __ movq(CODE_REG, FieldAddress(RAX, Function::code_offset()));
+    __ jmp(RCX);
+  }
 
   // Probe failed, check if it is a miss.
   __ Bind(&probe_failed);
@@ -3044,9 +3051,10 @@
   __ Bind(&found);
   const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
   const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
-  __ movq(RCX, Address(R13, entry_offset));
-  __ movq(CODE_REG, Address(R13, code_offset));
-  __ jmp(RCX);
+  if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+    __ movq(CODE_REG, Address(R13, code_offset));
+  }
+  __ jmp(Address(R13, entry_offset));
 
   __ Bind(&miss);
   __ LoadIsolate(RAX);