[VM] Bare instructions - Part 5: Get rid of CODE_REG indirection in SwitchableCalls

If the --use-bare-instructions flag is enabled we will:

  * Make call sites load the target directly from the pool (instead of
    the code object) - this saves one instruction (and an indirect load)

  * Ensure the object pool will have direct entry addresses by:

     - Letting the clustered snapshot reader change any StubCode::UnlinkedCall()
       in the object pool by it's monomorphic entry
     - Change the code patcher to patch SwitchableCalls by writing the
       monomorphic entry into the pool (instead of the code object)

Issue https://github.com/dart-lang/sdk/issues/33274

Change-Id: I4e41fc8e4461bde477cc559a6a4fccaaf3a350b5
Reviewed-on: https://dart-review.googlesource.com/c/86160
Commit-Queue: Martin Kustermann <kustermann@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/clustered_snapshot.cc b/runtime/vm/clustered_snapshot.cc
index 2e86c2b..6b68265 100644
--- a/runtime/vm/clustered_snapshot.cc
+++ b/runtime/vm/clustered_snapshot.cc
@@ -5618,7 +5618,38 @@
     }
   }
 
-  deserializer.ReadIsolateSnapshot(thread_->isolate()->object_store());
+  auto object_store = thread_->isolate()->object_store();
+  deserializer.ReadIsolateSnapshot(object_store);
+
+#if defined(DART_PRECOMPILED_RUNTIME)
+  if (FLAG_use_bare_instructions) {
+    // By default, every switchable call site will put (ic_data, code) into the
+    // object pool.  The [code] is initialized (at AOT compile-time) to be a
+    // [StubCode::UnlinkedCall].
+    //
+    // In --use-bare-instruction we reduce the extra indirection via the [code]
+    // object and store instead (ic_data, entrypoint) in the object pool.
+    //
+    // Since the actual [entrypoint] is only known at AOT runtime we switch all
+    // existing UnlinkedCall entries in the object pool to be it's entrypoint.
+    auto zone = thread_->zone();
+    const auto& pool = ObjectPool::Handle(
+        zone, ObjectPool::RawCast(object_store->global_object_pool()));
+    auto& entry = Object::Handle(zone);
+    auto& smi = Smi::Handle(zone);
+    for (intptr_t i = 0; i < pool.Length(); i++) {
+      if (pool.TypeAt(i) == ObjectPool::kTaggedObject) {
+        entry = pool.ObjectAt(i);
+        if (entry.raw() == StubCode::UnlinkedCall().raw()) {
+          smi = Smi::FromAlignedAddress(
+              StubCode::UnlinkedCall().MonomorphicEntryPoint());
+          pool.SetTypeAt(i, ObjectPool::kImmediate, ObjectPool::kPatchable);
+          pool.SetObjectAt(i, smi);
+        }
+      }
+    }
+  }
+#endif  // defined(DART_PRECOMPILED_RUNTIME)
 
   return ApiError::null();
 }
diff --git a/runtime/vm/code_patcher.cc b/runtime/vm/code_patcher.cc
index 3546b60..4926926 100644
--- a/runtime/vm/code_patcher.cc
+++ b/runtime/vm/code_patcher.cc
@@ -28,7 +28,7 @@
   }
 }
 
-bool MatchesPattern(uword end, int16_t* pattern, intptr_t size) {
+bool MatchesPattern(uword end, const int16_t* pattern, intptr_t size) {
   // When breaking within generated code in GDB, it may overwrite individual
   // instructions with trap instructions, which can cause this test to fail.
   //
diff --git a/runtime/vm/code_patcher.h b/runtime/vm/code_patcher.h
index 8b69986..9b31dcd 100644
--- a/runtime/vm/code_patcher.h
+++ b/runtime/vm/code_patcher.h
@@ -108,7 +108,7 @@
 // [0..255] values in [pattern] have to match, negative values are skipped.
 //
 // Example pattern: `[0x3d, 0x8b, -1, -1]`.
-bool MatchesPattern(uword end, int16_t* pattern, intptr_t size);
+bool MatchesPattern(uword end, const int16_t* pattern, intptr_t size);
 
 class KBCPatcher : public AllStatic {
  public:
diff --git a/runtime/vm/code_patcher_arm.cc b/runtime/vm/code_patcher_arm.cc
index f7204fa..258b035 100644
--- a/runtime/vm/code_patcher_arm.cc
+++ b/runtime/vm/code_patcher_arm.cc
@@ -61,23 +61,39 @@
                                         const Object& data,
                                         const Code& target) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  call.SetData(data);
-  call.SetTarget(target);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  }
 }
 
 RawCode* CodePatcher::GetSwitchableCallTargetAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  return call.target();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    return call.target();
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    return call.target();
+  }
 }
 
 RawObject* CodePatcher::GetSwitchableCallDataAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  return call.data();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    return call.data();
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    return call.data();
+  }
 }
 
 void CodePatcher::PatchNativeCallAt(uword return_address,
diff --git a/runtime/vm/code_patcher_arm64.cc b/runtime/vm/code_patcher_arm64.cc
index d084ac1..7716090 100644
--- a/runtime/vm/code_patcher_arm64.cc
+++ b/runtime/vm/code_patcher_arm64.cc
@@ -96,23 +96,39 @@
                                         const Object& data,
                                         const Code& target) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  call.SetData(data);
-  call.SetTarget(target);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  }
 }
 
 RawCode* CodePatcher::GetSwitchableCallTargetAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  return call.target();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    return call.target();
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    return call.target();
+  }
 }
 
 RawObject* CodePatcher::GetSwitchableCallDataAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCallPattern call(return_address, caller_code);
-  return call.data();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCallPattern call(return_address, caller_code);
+    return call.data();
+  } else {
+    SwitchableCallPattern call(return_address, caller_code);
+    return call.data();
+  }
 }
 
 void CodePatcher::PatchNativeCallAt(uword return_address,
diff --git a/runtime/vm/code_patcher_x64.cc b/runtime/vm/code_patcher_x64.cc
index bffab80..1794125 100644
--- a/runtime/vm/code_patcher_x64.cc
+++ b/runtime/vm/code_patcher_x64.cc
@@ -12,7 +12,9 @@
 #include "vm/dart_entry.h"
 #include "vm/instructions.h"
 #include "vm/object.h"
+#include "vm/object_store.h"
 #include "vm/raw_object.h"
+#include "vm/reverse_pc_lookup_cache.h"
 
 namespace dart {
 
@@ -209,12 +211,41 @@
 //   load guarded cid            load ICData             load MegamorphicCache
 //   load monomorphic target <-> load ICLookup stub  ->  load MMLookup stub
 //   call target.entry           call stub.entry         call stub.entry
-class SwitchableCall : public ValueObject {
+class SwitchableCallBase : public ValueObject {
  public:
-  SwitchableCall(uword return_address, const Code& code)
+  explicit SwitchableCallBase(const Code& code)
       : object_pool_(ObjectPool::Handle(code.GetObjectPool())),
         target_index_(-1),
-        data_index_(-1) {
+        data_index_(-1) {}
+
+  intptr_t data_index() const { return data_index_; }
+  intptr_t target_index() const { return target_index_; }
+
+  RawObject* data() const { return object_pool_.ObjectAt(data_index()); }
+
+  void SetData(const Object& data) const {
+    ASSERT(!Object::Handle(object_pool_.ObjectAt(data_index())).IsCode());
+    object_pool_.SetObjectAt(data_index(), data);
+    // No need to flush the instruction cache, since the code is not modified.
+  }
+
+ protected:
+  ObjectPool& object_pool_;
+  intptr_t target_index_;
+  intptr_t data_index_;
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(SwitchableCallBase);
+};
+
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a [Code] object: Either the code of the
+// monomorphic function or a stub code.
+class SwitchableCall : public SwitchableCallBase {
+ public:
+  SwitchableCall(uword return_address, const Code& code)
+      : SwitchableCallBase(code) {
     uword pc = return_address;
 
     // callq RCX
@@ -277,33 +308,96 @@
     ASSERT(Object::Handle(object_pool_.ObjectAt(target_index_)).IsCode());
   }
 
-  intptr_t data_index() const { return data_index_; }
-  intptr_t target_index() const { return target_index_; }
-
-  RawObject* data() const { return object_pool_.ObjectAt(data_index()); }
-  RawCode* target() const {
-    return reinterpret_cast<RawCode*>(object_pool_.ObjectAt(target_index()));
-  }
-
-  void SetData(const Object& data) const {
-    ASSERT(!Object::Handle(object_pool_.ObjectAt(data_index())).IsCode());
-    object_pool_.SetObjectAt(data_index(), data);
-    // No need to flush the instruction cache, since the code is not modified.
-  }
-
   void SetTarget(const Code& target) const {
     ASSERT(Object::Handle(object_pool_.ObjectAt(target_index())).IsCode());
     object_pool_.SetObjectAt(target_index(), target);
     // No need to flush the instruction cache, since the code is not modified.
   }
 
- protected:
-  const ObjectPool& object_pool_;
-  intptr_t target_index_;
-  intptr_t data_index_;
+  RawCode* target() const {
+    return reinterpret_cast<RawCode*>(object_pool_.ObjectAt(target_index()));
+  }
+};
 
- private:
-  DISALLOW_IMPLICIT_CONSTRUCTORS(SwitchableCall);
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a direct entrypoint address: Either the entry point
+// of the monomorphic function or a stub entry point.
+class BareSwitchableCall : public SwitchableCallBase {
+ public:
+  BareSwitchableCall(uword return_address, const Code& code)
+      : SwitchableCallBase(code) {
+    object_pool_ = ObjectPool::RawCast(
+        Isolate::Current()->object_store()->global_object_pool());
+
+    uword pc = return_address;
+
+    // callq RCX
+    static int16_t call_pattern[] = {
+        0xff, 0xd1,  //
+    };
+    if (MatchesPattern(pc, call_pattern, ARRAY_SIZE(call_pattern))) {
+      pc -= ARRAY_SIZE(call_pattern);
+    } else {
+      FATAL1("Failed to decode at %" Px, pc);
+    }
+
+    // movq RBX, [PP + offset]
+    static int16_t load_data_disp8[] = {
+        0x49, 0x8b, 0x5f, -1,  //
+    };
+    static int16_t load_data_disp32[] = {
+        0x49, 0x8b, 0x9f, -1, -1, -1, -1,
+    };
+    if (MatchesPattern(pc, load_data_disp8, ARRAY_SIZE(load_data_disp8))) {
+      pc -= ARRAY_SIZE(load_data_disp8);
+      data_index_ = IndexFromPPLoadDisp8(pc + 3);
+    } else if (MatchesPattern(pc, load_data_disp32,
+                              ARRAY_SIZE(load_data_disp32))) {
+      pc -= ARRAY_SIZE(load_data_disp32);
+      data_index_ = IndexFromPPLoadDisp32(pc + 3);
+    } else {
+      FATAL1("Failed to decode at %" Px, pc);
+    }
+    ASSERT(!Object::Handle(object_pool_.ObjectAt(data_index_)).IsCode());
+
+    // movq RCX, [PP + offset]
+    static int16_t load_code_disp8[] = {
+        0x49, 0x8b, 0x4f, -1,  //
+    };
+    static int16_t load_code_disp32[] = {
+        0x49, 0x8b, 0x8f, -1, -1, -1, -1,
+    };
+    if (MatchesPattern(pc, load_code_disp8, ARRAY_SIZE(load_code_disp8))) {
+      pc -= ARRAY_SIZE(load_code_disp8);
+      target_index_ = IndexFromPPLoadDisp8(pc + 3);
+    } else if (MatchesPattern(pc, load_code_disp32,
+                              ARRAY_SIZE(load_code_disp32))) {
+      pc -= ARRAY_SIZE(load_code_disp32);
+      target_index_ = IndexFromPPLoadDisp32(pc + 3);
+    } else {
+      FATAL1("Failed to decode at %" Px, pc);
+    }
+    ASSERT(object_pool_.TypeAt(target_index_) == ObjectPool::kImmediate);
+  }
+
+  void SetTarget(const Code& target) const {
+    ASSERT(object_pool_.TypeAt(target_index()) == ObjectPool::kImmediate);
+    object_pool_.SetRawValueAt(target_index(), target.MonomorphicEntryPoint());
+  }
+
+  RawCode* target() const {
+    const uword pc = object_pool_.RawValueAt(target_index());
+    auto rct = Isolate::Current()->reverse_pc_lookup_cache();
+    if (rct->Contains(pc)) {
+      return rct->Lookup(pc);
+    }
+    rct = Dart::vm_isolate()->reverse_pc_lookup_cache();
+    if (rct->Contains(pc)) {
+      return rct->Lookup(pc);
+    }
+    UNREACHABLE();
+  }
 };
 
 RawCode* CodePatcher::GetStaticCallTargetAt(uword return_address,
@@ -360,23 +454,39 @@
                                         const Object& data,
                                         const Code& target) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCall call(return_address, caller_code);
-  call.SetData(data);
-  call.SetTarget(target);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCall call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  } else {
+    SwitchableCall call(return_address, caller_code);
+    call.SetData(data);
+    call.SetTarget(target);
+  }
 }
 
 RawCode* CodePatcher::GetSwitchableCallTargetAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCall call(return_address, caller_code);
-  return call.target();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCall call(return_address, caller_code);
+    return call.target();
+  } else {
+    SwitchableCall call(return_address, caller_code);
+    return call.target();
+  }
 }
 
 RawObject* CodePatcher::GetSwitchableCallDataAt(uword return_address,
                                                 const Code& caller_code) {
   ASSERT(caller_code.ContainsInstructionAt(return_address));
-  SwitchableCall call(return_address, caller_code);
-  return call.data();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    BareSwitchableCall call(return_address, caller_code);
+    return call.data();
+  } else {
+    SwitchableCall call(return_address, caller_code);
+    return call.data();
+  }
 }
 
 void CodePatcher::PatchNativeCallAt(uword return_address,
diff --git a/runtime/vm/compiler/assembler/assembler_x64.cc b/runtime/vm/compiler/assembler/assembler_x64.cc
index 1d6be01..74f20d2 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.cc
+++ b/runtime/vm/compiler/assembler/assembler_x64.cc
@@ -1668,11 +1668,18 @@
   movq(TMP, Immediate(kSmiCid));
   jmp(&have_cid, kNearJump);
 
+  // Ensure the monomorphic entry is 2-byte aligned (so GC can see them if we
+  // store them in ICData / MegamorphicCache arrays)
+  nop(1);
+
   Comment("MonomorphicCheckedEntry");
   ASSERT(CodeSize() - start == Instructions::kPolymorphicEntryOffset);
+  ASSERT((CodeSize() & kSmiTagMask) == kSmiTag);
+
   SmiUntag(RBX);
   testq(RDI, Immediate(kSmiTagMask));
   j(ZERO, &immediate, kNearJump);
+  nop(1);
 
   LoadClassId(TMP, RDI);
 
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
index ac3b643..b61411d 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
@@ -1097,12 +1097,18 @@
   __ Comment("SwitchableCall");
   __ LoadFromOffset(kWord, R0, SP,
                     (ic_data.CountWithoutTypeArgs() - 1) * kWordSize);
-  __ LoadUniqueObject(CODE_REG, initial_stub);
-  intptr_t entry_point_offset =
-      entry_kind == Code::EntryKind::kNormal
-          ? Code::entry_point_offset(Code::EntryKind::kMonomorphic)
-          : Code::entry_point_offset(Code::EntryKind::kMonomorphicUnchecked);
-  __ ldr(LR, FieldAddress(CODE_REG, entry_point_offset));
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    // The AOT runtime will replace the slot in the object pool with the
+    // entrypoint address - see clustered_snapshot.cc.
+    __ LoadUniqueObject(LR, initial_stub);
+  } else {
+    __ LoadUniqueObject(CODE_REG, initial_stub);
+    const intptr_t entry_point_offset =
+        entry_kind == Code::EntryKind::kNormal
+            ? Code::entry_point_offset(Code::EntryKind::kMonomorphic)
+            : Code::entry_point_offset(Code::EntryKind::kMonomorphicUnchecked);
+    __ ldr(LR, FieldAddress(CODE_REG, entry_point_offset));
+  }
   __ LoadUniqueObject(R9, ic_data);
   __ blx(LR);
 
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
index 60cec6f..39fcb9e 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
@@ -1096,11 +1096,18 @@
       op.AddObject(initial_stub, ObjectPool::Patchability::kPatchable);
   ASSERT((ic_data_index + 1) == initial_stub_index);
 
-  __ LoadDoubleWordFromPoolOffset(R5, CODE_REG,
-                                  ObjectPool::element_offset(ic_data_index));
-  __ ldr(TMP, FieldAddress(CODE_REG, Code::entry_point_offset(
-                                         Code::EntryKind::kMonomorphic)));
-  __ blr(TMP);
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    // The AOT runtime will replace the slot in the object pool with the
+    // entrypoint address - see clustered_snapshot.cc.
+    __ LoadDoubleWordFromPoolOffset(R5, LR,
+                                    ObjectPool::element_offset(ic_data_index));
+  } else {
+    __ LoadDoubleWordFromPoolOffset(R5, CODE_REG,
+                                    ObjectPool::element_offset(ic_data_index));
+    __ ldr(LR, FieldAddress(CODE_REG, Code::entry_point_offset(
+                                          Code::EntryKind::kMonomorphic)));
+  }
+  __ blr(LR);
 
   EmitCallsiteMetadata(token_pos, DeoptId::kNone, RawPcDescriptors::kOther,
                        locs);
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
index 6c32aab..5e0389c 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
@@ -1089,12 +1089,18 @@
 
   __ Comment("SwitchableCall");
   __ movq(RDI, Address(RSP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
-  __ LoadUniqueObject(CODE_REG, initial_stub);
-  intptr_t entry_point_offset =
-      entry_kind == Code::EntryKind::kNormal
-          ? Code::entry_point_offset(Code::EntryKind::kMonomorphic)
-          : Code::entry_point_offset(Code::EntryKind::kMonomorphicUnchecked);
-  __ movq(RCX, FieldAddress(CODE_REG, entry_point_offset));
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    // The AOT runtime will replace the slot in the object pool with the
+    // entrypoint address - see clustered_snapshot.cc.
+    __ LoadUniqueObject(RCX, initial_stub);
+  } else {
+    intptr_t entry_point_offset =
+        entry_kind == Code::EntryKind::kNormal
+            ? Code::entry_point_offset(Code::EntryKind::kMonomorphic)
+            : Code::entry_point_offset(Code::EntryKind::kMonomorphicUnchecked);
+    __ LoadUniqueObject(CODE_REG, initial_stub);
+    __ movq(RCX, FieldAddress(CODE_REG, entry_point_offset));
+  }
   __ LoadUniqueObject(RBX, ic_data);
   __ call(RCX);
 
diff --git a/runtime/vm/instructions_arm.cc b/runtime/vm/instructions_arm.cc
index d7cc494..f9137ac 100644
--- a/runtime/vm/instructions_arm.cc
+++ b/runtime/vm/instructions_arm.cc
@@ -12,6 +12,7 @@
 #include "vm/constants_arm.h"
 #include "vm/cpu.h"
 #include "vm/object.h"
+#include "vm/reverse_pc_lookup_cache.h"
 
 namespace dart {
 
@@ -232,10 +233,22 @@
   object_pool_.SetObjectAt(target_code_pool_index_, target_code);
 }
 
-SwitchableCallPattern::SwitchableCallPattern(uword pc, const Code& code)
+SwitchableCallPatternBase::SwitchableCallPatternBase(const Code& code)
     : object_pool_(ObjectPool::Handle(code.GetObjectPool())),
       data_pool_index_(-1),
-      target_pool_index_(-1) {
+      target_pool_index_(-1) {}
+
+RawObject* SwitchableCallPatternBase::data() const {
+  return object_pool_.ObjectAt(data_pool_index_);
+}
+
+void SwitchableCallPatternBase::SetData(const Object& data) const {
+  ASSERT(!Object::Handle(object_pool_.ObjectAt(data_pool_index_)).IsCode());
+  object_pool_.SetObjectAt(data_pool_index_, data);
+}
+
+SwitchableCallPattern::SwitchableCallPattern(uword pc, const Code& code)
+    : SwitchableCallPatternBase(code) {
   ASSERT(code.ContainsInstructionAt(pc));
   // Last instruction: blx lr.
   ASSERT(*(reinterpret_cast<uword*>(pc) - 1) == 0xe12fff3e);
@@ -249,24 +262,49 @@
   ASSERT(reg == CODE_REG);
 }
 
-RawObject* SwitchableCallPattern::data() const {
-  return object_pool_.ObjectAt(data_pool_index_);
-}
-
 RawCode* SwitchableCallPattern::target() const {
   return reinterpret_cast<RawCode*>(object_pool_.ObjectAt(target_pool_index_));
 }
-
-void SwitchableCallPattern::SetData(const Object& data) const {
-  ASSERT(!Object::Handle(object_pool_.ObjectAt(data_pool_index_)).IsCode());
-  object_pool_.SetObjectAt(data_pool_index_, data);
-}
-
 void SwitchableCallPattern::SetTarget(const Code& target) const {
   ASSERT(Object::Handle(object_pool_.ObjectAt(target_pool_index_)).IsCode());
   object_pool_.SetObjectAt(target_pool_index_, target);
 }
 
+BareSwitchableCallPattern::BareSwitchableCallPattern(uword pc, const Code& code)
+    : SwitchableCallPatternBase(code) {
+  ASSERT(code.ContainsInstructionAt(pc));
+  // Last instruction: blx lr.
+  ASSERT(*(reinterpret_cast<uword*>(pc) - 1) == 0xe12fff3e);
+
+  Register reg;
+  uword data_load_end = InstructionPattern::DecodeLoadWordFromPool(
+      pc - Instr::kInstrSize, &reg, &data_pool_index_);
+  ASSERT(reg == R9);
+
+  InstructionPattern::DecodeLoadWordFromPool(data_load_end, &reg,
+                                             &target_pool_index_);
+  ASSERT(reg == LR);
+}
+
+RawCode* BareSwitchableCallPattern::target() const {
+  const uword pc = object_pool_.RawValueAt(target_pool_index_);
+  auto rct = Isolate::Current()->reverse_pc_lookup_cache();
+  if (rct->Contains(pc)) {
+    return rct->Lookup(pc);
+  }
+  rct = Dart::vm_isolate()->reverse_pc_lookup_cache();
+  if (rct->Contains(pc)) {
+    return rct->Lookup(pc);
+  }
+  UNREACHABLE();
+}
+
+void BareSwitchableCallPattern::SetTarget(const Code& target) const {
+  ASSERT(object_pool_.TypeAt(target_pool_index_) == ObjectPool::kImmediate);
+  object_pool_.SetRawValueAt(target_pool_index_,
+                             target.MonomorphicEntryPoint());
+}
+
 ReturnPattern::ReturnPattern(uword pc) : pc_(pc) {}
 
 bool ReturnPattern::IsValid() const {
diff --git a/runtime/vm/instructions_arm.h b/runtime/vm/instructions_arm.h
index ceb65e5..70a7636 100644
--- a/runtime/vm/instructions_arm.h
+++ b/runtime/vm/instructions_arm.h
@@ -97,23 +97,52 @@
 //   load guarded cid            load ICData             load MegamorphicCache
 //   load monomorphic target <-> load ICLookup stub  ->  load MMLookup stub
 //   call target.entry           call stub.entry         call stub.entry
-class SwitchableCallPattern : public ValueObject {
+class SwitchableCallPatternBase : public ValueObject {
  public:
-  SwitchableCallPattern(uword pc, const Code& code);
+  explicit SwitchableCallPatternBase(const Code& code);
 
   RawObject* data() const;
-  RawCode* target() const;
   void SetData(const Object& data) const;
-  void SetTarget(const Code& target) const;
 
- private:
+ protected:
   const ObjectPool& object_pool_;
   intptr_t data_pool_index_;
   intptr_t target_pool_index_;
 
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SwitchableCallPatternBase);
+};
+
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a [Code] object: Either the code of the
+// monomorphic function or a stub code.
+class SwitchableCallPattern : public SwitchableCallPatternBase {
+ public:
+  SwitchableCallPattern(uword pc, const Code& code);
+
+  RawCode* target() const;
+  void SetTarget(const Code& target) const;
+
+ private:
   DISALLOW_COPY_AND_ASSIGN(SwitchableCallPattern);
 };
 
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a direct entrypoint address: Either the entry point
+// of the monomorphic function or a stub entry point.
+class BareSwitchableCallPattern : public SwitchableCallPatternBase {
+ public:
+  BareSwitchableCallPattern(uword pc, const Code& code);
+
+  RawCode* target() const;
+  void SetTarget(const Code& target) const;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BareSwitchableCallPattern);
+};
+
 class ReturnPattern : public ValueObject {
  public:
   explicit ReturnPattern(uword pc);
diff --git a/runtime/vm/instructions_arm64.cc b/runtime/vm/instructions_arm64.cc
index 314ee86..2bf7c37 100644
--- a/runtime/vm/instructions_arm64.cc
+++ b/runtime/vm/instructions_arm64.cc
@@ -260,8 +260,7 @@
   Instr* ldr_instr = Instr::At(start);
 
   // Last instruction is always an ldp into two 64-bit X registers.
-  RELEASE_ASSERT(ldr_instr->IsLoadStoreRegPairOp() &&
-                 (ldr_instr->Bit(22) == 1));
+  ASSERT(ldr_instr->IsLoadStoreRegPairOp() && (ldr_instr->Bit(22) == 1));
 
   // Grab the destination register from the ldp instruction.
   *reg1 = ldr_instr->RtField();
@@ -276,30 +275,30 @@
     pool_offset = base_offset;
   } else {
     // Case 2 & 3.
-    RELEASE_ASSERT(base_reg == TMP);
+    ASSERT(base_reg == TMP);
 
     pool_offset = base_offset;
 
     start -= Instr::kInstrSize;
     Instr* add_instr = Instr::At(start);
-    RELEASE_ASSERT(add_instr->IsAddSubImmOp());
-    RELEASE_ASSERT(add_instr->RdField() == TMP);
+    ASSERT(add_instr->IsAddSubImmOp());
+    ASSERT(add_instr->RdField() == TMP);
 
     const auto shift = add_instr->Imm12ShiftField();
-    RELEASE_ASSERT(shift == 0 || shift == 1);
+    ASSERT(shift == 0 || shift == 1);
     pool_offset += (add_instr->Imm12Field() << (shift == 1 ? 12 : 0));
 
     if (add_instr->RnField() == TMP) {
       start -= Instr::kInstrSize;
       Instr* prev_add_instr = Instr::At(start);
-      RELEASE_ASSERT(prev_add_instr->IsAddSubImmOp());
-      RELEASE_ASSERT(prev_add_instr->RnField() == PP);
+      ASSERT(prev_add_instr->IsAddSubImmOp());
+      ASSERT(prev_add_instr->RnField() == PP);
 
       const auto shift = prev_add_instr->Imm12ShiftField();
-      RELEASE_ASSERT(shift == 0 || shift == 1);
+      ASSERT(shift == 0 || shift == 1);
       pool_offset += (prev_add_instr->Imm12Field() << (shift == 1 ? 12 : 0));
     } else {
-      RELEASE_ASSERT(add_instr->RnField() == PP);
+      ASSERT(add_instr->RnField() == PP);
     }
   }
   *index = ObjectPool::IndexFromOffset(pool_offset - kHeapObjectTag);
@@ -373,10 +372,22 @@
   // No need to flush the instruction cache, since the code is not modified.
 }
 
-SwitchableCallPattern::SwitchableCallPattern(uword pc, const Code& code)
+SwitchableCallPatternBase::SwitchableCallPatternBase(const Code& code)
     : object_pool_(ObjectPool::Handle(code.GetObjectPool())),
       data_pool_index_(-1),
-      target_pool_index_(-1) {
+      target_pool_index_(-1) {}
+
+RawObject* SwitchableCallPatternBase::data() const {
+  return object_pool_.ObjectAt(data_pool_index_);
+}
+
+void SwitchableCallPatternBase::SetData(const Object& data) const {
+  ASSERT(!Object::Handle(object_pool_.ObjectAt(data_pool_index_)).IsCode());
+  object_pool_.SetObjectAt(data_pool_index_, data);
+}
+
+SwitchableCallPattern::SwitchableCallPattern(uword pc, const Code& code)
+    : SwitchableCallPatternBase(code) {
   ASSERT(code.ContainsInstructionAt(pc));
   // Last instruction: blr ip0.
   ASSERT(*(reinterpret_cast<uint32_t*>(pc) - 1) == 0xd63f0200);
@@ -385,31 +396,58 @@
   intptr_t pool_index;
   InstructionPattern::DecodeLoadDoubleWordFromPool(
       pc - 2 * Instr::kInstrSize, &ic_data_reg, &code_reg, &pool_index);
-  RELEASE_ASSERT(ic_data_reg == R5);
-  RELEASE_ASSERT(code_reg == CODE_REG);
+  ASSERT(ic_data_reg == R5);
+  ASSERT(code_reg == CODE_REG);
 
   data_pool_index_ = pool_index;
   target_pool_index_ = pool_index + 1;
 }
 
-RawObject* SwitchableCallPattern::data() const {
-  return object_pool_.ObjectAt(data_pool_index_);
-}
-
 RawCode* SwitchableCallPattern::target() const {
   return reinterpret_cast<RawCode*>(object_pool_.ObjectAt(target_pool_index_));
 }
 
-void SwitchableCallPattern::SetData(const Object& data) const {
-  ASSERT(!Object::Handle(object_pool_.ObjectAt(data_pool_index_)).IsCode());
-  object_pool_.SetObjectAt(data_pool_index_, data);
-}
-
 void SwitchableCallPattern::SetTarget(const Code& target) const {
   ASSERT(Object::Handle(object_pool_.ObjectAt(target_pool_index_)).IsCode());
   object_pool_.SetObjectAt(target_pool_index_, target);
 }
 
+BareSwitchableCallPattern::BareSwitchableCallPattern(uword pc, const Code& code)
+    : SwitchableCallPatternBase(code) {
+  ASSERT(code.ContainsInstructionAt(pc));
+  // Last instruction: blr ip0.
+  ASSERT(*(reinterpret_cast<uint32_t*>(pc) - 1) == 0xd63f0200);
+
+  Register ic_data_reg, code_reg;
+  intptr_t pool_index;
+  InstructionPattern::DecodeLoadDoubleWordFromPool(
+      pc - Instr::kInstrSize, &ic_data_reg, &code_reg, &pool_index);
+  ASSERT(ic_data_reg == R5);
+  ASSERT(code_reg == TMP);
+
+  data_pool_index_ = pool_index;
+  target_pool_index_ = pool_index + 1;
+}
+
+RawCode* BareSwitchableCallPattern::target() const {
+  const uword pc = object_pool_.RawValueAt(target_pool_index_);
+  auto rct = Isolate::Current()->reverse_pc_lookup_cache();
+  if (rct->Contains(pc)) {
+    return rct->Lookup(pc);
+  }
+  rct = Dart::vm_isolate()->reverse_pc_lookup_cache();
+  if (rct->Contains(pc)) {
+    return rct->Lookup(pc);
+  }
+  UNREACHABLE();
+}
+
+void BareSwitchableCallPattern::SetTarget(const Code& target) const {
+  ASSERT(object_pool_.TypeAt(target_pool_index_) == ObjectPool::kImmediate);
+  object_pool_.SetRawValueAt(target_pool_index_,
+                             target.MonomorphicEntryPoint());
+}
+
 ReturnPattern::ReturnPattern(uword pc) : pc_(pc) {}
 
 bool ReturnPattern::IsValid() const {
diff --git a/runtime/vm/instructions_arm64.h b/runtime/vm/instructions_arm64.h
index 3107db6..18a8cc3 100644
--- a/runtime/vm/instructions_arm64.h
+++ b/runtime/vm/instructions_arm64.h
@@ -13,6 +13,7 @@
 #include "vm/constants_arm64.h"
 #include "vm/native_entry.h"
 #include "vm/object.h"
+#include "vm/reverse_pc_lookup_cache.h"
 
 namespace dart {
 
@@ -116,23 +117,52 @@
 //   load guarded cid            load ICData             load MegamorphicCache
 //   load monomorphic target <-> load ICLookup stub  ->  load MMLookup stub
 //   call target.entry           call stub.entry         call stub.entry
-class SwitchableCallPattern : public ValueObject {
+class SwitchableCallPatternBase : public ValueObject {
  public:
-  SwitchableCallPattern(uword pc, const Code& code);
+  explicit SwitchableCallPatternBase(const Code& code);
 
   RawObject* data() const;
-  RawCode* target() const;
   void SetData(const Object& data) const;
-  void SetTarget(const Code& target) const;
 
- private:
+ protected:
   const ObjectPool& object_pool_;
   intptr_t data_pool_index_;
   intptr_t target_pool_index_;
 
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SwitchableCallPatternBase);
+};
+
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a [Code] object: Either the code of the
+// monomorphic function or a stub code.
+class SwitchableCallPattern : public SwitchableCallPatternBase {
+ public:
+  SwitchableCallPattern(uword pc, const Code& code);
+
+  RawCode* target() const;
+  void SetTarget(const Code& target) const;
+
+ private:
   DISALLOW_COPY_AND_ASSIGN(SwitchableCallPattern);
 };
 
+// See [SwitchableCallBase] for a switchable calls in general.
+//
+// The target slot is always a direct entrypoint address: Either the entry point
+// of the monomorphic function or a stub entry point.
+class BareSwitchableCallPattern : public SwitchableCallPatternBase {
+ public:
+  BareSwitchableCallPattern(uword pc, const Code& code);
+
+  RawCode* target() const;
+  void SetTarget(const Code& target) const;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BareSwitchableCallPattern);
+};
+
 class ReturnPattern : public ValueObject {
  public:
   explicit ReturnPattern(uword pc);
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index 4acfb6f..5cda55f 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -4288,12 +4288,14 @@
     return reinterpret_cast<uword>(instr->ptr()) + HeaderSize();
   }
 
+  // Note: We keep the checked entrypoint offsets even (emitting NOPs if
+  // necessary) to allow them to be seen as Smis by the GC.
 #if defined(TARGET_ARCH_IA32)
   static const intptr_t kPolymorphicEntryOffset = 0;
   static const intptr_t kMonomorphicEntryOffset = 0;
 #elif defined(TARGET_ARCH_X64)
-  static const intptr_t kPolymorphicEntryOffset = 15;
-  static const intptr_t kMonomorphicEntryOffset = 34;
+  static const intptr_t kPolymorphicEntryOffset = 16;
+  static const intptr_t kMonomorphicEntryOffset = 36;
 #elif defined(TARGET_ARCH_ARM)
   static const intptr_t kPolymorphicEntryOffset = 0;
   static const intptr_t kMonomorphicEntryOffset = 20;