[vm, jit] Make unoptimized instance calls compatible with megamorphic calls.

This is preparation for switching the call's stub when polymorphism gets too high.

Bug: https://github.com/dart-lang/sdk/issues/26780
Bug: https://github.com/dart-lang/sdk/issues/29294
Bug: https://github.com/dart-lang/sdk/issues/36409
Change-Id: If601ea97a1942a567bee0847afeb3b4e9c11481a
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/99705
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_x64.cc b/runtime/vm/compiler/assembler/assembler_x64.cc
index 64f3d7ed..97ff005 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.cc
+++ b/runtime/vm/compiler/assembler/assembler_x64.cc
@@ -1732,7 +1732,7 @@
   LeaveDartFrame();
 }
 
-// RDI receiver, RBX guarded cid as Smi.
+// RDX receiver, RBX guarded cid as Smi.
 // Preserve R10 (ARGS_DESC_REG), not required today, but maybe later.
 void Assembler::MonomorphicCheckedEntry() {
   has_single_entry_point_ = false;
@@ -1754,11 +1754,10 @@
   ASSERT((CodeSize() & kSmiTagMask) == kSmiTag);
 
   SmiUntag(RBX);
-  testq(RDI, Immediate(kSmiTagMask));
+  testq(RDX, Immediate(kSmiTagMask));
   j(ZERO, &immediate, kNearJump);
-  nop(1);
 
-  LoadClassId(TMP, RDI);
+  LoadClassId(TMP, RDX);
 
   Bind(&have_cid);
   cmpq(TMP, RBX);
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
index 04a4a85..c53c257 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
@@ -1054,6 +1054,8 @@
   // Pass the function explicitly, it is used in IC stub.
 
   __ LoadObject(R8, parsed_function().function());
+  __ LoadFromOffset(kWord, R0, SP,
+                    (ic_data.CountWithoutTypeArgs() - 1) * kWordSize);
   __ LoadUniqueObject(R9, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs,
                    entry_kind);
@@ -1066,6 +1068,8 @@
                                          TokenPosition token_pos,
                                          LocationSummary* locs) {
   ASSERT(Array::Handle(zone(), ic_data.arguments_descriptor()).Length() > 0);
+  __ LoadFromOffset(kWord, R0, SP,
+                    (ic_data.CountWithoutTypeArgs() - 1) * kWordSize);
   __ LoadUniqueObject(R9, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs());
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
index 943aaec..8e76737 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
@@ -1040,6 +1040,7 @@
   // Pass the function explicitly, it is used in IC stub.
 
   __ LoadObject(R6, parsed_function().function());
+  __ LoadFromOffset(R0, SP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize);
   __ LoadUniqueObject(R5, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs());
@@ -1051,6 +1052,7 @@
                                          TokenPosition token_pos,
                                          LocationSummary* locs) {
   ASSERT(Array::Handle(zone(), ic_data.arguments_descriptor()).Length() > 0);
+  __ LoadFromOffset(R0, SP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize);
   __ LoadUniqueObject(R5, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs());
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc b/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
index 4281752..5fdf8f9 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
@@ -911,7 +911,9 @@
   // top-level function (parsed_function().function()) which could be
   // reoptimized and which counter needs to be incremented.
   // Pass the function explicitly, it is used in IC stub.
-  __ LoadObject(EBX, parsed_function().function());
+  __ LoadObject(EAX, parsed_function().function());
+  // Load receiver into EBX.
+  __ movl(EBX, Address(ESP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
   __ LoadObject(ECX, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs());
@@ -923,6 +925,8 @@
                                          TokenPosition token_pos,
                                          LocationSummary* locs) {
   ASSERT(Array::Handle(ic_data.arguments_descriptor()).Length() > 0);
+  // Load receiver into EBX.
+  __ movl(EBX, Address(ESP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
   __ LoadObject(ECX, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs());
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
index 5e296e6..46e9f35 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
@@ -1046,6 +1046,8 @@
   // reoptimized and which counter needs to be incremented.
   // Pass the function explicitly, it is used in IC stub.
   __ LoadObject(RDI, parsed_function().function());
+  // Load receiver into RDX.
+  __ movq(RDX, Address(RSP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
   __ LoadUniqueObject(RBX, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs,
                    entry_kind);
@@ -1058,6 +1060,8 @@
                                          TokenPosition token_pos,
                                          LocationSummary* locs) {
   ASSERT(Array::Handle(zone(), ic_data.arguments_descriptor()).Length() > 0);
+  // Load receiver into RDX.
+  __ movq(RDX, Address(RSP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
   __ LoadUniqueObject(RBX, ic_data);
   GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs);
   __ Drop(ic_data.CountWithTypeArgs(), RCX);
@@ -1077,8 +1081,8 @@
       zone(),
       MegamorphicCacheTable::Lookup(isolate(), name, arguments_descriptor));
   __ Comment("MegamorphicCall");
-  // Load receiver into RDI.
-  __ movq(RDI, Address(RSP, (args_desc.Count() - 1) * kWordSize));
+  // Load receiver into RDX.
+  __ movq(RDX, Address(RSP, (args_desc.Count() - 1) * kWordSize));
   __ LoadObject(RBX, cache);
   __ call(Address(THR, Thread::megamorphic_call_checked_entry_offset()));
 
@@ -1116,7 +1120,7 @@
   const Code& initial_stub = StubCode::ICCallThroughFunction();
 
   __ Comment("SwitchableCall");
-  __ movq(RDI, Address(RSP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
+  __ movq(RDX, Address(RSP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize));
   if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
     // The AOT runtime will replace the slot in the object pool with the
     // entrypoint address - see clustered_snapshot.cc.
diff --git a/runtime/vm/compiler/stub_code_compiler.h b/runtime/vm/compiler/stub_code_compiler.h
index c227889..b7d0332 100644
--- a/runtime/vm/compiler/stub_code_compiler.h
+++ b/runtime/vm/compiler/stub_code_compiler.h
@@ -41,13 +41,27 @@
   static void GenerateMegamorphicMissStub(Assembler* assembler);
   static void GenerateAllocationStubForClass(Assembler* assembler,
                                              const Class& cls);
+
+  enum Optimized {
+    kUnoptimized,
+    kOptimized,
+  };
+  enum CallType {
+    kInstanceCall,
+    kStaticCall,
+  };
+  enum Exactness {
+    kCheckExactness,
+    kIgnoreExactness,
+  };
   static void GenerateNArgsCheckInlineCacheStub(
       Assembler* assembler,
       intptr_t num_args,
       const RuntimeEntry& handle_ic_miss,
       Token::Kind kind,
-      bool optimized = false,
-      bool exactness_check = false);
+      Optimized optimized,
+      CallType type,
+      Exactness exactness);
   static void GenerateUsageCounterIncrement(Assembler* assembler,
                                             Register temp_reg);
   static void GenerateOptimizedUsageCounterIncrement(Assembler* assembler);
diff --git a/runtime/vm/compiler/stub_code_compiler_arm.cc b/runtime/vm/compiler/stub_code_compiler_arm.cc
index efd4d67..5d9aad0 100644
--- a/runtime/vm/compiler/stub_code_compiler_arm.cc
+++ b/runtime/vm/compiler/stub_code_compiler_arm.cc
@@ -1901,8 +1901,9 @@
 }
 
 // Generate inline cache check for 'num_args'.
-//  LR: return address.
-//  R9: inline cache data object.
+//  R0: receiver (if instance call)
+//  R9: ICData
+//  LR: return address
 // Control flow:
 // - If receiver is null -> jump to IC miss.
 // - If receiver is Smi -> load Smi class.
@@ -1915,9 +1916,10 @@
     intptr_t num_args,
     const RuntimeEntry& handle_ic_miss,
     Token::Kind kind,
-    bool optimized,
-    bool exactness_check /* = false */) {
-  ASSERT(!exactness_check);
+    Optimized optimized,
+    CallType type,
+    Exactness exactness) {
+  ASSERT(exactness == kIgnoreExactness);  // Unimplemented.
   __ CheckCodePointer();
   ASSERT(num_args == 1 || num_args == 2);
 #if defined(DEBUG)
@@ -1937,7 +1939,7 @@
 
 #if !defined(PRODUCT)
   Label stepping, done_stepping;
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Comment("Check single stepping");
     __ LoadIsolate(R8);
     __ ldrb(R8, Address(R8, target::Isolate::single_step_offset()));
@@ -1954,31 +1956,50 @@
   __ Bind(&not_smi_or_overflow);
 
   __ Comment("Extract ICData initial values and receiver cid");
-  // Load arguments descriptor into R4.
-  __ ldr(R4, FieldAddress(R9, target::ICData::arguments_descriptor_offset()));
-  // Loop that checks if there is an IC data match.
-  Label loop, found, miss;
   // R9: IC data object (preserved).
   __ ldr(R8, FieldAddress(R9, target::ICData::entries_offset()));
   // R8: ic_data_array with check entries: classes and target functions.
   const int kIcDataOffset = target::Array::data_offset() - kHeapObjectTag;
   // R8: points at the IC data array.
 
-  // Get the receiver's class ID (first read number of arguments from
-  // arguments descriptor array and then access the receiver from the stack).
-  __ ldr(NOTFP, FieldAddress(R4, target::ArgumentsDescriptor::count_offset()));
-  __ sub(NOTFP, NOTFP, Operand(target::ToRawSmi(1)));
-  // NOTFP: argument_count - 1 (smi).
+  if (type == kInstanceCall) {
+    __ LoadTaggedClassIdMayBeSmi(R0, R0);
+    __ ldr(R4, FieldAddress(R9, target::ICData::arguments_descriptor_offset()));
+    if (num_args == 2) {
+      __ ldr(NOTFP,
+             FieldAddress(R4, target::ArgumentsDescriptor::count_offset()));
+      __ sub(R1, NOTFP, Operand(target::ToRawSmi(2)));
+      __ ldr(R1, Address(SP, R1, LSL, 1));  // R1 (argument_count - 2) is Smi.
+      __ LoadTaggedClassIdMayBeSmi(R1, R1);
+    }
+  } else {
+    // Load arguments descriptor into R4.
+    __ ldr(R4, FieldAddress(R9, target::ICData::arguments_descriptor_offset()));
 
-  __ Comment("ICData loop");
+    // Get the receiver's class ID (first read number of arguments from
+    // arguments descriptor array and then access the receiver from the stack).
+    __ ldr(NOTFP,
+           FieldAddress(R4, target::ArgumentsDescriptor::count_offset()));
+    __ sub(NOTFP, NOTFP, Operand(target::ToRawSmi(1)));
+    // NOTFP: argument_count - 1 (smi).
 
-  __ ldr(R0, Address(SP, NOTFP, LSL, 1));  // NOTFP (argument_count - 1) is Smi.
-  __ LoadTaggedClassIdMayBeSmi(R0, R0);
-  if (num_args == 2) {
-    __ sub(R1, NOTFP, Operand(target::ToRawSmi(1)));
-    __ ldr(R1, Address(SP, R1, LSL, 1));  // R1 (argument_count - 2) is Smi.
-    __ LoadTaggedClassIdMayBeSmi(R1, R1);
+    __ ldr(R0,
+           Address(SP, NOTFP, LSL, 1));  // NOTFP (argument_count - 1) is Smi.
+    __ LoadTaggedClassIdMayBeSmi(R0, R0);
+
+    if (num_args == 2) {
+      __ sub(R1, NOTFP, Operand(target::ToRawSmi(1)));
+      __ ldr(R1, Address(SP, R1, LSL, 1));  // R1 (argument_count - 2) is Smi.
+      __ LoadTaggedClassIdMayBeSmi(R1, R1);
+    }
   }
+  // R0: first argument class ID as Smi.
+  // R1: second argument class ID as Smi.
+  // R4: args descriptor
+
+  // Loop that checks if there is an IC data match.
+  Label loop, found, miss;
+  __ Comment("ICData loop");
 
   // We unroll the generic one that is generated once more than the others.
   const bool optimize = kind == Token::kILLEGAL;
@@ -1998,9 +2019,9 @@
 
     __ Bind(&update);
 
-    const intptr_t entry_size =
-        target::ICData::TestEntryLengthFor(num_args, exactness_check) *
-        target::kWordSize;
+    const intptr_t entry_size = target::ICData::TestEntryLengthFor(
+                                    num_args, exactness == kCheckExactness) *
+                                target::kWordSize;
     __ AddImmediate(R8, entry_size);  // Next entry.
 
     __ CompareImmediate(R2, target::ToRawSmi(kIllegalCid));  // Done?
@@ -2014,6 +2035,8 @@
   __ Bind(&miss);
   __ Comment("IC miss");
   // Compute address of arguments.
+  __ ldr(NOTFP, FieldAddress(R4, target::ArgumentsDescriptor::count_offset()));
+  __ sub(NOTFP, NOTFP, Operand(target::ToRawSmi(1)));
   // NOTFP: argument_count - 1 (smi).
   __ add(NOTFP, SP, Operand(NOTFP, LSL, 1));  // NOTFP is Smi.
   // NOTFP: address of receiver.
@@ -2069,12 +2092,18 @@
   __ Branch(FieldAddress(R0, target::Function::entry_point_offset()));
 
 #if !defined(PRODUCT)
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Bind(&stepping);
     __ EnterStubFrame();
+    if (type == kInstanceCall) {
+      __ Push(R0);  // Preserve receiver.
+    }
     __ Push(R9);  // Preserve IC data.
     __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0);
     __ Pop(R9);
+    if (type == kInstanceCall) {
+      __ Pop(R0);
+    }
     __ RestoreCodePointer();
     __ LeaveStubFrame();
     __ b(&done_stepping);
@@ -2082,82 +2111,105 @@
 #endif
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  LR: return address.
-//  R9: inline cache data object.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub(
     Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  R8: Function
+//  LR: return address
 void StubCodeCompiler::GenerateOneArgOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 1,
-                                    kInlineCacheMissHandlerOneArgRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  R0: receiver
+//  R9: ICData
+//  R8: Function
+//  LR: return address
 void StubCodeCompiler::
     GenerateOneArgOptimizedCheckInlineCacheWithExactnessCheckStub(
         Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+//  R0: receiver
+//  R9: ICData
+//  R8: Function
+//  LR: return address
 void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
-// Intermediary stub between a static call and its target. ICData contains
-// the target function and the call count.
-// R9: ICData
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
 #if defined(DEBUG)
   {
     Label ok;
@@ -2220,18 +2272,24 @@
 #endif
 }
 
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateOneArgUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
+//  R9: ICData
+//  LR: return address
 void StubCodeCompiler::GenerateTwoArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R8);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL);
+      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
 // Stub for compiling a function and jumping to the compiled code.
@@ -2331,13 +2389,14 @@
   __ Stop("No debugging in PRODUCT mode");
 #else
   __ EnterStubFrame();
-  __ LoadImmediate(R0, 0);
-  // Preserve arguments descriptor and make room for result.
-  __ PushList((1 << R0) | (1 << R9));
+  __ Push(R0);          // Preserve receiver.
+  __ Push(R9);          // Preserve IC data.
+  __ PushImmediate(0);  // Space for result.
   __ CallRuntime(kBreakpointRuntimeHandlerRuntimeEntry, 0);
-  __ PopList((1 << R0) | (1 << R9));
+  __ Pop(CODE_REG);  // Original stub.
+  __ Pop(R9);        // Restore IC data.
+  __ Pop(R0);        // Restore receiver.
   __ LeaveStubFrame();
-  __ mov(CODE_REG, Operand(R0));
   __ Branch(FieldAddress(CODE_REG, target::Code::entry_point_offset()));
 #endif  // defined(PRODUCT)
 }
diff --git a/runtime/vm/compiler/stub_code_compiler_arm64.cc b/runtime/vm/compiler/stub_code_compiler_arm64.cc
index cfc7edc..211a6c4 100644
--- a/runtime/vm/compiler/stub_code_compiler_arm64.cc
+++ b/runtime/vm/compiler/stub_code_compiler_arm64.cc
@@ -1964,8 +1964,9 @@
 }
 
 // Generate inline cache check for 'num_args'.
-//  LR: return address.
-//  R5: inline cache data object.
+//  R0: receiver (if instance call)
+//  R5: ICData
+//  LR: return address
 // Control flow:
 // - If receiver is null -> jump to IC miss.
 // - If receiver is Smi -> load Smi class.
@@ -1978,9 +1979,10 @@
     intptr_t num_args,
     const RuntimeEntry& handle_ic_miss,
     Token::Kind kind,
-    bool optimized,
-    bool exactness_check /* = false */) {
-  ASSERT(!exactness_check);
+    Optimized optimized,
+    CallType type,
+    Exactness exactness) {
+  ASSERT(exactness == kIgnoreExactness);  // Unimplemented.
   ASSERT(num_args == 1 || num_args == 2);
 #if defined(DEBUG)
   {
@@ -2001,7 +2003,7 @@
 
 #if !defined(PRODUCT)
   Label stepping, done_stepping;
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Comment("Check single stepping");
     __ LoadIsolate(R6);
     __ LoadFromOffset(R6, R6, target::Isolate::single_step_offset(),
@@ -2019,37 +2021,54 @@
   __ Bind(&not_smi_or_overflow);
 
   __ Comment("Extract ICData initial values and receiver cid");
-  // Load arguments descriptor into R4.
-  __ LoadFieldFromOffset(R4, R5, target::ICData::arguments_descriptor_offset());
-  // Loop that checks if there is an IC data match.
-  Label loop, found, miss;
   // R5: IC data object (preserved).
   __ LoadFieldFromOffset(R6, R5, target::ICData::entries_offset());
   // R6: ic_data_array with check entries: classes and target functions.
   __ AddImmediate(R6, target::Array::data_offset() - kHeapObjectTag);
   // R6: points directly to the first ic data array element.
 
-  // Get the receiver's class ID (first read number of arguments from
-  // arguments descriptor array and then access the receiver from the stack).
-  __ LoadFieldFromOffset(R7, R4, target::ArgumentsDescriptor::count_offset());
-  __ SmiUntag(R7);  // Untag so we can use the LSL 3 addressing mode.
-  __ sub(R7, R7, Operand(1));
-
-  // R0 <- [SP + (R7 << 3)]
-  __ ldr(R0, Address(SP, R7, UXTX, Address::Scaled));
-  __ LoadTaggedClassIdMayBeSmi(R0, R0);
-
-  if (num_args == 2) {
-    __ AddImmediate(R1, R7, -1);
-    // R1 <- [SP + (R1 << 3)]
-    __ ldr(R1, Address(SP, R1, UXTX, Address::Scaled));
-    __ LoadTaggedClassIdMayBeSmi(R1, R1);
+  if (type == kInstanceCall) {
+    __ LoadTaggedClassIdMayBeSmi(R0, R0);
+    __ LoadFieldFromOffset(R4, R5,
+                           target::ICData::arguments_descriptor_offset());
+    if (num_args == 2) {
+      __ LoadFieldFromOffset(R7, R4,
+                             target::ArgumentsDescriptor::count_offset());
+      __ SmiUntag(R7);  // Untag so we can use the LSL 3 addressing mode.
+      __ sub(R7, R7, Operand(2));
+      // R1 <- [SP + (R1 << 3)]
+      __ ldr(R1, Address(SP, R7, UXTX, Address::Scaled));
+      __ LoadTaggedClassIdMayBeSmi(R1, R1);
+    }
+  } else {
+    __ LoadFieldFromOffset(R4, R5,
+                           target::ICData::arguments_descriptor_offset());
+    // Get the receiver's class ID (first read number of arguments from
+    // arguments descriptor array and then access the receiver from the stack).
+    __ LoadFieldFromOffset(R7, R4, target::ArgumentsDescriptor::count_offset());
+    __ SmiUntag(R7);  // Untag so we can use the LSL 3 addressing mode.
+    __ sub(R7, R7, Operand(1));
+    // R0 <- [SP + (R7 << 3)]
+    __ ldr(R0, Address(SP, R7, UXTX, Address::Scaled));
+    __ LoadTaggedClassIdMayBeSmi(R0, R0);
+    if (num_args == 2) {
+      __ AddImmediate(R1, R7, -1);
+      // R1 <- [SP + (R1 << 3)]
+      __ ldr(R1, Address(SP, R1, UXTX, Address::Scaled));
+      __ LoadTaggedClassIdMayBeSmi(R1, R1);
+    }
   }
+  // R0: first argument class ID as Smi.
+  // R1: second argument class ID as Smi.
+  // R4: args descriptor
 
   // We unroll the generic one that is generated once more than the others.
   const bool optimize = kind == Token::kILLEGAL;
 
+  // Loop that checks if there is an IC data match.
+  Label loop, found, miss;
   __ Comment("ICData loop");
+
   __ Bind(&loop);
   for (int unroll = optimize ? 4 : 2; unroll >= 0; unroll--) {
     Label update;
@@ -2065,9 +2084,9 @@
 
     __ Bind(&update);
 
-    const intptr_t entry_size =
-        target::ICData::TestEntryLengthFor(num_args, exactness_check) *
-        target::kWordSize;
+    const intptr_t entry_size = target::ICData::TestEntryLengthFor(
+                                    num_args, exactness == kCheckExactness) *
+                                target::kWordSize;
     __ AddImmediate(R6, entry_size);  // Next entry.
 
     __ CompareImmediate(R2, target::ToRawSmi(kIllegalCid));  // Done?
@@ -2080,7 +2099,11 @@
 
   __ Bind(&miss);
   __ Comment("IC miss");
+
   // Compute address of arguments.
+  __ LoadFieldFromOffset(R7, R4, target::ArgumentsDescriptor::count_offset());
+  __ SmiUntag(R7);  // Untag so we can use the LSL 3 addressing mode.
+  __ sub(R7, R7, Operand(1));
   // R7: argument_count - 1 (untagged).
   // R7 <- SP + (R7 << 3)
   __ add(R7, SP, Operand(R7, UXTX, 3));  // R7 is Untagged.
@@ -2145,9 +2168,15 @@
   if (!optimized) {
     __ Bind(&stepping);
     __ EnterStubFrame();
+    if (type == kInstanceCall) {
+      __ Push(R0);  // Preserve receiver.
+    }
     __ Push(R5);  // Preserve IC data.
     __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0);
     __ Pop(R5);
+    if (type == kInstanceCall) {
+      __ Pop(R0);
+    }
     __ RestoreCodePointer();
     __ LeaveStubFrame();
     __ b(&done_stepping);
@@ -2155,79 +2184,105 @@
 #endif
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  LR: return address.
-//  R5: inline cache data object.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub(
     Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// R6: Function
+// LR: return address
 void StubCodeCompiler::GenerateOneArgOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 1,
-                                    kInlineCacheMissHandlerOneArgRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R0: receiver
+// R5: ICData
+// R6: Function
+// LR: return address
 void StubCodeCompiler::
     GenerateOneArgOptimizedCheckInlineCacheWithExactnessCheckStub(
         Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+// R0: receiver
+// R5: ICData
+// R6: Function
+// LR: return address
 void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
 #if defined(DEBUG)
   {
     Label ok;
@@ -2294,18 +2349,24 @@
 #endif
 }
 
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateOneArgUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
+// R5: ICData
+// LR: return address
 void StubCodeCompiler::GenerateTwoArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, R6);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL);
+      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
 // Stub for compiling a function and jumping to the compiled code.
@@ -2420,14 +2481,16 @@
   __ Stop("No debugging in PRODUCT mode");
 #else
   __ EnterStubFrame();
-  __ Push(R5);
+  __ Push(R0);  // Preserve receiver.
+  __ Push(R5);  // Preserve IC data.
   __ Push(ZR);  // Space for result.
   __ CallRuntime(kBreakpointRuntimeHandlerRuntimeEntry, 0);
-  __ Pop(CODE_REG);
-  __ Pop(R5);
+  __ Pop(CODE_REG);  // Original stub.
+  __ Pop(R5);        // Restore IC data.
+  __ Pop(R0);        // Restore receiver.
   __ LeaveStubFrame();
-  __ LoadFieldFromOffset(R0, CODE_REG, target::Code::entry_point_offset());
-  __ br(R0);
+  __ LoadFieldFromOffset(TMP, CODE_REG, target::Code::entry_point_offset());
+  __ br(TMP);
 #endif  // defined(PRODUCT)
 }
 
diff --git a/runtime/vm/compiler/stub_code_compiler_ia32.cc b/runtime/vm/compiler/stub_code_compiler_ia32.cc
index c899725..657ec31 100644
--- a/runtime/vm/compiler/stub_code_compiler_ia32.cc
+++ b/runtime/vm/compiler/stub_code_compiler_ia32.cc
@@ -1503,7 +1503,7 @@
 void StubCodeCompiler::GenerateOptimizedUsageCounterIncrement(
     Assembler* assembler) {
   Register ic_reg = ECX;
-  Register func_reg = EBX;
+  Register func_reg = EAX;
   if (FLAG_trace_optimized_ic_calls) {
     __ EnterStubFrame();
     __ pushl(func_reg);  // Preserve
@@ -1607,8 +1607,9 @@
 }
 
 // Generate inline cache check for 'num_args'.
-//  ECX: Inline cache data object.
-//  TOS(0): return address
+//  EBX: receiver (if instance call)
+//  ECX: ICData
+//  ESP[0]: return address
 // Control flow:
 // - If receiver is null -> jump to IC miss.
 // - If receiver is Smi -> load Smi class.
@@ -1621,19 +1622,20 @@
     intptr_t num_args,
     const RuntimeEntry& handle_ic_miss,
     Token::Kind kind,
-    bool optimized,
-    bool exactness_check /* = false */) {
-  ASSERT(!exactness_check);  // Not supported.
+    Optimized optimized,
+    CallType type,
+    Exactness exactness) {
+  ASSERT(exactness == kIgnoreExactness);  // Unimplemented.
   ASSERT(num_args == 1 || num_args == 2);
 #if defined(DEBUG)
   {
     Label ok;
     // Check that the IC data array has NumArgsTested() == num_args.
     // 'NumArgsTested' is stored in the least significant bits of 'state_bits'.
-    __ movl(EBX, FieldAddress(ECX, target::ICData::state_bits_offset()));
+    __ movl(EAX, FieldAddress(ECX, target::ICData::state_bits_offset()));
     ASSERT(target::ICData::NumArgsTestedShift() == 0);  // No shift needed.
-    __ andl(EBX, Immediate(target::ICData::NumArgsTestedMask()));
-    __ cmpl(EBX, Immediate(num_args));
+    __ andl(EAX, Immediate(target::ICData::NumArgsTestedMask()));
+    __ cmpl(EAX, Immediate(num_args));
     __ j(EQUAL, &ok, Assembler::kNearJump);
     __ Stop("Incorrect stub for IC data");
     __ Bind(&ok);
@@ -1642,7 +1644,7 @@
 
 #if !defined(PRODUCT)
   Label stepping, done_stepping;
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Comment("Check single stepping");
     __ LoadIsolate(EAX);
     __ cmpb(Address(EAX, target::Isolate::single_step_offset()), Immediate(0));
@@ -1690,9 +1692,9 @@
       target::ICData::TargetIndexFor(num_args) * target::kWordSize;
   const intptr_t count_offset =
       target::ICData::CountIndexFor(num_args) * target::kWordSize;
-  const intptr_t entry_size =
-      target::ICData::TestEntryLengthFor(num_args, exactness_check) *
-      target::kWordSize;
+  const intptr_t entry_size = target::ICData::TestEntryLengthFor(
+                                  num_args, exactness == kCheckExactness) *
+                              target::kWordSize;
 
   __ Bind(&loop);
   for (int unroll = optimize ? 4 : 2; unroll >= 0; unroll--) {
@@ -1781,105 +1783,119 @@
   __ jmp(EBX);
 
 #if !defined(PRODUCT)
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Bind(&stepping);
     __ EnterStubFrame();
-    __ pushl(ECX);
+    __ pushl(EBX);  // Preserve receiver.
+    __ pushl(ECX);  // Preserve ICData.
     __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0);
-    __ popl(ECX);
+    __ popl(ECX);  // Restore ICData.
+    __ popl(EBX);  // Restore receiver.
     __ LeaveFrame();
     __ jmp(&done_stepping);
   }
 #endif
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  ECX: Inline cache data object.
-//  TOS(0): Return address.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub(
     Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// EBX: receiver
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  EDI: function which counter needs to be incremented.
-//  ECX: Inline cache data object.
-//  TOS(0): Return address.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+// EBX: receiver
+// ECX: ICData
+// EAX: Function
+// ESP[0]: return address
 void StubCodeCompiler::GenerateOneArgOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 1,
-                                    kInlineCacheMissHandlerOneArgRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
+// EBX: receiver
+// ECX: ICData
+// EAX: Function
+// ESP[0]: return address
 void StubCodeCompiler::
     GenerateOneArgOptimizedCheckInlineCacheWithExactnessCheckStub(
         Assembler* assembler) {
   __ Stop("Unimplemented");
 }
 
+// EBX: receiver
+// ECX: ICData
+// EAX: Function
+// ESP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL, true /* optimized */);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
-// Intermediary stub between a static call and its target. ICData contains
-// the target function and the call count.
 // ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
 
 #if defined(DEBUG)
   {
@@ -1940,18 +1956,24 @@
 #endif
 }
 
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateOneArgUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
+// ECX: ICData
+// ESP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, EBX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ EAX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL);
+      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
 // Stub for compiling a function and jumping to the compiled code.
@@ -2044,14 +2066,13 @@
   __ Stop("No debugging in PRODUCT mode");
 #else
   __ EnterStubFrame();
-  // Save IC data.
-  __ pushl(ECX);
-  // Room for result. Debugger stub returns address of the
-  // unpatched runtime stub.
+  __ pushl(EBX);           // Preserve receiver.
+  __ pushl(ECX);           // Preserve ICData.
   __ pushl(Immediate(0));  // Room for result.
   __ CallRuntime(kBreakpointRuntimeHandlerRuntimeEntry, 0);
   __ popl(EAX);  // Code of original stub.
-  __ popl(ECX);  // Restore IC data.
+  __ popl(ECX);  // Restore ICData.
+  __ popl(EBX);  // Restore receiver.
   __ LeaveFrame();
   // Jump to original stub.
   __ movl(EAX, FieldAddress(EAX, target::Code::entry_point_offset()));
@@ -2587,32 +2608,32 @@
 // Passed to target:
 //  EDX: arguments descriptor
 void StubCodeCompiler::GenerateICCallThroughFunctionStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // AOT only.
 }
 
 void StubCodeCompiler::GenerateICCallThroughCodeStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // AOT only.
 }
 
 void StubCodeCompiler::GenerateUnlinkedCallStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // AOT only.
 }
 
 void StubCodeCompiler::GenerateSingleTargetCallStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // AOT only.
 }
 
 void StubCodeCompiler::GenerateMonomorphicMissStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // AOT only.
 }
 
 void StubCodeCompiler::GenerateFrameAwaitingMaterializationStub(
     Assembler* assembler) {
-  __ int3();
+  __ int3();  // Marker stub.
 }
 
 void StubCodeCompiler::GenerateAsynchronousGapMarkerStub(Assembler* assembler) {
-  __ int3();
+  __ int3();  // Marker stub.
 }
 
 }  // namespace compiler
diff --git a/runtime/vm/compiler/stub_code_compiler_x64.cc b/runtime/vm/compiler/stub_code_compiler_x64.cc
index 1d14115..6179c7d 100644
--- a/runtime/vm/compiler/stub_code_compiler_x64.cc
+++ b/runtime/vm/compiler/stub_code_compiler_x64.cc
@@ -870,7 +870,7 @@
 
   // Space for the result of the runtime call.
   __ pushq(Immediate(0));
-  __ pushq(RAX);  // Receiver.
+  __ pushq(RDX);  // Receiver.
   __ pushq(RBX);  // IC data.
   __ pushq(R10);  // Arguments descriptor.
   __ CallRuntime(kMegamorphicCacheMissHandlerRuntimeEntry, 3);
@@ -1941,8 +1941,9 @@
 }
 
 // Generate inline cache check for 'num_args'.
-//  RBX: Inline cache data object.
-//  TOS(0): return address
+//  RDX: receiver (if instance call)
+//  RBX: ICData
+//  RSP[0]: return address
 // Control flow:
 // - If receiver is null -> jump to IC miss.
 // - If receiver is Smi -> load Smi class.
@@ -1955,8 +1956,9 @@
     intptr_t num_args,
     const RuntimeEntry& handle_ic_miss,
     Token::Kind kind,
-    bool optimized,
-    bool exactness_check) {
+    Optimized optimized,
+    CallType type,
+    Exactness exactness) {
   ASSERT(num_args == 1 || num_args == 2);
 #if defined(DEBUG)
   {
@@ -1975,7 +1977,7 @@
 
 #if !defined(PRODUCT)
   Label stepping, done_stepping;
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Comment("Check single stepping");
     __ LoadIsolate(RAX);
     __ cmpb(Address(RAX, target::Isolate::single_step_offset()), Immediate(0));
@@ -1991,30 +1993,40 @@
   __ Bind(&not_smi_or_overflow);
 
   __ Comment("Extract ICData initial values and receiver cid");
-  // Load arguments descriptor into R10.
-  __ movq(R10,
-          FieldAddress(RBX, target::ICData::arguments_descriptor_offset()));
-  // Loop that checks if there is an IC data match.
-  Label loop, found, miss;
   // RBX: IC data object (preserved).
   __ movq(R13, FieldAddress(RBX, target::ICData::entries_offset()));
   // R13: ic_data_array with check entries: classes and target functions.
   __ leaq(R13, FieldAddress(R13, target::Array::data_offset()));
   // R13: points directly to the first ic data array element.
 
-  // Get argument count as Smi into RCX.
-  __ movq(RCX, FieldAddress(R10, target::ArgumentsDescriptor::count_offset()));
-  // Load first argument into RDX.
-  __ movq(RDX, Address(RSP, RCX, TIMES_4, 0));
-  __ LoadTaggedClassIdMayBeSmi(RAX, RDX);
-  // RAX: first argument class ID as Smi.
-  if (num_args == 2) {
-    // Load second argument into R9.
-    __ movq(R9, Address(RSP, RCX, TIMES_4, -target::kWordSize));
-    __ LoadTaggedClassIdMayBeSmi(RCX, R9);
-    // RCX: second argument class ID (smi).
+  if (type == kInstanceCall) {
+    __ LoadTaggedClassIdMayBeSmi(RAX, RDX);
+    __ movq(R10,
+            FieldAddress(RBX, target::ICData::arguments_descriptor_offset()));
+    if (num_args == 2) {
+      __ movq(RCX,
+              FieldAddress(R10, target::ArgumentsDescriptor::count_offset()));
+      __ movq(R9, Address(RSP, RCX, TIMES_4, -target::kWordSize));
+      __ LoadTaggedClassIdMayBeSmi(RCX, R9);
+    }
+  } else {
+    __ movq(R10,
+            FieldAddress(RBX, target::ICData::arguments_descriptor_offset()));
+    __ movq(RCX,
+            FieldAddress(R10, target::ArgumentsDescriptor::count_offset()));
+    __ movq(RDX, Address(RSP, RCX, TIMES_4, 0));
+    __ LoadTaggedClassIdMayBeSmi(RAX, RDX);
+    if (num_args == 2) {
+      __ movq(R9, Address(RSP, RCX, TIMES_4, -target::kWordSize));
+      __ LoadTaggedClassIdMayBeSmi(RCX, R9);
+    }
   }
+  // RAX: first argument class ID as Smi.
+  // RCX: second argument class ID as Smi.
+  // R10: args descriptor
 
+  // Loop that checks if there is an IC data match.
+  Label loop, found, miss;
   __ Comment("ICData loop");
 
   // We unroll the generic one that is generated once more than the others.
@@ -2041,9 +2053,9 @@
 
     __ Bind(&update);
 
-    const intptr_t entry_size =
-        target::ICData::TestEntryLengthFor(num_args, exactness_check) *
-        target::kWordSize;
+    const intptr_t entry_size = target::ICData::TestEntryLengthFor(
+                                    num_args, exactness == kCheckExactness) *
+                                target::kWordSize;
     __ addq(R13, Immediate(entry_size));  // Next entry.
 
     __ cmpq(R9, Immediate(target::ToRawSmi(kIllegalCid)));  // Done?
@@ -2090,7 +2102,7 @@
   __ Bind(&found);
   // R13: Pointer to an IC data check group.
   Label call_target_function_through_unchecked_entry;
-  if (exactness_check) {
+  if (exactness == kCheckExactness) {
     Label exactness_ok;
     ASSERT(num_args == 1);
     __ movq(RAX, Address(R13, exactness_offset));
@@ -2129,10 +2141,9 @@
   __ Bind(&call_target_function);
   // RAX: Target function.
   __ movq(CODE_REG, FieldAddress(RAX, target::Function::code_offset()));
-  __ movq(RCX, FieldAddress(RAX, target::Function::entry_point_offset()));
-  __ jmp(RCX);
+  __ jmp(FieldAddress(RAX, target::Function::entry_point_offset()));
 
-  if (exactness_check) {
+  if (exactness == kCheckExactness) {
     __ Bind(&call_target_function_through_unchecked_entry);
     if (FLAG_optimization_counter_threshold >= 0) {
       __ Comment("Update ICData counter");
@@ -2142,18 +2153,22 @@
     __ Comment("Call target (via unchecked entry point)");
     __ movq(RAX, Address(R13, target_offset));
     __ movq(CODE_REG, FieldAddress(RAX, target::Function::code_offset()));
-    __ movq(RCX, FieldAddress(
-                     RAX, target::Function::unchecked_entry_point_offset()));
-    __ jmp(RCX);
+    __ jmp(FieldAddress(RAX, target::Function::unchecked_entry_point_offset()));
   }
 
 #if !defined(PRODUCT)
-  if (!optimized) {
+  if (optimized == kUnoptimized) {
     __ Bind(&stepping);
     __ EnterStubFrame();
-    __ pushq(RBX);
+    if (type == kInstanceCall) {
+      __ pushq(RDX);  // Preserve receiver.
+    }
+    __ pushq(RBX);  // Preserve ICData.
     __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0);
-    __ popq(RBX);
+    __ popq(RBX);  // Restore ICData.
+    if (type == kInstanceCall) {
+      __ popq(RDX);  // Restore receiver.
+    }
     __ RestoreCodePointer();
     __ LeaveStubFrame();
     __ jmp(&done_stepping);
@@ -2161,100 +2176,111 @@
 #endif
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  RBX: Inline cache data object.
-//  TOS(0): Return address.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateOneArgCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
-  GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
-}
-
-void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub(
-    Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
       assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
-      /*optimized=*/false, /*exactness_check=*/true);
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
+void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub(
+    Assembler* assembler) {
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kCheckExactness);
+}
+
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ,
+      kUnoptimized, kInstanceCall, kIgnoreExactness);
 }
 
-// Use inline cache data array to invoke the target or continue in inline
-// cache miss handler. Stub for 1-argument check (receiver class).
-//  RDI: function which counter needs to be incremented.
-//  RBX: Inline cache data object.
-//  TOS(0): Return address.
-// Inline cache data object structure:
-// 0: function-name
-// 1: N, number of arguments checked.
-// 2 .. (length - 1): group of checks, each check containing:
-//   - N classes.
-//   - 1 target function.
+//  RDX: receiver
+//  RBX: ICData
+//  RDI: Function
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateOneArgOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 1,
-                                    kInlineCacheMissHandlerOneArgRuntimeEntry,
-                                    Token::kILLEGAL, /*optimized=*/true);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RDI: Function
+//  RSP[0]: return address
 void StubCodeCompiler::
     GenerateOneArgOptimizedCheckInlineCacheWithExactnessCheckStub(
         Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 1,
-                                    kInlineCacheMissHandlerOneArgRuntimeEntry,
-                                    Token::kILLEGAL, /*optimized=*/true,
-                                    /*exactness_check=*/true);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kCheckExactness);
 }
 
+//  RDX: receiver
+//  RBX: ICData
+//  RDI: Function
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub(
     Assembler* assembler) {
   GenerateOptimizedUsageCounterIncrement(assembler);
-  GenerateNArgsCheckInlineCacheStub(assembler, 2,
-                                    kInlineCacheMissHandlerTwoArgsRuntimeEntry,
-                                    Token::kILLEGAL, /*optimized=*/true);
+  GenerateNArgsCheckInlineCacheStub(
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kOptimized, kInstanceCall, kIgnoreExactness);
 }
 
-// Intermediary stub between a static call and its target. ICData contains
-// the target function and the call count.
-// RBX: ICData
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
 #if defined(DEBUG)
   {
     Label ok;
@@ -2322,18 +2348,24 @@
 #endif
 }
 
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateOneArgUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL);
+      assembler, 1, kStaticCallMissHandlerOneArgRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
+//  RBX: ICData
+//  RSP[0]: return address
 void StubCodeCompiler::GenerateTwoArgsUnoptimizedStaticCallStub(
     Assembler* assembler) {
-  GenerateUsageCounterIncrement(assembler, RCX);
+  GenerateUsageCounterIncrement(assembler, /* scratch */ RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL);
+      assembler, 2, kStaticCallMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL,
+      kUnoptimized, kStaticCall, kIgnoreExactness);
 }
 
 // Stub for compiling a function and jumping to the compiled code.
@@ -2441,11 +2473,13 @@
   __ Stop("No debugging in PRODUCT mode");
 #else
   __ EnterStubFrame();
+  __ pushq(RDX);           // Preserve receiver.
   __ pushq(RBX);           // Preserve IC data.
   __ pushq(Immediate(0));  // Result slot.
   __ CallRuntime(kBreakpointRuntimeHandlerRuntimeEntry, 0);
   __ popq(CODE_REG);  // Original stub.
   __ popq(RBX);       // Restore IC data.
+  __ popq(RDX);       // Restore receiver.
   __ LeaveStubFrame();
 
   __ movq(RAX, FieldAddress(CODE_REG, target::Code::entry_point_offset()));
@@ -3061,7 +3095,7 @@
 }
 
 // Called from megamorphic calls.
-//  RDI: receiver
+//  RDX: receiver
 //  RBX: target::MegamorphicCache (preserved)
 // Passed to target:
 //  CODE_REG: target Code
@@ -3069,12 +3103,12 @@
 void StubCodeCompiler::GenerateMegamorphicCallStub(Assembler* assembler) {
   // Jump if receiver is a smi.
   Label smi_case;
-  __ testq(RDI, Immediate(kSmiTagMask));
+  __ testq(RDX, Immediate(kSmiTagMask));
   // Jump out of line for smi case.
   __ j(ZERO, &smi_case, Assembler::kNearJump);
 
   // Loads the cid of the object.
-  __ LoadClassId(RAX, RDI);
+  __ LoadClassId(RAX, RDX);
 
   Label cid_loaded;
   __ Bind(&cid_loaded);
@@ -3142,7 +3176,7 @@
 }
 
 // Called from switchable IC calls.
-//  RDI: receiver
+//  RDX: receiver
 //  RBX: ICData (preserved)
 // Passed to target:
 //  CODE_REG: target Code object
@@ -3154,7 +3188,7 @@
           FieldAddress(RBX, target::ICData::arguments_descriptor_offset()));
   __ leaq(R13, FieldAddress(R13, target::Array::data_offset()));
   // R13: first IC entry
-  __ LoadTaggedClassIdMayBeSmi(RAX, RDI);
+  __ LoadTaggedClassIdMayBeSmi(RAX, RDX);
   // RAX: receiver cid as Smi
 
   __ Bind(&loop);
@@ -3194,7 +3228,7 @@
           FieldAddress(RBX, target::ICData::arguments_descriptor_offset()));
   __ leaq(R13, FieldAddress(R13, target::Array::data_offset()));
   // R13: first IC entry
-  __ LoadTaggedClassIdMayBeSmi(RAX, RDI);
+  __ LoadTaggedClassIdMayBeSmi(RAX, RDX);
   // RAX: receiver cid as Smi
 
   __ Bind(&loop);
@@ -3229,21 +3263,21 @@
   __ jmp(RCX);
 }
 
-//  RDI: receiver
+//  RDX: receiver
 //  RBX: UnlinkedCall
 void StubCodeCompiler::GenerateUnlinkedCallStub(Assembler* assembler) {
   __ EnterStubFrame();
-  __ pushq(RDI);  // Preserve receiver.
+  __ pushq(RDX);  // Preserve receiver.
 
   __ pushq(Immediate(0));  // Result slot.
-  __ pushq(RDI);           // Arg0: Receiver
+  __ pushq(RDX);           // Arg0: Receiver
   __ pushq(RBX);           // Arg1: UnlinkedCall
   __ CallRuntime(kUnlinkedCallRuntimeEntry, 2);
   __ popq(RBX);
   __ popq(RBX);
   __ popq(RBX);  // result = IC
 
-  __ popq(RDI);  // Restore receiver.
+  __ popq(RDX);  // Restore receiver.
   __ LeaveStubFrame();
 
   __ movq(CODE_REG,
@@ -3254,13 +3288,13 @@
 }
 
 // Called from switchable IC calls.
-//  RDI: receiver
+//  RDX: receiver
 //  RBX: SingleTargetCache
 // Passed to target::
 //  CODE_REG: target Code object
 void StubCodeCompiler::GenerateSingleTargetCallStub(Assembler* assembler) {
   Label miss;
-  __ LoadClassIdMayBeSmi(RAX, RDI);
+  __ LoadClassIdMayBeSmi(RAX, RDX);
   __ movzxw(R9,
             FieldAddress(RBX, target::SingleTargetCache::lower_limit_offset()));
   __ movzxw(R10,
@@ -3277,15 +3311,15 @@
 
   __ Bind(&miss);
   __ EnterStubFrame();
-  __ pushq(RDI);  // Preserve receiver.
+  __ pushq(RDX);  // Preserve receiver.
 
   __ pushq(Immediate(0));  // Result slot.
-  __ pushq(RDI);           // Arg0: Receiver
+  __ pushq(RDX);           // Arg0: Receiver
   __ CallRuntime(kSingleTargetMissRuntimeEntry, 1);
   __ popq(RBX);
   __ popq(RBX);  // result = IC
 
-  __ popq(RDI);  // Restore receiver.
+  __ popq(RDX);  // Restore receiver.
   __ LeaveStubFrame();
 
   __ movq(CODE_REG,
@@ -3296,20 +3330,20 @@
 }
 
 // Called from the monomorphic checked entry.
-//  RDI: receiver
+//  RDX: receiver
 void StubCodeCompiler::GenerateMonomorphicMissStub(Assembler* assembler) {
   __ movq(CODE_REG,
           Address(THR, target::Thread::monomorphic_miss_stub_offset()));
   __ EnterStubFrame();
-  __ pushq(RDI);  // Preserve receiver.
+  __ pushq(RDX);  // Preserve receiver.
 
   __ pushq(Immediate(0));  // Result slot.
-  __ pushq(RDI);           // Arg0: Receiver
+  __ pushq(RDX);           // Arg0: Receiver
   __ CallRuntime(kMonomorphicMissRuntimeEntry, 1);
   __ popq(RBX);
   __ popq(RBX);  // result = IC
 
-  __ popq(RDI);  // Restore receiver.
+  __ popq(RDX);  // Restore receiver.
   __ LeaveStubFrame();
 
   __ movq(CODE_REG,
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index a82caf4..79ab050 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -4417,7 +4417,7 @@
   static const intptr_t kMonomorphicEntryOffset = 0;
 #elif defined(TARGET_ARCH_X64)
   static const intptr_t kPolymorphicEntryOffset = 16;
-  static const intptr_t kMonomorphicEntryOffset = 36;
+  static const intptr_t kMonomorphicEntryOffset = 34;
 #elif defined(TARGET_ARCH_ARM)
   static const intptr_t kPolymorphicEntryOffset = 0;
   static const intptr_t kMonomorphicEntryOffset = 20;