[VM/AOT] Fix shared spilling stubs on arm/arm64 and enable it
This reduces flutter_gallery_total_size slightly less than 1%
Change-Id: I44f670f693ccc97d1140673ad8559f70e460c6d2
Reviewed-on: https://dart-review.googlesource.com/c/89146
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
Commit-Queue: Martin Kustermann <kustermann@google.com>
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 3bc424f..94c7519 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -2988,8 +2988,12 @@
bool opt) const {
const intptr_t kNumInputs = 0;
const intptr_t kNumTemps = 1;
- LocationSummary* summary = new (zone) LocationSummary(
- zone, kNumInputs, kNumTemps, LocationSummary::kCallOnSlowPath);
+ const bool using_shared_stub = UseSharedSlowPathStub(opt);
+ ASSERT((kReservedCpuRegisters & (1 << LR)) != 0);
+ LocationSummary* summary = new (zone)
+ LocationSummary(zone, kNumInputs, kNumTemps,
+ using_shared_stub ? LocationSummary::kCallOnSharedSlowPath
+ : LocationSummary::kCallOnSlowPath);
summary->set_temp(0, Location::RequiresRegister());
return summary;
}
@@ -3030,7 +3034,6 @@
? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
: Thread::
stack_overflow_shared_without_fpu_regs_entry_point_offset();
- ASSERT(kReservedCpuRegisters & (1 << LR));
__ ldr(LR, Address(THR, entry_point_offset));
__ blx(LR);
compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index a3a7868..9c03a18 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -2676,8 +2676,12 @@
bool opt) const {
const intptr_t kNumInputs = 0;
const intptr_t kNumTemps = 1;
- LocationSummary* summary = new (zone) LocationSummary(
- zone, kNumInputs, kNumTemps, LocationSummary::kCallOnSlowPath);
+ const bool using_shared_stub = UseSharedSlowPathStub(opt);
+ ASSERT((kReservedCpuRegisters & (1 << LR)) != 0);
+ LocationSummary* summary = new (zone)
+ LocationSummary(zone, kNumInputs, kNumTemps,
+ using_shared_stub ? LocationSummary::kCallOnSharedSlowPath
+ : LocationSummary::kCallOnSlowPath);
summary->set_temp(0, Location::RequiresRegister());
return summary;
}
@@ -2718,8 +2722,6 @@
? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
: Thread::
stack_overflow_shared_without_fpu_regs_entry_point_offset();
- ASSERT(instruction()->locs()->temp(1).IsRegister() &&
- instruction()->locs()->temp(1).reg() == LR);
__ ldr(LR, Address(THR, entry_point_offset));
__ blr(LR);
compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 5a4ce97..6709e31 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -124,30 +124,17 @@
const RuntimeEntry* target,
intptr_t self_code_stub_offset_from_thread,
bool allow_return) {
- __ Push(LR);
-
// We want the saved registers to appear like part of the caller's frame, so
// we push them before calling EnterStubFrame.
RegisterSet all_registers;
all_registers.AddAllNonReservedRegisters(save_fpu_registers);
+
+ // To make the stack map calculation architecture independent we do the same
+ // as on intel.
+ __ Push(LR);
+
__ PushRegisters(all_registers);
-
- const intptr_t kSavedCpuRegisterSlots =
- Utils::CountOneBitsWord(kDartAvailableCpuRegs);
-
- const intptr_t kSavedFpuRegisterSlots =
- save_fpu_registers ? kNumberOfFpuRegisters * kFpuRegisterSize / kWordSize
- : 0;
-
- const intptr_t kAllSavedRegistersSlots =
- kSavedCpuRegisterSlots + kSavedFpuRegisterSlots;
-
- // Copy down the return address so the stack layout is correct.
- __ ldr(TMP, Address(SPREG, kAllSavedRegistersSlots * kWordSize));
- __ Push(TMP);
-
__ ldr(CODE_REG, Address(THR, self_code_stub_offset_from_thread));
-
__ EnterStubFrame();
__ CallRuntime(*target, /*argument_count=*/0);
if (!allow_return) {
@@ -155,13 +142,7 @@
return;
}
__ LeaveStubFrame();
-
- // Drop "official" return address -- we can just use the one stored above the
- // saved registers.
- __ Drop(1);
-
__ PopRegisters(all_registers);
-
__ Pop(LR);
__ bx(LR);
}
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index 3e5928f..182a355f 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -148,30 +148,16 @@
const RuntimeEntry* target,
intptr_t self_code_stub_offset_from_thread,
bool allow_return) {
- __ Push(LR);
-
// We want the saved registers to appear like part of the caller's frame, so
// we push them before calling EnterStubFrame.
RegisterSet all_registers;
all_registers.AddAllNonReservedRegisters(save_fpu_registers);
+
+ // To make the stack map calculation architecture independent we do the same
+ // as on intel.
+ __ Push(LR);
__ PushRegisters(all_registers);
-
- const intptr_t kSavedCpuRegisterSlots =
- Utils::CountOneBitsWord(kDartAvailableCpuRegs);
-
- const intptr_t kSavedFpuRegisterSlots =
- save_fpu_registers ? kNumberOfFpuRegisters * kFpuRegisterSize / kWordSize
- : 0;
-
- const intptr_t kAllSavedRegistersSlots =
- kSavedCpuRegisterSlots + kSavedFpuRegisterSlots;
-
- // Copy down the return address so the stack layout is correct.
- __ ldr(TMP, Address(SPREG, kAllSavedRegistersSlots * kWordSize));
- __ Push(TMP);
-
__ ldr(CODE_REG, Address(THR, self_code_stub_offset_from_thread));
-
__ EnterStubFrame();
__ CallRuntime(*target, /*argument_count=*/0);
if (!allow_return) {
@@ -179,13 +165,7 @@
return;
}
__ LeaveStubFrame();
-
- // Drop "official" return address -- we can just use the one stored above the
- // saved registers.
- __ Drop(1);
-
__ PopRegisters(all_registers);
-
__ Pop(LR);
__ ret(LR);
}