[vm, arm] Globally block LR for register allocation.
Flutter Gallery release:
Instructions(CodeSize): 5668368 -> 5670880 (+0.044%)
Fixes #34411
Bug: https://github.com/dart-lang/sdk/issues/34411
Change-Id: I5281ac9609ec5953cdaa881226f7034a5697d0ce
Reviewed-on: https://dart-review.googlesource.com/74923
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index bb41360..6c918ea 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -3068,7 +3068,7 @@
void Assembler::EnterCallRuntimeFrame(intptr_t frame_space) {
Comment("EnterCallRuntimeFrame");
// Preserve volatile CPU registers and PP.
- EnterFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP), 0);
+ EnterFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP) | (1 << LR), 0);
COMPILE_ASSERT((kDartVolatileCpuRegs & (1 << PP)) == 0);
// Preserve all volatile FPU registers.
@@ -3120,7 +3120,7 @@
}
// Restore volatile CPU registers.
- LeaveFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP));
+ LeaveFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP) | (1 << LR));
}
void Assembler::CallRuntime(const RuntimeEntry& entry,
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
index f53c438..6c3cb3a 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
@@ -169,11 +169,8 @@
ASSERT(deopt_env() != NULL);
- // LR may be live. It will be clobbered by BranchLink, so cache it in IP.
- // It will be restored at the top of the deoptimization stub, specifically in
- // GenerateDeoptimizationSequence in stub_code_arm.cc.
__ Push(CODE_REG);
- __ mov(IP, Operand(LR));
+ ASSERT(kReservedCpuRegisters & (1 << LR));
__ BranchLink(*StubCode::Deoptimize_entry());
set_pc_offset(assembler->CodeSize());
#undef __
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 99b85ad..77503a6 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -1492,9 +1492,6 @@
bool needs_base = false;
intptr_t kNumTemps = 0;
- if (ShouldEmitStoreBarrier()) {
- kNumTemps += 1; // Block LR for the store barrier.
- }
if (CanBeImmediateIndex(index(), class_id(), IsExternal(),
false, // Store.
&needs_base)) {
@@ -1562,10 +1559,6 @@
return NULL;
}
- if (ShouldEmitStoreBarrier()) {
- locs->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
- }
-
return locs;
}
@@ -2196,8 +2189,7 @@
bool opt) const {
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps =
- ((IsUnboxedStore() && opt) ? 2 : ((IsPotentialUnboxedStore()) ? 3 : 0)) +
- (ShouldEmitStoreBarrier() ? 1 : 0); // Block LR for the store barrier.
+ ((IsUnboxedStore() && opt) ? 2 : ((IsPotentialUnboxedStore()) ? 3 : 0));
LocationSummary* summary = new (zone)
LocationSummary(zone, kNumInputs, kNumTemps,
((IsUnboxedStore() && opt && is_initialization()) ||
@@ -2228,9 +2220,6 @@
: Location::RegisterOrConstant(value()));
#endif
}
- if (ShouldEmitStoreBarrier()) {
- summary->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
- }
return summary;
}
@@ -2433,8 +2422,7 @@
LocationSummary* StoreStaticFieldInstr::MakeLocationSummary(Zone* zone,
bool opt) const {
const intptr_t kNumInputs = 1;
- const intptr_t kNumTemps =
- value()->NeedsWriteBarrier() ? 2 : 1; // Block LR for the store barrier.
+ const intptr_t kNumTemps = 1;
LocationSummary* locs = new (zone)
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
#if defined(CONCURRENT_MARKING)
@@ -2444,9 +2432,6 @@
: Location::RequiresRegister());
#endif
locs->set_temp(0, Location::RequiresRegister());
- if (value()->NeedsWriteBarrier()) {
- locs->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
- }
return locs;
}
@@ -3063,8 +3048,7 @@
? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
: Thread::
stack_overflow_shared_without_fpu_regs_entry_point_offset();
- ASSERT(instruction()->locs()->temp(1).IsRegister() &&
- instruction()->locs()->temp(1).reg() == LR);
+ ASSERT(kReservedCpuRegisters & (1 << LR));
__ ldr(LR, Address(THR, entry_point_offset));
__ blx(LR);
compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
diff --git a/runtime/vm/compiler/backend/locations.cc b/runtime/vm/compiler/backend/locations.cc
index 22ea322..569be42 100644
--- a/runtime/vm/compiler/backend/locations.cc
+++ b/runtime/vm/compiler/backend/locations.cc
@@ -28,11 +28,7 @@
intptr_t temp_count,
LocationSummary::ContainsCall contains_call)
: num_inputs_(input_count),
-#if defined(TARGET_ARCH_ARM)
- num_temps_(temp_count + (contains_call == kCallOnSharedSlowPath ? 1 : 0)),
-#else
num_temps_(temp_count),
-#endif
stack_bitmap_(NULL),
contains_call_(contains_call),
live_registers_() {
@@ -41,14 +37,6 @@
#endif
input_locations_ = zone->Alloc<Location>(num_inputs_);
temp_locations_ = zone->Alloc<Location>(num_temps_);
-
-#if defined(TARGET_ARCH_ARM)
- if (contains_call == kCallOnSharedSlowPath) {
- // TODO(sjindel): Mitigate the negative effect on the fast-path of blocking
- // LR.
- set_temp(temp_count, Location::RegisterLocation(LR));
- }
-#endif
}
LocationSummary* LocationSummary::Make(
diff --git a/runtime/vm/constants_arm.h b/runtime/vm/constants_arm.h
index c9973c3..67a2739 100644
--- a/runtime/vm/constants_arm.h
+++ b/runtime/vm/constants_arm.h
@@ -306,11 +306,12 @@
const int kAbiPreservedFpuRegCount = 4;
const RegList kReservedCpuRegisters = (1 << SPREG) | (1 << FPREG) | (1 << TMP) |
- (1 << PP) | (1 << THR) | (1 << PC);
+ (1 << PP) | (1 << THR) | (1 << LR) |
+ (1 << PC);
// CPU registers available to Dart allocator.
constexpr RegList kDartAvailableCpuRegs =
kAllCpuRegistersList & ~kReservedCpuRegisters;
-constexpr int kNumberOfDartAvailableCpuRegs = kNumberOfCpuRegisters - 6;
+constexpr int kNumberOfDartAvailableCpuRegs = kNumberOfCpuRegisters - 7;
const intptr_t kStoreBufferWrapperSize = 24;
// Registers available to Dart that are not preserved by runtime calls.
const RegList kDartVolatileCpuRegs =
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 3ba9531..d00c0f6 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -121,11 +121,6 @@
// We want the saved registers to appear like part of the caller's frame, so
// we push them before calling EnterStubFrame.
- //
- // TODO(sjindel): We could skip saving LR (and thus remove one bit from the
- // stackmap of the callsite), but this would add ARM-specific complexity to
- // FlowGraphCompiler::RecordSafepoint and
- // FlowGraphCompiler::SlowPathEnvironmentFor.
RegisterSet all_registers;
all_registers.AddAllNonReservedRegisters(save_fpu_registers);
__ PushRegisters(all_registers);
@@ -506,18 +501,7 @@
DeoptStubKind kind) {
// DeoptimizeCopyFrame expects a Dart frame, i.e. EnterDartFrame(0), but there
// is no need to set the correct PC marker or load PP, since they get patched.
-
- // IP has the potentially live LR value. LR was clobbered by the call with
- // the return address, so move it into IP to set up the Dart frame.
- __ eor(IP, IP, Operand(LR));
- __ eor(LR, IP, Operand(LR));
- __ eor(IP, IP, Operand(LR));
-
- // Set up the frame manually with return address now stored in IP.
- COMPILE_ASSERT(PP < CODE_REG);
- COMPILE_ASSERT(CODE_REG < FP);
- COMPILE_ASSERT(FP < IP);
- __ EnterFrame((1 << PP) | (1 << CODE_REG) | (1 << FP) | (1 << IP), 0);
+ __ EnterDartFrame(0);
__ LoadPoolPointer();
// The code in this frame may not cause GC. kDeoptimizeCopyFrameRuntimeEntry