[vm, arm] Globally block LR for register allocation.

Flutter Gallery release:
Instructions(CodeSize): 5668368 -> 5670880 (+0.044%)

Fixes #34411

Bug: https://github.com/dart-lang/sdk/issues/34411
Change-Id: I5281ac9609ec5953cdaa881226f7034a5697d0ce
Reviewed-on: https://dart-review.googlesource.com/74923
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index bb41360..6c918ea 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -3068,7 +3068,7 @@
 void Assembler::EnterCallRuntimeFrame(intptr_t frame_space) {
   Comment("EnterCallRuntimeFrame");
   // Preserve volatile CPU registers and PP.
-  EnterFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP), 0);
+  EnterFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP) | (1 << LR), 0);
   COMPILE_ASSERT((kDartVolatileCpuRegs & (1 << PP)) == 0);
 
   // Preserve all volatile FPU registers.
@@ -3120,7 +3120,7 @@
   }
 
   // Restore volatile CPU registers.
-  LeaveFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP));
+  LeaveFrame(kDartVolatileCpuRegs | (1 << PP) | (1 << FP) | (1 << LR));
 }
 
 void Assembler::CallRuntime(const RuntimeEntry& entry,
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
index f53c438..6c3cb3a 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
@@ -169,11 +169,8 @@
 
   ASSERT(deopt_env() != NULL);
 
-  // LR may be live. It will be clobbered by BranchLink, so cache it in IP.
-  // It will be restored at the top of the deoptimization stub, specifically in
-  // GenerateDeoptimizationSequence in stub_code_arm.cc.
   __ Push(CODE_REG);
-  __ mov(IP, Operand(LR));
+  ASSERT(kReservedCpuRegisters & (1 << LR));
   __ BranchLink(*StubCode::Deoptimize_entry());
   set_pc_offset(assembler->CodeSize());
 #undef __
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 99b85ad..77503a6 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -1492,9 +1492,6 @@
 
   bool needs_base = false;
   intptr_t kNumTemps = 0;
-  if (ShouldEmitStoreBarrier()) {
-    kNumTemps += 1;  // Block LR for the store barrier.
-  }
   if (CanBeImmediateIndex(index(), class_id(), IsExternal(),
                           false,  // Store.
                           &needs_base)) {
@@ -1562,10 +1559,6 @@
       return NULL;
   }
 
-  if (ShouldEmitStoreBarrier()) {
-    locs->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
-  }
-
   return locs;
 }
 
@@ -2196,8 +2189,7 @@
                                                               bool opt) const {
   const intptr_t kNumInputs = 2;
   const intptr_t kNumTemps =
-      ((IsUnboxedStore() && opt) ? 2 : ((IsPotentialUnboxedStore()) ? 3 : 0)) +
-      (ShouldEmitStoreBarrier() ? 1 : 0);  // Block LR for the store barrier.
+      ((IsUnboxedStore() && opt) ? 2 : ((IsPotentialUnboxedStore()) ? 3 : 0));
   LocationSummary* summary = new (zone)
       LocationSummary(zone, kNumInputs, kNumTemps,
                       ((IsUnboxedStore() && opt && is_initialization()) ||
@@ -2228,9 +2220,6 @@
                            : Location::RegisterOrConstant(value()));
 #endif
   }
-  if (ShouldEmitStoreBarrier()) {
-    summary->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
-  }
   return summary;
 }
 
@@ -2433,8 +2422,7 @@
 LocationSummary* StoreStaticFieldInstr::MakeLocationSummary(Zone* zone,
                                                             bool opt) const {
   const intptr_t kNumInputs = 1;
-  const intptr_t kNumTemps =
-      value()->NeedsWriteBarrier() ? 2 : 1;  // Block LR for the store barrier.
+  const intptr_t kNumTemps = 1;
   LocationSummary* locs = new (zone)
       LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
 #if defined(CONCURRENT_MARKING)
@@ -2444,9 +2432,6 @@
                                                : Location::RequiresRegister());
 #endif
   locs->set_temp(0, Location::RequiresRegister());
-  if (value()->NeedsWriteBarrier()) {
-    locs->set_temp(kNumTemps - 1, Location::RegisterLocation(LR));
-  }
   return locs;
 }
 
@@ -3063,8 +3048,7 @@
               ? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
               : Thread::
                     stack_overflow_shared_without_fpu_regs_entry_point_offset();
-      ASSERT(instruction()->locs()->temp(1).IsRegister() &&
-             instruction()->locs()->temp(1).reg() == LR);
+      ASSERT(kReservedCpuRegisters & (1 << LR));
       __ ldr(LR, Address(THR, entry_point_offset));
       __ blx(LR);
       compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
diff --git a/runtime/vm/compiler/backend/locations.cc b/runtime/vm/compiler/backend/locations.cc
index 22ea322..569be42 100644
--- a/runtime/vm/compiler/backend/locations.cc
+++ b/runtime/vm/compiler/backend/locations.cc
@@ -28,11 +28,7 @@
                                  intptr_t temp_count,
                                  LocationSummary::ContainsCall contains_call)
     : num_inputs_(input_count),
-#if defined(TARGET_ARCH_ARM)
-      num_temps_(temp_count + (contains_call == kCallOnSharedSlowPath ? 1 : 0)),
-#else
       num_temps_(temp_count),
-#endif
       stack_bitmap_(NULL),
       contains_call_(contains_call),
       live_registers_() {
@@ -41,14 +37,6 @@
 #endif
   input_locations_ = zone->Alloc<Location>(num_inputs_);
   temp_locations_ = zone->Alloc<Location>(num_temps_);
-
-#if defined(TARGET_ARCH_ARM)
-  if (contains_call == kCallOnSharedSlowPath) {
-    // TODO(sjindel): Mitigate the negative effect on the fast-path of blocking
-    // LR.
-    set_temp(temp_count, Location::RegisterLocation(LR));
-  }
-#endif
 }
 
 LocationSummary* LocationSummary::Make(
diff --git a/runtime/vm/constants_arm.h b/runtime/vm/constants_arm.h
index c9973c3..67a2739 100644
--- a/runtime/vm/constants_arm.h
+++ b/runtime/vm/constants_arm.h
@@ -306,11 +306,12 @@
 const int kAbiPreservedFpuRegCount = 4;
 
 const RegList kReservedCpuRegisters = (1 << SPREG) | (1 << FPREG) | (1 << TMP) |
-                                      (1 << PP) | (1 << THR) | (1 << PC);
+                                      (1 << PP) | (1 << THR) | (1 << LR) |
+                                      (1 << PC);
 // CPU registers available to Dart allocator.
 constexpr RegList kDartAvailableCpuRegs =
     kAllCpuRegistersList & ~kReservedCpuRegisters;
-constexpr int kNumberOfDartAvailableCpuRegs = kNumberOfCpuRegisters - 6;
+constexpr int kNumberOfDartAvailableCpuRegs = kNumberOfCpuRegisters - 7;
 const intptr_t kStoreBufferWrapperSize = 24;
 // Registers available to Dart that are not preserved by runtime calls.
 const RegList kDartVolatileCpuRegs =
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 3ba9531..d00c0f6 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -121,11 +121,6 @@
 
   // We want the saved registers to appear like part of the caller's frame, so
   // we push them before calling EnterStubFrame.
-  //
-  // TODO(sjindel): We could skip saving LR (and thus remove one bit from the
-  // stackmap of the callsite), but this would add ARM-specific complexity to
-  // FlowGraphCompiler::RecordSafepoint and
-  // FlowGraphCompiler::SlowPathEnvironmentFor.
   RegisterSet all_registers;
   all_registers.AddAllNonReservedRegisters(save_fpu_registers);
   __ PushRegisters(all_registers);
@@ -506,18 +501,7 @@
                                            DeoptStubKind kind) {
   // DeoptimizeCopyFrame expects a Dart frame, i.e. EnterDartFrame(0), but there
   // is no need to set the correct PC marker or load PP, since they get patched.
-
-  // IP has the potentially live LR value. LR was clobbered by the call with
-  // the return address, so move it into IP to set up the Dart frame.
-  __ eor(IP, IP, Operand(LR));
-  __ eor(LR, IP, Operand(LR));
-  __ eor(IP, IP, Operand(LR));
-
-  // Set up the frame manually with return address now stored in IP.
-  COMPILE_ASSERT(PP < CODE_REG);
-  COMPILE_ASSERT(CODE_REG < FP);
-  COMPILE_ASSERT(FP < IP);
-  __ EnterFrame((1 << PP) | (1 << CODE_REG) | (1 << FP) | (1 << IP), 0);
+  __ EnterDartFrame(0);
   __ LoadPoolPointer();
 
   // The code in this frame may not cause GC. kDeoptimizeCopyFrameRuntimeEntry