[vm/precomp] Re-land support for slow-path sharing of CheckStackOverflow with status file updates.

Patchset 1 contains the original revision.

Cq-Include-Trybots: luci.dart.try:vm-kernel-reload-linux-release-x64-try;luci.dart.try:vm-kernel-reload-rollback-linux-release-x64-try
Change-Id: Ic2601a268e1cf909183283e9418c03dc6559e809
Reviewed-on: https://dart-review.googlesource.com/62902
Commit-Queue: Samir Jindel <sjindel@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/tests/vm/dart/stack_overflow_shared_test.dart b/runtime/tests/vm/dart/stack_overflow_shared_test.dart
new file mode 100644
index 0000000..56fa08c
--- /dev/null
+++ b/runtime/tests/vm/dart/stack_overflow_shared_test.dart
@@ -0,0 +1,36 @@
+// Copyright (c) 2018, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// VMOptions=--optimization_counter_threshold=10 --no-background-compilation --shared-slow-path-triggers-gc --stacktrace_filter=filter_me
+
+// This tests the stackmaps and environments for safepoints corresponding to
+// slow-path stack overflow checks which uses shared runtime stubs.
+
+import 'package:expect/expect.dart';
+import 'dart:math';
+
+filter_me() {
+  int s = 0;
+  for (int i = 0; i < 100; ++i) {
+    if (i % 2 == 0) {
+      s += i;
+    } else {
+      s -= i;
+    }
+  }
+  Expect.equals(s, -50);
+  double x = 0.0;
+  for (int i = 0; i < 100; ++i) {
+    if (i % 2 == 0) {
+      x = x / 3;
+    } else {
+      x = x * 2 + 1;
+    }
+  }
+  Expect.isTrue(x - 0.00001 < 3 && x + 0.00001 > 3);
+}
+
+main() {
+  filter_me();
+}
diff --git a/runtime/tests/vm/vm.status b/runtime/tests/vm/vm.status
index d52b4c8..4dd7760 100644
--- a/runtime/tests/vm/vm.status
+++ b/runtime/tests/vm/vm.status
@@ -360,5 +360,7 @@
 [ $hot_reload || $hot_reload_rollback ]
 dart/appjit_determinism_test: Skip # Reload affects determinisim
 dart/script_determinism_test: Skip # We can shutdown an isolate before it reloads.
+dart/slow_path_shared_stub_test: Skip # Too slow with --slow-path-triggers-gc flag and not relevant outside precompiled.
 dart/spawn_infinite_loop_test: Skip # We can shutdown an isolate before it reloads.
 dart/spawn_shutdown_test: Skip # We can shutdown an isolate before it reloads.
+dart/stack_overflow_shared_test: Skip # Too slow with --slow-path-triggers-gc flag and not relevant outside precompiled.
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc
index 4f48c7b..5b941df 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc
@@ -779,7 +779,8 @@
         if ((kReservedCpuRegisters & (1 << i)) != 0) continue;
         const Register reg = static_cast<Register>(i);
         bitmap->Set(bitmap->Length(),
-                    locs->live_registers()->ContainsRegister(reg));
+                    locs->live_registers()->ContainsRegister(reg) &&
+                        locs->live_registers()->IsTagged(reg));
       }
     }
 
@@ -2192,7 +2193,9 @@
         compiler->SlowPathEnvironmentFor(instruction(), num_args_);
     compiler->EmitCatchEntryState(env, try_index_);
   }
-  __ Breakpoint();
+  if (!use_shared_stub) {
+    __ Breakpoint();
+  }
 }
 
 #undef __
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index 656e856..3c2a601 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -1010,6 +1010,15 @@
 
   void Unsupported(FlowGraphCompiler* compiler);
 
+  static bool SlowPathSharingSupported(bool is_optimizing) {
+#if defined(TARGET_ARCH_X64)
+    return FLAG_enable_slow_path_sharing && FLAG_precompiled_mode &&
+           is_optimizing;
+#else
+    return false;
+#endif
+  }
+
   virtual bool UseSharedSlowPathStub(bool is_optimizing) const { return false; }
 
  protected:
@@ -6470,6 +6479,10 @@
 
   virtual bool HasUnknownSideEffects() const { return false; }
 
+  virtual bool UseSharedSlowPathStub(bool is_optimizing) const {
+    return SlowPathSharingSupported(is_optimizing);
+  }
+
   PRINT_OPERANDS_TO_SUPPORT
 
  private:
@@ -6993,12 +7006,7 @@
   const String& function_name() const { return function_name_; }
 
   bool UseSharedSlowPathStub(bool is_optimizing) const {
-#if defined(TARGET_ARCH_X64)
-    return FLAG_enable_slow_path_sharing && FLAG_precompiled_mode &&
-           is_optimizing;
-#else
-    return false;
-#endif
+    return SlowPathSharingSupported(is_optimizing);
   }
 
   DECLARE_INSTRUCTION(CheckNull)
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index c520bfc..92ba643 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -2608,8 +2608,11 @@
                                                               bool opt) const {
   const intptr_t kNumInputs = 0;
   const intptr_t kNumTemps = 1;
-  LocationSummary* summary = new (zone) LocationSummary(
-      zone, kNumInputs, kNumTemps, LocationSummary::kCallOnSlowPath);
+  const bool using_shared_stub = UseSharedSlowPathStub(opt);
+  LocationSummary* summary = new (zone)
+      LocationSummary(zone, kNumInputs, kNumTemps,
+                      using_shared_stub ? LocationSummary::kCallOnSharedSlowPath
+                                        : LocationSummary::kCallOnSlowPath);
   summary->set_temp(0, Location::RequiresRegister());
   return summary;
 }
@@ -2631,16 +2634,38 @@
     }
     __ Comment("CheckStackOverflowSlowPath");
     __ Bind(entry_label());
-    compiler->SaveLiveRegisters(instruction()->locs());
+    const bool using_shared_stub =
+        instruction()->locs()->call_on_shared_slow_path();
+    const bool live_fpu_regs =
+        instruction()->locs()->live_registers()->FpuRegisterCount() > 0;
+    if (!using_shared_stub) {
+      compiler->SaveLiveRegisters(instruction()->locs());
+    }
     // pending_deoptimization_env_ is needed to generate a runtime call that
     // may throw an exception.
     ASSERT(compiler->pending_deoptimization_env_ == NULL);
     Environment* env =
         compiler->SlowPathEnvironmentFor(instruction(), kNumSlowPathArgs);
     compiler->pending_deoptimization_env_ = env;
-    compiler->GenerateRuntimeCall(
-        instruction()->token_pos(), instruction()->deopt_id(),
-        kStackOverflowRuntimeEntry, kNumSlowPathArgs, instruction()->locs());
+
+    if (using_shared_stub) {
+      uword entry_point_offset =
+          live_fpu_regs
+              ? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
+              : Thread::
+                    stack_overflow_shared_without_fpu_regs_entry_point_offset();
+      __ call(Address(THR, entry_point_offset));
+      compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
+      compiler->EmitCatchEntryState();
+      compiler->AddDescriptor(
+          RawPcDescriptors::kOther, compiler->assembler()->CodeSize(),
+          instruction()->deopt_id(), instruction()->token_pos(),
+          compiler->CurrentTryIndex());
+    } else {
+      compiler->GenerateRuntimeCall(
+          instruction()->token_pos(), instruction()->deopt_id(),
+          kStackOverflowRuntimeEntry, kNumSlowPathArgs, instruction()->locs());
+    }
 
     if (compiler->isolate()->use_osr() && !compiler->is_optimizing() &&
         instruction()->in_loop()) {
@@ -2650,7 +2675,9 @@
                                      TokenPosition::kNoSource);
     }
     compiler->pending_deoptimization_env_ = NULL;
-    compiler->RestoreLiveRegisters(instruction()->locs());
+    if (!using_shared_stub) {
+      compiler->RestoreLiveRegisters(instruction()->locs());
+    }
     __ jmp(exit_label());
   }
 
diff --git a/runtime/vm/runtime_entry.cc b/runtime/vm/runtime_entry.cc
index bcbb625..b298c05 100644
--- a/runtime/vm/runtime_entry.cc
+++ b/runtime/vm/runtime_entry.cc
@@ -181,7 +181,7 @@
   const uword pc_offset = caller_frame->pc() - code.PayloadStart();
 
   if (FLAG_shared_slow_path_triggers_gc) {
-    Isolate::Current()->heap()->CollectAllGarbage();
+    isolate->heap()->CollectAllGarbage();
   }
 
   const CodeSourceMap& map =
@@ -1976,6 +1976,10 @@
   // persist.
   uword stack_overflow_flags = thread->GetAndClearStackOverflowFlags();
 
+  if (FLAG_shared_slow_path_triggers_gc) {
+    isolate->heap()->CollectAllGarbage();
+  }
+
   bool interpreter_stack_overflow = false;
 #if defined(DART_USE_INTERPRETER)
   // Do not allocate an interpreter, if none is allocated yet.
diff --git a/runtime/vm/stub_code.h b/runtime/vm/stub_code.h
index 99dd71b..90567b6 100644
--- a/runtime/vm/stub_code.h
+++ b/runtime/vm/stub_code.h
@@ -79,7 +79,9 @@
   V(FrameAwaitingMaterialization)                                              \
   V(AsynchronousGapMarker)                                                     \
   V(NullErrorSharedWithFPURegs)                                                \
-  V(NullErrorSharedWithoutFPURegs)
+  V(NullErrorSharedWithoutFPURegs)                                             \
+  V(StackOverflowSharedWithFPURegs)                                            \
+  V(StackOverflowSharedWithoutFPURegs)
 
 #else
 #define VM_STUB_CODE_LIST(V)                                                   \
@@ -202,8 +204,11 @@
   static RawCode* Generate(const char* name,
                            void (*GenerateStub)(Assembler* assembler));
 
-  static void GenerateNullErrorShared(Assembler* assembler,
-                                      bool save_fpu_registers);
+  static void GenerateSharedStub(Assembler* assembler,
+                                 bool save_fpu_registers,
+                                 const RuntimeEntry* target,
+                                 intptr_t self_code_stub_offset_from_thread,
+                                 bool allow_return);
 
   static void GenerateMegamorphicMissStub(Assembler* assembler);
   static void GenerateAllocationStubForClass(Assembler* assembler,
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 493329f..d9fede6 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -122,6 +122,18 @@
   __ Breakpoint();
 }
 
+void StubCode::GenerateStackOverflowSharedWithoutFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
+void StubCode::GenerateStackOverflowSharedWithFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
 // Input parameters:
 //   R0 : stop message (const char*).
 // Must preserve all registers.
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index c38de9a..7610428 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -139,6 +139,18 @@
   __ Breakpoint();
 }
 
+void StubCode::GenerateStackOverflowSharedWithoutFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
+void StubCode::GenerateStackOverflowSharedWithFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
 void StubCode::GeneratePrintStopMessageStub(Assembler* assembler) {
   __ Stop("GeneratePrintStopMessageStub");
 }
diff --git a/runtime/vm/stub_code_ia32.cc b/runtime/vm/stub_code_ia32.cc
index 60a107b..6b91241 100644
--- a/runtime/vm/stub_code_ia32.cc
+++ b/runtime/vm/stub_code_ia32.cc
@@ -107,6 +107,18 @@
   __ Breakpoint();
 }
 
+void StubCode::GenerateStackOverflowSharedWithoutFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
+void StubCode::GenerateStackOverflowSharedWithFPURegsStub(
+    Assembler* assembler) {
+  // TODO(sjindel): implement.
+  __ Breakpoint();
+}
+
 // Input parameters:
 //   ESP : points to return address.
 //   EAX : stop message (const char*).
diff --git a/runtime/vm/stub_code_x64.cc b/runtime/vm/stub_code_x64.cc
index 22da3fe..f6b0b26 100644
--- a/runtime/vm/stub_code_x64.cc
+++ b/runtime/vm/stub_code_x64.cc
@@ -108,8 +108,11 @@
   __ ret();
 }
 
-void StubCode::GenerateNullErrorShared(Assembler* assembler,
-                                       bool save_fpu_registers) {
+void StubCode::GenerateSharedStub(Assembler* assembler,
+                                  bool save_fpu_registers,
+                                  const RuntimeEntry* target,
+                                  intptr_t self_code_stub_offset_from_thread,
+                                  bool allow_return) {
   // We want the saved registers to appear like part of the caller's frame, so
   // we push them before calling EnterStubFrame.
   __ PushRegisters(kDartAvailableCpuRegs,
@@ -128,27 +131,59 @@
   // Copy down the return address so the stack layout is correct.
   __ pushq(Address(RSP, kAllSavedRegistersSlots * kWordSize));
 
-  const intptr_t stub_offset =
-      save_fpu_registers
-          ? Thread::null_error_shared_with_fpu_regs_stub_offset()
-          : Thread::null_error_shared_without_fpu_regs_stub_offset();
-  __ movq(CODE_REG, Address(THR, stub_offset));
+  __ movq(CODE_REG, Address(THR, self_code_stub_offset_from_thread));
 
   __ EnterStubFrame();
 
   __ movq(CODE_REG, Address(THR, Thread::call_to_runtime_stub_offset()));
-  __ movq(RBX, Address(THR, Thread::OffsetFromThread(&kNullErrorRuntimeEntry)));
+  __ movq(RBX, Address(THR, Thread::OffsetFromThread(target)));
   __ movq(R10, Immediate(/*argument_count=*/0));
   __ call(Address(THR, Thread::call_to_runtime_entry_point_offset()));
-  __ Breakpoint();
+
+  if (!allow_return) {
+    __ Breakpoint();
+    return;
+  }
+  __ LeaveStubFrame();
+
+  // Drop "official" return address -- we can just use the one stored above the
+  // saved registers.
+  __ Drop(1);
+
+  __ PopRegisters(kDartAvailableCpuRegs,
+                  save_fpu_registers ? kAllFpuRegistersList : 0);
+
+  __ ret();
 }
 
 void StubCode::GenerateNullErrorSharedWithoutFPURegsStub(Assembler* assembler) {
-  GenerateNullErrorShared(assembler, false);
+  GenerateSharedStub(assembler, /*save_fpu_registers=*/false,
+                     &kNullErrorRuntimeEntry,
+                     Thread::null_error_shared_without_fpu_regs_stub_offset(),
+                     /*allow_return=*/false);
 }
 
 void StubCode::GenerateNullErrorSharedWithFPURegsStub(Assembler* assembler) {
-  GenerateNullErrorShared(assembler, true);
+  GenerateSharedStub(assembler, /*save_fpu_registers=*/true,
+                     &kNullErrorRuntimeEntry,
+                     Thread::null_error_shared_with_fpu_regs_stub_offset(),
+                     /*allow_return=*/false);
+}
+
+void StubCode::GenerateStackOverflowSharedWithoutFPURegsStub(
+    Assembler* assembler) {
+  GenerateSharedStub(
+      assembler, /*save_fpu_registers=*/false, &kStackOverflowRuntimeEntry,
+      Thread::stack_overflow_shared_without_fpu_regs_stub_offset(),
+      /*allow_return=*/true);
+}
+
+void StubCode::GenerateStackOverflowSharedWithFPURegsStub(
+    Assembler* assembler) {
+  GenerateSharedStub(assembler, /*save_fpu_registers=*/true,
+                     &kStackOverflowRuntimeEntry,
+                     Thread::stack_overflow_shared_with_fpu_regs_stub_offset(),
+                     /*allow_return=*/true);
 }
 
 // Input parameters:
diff --git a/runtime/vm/thread.h b/runtime/vm/thread.h
index d8f2792..8864071 100644
--- a/runtime/vm/thread.h
+++ b/runtime/vm/thread.h
@@ -98,6 +98,10 @@
     StubCode::NullErrorSharedWithoutFPURegs_entry()->code(), NULL)             \
   V(RawCode*, null_error_shared_with_fpu_regs_stub_,                           \
     StubCode::NullErrorSharedWithFPURegs_entry()->code(), NULL)                \
+  V(RawCode*, stack_overflow_shared_without_fpu_regs_stub_,                    \
+    StubCode::StackOverflowSharedWithoutFPURegs_entry()->code(), NULL)         \
+  V(RawCode*, stack_overflow_shared_with_fpu_regs_stub_,                       \
+    StubCode::StackOverflowSharedWithFPURegs_entry()->code(), NULL)            \
   V(RawCode*, monomorphic_miss_stub_,                                          \
     StubCode::MonomorphicMiss_entry()->code(), NULL)                           \
   V(RawCode*, ic_lookup_through_code_stub_,                                    \
@@ -142,6 +146,10 @@
     StubCode::NullErrorSharedWithoutFPURegs_entry()->EntryPoint(), 0)          \
   V(uword, null_error_shared_with_fpu_regs_entry_point_,                       \
     StubCode::NullErrorSharedWithFPURegs_entry()->EntryPoint(), 0)             \
+  V(uword, stack_overflow_shared_without_fpu_regs_entry_point_,                \
+    StubCode::StackOverflowSharedWithoutFPURegs_entry()->EntryPoint(), 0)      \
+  V(uword, stack_overflow_shared_with_fpu_regs_entry_point_,                   \
+    StubCode::StackOverflowSharedWithFPURegs_entry()->EntryPoint(), 0)         \
   V(uword, megamorphic_call_checked_entry_,                                    \
     StubCode::MegamorphicCall_entry()->EntryPoint(), 0)                        \
   V(uword, monomorphic_miss_entry_,                                            \