[VM/Compiler/AOT] Bare instructions 8: Improve AOT code by using pc-relative calls

This CL improves AOT code for StackOverflowInstr/CheckNullInstr:

  * On ARM we can do a conditional pc-relative calls for the stack overflow
    checks, getting rid of the slow-paths entirely.

  * On ARM64 we can do pc-relative calls on the slow path, avoiding an
    extra load.

Flutter gallery size impact (in bare instructions mode):
  * ARM: -3.7% RX
  * ARM64: -1.4% RX

Issue https://github.com/dart-lang/sdk/issues/33274

Change-Id: Ia1acd76ac6efa26642f99e1ce3e417100aa357f3
Reviewed-on: https://dart-review.googlesource.com/c/89620
Commit-Queue: Martin Kustermann <kustermann@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/aot/precompiler.cc b/runtime/vm/compiler/aot/precompiler.cc
index 1fc69d6..d559f8b 100644
--- a/runtime/vm/compiler/aot/precompiler.cc
+++ b/runtime/vm/compiler/aot/precompiler.cc
@@ -247,9 +247,34 @@
         //   - build method extractor code
         MegamorphicCacheTable::ReInitMissHandlerCode(
             isolate_, global_object_pool_wrapper());
-        I->object_store()->set_build_method_extractor_code(
-            Code::Handle(StubCode::GetBuildMethodExtractorStub(
-                global_object_pool_wrapper())));
+
+        auto& stub_code = Code::Handle();
+
+        stub_code =
+            StubCode::GetBuildMethodExtractorStub(global_object_pool_wrapper());
+        I->object_store()->set_build_method_extractor_code(stub_code);
+
+        stub_code =
+            StubCode::BuildIsolateSpecificNullErrorSharedWithFPURegsStub(
+                global_object_pool_wrapper());
+        I->object_store()->set_null_error_stub_with_fpu_regs_stub(stub_code);
+
+        stub_code =
+            StubCode::BuildIsolateSpecificNullErrorSharedWithoutFPURegsStub(
+                global_object_pool_wrapper());
+        I->object_store()->set_null_error_stub_without_fpu_regs_stub(stub_code);
+
+        stub_code =
+            StubCode::BuildIsolateSpecificStackOverflowSharedWithFPURegsStub(
+                global_object_pool_wrapper());
+        I->object_store()->set_stack_overflow_stub_with_fpu_regs_stub(
+            stub_code);
+
+        stub_code =
+            StubCode::BuildIsolateSpecificStackOverflowSharedWithoutFPURegsStub(
+                global_object_pool_wrapper());
+        I->object_store()->set_stack_overflow_stub_without_fpu_regs_stub(
+            stub_code);
       }
 
       CollectDynamicFunctionNames();
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index 53de88b..1998531 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -3406,9 +3406,9 @@
   }
 }
 
-void Assembler::GenerateUnRelocatedPcRelativeCall() {
-  // Emit "blr <offset>".
-  EmitType5(AL, 0x686868, /*link=*/true);
+void Assembler::GenerateUnRelocatedPcRelativeCall(Condition cond) {
+  // Emit "blr.cond <offset>".
+  EmitType5(cond, 0x686868, /*link=*/true);
 }
 
 void Assembler::Stop(const char* message) {
diff --git a/runtime/vm/compiler/assembler/assembler_arm.h b/runtime/vm/compiler/assembler/assembler_arm.h
index b4c35c9..6ca0bb7 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.h
+++ b/runtime/vm/compiler/assembler/assembler_arm.h
@@ -1106,7 +1106,7 @@
   //   (Code::kPcRelativeCall & pc_offset, <target-code>, <target-function>)
   //
   // will be used during relocation to fix the offset.
-  void GenerateUnRelocatedPcRelativeCall();
+  void GenerateUnRelocatedPcRelativeCall(Condition cond = AL);
 
   // Emit data (e.g encoded instruction or immediate) in instruction stream.
   void Emit(int32_t value);
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc
index 9b4f5f1..a22c092 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc
@@ -471,10 +471,11 @@
 void FlowGraphCompiler::EmitCallsiteMetadata(TokenPosition token_pos,
                                              intptr_t deopt_id,
                                              RawPcDescriptors::Kind kind,
-                                             LocationSummary* locs) {
+                                             LocationSummary* locs,
+                                             Environment* env) {
   AddCurrentDescriptor(kind, deopt_id, token_pos);
   RecordSafepoint(locs);
-  RecordCatchEntryMoves();
+  RecordCatchEntryMoves(env);
   if (deopt_id != DeoptId::kNone) {
     // Marks either the continuation point in unoptimized code or the
     // deoptimization point in optimized code, after call.
@@ -2278,7 +2279,7 @@
     __ PushRegister(locs->in(i).reg());
   }
   if (use_shared_stub) {
-    EmitSharedStubCall(compiler->assembler(), live_fpu_registers);
+    EmitSharedStubCall(compiler, live_fpu_registers);
   } else {
     __ CallRuntime(runtime_entry_, num_args_);
   }
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.h b/runtime/vm/compiler/backend/flow_graph_compiler.h
index f0c0af9..44d8158 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.h
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.h
@@ -262,7 +262,7 @@
   virtual void EmitCodeAtSlowPathEntry(FlowGraphCompiler* compiler) {}
   virtual void AddMetadataForRuntimeCall(FlowGraphCompiler* compiler) {}
 
-  virtual void EmitSharedStubCall(Assembler* assembler,
+  virtual void EmitSharedStubCall(FlowGraphCompiler* compiler,
                                   bool save_fpu_registers) {
     UNREACHABLE();
   }
@@ -275,6 +275,27 @@
   const intptr_t try_index_;
 };
 
+class NullErrorSlowPath : public ThrowErrorSlowPathCode {
+ public:
+  static const intptr_t kNumberOfArguments = 0;
+
+  NullErrorSlowPath(CheckNullInstr* instruction, intptr_t try_index)
+      : ThrowErrorSlowPathCode(instruction,
+                               kNullErrorRuntimeEntry,
+                               kNumberOfArguments,
+                               try_index) {}
+
+  const char* name() override { return "check null"; }
+
+  void EmitSharedStubCall(FlowGraphCompiler* compiler,
+                          bool save_fpu_registers) override;
+
+  void AddMetadataForRuntimeCall(FlowGraphCompiler* compiler) override {
+    CheckNullInstr::AddMetadataForRuntimeCall(instruction()->AsCheckNull(),
+                                              compiler);
+  }
+};
+
 #endif  // !defined(TARGET_ARCH_DBC)
 
 class FlowGraphCompiler : public ValueObject {
@@ -597,10 +618,20 @@
   void RecordCatchEntryMoves(Environment* env = NULL,
                              intptr_t try_index = kInvalidTryIndex);
 
+  // Emits the following metadata for the current PC:
+  //
+  //   * Attaches current try index
+  //   * Attaches stackmaps
+  //   * Attaches catch entry moves (in AOT)
+  //   * Deoptimization information (in JIT)
+  //
+  // If [env] is not `nullptr` it will be used instead of the
+  // `pending_deoptimization_env`.
   void EmitCallsiteMetadata(TokenPosition token_pos,
                             intptr_t deopt_id,
                             RawPcDescriptors::Kind kind,
-                            LocationSummary* locs);
+                            LocationSummary* locs,
+                            Environment* env = nullptr);
 
   void EmitComment(Instruction* instr);
 
@@ -767,6 +798,9 @@
   bool IsEmptyBlock(BlockEntryInstr* block) const;
 
  private:
+  friend class CheckNullInstr;           // For AddPcRelativeCallStubTarget().
+  friend class NullErrorSlowPath;        // For AddPcRelativeCallStubTarget().
+  friend class CheckStackOverflowInstr;  // For AddPcRelativeCallStubTarget().
   friend class CheckStackOverflowSlowPath;  // For pending_deoptimization_env_.
   friend class CheckedSmiSlowPath;          // Same.
   friend class CheckedSmiComparisonSlowPath;  // Same.
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 0e381f8..8ea643a 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -4532,43 +4532,16 @@
   return locs;
 }
 
-class NullErrorSlowPath : public ThrowErrorSlowPathCode {
- public:
-  static const intptr_t kNumberOfArguments = 0;
-
-  NullErrorSlowPath(CheckNullInstr* instruction, intptr_t try_index)
-      : ThrowErrorSlowPathCode(instruction,
-                               kNullErrorRuntimeEntry,
-                               kNumberOfArguments,
-                               try_index) {}
-
-  const char* name() override { return "check null"; }
-
-  void EmitSharedStubCall(Assembler* assembler,
-                          bool save_fpu_registers) override {
-    assembler->CallNullErrorShared(save_fpu_registers);
-  }
-
-  void AddMetadataForRuntimeCall(FlowGraphCompiler* compiler) override {
-    const String& function_name = instruction()->AsCheckNull()->function_name();
-    const intptr_t name_index =
-        compiler->assembler()->object_pool_wrapper().FindObject(function_name);
-    compiler->AddNullCheck(compiler->assembler()->CodeSize(),
-                           instruction()->token_pos(), name_index);
-  }
-};
-
-void CheckNullInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
-  NullErrorSlowPath* slow_path =
-      new NullErrorSlowPath(this, compiler->CurrentTryIndex());
-  compiler->AddSlowPathCode(slow_path);
-
-  Register value_reg = locs()->in(0).reg();
-  // TODO(dartbug.com/30480): Consider passing `null` literal as an argument
-  // in order to be able to allocate it on register.
-  __ CompareObject(value_reg, Object::null_object());
-  __ BranchIf(EQUAL, slow_path->entry_label());
+#if !defined(TARGET_ARCH_DBC)
+void CheckNullInstr::AddMetadataForRuntimeCall(CheckNullInstr* check_null,
+                                               FlowGraphCompiler* compiler) {
+  const String& function_name = check_null->function_name();
+  const intptr_t name_index =
+      compiler->assembler()->object_pool_wrapper().FindObject(function_name);
+  compiler->AddNullCheck(compiler->assembler()->CodeSize(),
+                         check_null->token_pos(), name_index);
 }
+#endif  // !defined(TARGET_ARCH_DBC)
 
 void UnboxInstr::EmitLoadFromBoxWithDeopt(FlowGraphCompiler* compiler) {
   const intptr_t box_cid = BoxCid();
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index a1550b2..91c2795 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -7175,6 +7175,9 @@
 
   virtual bool AttributesEqual(Instruction* other) const { return true; }
 
+  static void AddMetadataForRuntimeCall(CheckNullInstr* check_null,
+                                        FlowGraphCompiler* compiler);
+
  private:
   const TokenPosition token_pos_;
   const String& function_name_;
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 94c7519..ae182a4 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -3072,11 +3072,34 @@
 };
 
 void CheckStackOverflowInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
-  CheckStackOverflowSlowPath* slow_path = new CheckStackOverflowSlowPath(this);
-  compiler->AddSlowPathCode(slow_path);
-
   __ ldr(IP, Address(THR, Thread::stack_limit_offset()));
   __ cmp(SP, Operand(IP));
+
+  auto object_store = compiler->isolate()->object_store();
+  const bool live_fpu_regs = locs()->live_registers()->FpuRegisterCount() > 0;
+  const auto& stub = Code::ZoneHandle(
+      compiler->zone(),
+      live_fpu_regs
+          ? object_store->stack_overflow_stub_with_fpu_regs_stub()
+          : object_store->stack_overflow_stub_without_fpu_regs_stub());
+  const bool using_shared_stub = locs()->call_on_shared_slow_path();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions &&
+      using_shared_stub && !stub.InVMHeap()) {
+    compiler->AddPcRelativeCallStubTarget(stub);
+    __ GenerateUnRelocatedPcRelativeCall(LS);
+
+    // We use the "extended" environment which has the locations updated to
+    // reflect live registers being saved in the shared spilling stubs (see
+    // the stub above).
+    auto extended_env = compiler->SlowPathEnvironmentFor(this, 0);
+    compiler->EmitCallsiteMetadata(token_pos(), deopt_id(),
+                                   RawPcDescriptors::kOther, locs(),
+                                   extended_env);
+    return;
+  }
+
+  CheckStackOverflowSlowPath* slow_path = new CheckStackOverflowSlowPath(this);
+  compiler->AddSlowPathCode(slow_path);
   __ b(slow_path->entry_label(), LS);
   if (compiler->CanOSRFunction() && in_loop()) {
     const Register temp = locs()->temp(0).reg();
@@ -5742,6 +5765,47 @@
   __ BranchIfNotSmi(value, deopt);
 }
 
+void CheckNullInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  Register value_reg = locs()->in(0).reg();
+  // TODO(dartbug.com/30480): Consider passing `null` literal as an argument
+  // in order to be able to allocate it on register.
+  __ CompareObject(value_reg, Object::null_object());
+
+  auto object_store = compiler->isolate()->object_store();
+  const bool live_fpu_regs = locs()->live_registers()->FpuRegisterCount() > 0;
+  const auto& stub = Code::ZoneHandle(
+      compiler->zone(),
+      live_fpu_regs ? object_store->null_error_stub_with_fpu_regs_stub()
+                    : object_store->null_error_stub_without_fpu_regs_stub());
+  const bool using_shared_stub = locs()->call_on_shared_slow_path();
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions &&
+      using_shared_stub && !stub.InVMHeap()) {
+    compiler->AddPcRelativeCallStubTarget(stub);
+    __ GenerateUnRelocatedPcRelativeCall(EQUAL);
+
+    // We use the "extended" environment which has the locations updated to
+    // reflect live registers being saved in the shared spilling stubs (see
+    // the stub above).
+    auto extended_env = compiler->SlowPathEnvironmentFor(this, 0);
+    compiler->EmitCallsiteMetadata(token_pos(), deopt_id(),
+                                   RawPcDescriptors::kOther, locs(),
+                                   extended_env);
+    CheckNullInstr::AddMetadataForRuntimeCall(this, compiler);
+    return;
+  }
+
+  NullErrorSlowPath* slow_path =
+      new NullErrorSlowPath(this, compiler->CurrentTryIndex());
+  compiler->AddSlowPathCode(slow_path);
+
+  __ BranchIf(EQUAL, slow_path->entry_label());
+}
+
+void NullErrorSlowPath::EmitSharedStubCall(FlowGraphCompiler* compiler,
+                                           bool save_fpu_registers) {
+  compiler->assembler()->CallNullErrorShared(save_fpu_registers);
+}
+
 LocationSummary* CheckClassIdInstr::MakeLocationSummary(Zone* zone,
                                                         bool opt) const {
   const intptr_t kNumInputs = 1;
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index 9c03a18..d8ca855 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -2695,8 +2695,9 @@
       : TemplateSlowPathCode(instruction) {}
 
   virtual void EmitNativeCode(FlowGraphCompiler* compiler) {
+    auto locs = instruction()->locs();
     if (compiler->isolate()->use_osr() && osr_entry_label()->IsLinked()) {
-      const Register value = instruction()->locs()->temp(0).reg();
+      const Register value = locs->temp(0).reg();
       __ Comment("CheckStackOverflowSlowPathOsr");
       __ Bind(osr_entry_label());
       __ LoadImmediate(value, Thread::kOsrRequest);
@@ -2704,10 +2705,9 @@
     }
     __ Comment("CheckStackOverflowSlowPath");
     __ Bind(entry_label());
-    const bool using_shared_stub =
-        instruction()->locs()->call_on_shared_slow_path();
+    const bool using_shared_stub = locs->call_on_shared_slow_path();
     if (!using_shared_stub) {
-      compiler->SaveLiveRegisters(instruction()->locs());
+      compiler->SaveLiveRegisters(locs);
     }
     // pending_deoptimization_env_ is needed to generate a runtime call that
     // may throw an exception.
@@ -2717,14 +2717,30 @@
     compiler->pending_deoptimization_env_ = env;
 
     if (using_shared_stub) {
-      uword entry_point_offset =
-          instruction()->locs()->live_registers()->FpuRegisterCount() > 0
-              ? Thread::stack_overflow_shared_with_fpu_regs_entry_point_offset()
-              : Thread::
-                    stack_overflow_shared_without_fpu_regs_entry_point_offset();
-      __ ldr(LR, Address(THR, entry_point_offset));
-      __ blr(LR);
-      compiler->RecordSafepoint(instruction()->locs(), kNumSlowPathArgs);
+      auto object_store = compiler->isolate()->object_store();
+      const bool live_fpu_regs = locs->live_registers()->FpuRegisterCount() > 0;
+      const auto& stub = Code::Handle(
+          compiler->zone(),
+          live_fpu_regs
+              ? object_store->stack_overflow_stub_with_fpu_regs_stub()
+              : object_store->stack_overflow_stub_without_fpu_regs_stub());
+
+      if (FLAG_precompiled_mode && FLAG_use_bare_instructions &&
+          using_shared_stub && !stub.InVMHeap()) {
+        compiler->AddPcRelativeCallStubTarget(stub);
+        __ GenerateUnRelocatedPcRelativeCall();
+
+      } else {
+        uword entry_point_offset =
+            locs->live_registers()->FpuRegisterCount() > 0
+                ? Thread::
+                      stack_overflow_shared_with_fpu_regs_entry_point_offset()
+                : Thread::
+                      stack_overflow_shared_without_fpu_regs_entry_point_offset();
+        __ ldr(LR, Address(THR, entry_point_offset));
+        __ blr(LR);
+      }
+      compiler->RecordSafepoint(locs, kNumSlowPathArgs);
       compiler->RecordCatchEntryMoves();
       compiler->AddDescriptor(
           RawPcDescriptors::kOther, compiler->assembler()->CodeSize(),
@@ -2733,7 +2749,7 @@
     } else {
       compiler->GenerateRuntimeCall(
           instruction()->token_pos(), instruction()->deopt_id(),
-          kStackOverflowRuntimeEntry, kNumSlowPathArgs, instruction()->locs());
+          kStackOverflowRuntimeEntry, kNumSlowPathArgs, locs);
     }
 
     if (compiler->isolate()->use_osr() && !compiler->is_optimizing() &&
@@ -2745,7 +2761,7 @@
     }
     compiler->pending_deoptimization_env_ = NULL;
     if (!using_shared_stub) {
-      compiler->RestoreLiveRegisters(instruction()->locs());
+      compiler->RestoreLiveRegisters(locs);
     }
     __ b(exit_label());
   }
@@ -4956,6 +4972,40 @@
   __ BranchIfNotSmi(value, deopt);
 }
 
+void CheckNullInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  NullErrorSlowPath* slow_path =
+      new NullErrorSlowPath(this, compiler->CurrentTryIndex());
+  compiler->AddSlowPathCode(slow_path);
+
+  Register value_reg = locs()->in(0).reg();
+  // TODO(dartbug.com/30480): Consider passing `null` literal as an argument
+  // in order to be able to allocate it on register.
+  __ CompareObject(value_reg, Object::null_object());
+  __ BranchIf(EQUAL, slow_path->entry_label());
+}
+
+void NullErrorSlowPath::EmitSharedStubCall(FlowGraphCompiler* compiler,
+                                           bool save_fpu_registers) {
+  auto check_null = instruction()->AsCheckNull();
+  auto locs = check_null->locs();
+  const bool using_shared_stub = locs->call_on_shared_slow_path();
+
+  const bool live_fpu_regs = locs->live_registers()->FpuRegisterCount() > 0;
+  auto object_store = compiler->isolate()->object_store();
+  const auto& stub = Code::Handle(
+      compiler->zone(),
+      live_fpu_regs ? object_store->null_error_stub_with_fpu_regs_stub()
+                    : object_store->null_error_stub_without_fpu_regs_stub());
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions &&
+      using_shared_stub && !stub.InVMHeap()) {
+    compiler->AddPcRelativeCallStubTarget(stub);
+    compiler->assembler()->GenerateUnRelocatedPcRelativeCall();
+    return;
+  }
+
+  compiler->assembler()->CallNullErrorShared(save_fpu_registers);
+}
+
 LocationSummary* CheckArrayBoundInstr::MakeLocationSummary(Zone* zone,
                                                            bool opt) const {
   const intptr_t kNumInputs = 2;
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index ce677dc..fdd36ae 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -5102,6 +5102,24 @@
   __ BranchIfNotSmi(value, deopt);
 }
 
+void CheckNullInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  NullErrorSlowPath* slow_path =
+      new NullErrorSlowPath(this, compiler->CurrentTryIndex());
+  compiler->AddSlowPathCode(slow_path);
+
+  Register value_reg = locs()->in(0).reg();
+  // TODO(dartbug.com/30480): Consider passing `null` literal as an argument
+  // in order to be able to allocate it on register.
+  __ CompareObject(value_reg, Object::null_object());
+  __ BranchIf(EQUAL, slow_path->entry_label());
+}
+
+void NullErrorSlowPath::EmitSharedStubCall(FlowGraphCompiler* compiler,
+                                           bool save_fpu_registers) {
+  // We only generate shared spilling stub calls for AOT configurations.
+  UNREACHABLE();
+}
+
 LocationSummary* CheckClassIdInstr::MakeLocationSummary(Zone* zone,
                                                         bool opt) const {
   const intptr_t kNumInputs = 1;
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 4ae5a22..796fbf5 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -5193,6 +5193,23 @@
   __ BranchIfNotSmi(value, deopt);
 }
 
+void CheckNullInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  NullErrorSlowPath* slow_path =
+      new NullErrorSlowPath(this, compiler->CurrentTryIndex());
+  compiler->AddSlowPathCode(slow_path);
+
+  Register value_reg = locs()->in(0).reg();
+  // TODO(dartbug.com/30480): Consider passing `null` literal as an argument
+  // in order to be able to allocate it on register.
+  __ CompareObject(value_reg, Object::null_object());
+  __ BranchIf(EQUAL, slow_path->entry_label());
+}
+
+void NullErrorSlowPath::EmitSharedStubCall(FlowGraphCompiler* compiler,
+                                           bool save_fpu_registers) {
+  compiler->assembler()->CallNullErrorShared(save_fpu_registers);
+}
+
 LocationSummary* CheckClassIdInstr::MakeLocationSummary(Zone* zone,
                                                         bool opt) const {
   const intptr_t kNumInputs = 1;
diff --git a/runtime/vm/image_snapshot.cc b/runtime/vm/image_snapshot.cc
index bf7dada..303ca6e 100644
--- a/runtime/vm/image_snapshot.cc
+++ b/runtime/vm/image_snapshot.cc
@@ -386,6 +386,25 @@
   }
 }
 
+const char* NameOfStubIsolateSpecificStub(ObjectStore* object_store,
+                                          const Code& code) {
+  if (code.raw() == object_store->build_method_extractor_code()) {
+    return "_iso_stub_BuildMethodExtractorStub";
+  } else if (code.raw() == object_store->null_error_stub_with_fpu_regs_stub()) {
+    return "_iso_stub_NullErrorSharedWithFPURegsStub";
+  } else if (code.raw() ==
+             object_store->null_error_stub_without_fpu_regs_stub()) {
+    return "_iso_stub_NullErrorSharedWithoutFPURegsStub";
+  } else if (code.raw() ==
+             object_store->stack_overflow_stub_with_fpu_regs_stub()) {
+    return "_iso_stub_StackOverflowStubWithFPURegsStub";
+  } else if (code.raw() ==
+             object_store->stack_overflow_stub_without_fpu_regs_stub()) {
+    return "_iso_stub_StackOverflowStubWithoutFPURegsStub";
+  }
+  return nullptr;
+}
+
 void AssemblyImageWriter::WriteText(WriteStream* clustered_stream, bool vm) {
   Zone* zone = Thread::Current()->zone();
 
@@ -496,14 +515,15 @@
       owner = code.owner();
       if (owner.IsNull()) {
         const char* name = StubCode::NameOfStub(insns.EntryPoint());
-        if (name == nullptr &&
-            code.raw() == object_store->build_method_extractor_code()) {
-          name = "BuildMethodExtractor";
-        }
-        if (name != NULL) {
+        if (name != nullptr) {
           assembly_stream_.Print("Precompiled_Stub_%s:\n", name);
         } else {
-          const char* name = tts.StubNameFromAddresss(insns.EntryPoint());
+          if (name == nullptr) {
+            name = NameOfStubIsolateSpecificStub(object_store, code);
+          }
+          if (name == nullptr) {
+            name = tts.StubNameFromAddresss(insns.EntryPoint());
+          }
           assembly_stream_.Print("Precompiled__%s:\n", name);
         }
       } else if (owner.IsClass()) {
diff --git a/runtime/vm/instructions_arm.cc b/runtime/vm/instructions_arm.cc
index 1ba7633..2f8152a 100644
--- a/runtime/vm/instructions_arm.cc
+++ b/runtime/vm/instructions_arm.cc
@@ -359,9 +359,8 @@
 bool PcRelativeCallPattern::IsValid() const {
   // bl.<cond> <offset>
   const uint32_t word = *reinterpret_cast<uint32_t*>(pc_);
-  const uint32_t cond_all = 0xe0;
-  const uint32_t branch_link = 0x0b;
-  return (word >> 24) == (cond_all | branch_link);
+  const uint32_t branch_link = 0x05;
+  return ((word >> kTypeShift) & ((1 << kTypeBits) - 1)) == branch_link;
 }
 
 void PcRelativeTrampolineJumpPattern::Initialize() {
diff --git a/runtime/vm/object_store.h b/runtime/vm/object_store.h
index 77c99ee..20a5565 100644
--- a/runtime/vm/object_store.h
+++ b/runtime/vm/object_store.h
@@ -125,6 +125,10 @@
   RW(Array, unique_dynamic_targets)                                            \
   RW(GrowableObjectArray, megamorphic_cache_table)                             \
   RW(Code, build_method_extractor_code)                                        \
+  RW(Code, null_error_stub_with_fpu_regs_stub)                                 \
+  RW(Code, null_error_stub_without_fpu_regs_stub)                              \
+  RW(Code, stack_overflow_stub_with_fpu_regs_stub)                             \
+  RW(Code, stack_overflow_stub_without_fpu_regs_stub)                          \
   R_(Code, megamorphic_miss_code)                                              \
   R_(Function, megamorphic_miss_function)                                      \
   RW(Array, code_order_table)                                                  \
diff --git a/runtime/vm/stub_code.cc b/runtime/vm/stub_code.cc
index 9f48a07..57a718c 100644
--- a/runtime/vm/stub_code.cc
+++ b/runtime/vm/stub_code.cc
@@ -298,6 +298,7 @@
 const char* StubCode::NameOfStub(uword entry_point) {
 #define VM_STUB_CODE_TESTER(name)                                              \
   if (entries_[k##name##Index] != nullptr &&                                   \
+      !entries_[k##name##Index]->IsNull() &&                                   \
       entries_[k##name##Index]->EntryPoint() == entry_point) {                 \
     return "" #name;                                                           \
   }
diff --git a/runtime/vm/stub_code.h b/runtime/vm/stub_code.h
index fe2fcbe..f551484 100644
--- a/runtime/vm/stub_code.h
+++ b/runtime/vm/stub_code.h
@@ -169,6 +169,16 @@
   }
   static intptr_t NumEntries() { return kNumStubEntries; }
 
+#if !defined(DART_PRECOMPILED_RUNTIME)
+#define GENERATE_STUB(name)                                                    \
+  static RawCode* BuildIsolateSpecific##name##Stub(ObjectPoolWrapper* opw) {   \
+    return StubCode::Generate("_iso_stub_" #name, opw,                         \
+                              StubCode::Generate##name##Stub);                 \
+  }
+  VM_STUB_CODE_LIST(GENERATE_STUB);
+#undef GENERATE_STUB
+#endif  // !defined(DART_PRECOMPILED_RUNTIME)
+
  private:
   friend class MegamorphicCacheTable;