[vm] Unify function prologue generation across all backends.

Previously we would generate the prologue in FunctionEntryInstr or OsrEntryInstr
on X64/ARM, and in CompileGraph on ARM64/IA32/DBC. This caused a confusing asymmetry
in the code.

Now we generate it in FunctionEntryInstr/OsrEntryInstr on all backends.

Partially addresses dartbug.com/34162.

This also fixes a regression on a protobuf benchmark due to ICDatas not being saved.

Change-Id: Ie270d759234b197141025dde07a27546d221f6c8
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/101297
Commit-Queue: Samir Jindel <sjindel@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.cc b/runtime/vm/compiler/assembler/assembler_arm64.cc
index 7a1aaf9..63775f1 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm64.cc
@@ -1442,19 +1442,21 @@
   bool saved_use_far_branches = use_far_branches();
   set_use_far_branches(false);
 
+  const intptr_t start = CodeSize();
+
   Label immediate, miss;
   Bind(&miss);
   ldr(IP0, Address(THR, Thread::monomorphic_miss_entry_offset()));
   br(IP0);
 
   Comment("MonomorphicCheckedEntry");
-  ASSERT(CodeSize() == Instructions::kPolymorphicEntryOffset);
+  ASSERT(CodeSize() - start == Instructions::kPolymorphicEntryOffset);
   LoadClassIdMayBeSmi(IP0, R0);
   cmp(R5, Operand(IP0, LSL, 1));
   b(&miss, NE);
 
   // Fall through to unchecked entry.
-  ASSERT(CodeSize() == Instructions::kMonomorphicEntryOffset);
+  ASSERT(CodeSize() - start == Instructions::kMonomorphicEntryOffset);
 
   set_use_far_branches(saved_use_far_branches);
 }
diff --git a/runtime/vm/compiler/backend/block_scheduler.cc b/runtime/vm/compiler/backend/block_scheduler.cc
index af57dfd..46b26a2 100644
--- a/runtime/vm/compiler/backend/block_scheduler.cc
+++ b/runtime/vm/compiler/backend/block_scheduler.cc
@@ -274,20 +274,20 @@
     }
   }
 
-  // Emit code in reverse postorder but move any throwing blocks to the very
-  // end.
+  // Emit code in reverse postorder but move any throwing blocks (except the
+  // function entry, which needs to come first) to the very end.
   auto& codegen_order = *flow_graph()->CodegenBlockOrder(true);
   for (intptr_t i = 0; i < block_count; ++i) {
     auto block = reverse_postorder[i];
     const intptr_t preorder_nr = block->preorder_number();
-    if (!is_terminating[preorder_nr]) {
+    if (!is_terminating[preorder_nr] || block->IsFunctionEntry()) {
       codegen_order.Add(block);
     }
   }
   for (intptr_t i = 0; i < block_count; ++i) {
     auto block = reverse_postorder[i];
     const intptr_t preorder_nr = block->preorder_number();
-    if (is_terminating[preorder_nr]) {
+    if (is_terminating[preorder_nr] && !block->IsFunctionEntry()) {
       codegen_order.Add(block);
     }
   }
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc
index 05a3d92..00f1bdf 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc
@@ -321,16 +321,6 @@
 }
 
 intptr_t FlowGraphCompiler::UncheckedEntryOffset() const {
-// On ARM64 we cannot use the position of the label bound in the
-// FunctionEntryInstr, because `FunctionEntryInstr::EmitNativeCode` does not
-// emit the monomorphic entry and frame entry (instead on ARM64 this is done
-// in FlowGraphCompiler::CompileGraph()).
-//
-// See http://dartbug.com/34162
-#if defined(TARGET_ARCH_ARM64)
-  return 0;
-#endif
-
   BlockEntryInstr* entry = flow_graph().graph_entry()->unchecked_entry();
   if (entry == nullptr) {
     entry = flow_graph().graph_entry()->normal_entry();
@@ -529,6 +519,13 @@
     flow_graph().ComputeLoops();
   }
 
+  // In precompiled mode, we require the function entry to come first (after the
+  // graph entry), since the polymorphic check is performed in the function
+  // entry (see Instructions::EntryPoint).
+  if (FLAG_precompiled_mode) {
+    ASSERT(block_order()[1] == flow_graph().graph_entry()->normal_entry());
+  }
+
   for (intptr_t i = 0; i < block_order().length(); ++i) {
     // Compile the block entry.
     BlockEntryInstr* entry = block_order()[i];
@@ -561,9 +558,7 @@
     pending_deoptimization_env_ = NULL;
     EndCodeSourceRange(entry->token_pos());
 
-    // The function was fully intrinsified, so there's no need to generate any
-    // more code.
-    if (fully_intrinsified_) {
+    if (skip_body_compilation()) {
       ASSERT(entry == flow_graph().graph_entry()->normal_entry());
       break;
     }
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.h b/runtime/vm/compiler/backend/flow_graph_compiler.h
index 7f5c1a3..8795377 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.h
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.h
@@ -418,6 +418,15 @@
   bool CanOSRFunction() const;
   bool is_optimizing() const { return is_optimizing_; }
 
+  // The function was fully intrinsified, so the body is unreachable.
+  //
+  // We still need to compile the body in unoptimized mode because the
+  // 'ICData's are added to the function's 'ic_data_array_' when instance
+  // calls are compiled.
+  bool skip_body_compilation() const {
+    return fully_intrinsified_ && is_optimizing();
+  }
+
   void EnterIntrinsicMode();
   void ExitIntrinsicMode();
   bool intrinsic_mode() const { return intrinsic_mode_; }
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
index 4debc98..7f1bfb9 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm.cc
@@ -934,7 +934,7 @@
 
   __ bkpt(0);
 
-  if (!fully_intrinsified_) {
+  if (!skip_body_compilation()) {
     ASSERT(assembler()->constant_pool_allowed());
     GenerateDeferredCode();
   }
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
index 0fee423..0c6be28 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc
@@ -871,34 +871,7 @@
   }
 }
 
-// Input parameters:
-//   LR: return address.
-//   SP: address of last argument.
-//   FP: caller's frame pointer.
-//   PP: caller's pool pointer.
-//   R4: arguments descriptor array.
-void FlowGraphCompiler::CompileGraph() {
-  InitCompiler();
-
-  if (FLAG_precompiled_mode) {
-    const Function& function = parsed_function().function();
-    if (function.IsDynamicFunction()) {
-      SpecialStatsBegin(CombinedCodeStatistics::kTagCheckedEntry);
-      __ MonomorphicCheckedEntry();
-      SpecialStatsEnd(CombinedCodeStatistics::kTagCheckedEntry);
-    }
-  }
-
-  // For JIT we have multiple entrypoints functionality which moved the
-  // intrinsification as well as the setup of the frame to the
-  // [TargetEntryInstr::EmitNativeCode].
-  //
-  // Though this has not been implemented on ARM64, which is why this code here
-  // is outside the "ifdef DART_PRECOMPILER".
-  if (TryIntrinsify()) {
-    // Skip regular code generation.
-    return;
-  }
+void FlowGraphCompiler::EmitPrologue() {
   EmitFrameEntry();
   ASSERT(assembler()->constant_pool_allowed());
 
@@ -925,11 +898,32 @@
   }
 
   EndCodeSourceRange(TokenPosition::kDartCodePrologue);
+}
+
+// Input parameters:
+//   LR: return address.
+//   SP: address of last argument.
+//   FP: caller's frame pointer.
+//   PP: caller's pool pointer.
+//   R4: arguments descriptor array.
+void FlowGraphCompiler::CompileGraph() {
+  InitCompiler();
+
+  // For JIT we have multiple entrypoints functionality which moved the frame
+  // setup into the [TargetEntryInstr] (which will set the constant pool
+  // allowed bit to true).  Despite this we still have to set the
+  // constant pool allowed bit to true here as well, because we can generate
+  // code for [CatchEntryInstr]s, which need the pool.
+  __ set_constant_pool_allowed(true);
+
   VisitBlocks();
 
   __ brk(0);
-  ASSERT(assembler()->constant_pool_allowed());
-  GenerateDeferredCode();
+
+  if (!skip_body_compilation()) {
+    ASSERT(assembler()->constant_pool_allowed());
+    GenerateDeferredCode();
+  }
 }
 
 void FlowGraphCompiler::GenerateCall(TokenPosition token_pos,
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_dbc.cc b/runtime/vm/compiler/backend/flow_graph_compiler_dbc.cc
index 892cdb5..e3729c7 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_dbc.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_dbc.cc
@@ -340,15 +340,12 @@
   }
 }
 
+void FlowGraphCompiler::EmitPrologue() {
+  EmitFrameEntry();
+}
+
 void FlowGraphCompiler::CompileGraph() {
   InitCompiler();
-
-  if (TryIntrinsify()) {
-    // Skip regular code generation.
-    return;
-  }
-
-  EmitFrameEntry();
   VisitBlocks();
 }
 
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc b/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
index c0509b9..5629335 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_ia32.cc
@@ -791,14 +791,7 @@
   }
 }
 
-void FlowGraphCompiler::CompileGraph() {
-  InitCompiler();
-
-  if (TryIntrinsify()) {
-    // Skip regular code generation.
-    return;
-  }
-
+void FlowGraphCompiler::EmitPrologue() {
   EmitFrameEntry();
 
   // In unoptimized code, initialize (non-argument) stack allocated slots.
@@ -826,11 +819,18 @@
   }
 
   EndCodeSourceRange(TokenPosition::kDartCodePrologue);
+}
+
+void FlowGraphCompiler::CompileGraph() {
+  InitCompiler();
+
   ASSERT(!block_order().is_empty());
   VisitBlocks();
 
-  __ int3();
-  GenerateDeferredCode();
+  if (!skip_body_compilation()) {
+    __ int3();
+    GenerateDeferredCode();
+  }
 }
 
 void FlowGraphCompiler::GenerateCall(TokenPosition token_pos,
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
index 15d3061..996f030 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler_x64.cc
@@ -930,7 +930,7 @@
 
   __ int3();
 
-  if (!fully_intrinsified_) {
+  if (!skip_body_compilation()) {
     ASSERT(assembler()->constant_pool_allowed());
     GenerateDeferredCode();
   }
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index cbffc1a..5c840ec 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -3831,7 +3831,7 @@
 // fall-through code in [FlowGraphCompiler::CompileGraph()].
 // (As opposed to here where we don't check for the return value of
 // [Intrinsify]).
-#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM)
+#if defined(DART_SUPPORT_PRECOMPILATION)
   if (FLAG_precompiled_mode) {
     const Function& function = compiler->parsed_function().function();
     if (function.IsDynamicFunction()) {
@@ -3840,12 +3840,21 @@
       compiler->SpecialStatsEnd(CombinedCodeStatistics::kTagCheckedEntry);
     }
   }
-  // NOTE: Because in X64/ARM mode the graph can have multiple entrypoints, we
-  // generate several times the same intrinsification & frame setup. That's why
-  // we cannot rely on the constant pool being `false` when we come in here.
+#endif
+
+  // NOTE: Because of the presence of multiple entry-points, we generate several
+  // times the same intrinsification & frame setup. That's why we cannot rely on
+  // the constant pool being `false` when we come in here.
+#if defined(TARGET_USES_OBJECT_POOL)
   __ set_constant_pool_allowed(false);
-  if (compiler->TryIntrinsify()) return;
+#endif
+
+  if (compiler->TryIntrinsify() && compiler->skip_body_compilation()) {
+    return;
+  }
   compiler->EmitPrologue();
+
+#if defined(TARGET_USES_OBJECT_POOL)
   ASSERT(__ constant_pool_allowed());
 #endif
 
@@ -3883,13 +3892,16 @@
   ASSERT(compiler->is_optimizing());
   __ Bind(compiler->GetJumpLabel(this));
 
-#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM)
-  // NOTE: Because in JIT X64/ARM mode the graph can have multiple
-  // entrypoints, so we generate several times the same intrinsification &
-  // frame setup.  That's why we cannot rely on the constant pool being
-  // `false` when we come in here.
+  // NOTE: Because the graph can have multiple entrypoints, we generate several
+  // times the same intrinsification & frame setup. That's why we cannot rely on
+  // the constant pool being `false` when we come in here.
+#if defined(TARGET_USES_OBJECT_POOL)
   __ set_constant_pool_allowed(false);
+#endif
+
   compiler->EmitPrologue();
+
+#if defined(TARGET_USES_OBJECT_POOL)
   ASSERT(__ constant_pool_allowed());
 #endif
 
diff --git a/runtime/vm/globals.h b/runtime/vm/globals.h
index ad33a42..7336f1f 100644
--- a/runtime/vm/globals.h
+++ b/runtime/vm/globals.h
@@ -161,6 +161,17 @@
 
 #endif  // !defined(HOST_OS_WINDOWS))
 
+#if defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_ARM64) ||                  \
+    defined(TARGET_ARCH_X64)
+#define TARGET_USES_OBJECT_POOL 1
+#endif
+
+#if defined(DART_PRECOMPILER) &&                                               \
+    (defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM) ||                   \
+     defined(TARGET_ARCH_ARM64))
+#define DART_SUPPORT_PRECOMPILATION 1
+#endif
+
 }  // namespace dart
 
 #endif  // RUNTIME_VM_GLOBALS_H_