Reland "[vm/compiler] AOT inline heuristics improvements"

This reverts commit 43a96d49afb69eba6179fa083a97f8bec581f4d8.

Reason for revert: <INSERT REASONING HERE>

Original change's description:
> Revert "[vm/compiler] AOT inline heuristics improvements"
> 
> This reverts commit 2908e61f2a84601efcdf0e3bf7df4a6f526fb6c6.
> 
> Reason for revert: regress_32322_test crashes
> 
> Original change's description:
> > [vm/compiler] AOT inline heuristics improvements
> > 
> > Rationale:
> > Yields substantial improvements on various benchmarks
> > (1.8x on HMAC stand-alone, around 5x on TypedData settters and getters),
> > with only moderate increase in code size (3.2% on Flutter gallery).
> > 
> > https://github.com/dart-lang/sdk/issues/34473
> > https://github.com/dart-lang/sdk/issues/32167
> > 
> > Change-Id: I0909efd7afc72229524ff8edb7322ce025a14af4
> > Reviewed-on: https://dart-review.googlesource.com/c/89162
> > Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
> > Reviewed-by: Alexander Markov <alexmarkov@google.com>
> > Commit-Queue: Aart Bik <ajcbik@google.com>
> 
> TBR=vegorov@google.com,alexmarkov@google.com,ajcbik@google.com
> 
> Change-Id: I9c7dadb18935ad32f4d4cd72872838e8ac9cc288
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://dart-review.googlesource.com/c/89740
> Reviewed-by: Aart Bik <ajcbik@google.com>
> Commit-Queue: Aart Bik <ajcbik@google.com>

TBR=vegorov@google.com,alexmarkov@google.com,ajcbik@google.com

# Not skipping CQ checks because original CL landed > 1 day ago.

Change-Id: Iace9857654b63af2fbcd2808d19802fb60305973
Reviewed-on: https://dart-review.googlesource.com/c/90141
Reviewed-by: Aart Bik <ajcbik@google.com>
Commit-Queue: Aart Bik <ajcbik@google.com>
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index 3b48fc5..bfd4395 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -56,6 +56,10 @@
             80,
             "Do not inline callees larger than threshold");
 DEFINE_FLAG(int,
+            inlining_small_leaf_size_threshold,
+            50,
+            "Do not inline leaf callees larger than threshold");
+DEFINE_FLAG(int,
             inlining_caller_size_threshold,
             50000,
             "Stop inlining once caller reaches the threshold.");
@@ -332,19 +336,16 @@
   static intptr_t AotCallCountApproximation(intptr_t nesting_depth) {
     switch (nesting_depth) {
       case 0:
-        // Note that we use value 0, and not 1, i.e. any straightline code
-        // outside a loop is assumed to be very cold. With value 1, inlining
-        // inside loops is still favored over inlining inside straightline
-        // code, but for a method without loops, *all* call sites are inlined
-        // (potentially more performance, at the expense of larger code size).
-        // TODO(ajcbik): use 1 and fine tune other heuristics
-        return 0;
+        // The value 1 makes most sense, but it may give a high ratio to call
+        // sites outside loops. Therefore, such call sites are subject to
+        // subsequent stricter heuristic to limit code size increase.
+        return 1;
       case 1:
         return 10;
       case 2:
-        return 100;
+        return 10 * 10;
       default:
-        return 1000;
+        return 10 * 10 * 10;
     }
   }
 
@@ -512,6 +513,36 @@
   DISALLOW_COPY_AND_ASSIGN(CallSites);
 };
 
+// Determines if inlining this graph yields a small leaf node.
+static bool IsSmallLeaf(FlowGraph* graph) {
+  intptr_t instruction_count = 0;
+  for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done();
+       block_it.Advance()) {
+    BlockEntryInstr* entry = block_it.Current();
+    for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
+      Instruction* current = it.Current();
+      ++instruction_count;
+      if (current->IsInstanceCall() || current->IsPolymorphicInstanceCall() ||
+          current->IsClosureCall()) {
+        return false;
+      } else if (current->IsStaticCall()) {
+        const Function& function = current->AsStaticCall()->function();
+        const intptr_t inl_size = function.optimized_instruction_count();
+        // Accept a static call is always inlined in some way and add the
+        // cached size to the total instruction count. A reasonable guess
+        // is made if the count has not been collected yet (listed methods
+        // are never very large).
+        if (!function.always_inline() && !function.IsRecognized()) {
+          return false;
+        }
+        static constexpr intptr_t kAvgListedMethodSize = 20;
+        instruction_count += (inl_size == 0 ? kAvgListedMethodSize : inl_size);
+      }
+    }
+  }
+  return instruction_count <= FLAG_inlining_small_leaf_size_threshold;
+}
+
 struct InlinedCallData {
   InlinedCallData(Definition* call,
                   const Array& arguments_descriptor,
@@ -863,7 +894,8 @@
 
   bool TryInlining(const Function& function,
                    const Array& argument_names,
-                   InlinedCallData* call_data) {
+                   InlinedCallData* call_data,
+                   bool stricter_heuristic) {
     if (trace_inlining()) {
       String& name = String::Handle(function.QualifiedUserVisibleName());
       THR_Print("  => %s (deopt count %d)\n", name.ToCString(),
@@ -1174,7 +1206,7 @@
 
         if (FLAG_support_il_printer && trace_inlining() &&
             (FLAG_print_flow_graph || FLAG_print_flow_graph_optimized)) {
-          THR_Print("Callee graph for inlining %s\n",
+          THR_Print("Callee graph for inlining %s (optimized)\n",
                     function.ToFullyQualifiedCString());
           FlowGraphPrinter printer(*callee_graph);
           printer.PrintBlocks();
@@ -1215,6 +1247,19 @@
           return false;
         }
 
+        // If requested, a stricter heuristic is applied to this inlining. This
+        // heuristic always scans the method (rather than possibly reusing
+        // cached results) to make sure all specializations are accounted for.
+        if (stricter_heuristic) {
+          if (!IsSmallLeaf(callee_graph)) {
+            TRACE_INLINING(
+                THR_Print("     Bailout: heuristics (no small leaf)\n"));
+            PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
+                                &call_data->caller, &function, call_data->call);
+            return false;
+          }
+        }
+
         // Inline dispatcher methods regardless of the current depth.
         const intptr_t depth =
             function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_;
@@ -1436,7 +1481,13 @@
           call, Array::ZoneHandle(Z, call->GetArgumentsDescriptor()),
           call->FirstArgIndex(), &arguments, call_info[call_idx].caller(),
           call_info[call_idx].caller_graph->inlining_id());
-      if (TryInlining(call->function(), call->argument_names(), &call_data)) {
+
+      // Calls outside loops are subject to stricter heuristics under AOT.
+      bool stricter_heuristic = FLAG_precompiled_mode &&
+                                !inliner_->AlwaysInline(target) &&
+                                call_info[call_idx].nesting_depth == 0;
+      if (TryInlining(call->function(), call->argument_names(), &call_data,
+                      stricter_heuristic)) {
         InlineCall(&call_data);
         inlined = true;
       }
@@ -1489,7 +1540,7 @@
           call, arguments_descriptor, call->FirstArgIndex(), &arguments,
           call_info[call_idx].caller(),
           call_info[call_idx].caller_graph->inlining_id());
-      if (TryInlining(target, call->argument_names(), &call_data)) {
+      if (TryInlining(target, call->argument_names(), &call_data, false)) {
         InlineCall(&call_data);
         inlined = true;
       }
@@ -1751,7 +1802,7 @@
                             caller_function_, caller_inlining_id_);
   Function& target = Function::ZoneHandle(zone(), target_info.target->raw());
   if (!owner_->TryInlining(target, call_->instance_call()->argument_names(),
-                           &call_data)) {
+                           &call_data, false)) {
     return false;
   }
 
diff --git a/runtime/vm/compiler/method_recognizer.h b/runtime/vm/compiler/method_recognizer.h
index e901118..75ceb07 100644
--- a/runtime/vm/compiler/method_recognizer.h
+++ b/runtime/vm/compiler/method_recognizer.h
@@ -373,10 +373,28 @@
   V(_List, _slice, ObjectArraySlice, 0x4c865d1d)                               \
   V(_ImmutableList, get:iterator, ImmutableArrayIterator, 0x6c851c55)          \
   V(_ImmutableList, forEach, ImmutableArrayForEach, 0x11406b13)                \
-  V(_Uint8ArrayView, [], Uint8ArrayViewGetIndexed, 0x7d308247)                 \
-  V(_Uint8ArrayView, []=, Uint8ArrayViewSetIndexed, 0x65ba546e)                \
   V(_Int8ArrayView, [], Int8ArrayViewGetIndexed, 0x7e5a8458)                   \
   V(_Int8ArrayView, []=, Int8ArrayViewSetIndexed, 0x62f615e4)                  \
+  V(_Uint8ArrayView, [], Uint8ArrayViewGetIndexed, 0x7d308247)                 \
+  V(_Uint8ArrayView, []=, Uint8ArrayViewSetIndexed, 0x65ba546e)                \
+  V(_Uint8ClampedArrayView, [], Uint8ClampedArrayViewGetIndexed, 0x7d308247)   \
+  V(_Uint8ClampedArrayView, []=, Uint8ClampedArrayViewSetIndexed, 0x65ba546e)  \
+  V(_Uint16ArrayView, [], Uint16ArrayViewGetIndexed, 0xe96836dd)               \
+  V(_Uint16ArrayView, []=, Uint16ArrayViewSetIndexed, 0x15b02947)              \
+  V(_Int16ArrayView, [], Int16ArrayViewGetIndexed, 0x1b24a48b)                 \
+  V(_Int16ArrayView, []=, Int16ArrayViewSetIndexed, 0xb91ec2e6)                \
+  V(_Uint32ArrayView, [], Uint32ArrayViewGetIndexed, 0x8a4f93b3)               \
+  V(_Uint32ArrayView, []=, Uint32ArrayViewSetIndexed, 0xf54918b5)              \
+  V(_Int32ArrayView, [], Int32ArrayViewGetIndexed, 0x85040819)                 \
+  V(_Int32ArrayView, []=, Int32ArrayViewSetIndexed, 0xaec8c6f5)                \
+  V(_Uint64ArrayView, [], Uint64ArrayViewGetIndexed, 0xd0c44fe7)               \
+  V(_Uint64ArrayView, []=, Uint64ArrayViewSetIndexed, 0x402712b7)              \
+  V(_Int64ArrayView, [], Int64ArrayViewGetIndexed, 0xf3090b95)                 \
+  V(_Int64ArrayView, []=, Int64ArrayViewSetIndexed, 0xca07e497)                \
+  V(_Float32ArrayView, [], Float32ArrayViewGetIndexed, 0xef967533)             \
+  V(_Float32ArrayView, []=, Float32ArrayViewSetIndexed, 0xc9b691bd)            \
+  V(_Float64ArrayView, [], Float64ArrayViewGetIndexed, 0x9d83f585)             \
+  V(_Float64ArrayView, []=, Float64ArrayViewSetIndexed, 0x3c1adabd)            \
   V(_ByteDataView, setInt8, ByteDataViewSetInt8, 0x6395293e)                   \
   V(_ByteDataView, setUint8, ByteDataViewSetUint8, 0x79979d1f)                 \
   V(_ByteDataView, setInt16, ByteDataViewSetInt16, 0x525ec534)                 \