[vm/inliner] Tweak inlining heuristic, improve List.of inlining.

Builds on top of https://dart-review.googlesource.com/c/sdk/+/228660, introduces vm:vm:always-consider-inlining that tell inliner not to give up inlining of a function.

HashMap constructor got prefer-inline pragma to accommodate shift in inlining away from where it used to be inlined due to large size of inlined HashMap, greater than FLAG_inline_getters_setters_smaller_than heuristic.

Notable performance changes:

JIT (x64)
===
ListCopy.List.of.fixed.100 27.37%
ListCopy.List.of.fixed.2 16.31%

AOT (x64)
===
Empty  -9.978%
InstantiateTypeArgs.Instantiate1  -8.262%
...
ListCopy.List.of.fixed.2 13.73%
ListCopy.spread.num.2 15.51%
ListCopy.List.of.2 15.08%
MapCopy.Map.String.of.Map.100 55.62%
MapCopy.Map.Thing.of.Map.100 56.06%

flutter release
===
flutter_gallery_apk_size (Pixel 2)
-0.0074% (1.0 noise)41809568.00 41812676.00


Addresses https://github.com/dart-lang/sdk/issues/49408

TEST=Inliner_always_consider_inlining, Inliner_List_of_inlined
Change-Id: I7f8fc7cb0ac4a69310c108cf519518c384dc0164
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/253740
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
diff --git a/runtime/docs/compiler/pragmas_recognized_by_compiler.md b/runtime/docs/compiler/pragmas_recognized_by_compiler.md
index 8c910f8..640fd3c 100644
--- a/runtime/docs/compiler/pragmas_recognized_by_compiler.md
+++ b/runtime/docs/compiler/pragmas_recognized_by_compiler.md
@@ -25,6 +25,14 @@
 Here, the VM will not inline the annotated function. In this case, the pragma
 is always respected.
 
+####
+
+```dart
+@pragma("vm:always-consider-inlining")
+```
+
+VM will keep trying to inline the function in new contexts, not giving up after encountered contexts where inlining wasn't effective. With this some compile time is traded for expectation that the function has signficant type specialization, resulting in highly efficient inlined results in contexts where arguments types are known to the compiler. Example of this is `_List.of` constructor in dart core library.
+
 ## Annotations for return types and field types
 
 The VM is not able to see across method calls (apart from inlining) and
diff --git a/runtime/docs/pragmas.md b/runtime/docs/pragmas.md
index 90eb126..4ca6c30 100644
--- a/runtime/docs/pragmas.md
+++ b/runtime/docs/pragmas.md
@@ -12,6 +12,7 @@
 | `vm:notify-debugger-on-exception` | Marks a function that catches exceptions, making the VM treat any caught exception as if they were uncaught. This can be used to notify an attached debugger during debugging, without pausing the app during regular execution. |
 | `vm:external-name` | Allows to specify an external (native) name for an `external` function. This name is used to lookup native implementation via native resolver associated with the current library through embedding APIs. This is a replacement for legacy VM specific `native "name"` syntax. |
 | `vm:invisible` | Allows to mark a function as invisible so it will not appear on stack traces. |
+| `vm:always-consider-inlining` | Marks a function which particularly benefits from inlining and specialization in context of the caller (for example, when concrete types of arguments are known). Inliner will not give up after one failed inlining attempt and will continue trying to inline this function.
 
 ## Unsafe pragmas for general use
 
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index ef1de04..7aa88d6 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -524,38 +524,51 @@
   DISALLOW_COPY_AND_ASSIGN(CallSites);
 };
 
-// Determines if inlining this graph yields a small leaf node.
-static bool IsSmallLeaf(FlowGraph* graph) {
+// Determines if inlining this graph yields a small leaf node, or a sequence of
+// static calls that is no larger than the call site it will replace.
+static bool IsSmallLeafOrReduction(int inlining_depth,
+                                   intptr_t call_site_instructions,
+                                   FlowGraph* graph) {
   intptr_t instruction_count = 0;
+  intptr_t call_count = 0;
   for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done();
        block_it.Advance()) {
     BlockEntryInstr* entry = block_it.Current();
     for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
       Instruction* current = it.Current();
+      if (current->IsReturn()) continue;
       ++instruction_count;
       if (current->IsInstanceCall() || current->IsPolymorphicInstanceCall() ||
           current->IsClosureCall()) {
         return false;
-      } else if (current->IsStaticCall()) {
+      }
+      if (current->IsStaticCall()) {
         const Function& function = current->AsStaticCall()->function();
         const intptr_t inl_size = function.optimized_instruction_count();
         const bool always_inline =
             FlowGraphInliner::FunctionHasPreferInlinePragma(function);
-        // Accept a static call is always inlined in some way and add the
-        // cached size to the total instruction count. A reasonable guess
-        // is made if the count has not been collected yet (listed methods
-        // are never very large).
-        if (!always_inline && !function.IsRecognized()) {
-          return false;
+        // Accept a static call that is always inlined in some way and add the
+        // cached size to the total instruction count. A reasonable guess is
+        // made if the count has not been collected yet (listed methods are
+        // never very large).
+        if (always_inline || function.IsRecognized()) {
+          if (!always_inline) {
+            static constexpr intptr_t kAvgListedMethodSize = 20;
+            instruction_count +=
+                (inl_size == 0 ? kAvgListedMethodSize : inl_size);
+          }
+        } else {
+          ++call_count;
+          instruction_count += current->AsStaticCall()->ArgumentCount();
+          instruction_count += 1;  // pop the call frame.
         }
-        if (!always_inline) {
-          static constexpr intptr_t kAvgListedMethodSize = 20;
-          instruction_count +=
-              (inl_size == 0 ? kAvgListedMethodSize : inl_size);
-        }
+        continue;
       }
     }
   }
+  if (call_count > 0) {
+    return instruction_count <= call_site_instructions;
+  }
   return instruction_count <= FLAG_inlining_small_leaf_size_threshold;
 }
 
@@ -1036,6 +1049,8 @@
 
     if (FlowGraphInliner::FunctionHasNeverInlinePragma(function)) {
       TRACE_INLINING(THR_Print("     Bailout: vm:never-inline pragma\n"));
+      PRINT_INLINING_TREE("vm:never-inline", &call_data->caller, &function,
+                          call_data->call);
       return false;
     }
 
@@ -1156,6 +1171,8 @@
             TRACE_INLINING(
                 THR_Print("     Bailout: not inlinable due to "
                           "!function.CanBeInlined()\n"));
+            PRINT_INLINING_TREE("Not inlinable", &call_data->caller, &function,
+                                call_data->call);
             return false;
           }
         }
@@ -1352,9 +1369,19 @@
               ShouldWeInline(function, instruction_count, call_site_count);
           if (!decision.value) {
             // If size is larger than all thresholds, don't consider it again.
+
+            // TODO(dartbug.com/49665): Make compiler smart enough so it itself
+            // can identify highly-specialized functions that should always
+            // be considered for inlining, without relying on a pragma.
             if ((instruction_count > FLAG_inlining_size_threshold) &&
                 (call_site_count > FLAG_inlining_callee_call_sites_threshold)) {
-              function.set_is_inlinable(false);
+              // Will keep trying to inline the function if it can be
+              // specialized based on argument types.
+              if (!FlowGraphInliner::FunctionHasAlwaysConsiderInliningPragma(
+                      function)) {
+                function.set_is_inlinable(false);
+                TRACE_INLINING(THR_Print("     Mark not inlinable\n"));
+              }
             }
             TRACE_INLINING(
                 THR_Print("     Bailout: heuristics (%s) with "
@@ -1376,7 +1403,13 @@
           // TODO(ajcbik): with the now better bookkeeping, explore removing
           // this
           if (stricter_heuristic) {
-            if (!IsSmallLeaf(callee_graph)) {
+            intptr_t call_site_instructions = 0;
+            if (auto static_call = call->AsStaticCall()) {
+              // Push all the arguments, do the call, drop arguments.
+              call_site_instructions = static_call->ArgumentCount() + 1 + 1;
+            }
+            if (!IsSmallLeafOrReduction(inlining_depth_, call_site_instructions,
+                                        callee_graph)) {
               TRACE_INLINING(
                   THR_Print("     Bailout: heuristics (no small leaf)\n"));
               PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
@@ -2440,6 +2473,19 @@
                              /*multiple=*/false, &options);
 }
 
+bool FlowGraphInliner::FunctionHasAlwaysConsiderInliningPragma(
+    const Function& function) {
+  if (!function.has_pragma()) {
+    return false;
+  }
+  Thread* thread = dart::Thread::Current();
+  COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma);
+  Object& options = Object::Handle();
+  return Library::FindPragma(thread, /*only_core=*/false, function,
+                             Symbols::vm_always_consider_inlining(),
+                             /*multiple=*/false, &options);
+}
+
 bool FlowGraphInliner::AlwaysInline(const Function& function) {
   if (FunctionHasPreferInlinePragma(function)) {
     TRACE_INLINING(
diff --git a/runtime/vm/compiler/backend/inliner.h b/runtime/vm/compiler/backend/inliner.h
index 404654b..fe115c9 100644
--- a/runtime/vm/compiler/backend/inliner.h
+++ b/runtime/vm/compiler/backend/inliner.h
@@ -123,6 +123,7 @@
 
   static bool FunctionHasPreferInlinePragma(const Function& function);
   static bool FunctionHasNeverInlinePragma(const Function& function);
+  static bool FunctionHasAlwaysConsiderInliningPragma(const Function& function);
 
   FlowGraph* flow_graph() const { return flow_graph_; }
   intptr_t NextInlineId(const Function& function,
diff --git a/runtime/vm/compiler/backend/inliner_test.cc b/runtime/vm/compiler/backend/inliner_test.cc
index 168018d..a1b18fa 100644
--- a/runtime/vm/compiler/backend/inliner_test.cc
+++ b/runtime/vm/compiler/backend/inliner_test.cc
@@ -246,6 +246,132 @@
   }));
 }
 
+// Verifies that pragma-decorated call gets inlined.
+ISOLATE_UNIT_TEST_CASE(Inliner_always_consider_inlining) {
+  const char* kScript = R"(
+    choice() {
+      dynamic x;
+      return x == 123;
+    }
+
+    @pragma("vm:always-consider-inlining")
+    bar(baz) {
+      if (baz is String) {
+        return 1;
+      }
+      if (baz is num) {
+        return 2;
+      }
+      if (baz is bool) {
+        dynamic j = 0;
+        for (int i = 0; i < 1024; i++) {
+          j += "i: $i".length;
+        }
+        return j;
+      }
+      return 4;
+    }
+
+    bbar(bbaz, something) {
+      if (bbaz == null) {
+        return "null";
+      }
+      return bar(bbaz);
+    }
+
+    main(args) {
+      print(bbar(42, "something"));
+      print(bbar(choice() ? "abc": 42, "something"));
+      print(bbar("abc", "something"));
+    }
+  )";
+
+  const auto& root_library = Library::Handle(LoadTestScript(kScript));
+  const auto& function = Function::Handle(GetFunction(root_library, "main"));
+
+  TestPipeline pipeline(function, CompilerPass::kAOT);
+  FlowGraph* flow_graph = pipeline.RunPasses({});
+
+  auto entry = flow_graph->graph_entry()->normal_entry();
+  ILMatcher cursor(flow_graph, entry, /*trace=*/true);
+
+  StaticCallInstr* call_print1;
+  StaticCallInstr* call_print2;
+  StaticCallInstr* call_print3;
+  StaticCallInstr* call_bar;
+  RELEASE_ASSERT(cursor.TryMatch({
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_print1},
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_bar},
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_print2},
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_print3},
+      kMoveGlob,
+      kMatchReturn,
+  }));
+  EXPECT(strcmp(call_print1->function().UserVisibleNameCString(), "print") ==
+         0);
+  EXPECT(strcmp(call_print2->function().UserVisibleNameCString(), "print") ==
+         0);
+  EXPECT(strcmp(call_print3->function().UserVisibleNameCString(), "print") ==
+         0);
+  EXPECT(strcmp(call_bar->function().UserVisibleNameCString(), "bar") == 0);
+}
+
+// Verifies that List.of gets inlined.
+ISOLATE_UNIT_TEST_CASE(Inliner_List_of_inlined) {
+  const char* kScript = R"(
+    main() {
+      final foo = List<String>.filled(100, "bar");
+      final the_copy1 = List.of(foo, growable: false);
+      print('${the_copy1.length}');
+    }
+  )";
+
+  const auto& root_library = Library::Handle(LoadTestScript(kScript));
+  const auto& function = Function::Handle(GetFunction(root_library, "main"));
+
+  TestPipeline pipeline(function, CompilerPass::kAOT);
+  FlowGraph* flow_graph = pipeline.RunPasses({});
+
+  auto entry = flow_graph->graph_entry()->normal_entry();
+  ILMatcher cursor(flow_graph, entry, /*trace=*/true);
+
+  StaticCallInstr* call_get_length;
+  StaticCallInstr* call_interpolate;
+  StaticCallInstr* call_print;
+  RELEASE_ASSERT(cursor.TryMatch({
+      kMoveGlob,
+      kMatchAndMoveJoinEntry,
+      kMoveGlob,
+      kMatchAndMoveBranchTrue,
+      kMoveGlob,
+      kMatchAndMoveJoinEntry,
+      kMatchAndMoveCheckStackOverflow,
+      kMatchAndMoveBranchFalse,
+      kMatchAndMoveTargetEntry,
+      kMoveGlob,
+      kMatchAndMoveJoinEntry,
+      kMoveGlob,
+      kMatchAndMoveBranchFalse,
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_get_length},
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_interpolate},
+      kMoveGlob,
+      {kMatchAndMoveStaticCall, &call_print},
+      kMoveGlob,
+      kMatchReturn,
+  }));
+  EXPECT(strcmp(call_get_length->function().UserVisibleNameCString(),
+                "length") == 0);
+  EXPECT(strcmp(call_interpolate->function().UserVisibleNameCString(),
+                "_interpolateSingle") == 0);
+  EXPECT(strcmp(call_print->function().UserVisibleNameCString(), "print") == 0);
+}
+
 #endif  // defined(DART_PRECOMPILER)
 
 }  // namespace dart
diff --git a/runtime/vm/symbols.h b/runtime/vm/symbols.h
index 9c778e2..6bcd970 100644
--- a/runtime/vm/symbols.h
+++ b/runtime/vm/symbols.h
@@ -496,20 +496,21 @@
   V(state, "state")                                                            \
   V(string_param, ":string_param")                                             \
   V(string_param_length, ":string_param_length")                               \
-  V(vm_prefer_inline, "vm:prefer-inline")                                      \
+  V(vm_always_consider_inlining, "vm:always-consider-inlining")                \
   V(vm_entry_point, "vm:entry-point")                                          \
   V(vm_exact_result_type, "vm:exact-result-type")                              \
+  V(vm_external_name, "vm:external-name")                                      \
+  V(vm_ffi_abi_specific_mapping, "vm:ffi:abi-specific-mapping")                \
+  V(vm_ffi_struct_fields, "vm:ffi:struct-fields")                              \
+  V(vm_invisible, "vm:invisible")                                              \
   V(vm_never_inline, "vm:never-inline")                                        \
   V(vm_non_nullable_result_type, "vm:non-nullable-result-type")                \
   V(vm_notify_debugger_on_exception, "vm:notify-debugger-on-exception")        \
+  V(vm_prefer_inline, "vm:prefer-inline")                                      \
   V(vm_recognized, "vm:recognized")                                            \
+  V(vm_testing_print_flow_graph, "vm:testing:print-flow-graph")                \
   V(vm_trace_entrypoints, "vm:testing.unsafe.trace-entrypoints-fn")            \
-  V(vm_ffi_abi_specific_mapping, "vm:ffi:abi-specific-mapping")                \
-  V(vm_ffi_struct_fields, "vm:ffi:struct-fields")                              \
-  V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts")                        \
-  V(vm_external_name, "vm:external-name")                                      \
-  V(vm_invisible, "vm:invisible")                                              \
-  V(vm_testing_print_flow_graph, "vm:testing:print-flow-graph")
+  V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts")
 
 // Contains a list of frequently used strings in a canonicalized form. This
 // list is kept in the vm_isolate in order to share the copy across isolates
diff --git a/sdk/lib/_internal/vm/lib/array.dart b/sdk/lib/_internal/vm/lib/array.dart
index be6f0dd..6829fb7 100644
--- a/sdk/lib/_internal/vm/lib/array.dart
+++ b/sdk/lib/_internal/vm/lib/array.dart
@@ -115,6 +115,7 @@
   }
 
   // Specialization of List.of constructor for growable == false.
+  @pragma("vm:always-consider-inlining")
   factory _List.of(Iterable<E> elements) {
     if (elements is _GrowableList) {
       return _List._ofGrowableList(unsafeCast(elements));
diff --git a/sdk/lib/_internal/vm/lib/collection_patch.dart b/sdk/lib/_internal/vm/lib/collection_patch.dart
index 0647441..7306739 100644
--- a/sdk/lib/_internal/vm/lib/collection_patch.dart
+++ b/sdk/lib/_internal/vm/lib/collection_patch.dart
@@ -25,6 +25,7 @@
 @patch
 class HashMap<K, V> {
   @patch
+  @pragma("vm:prefer-inline")
   factory HashMap(
       {bool equals(K key1, K key2)?,
       int hashCode(K key)?,