Reland "[vm/compiler] AOT inline heuristics improvements"
This reverts commit 43a96d49afb69eba6179fa083a97f8bec581f4d8.
Reason for revert: <INSERT REASONING HERE>
Original change's description:
> Revert "[vm/compiler] AOT inline heuristics improvements"
>
> This reverts commit 2908e61f2a84601efcdf0e3bf7df4a6f526fb6c6.
>
> Reason for revert: regress_32322_test crashes
>
> Original change's description:
> > [vm/compiler] AOT inline heuristics improvements
> >
> > Rationale:
> > Yields substantial improvements on various benchmarks
> > (1.8x on HMAC stand-alone, around 5x on TypedData settters and getters),
> > with only moderate increase in code size (3.2% on Flutter gallery).
> >
> > https://github.com/dart-lang/sdk/issues/34473
> > https://github.com/dart-lang/sdk/issues/32167
> >
> > Change-Id: I0909efd7afc72229524ff8edb7322ce025a14af4
> > Reviewed-on: https://dart-review.googlesource.com/c/89162
> > Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
> > Reviewed-by: Alexander Markov <alexmarkov@google.com>
> > Commit-Queue: Aart Bik <ajcbik@google.com>
>
> TBR=vegorov@google.com,alexmarkov@google.com,ajcbik@google.com
>
> Change-Id: I9c7dadb18935ad32f4d4cd72872838e8ac9cc288
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://dart-review.googlesource.com/c/89740
> Reviewed-by: Aart Bik <ajcbik@google.com>
> Commit-Queue: Aart Bik <ajcbik@google.com>
TBR=vegorov@google.com,alexmarkov@google.com,ajcbik@google.com
# Not skipping CQ checks because original CL landed > 1 day ago.
Change-Id: Iace9857654b63af2fbcd2808d19802fb60305973
Reviewed-on: https://dart-review.googlesource.com/c/90141
Reviewed-by: Aart Bik <ajcbik@google.com>
Commit-Queue: Aart Bik <ajcbik@google.com>
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index 3b48fc5..bfd4395 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -56,6 +56,10 @@
80,
"Do not inline callees larger than threshold");
DEFINE_FLAG(int,
+ inlining_small_leaf_size_threshold,
+ 50,
+ "Do not inline leaf callees larger than threshold");
+DEFINE_FLAG(int,
inlining_caller_size_threshold,
50000,
"Stop inlining once caller reaches the threshold.");
@@ -332,19 +336,16 @@
static intptr_t AotCallCountApproximation(intptr_t nesting_depth) {
switch (nesting_depth) {
case 0:
- // Note that we use value 0, and not 1, i.e. any straightline code
- // outside a loop is assumed to be very cold. With value 1, inlining
- // inside loops is still favored over inlining inside straightline
- // code, but for a method without loops, *all* call sites are inlined
- // (potentially more performance, at the expense of larger code size).
- // TODO(ajcbik): use 1 and fine tune other heuristics
- return 0;
+ // The value 1 makes most sense, but it may give a high ratio to call
+ // sites outside loops. Therefore, such call sites are subject to
+ // subsequent stricter heuristic to limit code size increase.
+ return 1;
case 1:
return 10;
case 2:
- return 100;
+ return 10 * 10;
default:
- return 1000;
+ return 10 * 10 * 10;
}
}
@@ -512,6 +513,36 @@
DISALLOW_COPY_AND_ASSIGN(CallSites);
};
+// Determines if inlining this graph yields a small leaf node.
+static bool IsSmallLeaf(FlowGraph* graph) {
+ intptr_t instruction_count = 0;
+ for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done();
+ block_it.Advance()) {
+ BlockEntryInstr* entry = block_it.Current();
+ for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
+ Instruction* current = it.Current();
+ ++instruction_count;
+ if (current->IsInstanceCall() || current->IsPolymorphicInstanceCall() ||
+ current->IsClosureCall()) {
+ return false;
+ } else if (current->IsStaticCall()) {
+ const Function& function = current->AsStaticCall()->function();
+ const intptr_t inl_size = function.optimized_instruction_count();
+ // Accept a static call is always inlined in some way and add the
+ // cached size to the total instruction count. A reasonable guess
+ // is made if the count has not been collected yet (listed methods
+ // are never very large).
+ if (!function.always_inline() && !function.IsRecognized()) {
+ return false;
+ }
+ static constexpr intptr_t kAvgListedMethodSize = 20;
+ instruction_count += (inl_size == 0 ? kAvgListedMethodSize : inl_size);
+ }
+ }
+ }
+ return instruction_count <= FLAG_inlining_small_leaf_size_threshold;
+}
+
struct InlinedCallData {
InlinedCallData(Definition* call,
const Array& arguments_descriptor,
@@ -863,7 +894,8 @@
bool TryInlining(const Function& function,
const Array& argument_names,
- InlinedCallData* call_data) {
+ InlinedCallData* call_data,
+ bool stricter_heuristic) {
if (trace_inlining()) {
String& name = String::Handle(function.QualifiedUserVisibleName());
THR_Print(" => %s (deopt count %d)\n", name.ToCString(),
@@ -1174,7 +1206,7 @@
if (FLAG_support_il_printer && trace_inlining() &&
(FLAG_print_flow_graph || FLAG_print_flow_graph_optimized)) {
- THR_Print("Callee graph for inlining %s\n",
+ THR_Print("Callee graph for inlining %s (optimized)\n",
function.ToFullyQualifiedCString());
FlowGraphPrinter printer(*callee_graph);
printer.PrintBlocks();
@@ -1215,6 +1247,19 @@
return false;
}
+ // If requested, a stricter heuristic is applied to this inlining. This
+ // heuristic always scans the method (rather than possibly reusing
+ // cached results) to make sure all specializations are accounted for.
+ if (stricter_heuristic) {
+ if (!IsSmallLeaf(callee_graph)) {
+ TRACE_INLINING(
+ THR_Print(" Bailout: heuristics (no small leaf)\n"));
+ PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
+ &call_data->caller, &function, call_data->call);
+ return false;
+ }
+ }
+
// Inline dispatcher methods regardless of the current depth.
const intptr_t depth =
function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_;
@@ -1436,7 +1481,13 @@
call, Array::ZoneHandle(Z, call->GetArgumentsDescriptor()),
call->FirstArgIndex(), &arguments, call_info[call_idx].caller(),
call_info[call_idx].caller_graph->inlining_id());
- if (TryInlining(call->function(), call->argument_names(), &call_data)) {
+
+ // Calls outside loops are subject to stricter heuristics under AOT.
+ bool stricter_heuristic = FLAG_precompiled_mode &&
+ !inliner_->AlwaysInline(target) &&
+ call_info[call_idx].nesting_depth == 0;
+ if (TryInlining(call->function(), call->argument_names(), &call_data,
+ stricter_heuristic)) {
InlineCall(&call_data);
inlined = true;
}
@@ -1489,7 +1540,7 @@
call, arguments_descriptor, call->FirstArgIndex(), &arguments,
call_info[call_idx].caller(),
call_info[call_idx].caller_graph->inlining_id());
- if (TryInlining(target, call->argument_names(), &call_data)) {
+ if (TryInlining(target, call->argument_names(), &call_data, false)) {
InlineCall(&call_data);
inlined = true;
}
@@ -1751,7 +1802,7 @@
caller_function_, caller_inlining_id_);
Function& target = Function::ZoneHandle(zone(), target_info.target->raw());
if (!owner_->TryInlining(target, call_->instance_call()->argument_names(),
- &call_data)) {
+ &call_data, false)) {
return false;
}
diff --git a/runtime/vm/compiler/method_recognizer.h b/runtime/vm/compiler/method_recognizer.h
index e901118..75ceb07 100644
--- a/runtime/vm/compiler/method_recognizer.h
+++ b/runtime/vm/compiler/method_recognizer.h
@@ -373,10 +373,28 @@
V(_List, _slice, ObjectArraySlice, 0x4c865d1d) \
V(_ImmutableList, get:iterator, ImmutableArrayIterator, 0x6c851c55) \
V(_ImmutableList, forEach, ImmutableArrayForEach, 0x11406b13) \
- V(_Uint8ArrayView, [], Uint8ArrayViewGetIndexed, 0x7d308247) \
- V(_Uint8ArrayView, []=, Uint8ArrayViewSetIndexed, 0x65ba546e) \
V(_Int8ArrayView, [], Int8ArrayViewGetIndexed, 0x7e5a8458) \
V(_Int8ArrayView, []=, Int8ArrayViewSetIndexed, 0x62f615e4) \
+ V(_Uint8ArrayView, [], Uint8ArrayViewGetIndexed, 0x7d308247) \
+ V(_Uint8ArrayView, []=, Uint8ArrayViewSetIndexed, 0x65ba546e) \
+ V(_Uint8ClampedArrayView, [], Uint8ClampedArrayViewGetIndexed, 0x7d308247) \
+ V(_Uint8ClampedArrayView, []=, Uint8ClampedArrayViewSetIndexed, 0x65ba546e) \
+ V(_Uint16ArrayView, [], Uint16ArrayViewGetIndexed, 0xe96836dd) \
+ V(_Uint16ArrayView, []=, Uint16ArrayViewSetIndexed, 0x15b02947) \
+ V(_Int16ArrayView, [], Int16ArrayViewGetIndexed, 0x1b24a48b) \
+ V(_Int16ArrayView, []=, Int16ArrayViewSetIndexed, 0xb91ec2e6) \
+ V(_Uint32ArrayView, [], Uint32ArrayViewGetIndexed, 0x8a4f93b3) \
+ V(_Uint32ArrayView, []=, Uint32ArrayViewSetIndexed, 0xf54918b5) \
+ V(_Int32ArrayView, [], Int32ArrayViewGetIndexed, 0x85040819) \
+ V(_Int32ArrayView, []=, Int32ArrayViewSetIndexed, 0xaec8c6f5) \
+ V(_Uint64ArrayView, [], Uint64ArrayViewGetIndexed, 0xd0c44fe7) \
+ V(_Uint64ArrayView, []=, Uint64ArrayViewSetIndexed, 0x402712b7) \
+ V(_Int64ArrayView, [], Int64ArrayViewGetIndexed, 0xf3090b95) \
+ V(_Int64ArrayView, []=, Int64ArrayViewSetIndexed, 0xca07e497) \
+ V(_Float32ArrayView, [], Float32ArrayViewGetIndexed, 0xef967533) \
+ V(_Float32ArrayView, []=, Float32ArrayViewSetIndexed, 0xc9b691bd) \
+ V(_Float64ArrayView, [], Float64ArrayViewGetIndexed, 0x9d83f585) \
+ V(_Float64ArrayView, []=, Float64ArrayViewSetIndexed, 0x3c1adabd) \
V(_ByteDataView, setInt8, ByteDataViewSetInt8, 0x6395293e) \
V(_ByteDataView, setUint8, ByteDataViewSetUint8, 0x79979d1f) \
V(_ByteDataView, setInt16, ByteDataViewSetInt16, 0x525ec534) \