[vm/compiler] Improve AOT block scheduler
Try to keep loop blocks together, previously AOT compiler would produce
bad block order for loops like these:
while (true) {
if (smth) {
// (*)
return;
}
}
And put `(*)` blocks in between loop header and other loop blocks.
TEST=ci
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-x64-try,vm-aot-mac-release-arm64-try,vm-aot-linux-release-x64-try
Change-Id: I52304810b61ff9288fe9df648b21c0258299d61e
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/358447
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Slava Egorov <vegorov@google.com>
diff --git a/pkg/vm/bin/compare_il.dart b/pkg/vm/bin/compare_il.dart
index 28a0f3d..f32d01e 100644
--- a/pkg/vm/bin/compare_il.dart
+++ b/pkg/vm/bin/compare_il.dart
@@ -195,8 +195,13 @@
for (var graph in File(ilFile).readAsLinesSync()) {
final m = jsonDecode(graph) as Map<String, dynamic>;
- graphs.putIfAbsent(m['f'], () => {})[m['p']] =
- FlowGraph(m['b'], m['desc'], m['flags'], rename: rename);
+ graphs.putIfAbsent(m['f'], () => {})[m['p']] = FlowGraph(
+ m['b'],
+ m['desc'],
+ m['flags'],
+ rename: rename,
+ codegenBlockOrder: m['cbo'],
+ );
}
return graphs;
diff --git a/pkg/vm/lib/testing/il_matchers.dart b/pkg/vm/lib/testing/il_matchers.dart
index 9cb5c00..3b316df 100644
--- a/pkg/vm/lib/testing/il_matchers.dart
+++ b/pkg/vm/lib/testing/il_matchers.dart
@@ -13,22 +13,29 @@
/// Flow graph parsed from --print-flow-graph-as-json output.
class FlowGraph {
- final List<dynamic> blocks;
+ final List<dynamic> _blocks;
final Map<String, InstructionDescriptor> descriptors;
final Map<String, dynamic> flags;
final Renamer rename;
+ final List<dynamic>? codegenBlockOrder;
- FlowGraph(this.blocks, Map<String, dynamic> desc, this.flags,
- {required this.rename})
+ FlowGraph(this._blocks, Map<String, dynamic> desc, this.flags,
+ {required this.rename, List<dynamic>? codegenBlockOrder})
: descriptors = {
for (var e in desc.entries)
e.key: InstructionDescriptor.fromJson(e.value)
- };
+ },
+ codegenBlockOrder =
+ codegenBlockOrder?.map((idx) => _blocks[idx as int]).toList();
bool get soundNullSafety => flags['nnbd'];
PrettyPrinter get printer => PrettyPrinter(descriptors);
+ List<dynamic> blocks({bool inCodegenBlockOrder = false}) {
+ return inCodegenBlockOrder ? codegenBlockOrder! : _blocks;
+ }
+
/// Match the sequence of blocks in this flow graph against the given
/// sequence of matchers: `expected[i]` is expected to match `blocks[i]`,
/// but there can be more blocks in the graph than matchers (the suffix is
@@ -38,14 +45,19 @@
/// a fresh instance of [Env] will be created and used.
///
/// This function returns the populated matching environment.
- Env match(List<Matcher> expected, {Env? env}) {
+ Env match(
+ List<Matcher> expected, {
+ Env? env,
+ bool inCodegenBlockOrder = false,
+ }) {
env ??= Env(rename: rename, descriptors: descriptors);
+ final got = blocks(inCodegenBlockOrder: inCodegenBlockOrder);
for (var i = 0; i < expected.length; i++) {
- final result = expected[i].match(env, blocks[i]);
+ final result = expected[i].match(env, got[i]);
if (result.isFail) {
print('Failed to match: ${result.message}');
- dump();
+ dump(inCodegenBlockOrder: inCodegenBlockOrder);
throw 'Failed to match';
}
}
@@ -63,11 +75,11 @@
return {for (final e in attrs.entries) e.key: instr['d'][e.value]};
}
- void dump() {
+ void dump({bool inCodegenBlockOrder = false}) {
final printer = PrettyPrinter(descriptors);
final buffer = StringBuffer();
- for (var block in blocks) {
+ for (var block in blocks(inCodegenBlockOrder: inCodegenBlockOrder)) {
printer._formatBlock(buffer, block);
}
print(buffer);
@@ -676,7 +688,7 @@
);
// ignore: non_constant_identifier_names
- InstructionMatcher Branch(InstructionMatcher compare,
+ InstructionMatcher Branch(Matcher compare,
{String? ifTrue, String? ifFalse, bool skipUntilMatched = true}) =>
InstructionMatcher._(
op: 'Branch',
diff --git a/runtime/tests/vm/dart/block_ordering_il_test.dart b/runtime/tests/vm/dart/block_ordering_il_test.dart
new file mode 100644
index 0000000..6a89638
--- /dev/null
+++ b/runtime/tests/vm/dart/block_ordering_il_test.dart
@@ -0,0 +1,184 @@
+// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+import 'dart:typed_data';
+
+// This test checks that AOT compiler tries to keep loop blocks together and
+// sinks cold code to the end of the function.
+
+import 'package:expect/expect.dart';
+import 'package:vm/testing/il_matchers.dart';
+
+@pragma('vm:never-inline')
+@pragma('vm:testing:print-flow-graph', 'ReorderBlocks')
+int loop(Uint8List list) {
+ for (var i = 0; i < list.length; i++) {
+ if (list[i] == 1) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+@pragma('vm:never-inline')
+@pragma('vm:testing:print-flow-graph', 'ReorderBlocks')
+int loop2(Uint8List list) {
+ int sum = 0;
+ for (var i = 0; i < list.length; i++) {
+ for (var j = 0; j < list.length; j++) {
+ if (list[j] == 1) {
+ break;
+ }
+ sum += list[j];
+ }
+ }
+ return sum;
+}
+
+@pragma('vm:never-inline')
+@pragma('vm:testing:print-flow-graph', 'ReorderBlocks')
+int bodyAlwaysThrows(Uint8List list) {
+ try {
+ throw 'Yay';
+ } catch (e) {
+ return list.length;
+ }
+}
+
+@pragma('vm:never-inline')
+@pragma('vm:testing:print-flow-graph', 'ReorderBlocks')
+int throwInALoop(Uint8List list) {
+ for (var i = 0; i < list.length; i++) {
+ if (list[i] == 1) {
+ return i;
+ }
+ if (list[i] == 2) {
+ throw 'Unexpected value';
+ }
+ }
+ return -1;
+}
+
+void main(List<String> args) {
+ final input = args.contains('foo')
+ ? (Uint8List(10)..[5] = 1)
+ : (Uint8List(100)..[50] = 1);
+
+ Expect.equals(50, loop(input));
+ Expect.equals(0, loop2(input));
+ Expect.equals(100, bodyAlwaysThrows(input));
+ Expect.equals(50, throwInALoop(input));
+}
+
+void matchIL$loop(FlowGraph graph) {
+ graph.match(inCodegenBlockOrder: true, [
+ match.block('Graph'),
+ match.block('Function', [
+ match.Goto('loop_header'),
+ ]),
+ 'loop_header' <<
+ match.block('Join', [
+ match.Branch(match.any, ifTrue: 'loop_body'),
+ ]),
+ 'loop_body' <<
+ match.block('Target', [
+ match.Branch(match.any, ifFalse: 'loop_inc'),
+ ]),
+ 'loop_inc' <<
+ match.block('Target', [
+ match.Goto('loop_header'),
+ ]),
+ ]);
+}
+
+void matchIL$loop2(FlowGraph graph) {
+ graph.match(inCodegenBlockOrder: true, [
+ match.block('Graph'),
+ match.block('Function', [
+ match.Goto('loop_header_1'),
+ ]),
+ 'loop_header_1' <<
+ match.block('Join', [
+ match.Branch(match.any, ifTrue: 'loop_body_1'),
+ ]),
+ 'loop_body_1' <<
+ match.block('Target', [
+ match.Goto('loop_header_2'),
+ ]),
+ 'loop_header_2' <<
+ match.block('Join', [
+ match.Branch(match.any,
+ ifTrue: 'loop_body_2', ifFalse: 'loop_body_2_exit_1'),
+ ]),
+ 'loop_body_2' <<
+ match.block('Target', [
+ match.Branch(match.any,
+ ifTrue: 'loop_body_2_exit_2', ifFalse: 'loop_inc_2'),
+ ]),
+ 'loop_inc_2' <<
+ match.block('Target', [
+ match.Goto('loop_header_2'),
+ ]),
+ 'loop_body_2_exit_2' <<
+ match.block('Target', [
+ match.Goto('loop_inc_1'),
+ ]),
+ 'loop_body_2_exit_1' <<
+ match.block('Target', [
+ match.Goto('loop_inc_1'),
+ ]),
+ 'loop_inc_1' <<
+ match.block('Join', [
+ match.Goto('loop_header_1'),
+ ]),
+ ]);
+}
+
+void matchIL$bodyAlwaysThrows(FlowGraph graph) {
+ graph.match([
+ match.block('Graph'),
+ match.block('Function'),
+ match.block('CatchBlock'),
+ match.block('Join'),
+ ], inCodegenBlockOrder: true);
+}
+
+void matchIL$throwInALoop(FlowGraph graph) {
+ graph.match(inCodegenBlockOrder: true, [
+ match.block('Graph'),
+ match.block('Function', [
+ match.Goto('loop_header'),
+ ]),
+ 'loop_header' <<
+ match.block('Join', [
+ 'i' << match.Phi(match.any, 'inc_i'),
+ match.Branch(match.any, ifTrue: 'loop_body', ifFalse: 'return_fail'),
+ ]),
+ 'loop_body' <<
+ match.block('Target', [
+ match.Branch(match.any,
+ ifTrue: 'return_found', ifFalse: 'loop_body_cont'),
+ ]),
+ 'loop_body_cont' <<
+ match.block('Target', [
+ match.Branch(match.any, ifTrue: 'throw', ifFalse: 'loop_inc'),
+ ]),
+ 'loop_inc' <<
+ match.block('Target', [
+ 'inc_i' << match.BinaryInt64Op('i', match.any),
+ match.Goto('loop_header'),
+ ]),
+ 'return_found' <<
+ match.block('Target', [
+ match.Return('i'),
+ ]),
+ 'return_fail' <<
+ match.block('Target', [
+ match.Return(match.any),
+ ]),
+ 'throw' <<
+ match.block('Target', [
+ match.Throw(match.any),
+ ]),
+ ]);
+}
diff --git a/runtime/tests/vm/dart/inline_TypedList_getUint32_il_test.dart b/runtime/tests/vm/dart/inline_TypedList_getUint32_il_test.dart
index 053b6ca..f1a647a 100644
--- a/runtime/tests/vm/dart/inline_TypedList_getUint32_il_test.dart
+++ b/runtime/tests/vm/dart/inline_TypedList_getUint32_il_test.dart
@@ -43,7 +43,7 @@
void matchIL$calculateRetainers(FlowGraph graph) {
graph.dump();
final descriptors = graph.descriptors;
- for (var block in graph.blocks) {
+ for (var block in graph.blocks()) {
for (var instr in [...?block['is']]) {
if (instr['o'] != 'StaticCall') continue;
final function = graph.attributesFor(instr)?['function'];
diff --git a/runtime/vm/compiler/backend/block_scheduler.cc b/runtime/vm/compiler/backend/block_scheduler.cc
index 2065f58..cb29964 100644
--- a/runtime/vm/compiler/backend/block_scheduler.cc
+++ b/runtime/vm/compiler/backend/block_scheduler.cc
@@ -238,62 +238,153 @@
}
}
-// Moves blocks ending in a throw/rethrow, as well as any block post-dominated
-// by such a throwing block, to the end.
-void BlockScheduler::ReorderBlocksAOT(FlowGraph* flow_graph) {
- auto& reverse_postorder = flow_graph->reverse_postorder();
- const intptr_t block_count = reverse_postorder.length();
- GrowableArray<bool> is_terminating(block_count);
- is_terminating.FillWith(false, 0, block_count);
+// AOT block order is based on reverse post order but with two changes:
+//
+// - Blocks which always throw and their direct predecessors are considered
+// *cold* and moved to the end of the order.
+// - Blocks which belong to the same loop are kept together (where possible)
+// and not interspersed with other blocks.
+//
+namespace {
+class AOTBlockScheduler {
+ public:
+ explicit AOTBlockScheduler(FlowGraph* flow_graph)
+ : flow_graph_(flow_graph),
+ block_count_(flow_graph->reverse_postorder().length()),
+ marks_(block_count_),
+ postorder_(block_count_),
+ cold_postorder_(10) {
+ marks_.FillWith(0, 0, block_count_);
+ }
- // Any block in the worklist is marked and any of its unconditional
- // predecessors need to be marked as well.
- GrowableArray<BlockEntryInstr*> worklist;
+ void ComputeOrder() {
+ ComputeOrderImpl();
- // Add all throwing blocks to the worklist.
- for (intptr_t i = 0; i < block_count; ++i) {
- auto block = reverse_postorder[i];
- auto last = block->last_instruction();
- if (last->IsThrow() || last->IsReThrow()) {
- const intptr_t preorder_nr = block->preorder_number();
- is_terminating[preorder_nr] = true;
- worklist.Add(block);
+ const auto codegen_order = flow_graph_->CodegenBlockOrder();
+ for (intptr_t i = postorder_.length() - 1; i >= 0; --i) {
+ codegen_order->Add(postorder_[i]);
+ }
+ for (intptr_t i = cold_postorder_.length() - 1; i >= 0; --i) {
+ codegen_order->Add(cold_postorder_[i]);
}
}
- // Follow all indirect predecessors which unconditionally will end up in a
- // throwing block.
- while (worklist.length() > 0) {
- auto block = worklist.RemoveLast();
- for (intptr_t i = 0; i < block->PredecessorCount(); ++i) {
- auto predecessor = block->PredecessorAt(i);
- if (predecessor->last_instruction()->IsGoto()) {
- const intptr_t preorder_nr = predecessor->preorder_number();
- if (!is_terminating[preorder_nr]) {
- is_terminating[preorder_nr] = true;
- worklist.Add(predecessor);
+ private:
+ // The algorithm below is almost identical to |FlowGraph::DiscoverBlocks|, but
+ // with few tweaks which guarantee improved scheduling for cold code and
+ // loops.
+ void ComputeOrderImpl() {
+ PushBlock(flow_graph_->graph_entry());
+ while (!block_stack_.is_empty()) {
+ BlockEntryInstr* block = block_stack_.Last();
+ auto& marks = MarksOf(block);
+ auto last = block->last_instruction();
+ const auto successor_count = last->SuccessorCount();
+
+ if ((marks & kVisitedMark) == 0) {
+ marks |= kVisitedMark;
+
+ if (last->IsThrow() || last->IsReThrow()) {
+ marks |= kColdMark;
+ } else {
+ // When visiting a block inside a loop with two successors
+ // push the successor with lesser nesting *last*, so that it is
+ // visited first. This helps to keep blocks which belong to the
+ // same loop together.
+ //
+ // This is the main difference from |DiscoverBlocks| which always
+ // visits successors in reverse order.
+ if (successor_count == 2 && block->loop_info() != nullptr) {
+ auto succ0 = last->SuccessorAt(0);
+ auto succ1 = last->SuccessorAt(1);
+
+ if (succ0->NestingDepth() < succ1->NestingDepth()) {
+ PushBlock(succ1);
+ PushBlock(succ0);
+ } else {
+ PushBlock(succ0);
+ PushBlock(succ1);
+ }
+ } else {
+ for (intptr_t i = 0; i < successor_count; i++) {
+ PushBlock(last->SuccessorAt(i));
+ }
+ }
+
+ // We have pushed some successors to the stack. Process them first.
+ if (block_stack_.Last() != block) {
+ continue;
+ }
+
+ // No successors added, fall through.
}
}
+
+ // All successors of this block were visited, which means we are
+ // done with this block.
+ block_stack_.RemoveLast();
+
+ // Propagate cold mark from the successors: if all successors are
+ // cold then this block is cold as well.
+ if (successor_count > 0) {
+ uint8_t cold_mark = kColdMark;
+ for (intptr_t i = 0; i < successor_count; i++) {
+ cold_mark &= MarksOf(last->SuccessorAt(i));
+ }
+ marks |= cold_mark;
+ }
+
+ if ((marks & (kColdMark | kPinnedMark)) == kColdMark) {
+ // This block is cold and not pinned: move it to cold section at
+ // the end.
+ cold_postorder_.Add(block);
+ } else {
+ postorder_.Add(block);
+ }
}
}
- // Emit code in reverse postorder but move any throwing blocks (except the
- // function entry, which needs to come first) to the very end.
- auto codegen_order = flow_graph->CodegenBlockOrder();
- for (intptr_t i = 0; i < block_count; ++i) {
- auto block = reverse_postorder[i];
- const intptr_t preorder_nr = block->preorder_number();
- if (!is_terminating[preorder_nr] || block->IsFunctionEntry()) {
- codegen_order->Add(block);
+ // The block was added to the stack.
+ static constexpr uint8_t kSeenMark = 1 << 0;
+ // The block was visited and all of its successors were added to the stack.
+ static constexpr uint8_t kVisitedMark = 1 << 1;
+ // The block terminates with unconditional throw or rethrow.
+ static constexpr uint8_t kColdMark = 1 << 2;
+ // The block should not move to cold section.
+ static constexpr uint8_t kPinnedMark = 1 << 3;
+
+ uint8_t& MarksOf(BlockEntryInstr* block) {
+ return marks_[block->preorder_number()];
+ }
+
+ void PushBlock(BlockEntryInstr* block) {
+ auto& marks = MarksOf(block);
+ if ((marks & kSeenMark) == 0) {
+ marks |= kSeenMark;
+ block_stack_.Add(block);
+
+ if (block->IsFunctionEntry() || block->IsGraphEntry()) {
+ marks |= kPinnedMark;
+ }
}
}
- for (intptr_t i = 0; i < block_count; ++i) {
- auto block = reverse_postorder[i];
- const intptr_t preorder_nr = block->preorder_number();
- if (is_terminating[preorder_nr] && !block->IsFunctionEntry()) {
- codegen_order->Add(block);
- }
- }
+
+ FlowGraph* const flow_graph_;
+ const intptr_t block_count_;
+
+ // Block marks for each block indexed by block preorder number.
+ GrowableArray<uint8_t> marks_;
+
+ // Stack of blocks to process.
+ GrowableArray<BlockEntryInstr*> block_stack_;
+
+ GrowableArray<BlockEntryInstr*> postorder_;
+ GrowableArray<BlockEntryInstr*> cold_postorder_;
+};
+} // namespace
+
+void BlockScheduler::ReorderBlocksAOT(FlowGraph* flow_graph) {
+ AOTBlockScheduler(flow_graph).ComputeOrder();
}
} // namespace dart
diff --git a/runtime/vm/compiler/backend/il_printer.cc b/runtime/vm/compiler/backend/il_printer.cc
index 36f0b83..8d17729 100644
--- a/runtime/vm/compiler/backend/il_printer.cc
+++ b/runtime/vm/compiler/backend/il_printer.cc
@@ -57,6 +57,16 @@
PrintBlock(&writer, block);
}
writer.CloseArray();
+ const auto& codegen_order = *flow_graph->CodegenBlockOrder();
+ if (!codegen_order.is_empty() &&
+ (&codegen_order != &flow_graph->reverse_postorder())) {
+ writer.OpenArray("cbo");
+ const auto block_count = flow_graph->reverse_postorder().length();
+ for (auto block : codegen_order) {
+ writer.PrintValue64((block_count - 1) - block->postorder_number());
+ }
+ writer.CloseArray();
+ }
writer.OpenObject("desc");
AttributesSerializer(&writer).WriteDescriptors();
writer.CloseObject();