Balance tests equally across shards.

The sharded test runner invocations are now passed the previous results
which contains the test timing, which are used to simulate how long each
shard would take to run. The shards are now balanced as evenly as
possible on a test level, taking multiple cores into account.

Sharded tests are now run starting with the slowest test first, such
that extremely long running tests finish as early as possible. This
behavior ensures the cores are saturated and can be padded with fast
tests near the end, rather than waiting for a few slow tests to complete
while the rest of the system is idle.

The algorithm works very well whenever it's able to accurately predict
the time to run shards. In a number of cases, the model doesn't quite
reflect reality and the data, which makes it fairly imperfect but still
reasonably good. I think a second order feedback loop might kick in once
it reorders the tests across shards and the test timing data reflects
the new test timings.

Multitests are no longer always sent to the same shard, since the data
isn't available at the moment, and the change as-is speeds up the test
running considerably.

The front end unit test suites currently ignore the feature as there are
no benefits yet to improving those quick shards.

Upgrade the language version to 3.0.0 so patterns can be used and fix
a mixin not being a mixin.

Fixes: b/291585137
Change-Id: I3cc1b1d96038d5b46e836b091e299097717c226c
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/314081
Reviewed-by: William Hesse <whesse@google.com>
Commit-Queue: Jonas Termansen <sortie@google.com>
diff --git a/pkg/front_end/test/unit_test_suites.dart b/pkg/front_end/test/unit_test_suites.dart
index d8902bc..8783a89 100644
--- a/pkg/front_end/test/unit_test_suites.dart
+++ b/pkg/front_end/test/unit_test_suites.dart
@@ -93,6 +93,8 @@
           defaultsTo: "${getDefaultThreads()}")
       ..addOption("shards", help: "Number of shards", defaultsTo: "1")
       ..addOption("shard", help: "Which shard to run", defaultsTo: "1")
+      ..addOption("previous-results",
+          help: "An earlier results.json for balancing tests across shards.")
       ..addFlag("skipTestsThatRequireGit",
           help: "Whether to skip tests that require git to run",
           defaultsTo: false)
diff --git a/pkg/test_runner/lib/src/compiler_configuration.dart b/pkg/test_runner/lib/src/compiler_configuration.dart
index da61615..28767a9 100644
--- a/pkg/test_runner/lib/src/compiler_configuration.dart
+++ b/pkg/test_runner/lib/src/compiler_configuration.dart
@@ -1337,7 +1337,7 @@
   }
 }
 
-abstract class VMKernelCompilerMixin {
+mixin VMKernelCompilerMixin {
   TestConfiguration get _configuration;
 
   bool get _useSdk;
diff --git a/pkg/test_runner/lib/src/configuration.dart b/pkg/test_runner/lib/src/configuration.dart
index 6e733c0..7fc309c 100644
--- a/pkg/test_runner/lib/src/configuration.dart
+++ b/pkg/test_runner/lib/src/configuration.dart
@@ -59,6 +59,8 @@
       this.taskCount = 1,
       this.shardCount = 1,
       this.shard = 1,
+      this.shardOfTests = const {},
+      this.testTimes = const {},
       this.stepName,
       this.testServerPort = 0,
       this.testServerCrossOriginPort = 0,
@@ -78,7 +80,7 @@
                 .resolve('.dart_tool/package_config.json')
                 .toFilePath();
 
-  final Map<String, RegExp?> selectors;
+  final Map<String, RegExp> selectors;
   final Progress progress;
   // The test configuration read from the -n option and the test matrix
   // or else computed from the test options.
@@ -143,6 +145,8 @@
   final int taskCount;
   final int shardCount;
   final int shard;
+  final Map<String, int> shardOfTests;
+  final Map<String, int> testTimes;
   final int repeat;
   final String? stepName;
 
diff --git a/pkg/test_runner/lib/src/options.dart b/pkg/test_runner/lib/src/options.dart
index bb619c1..6afeec2 100644
--- a/pkg/test_runner/lib/src/options.dart
+++ b/pkg/test_runner/lib/src/options.dart
@@ -12,6 +12,7 @@
 import 'configuration.dart';
 import 'path.dart';
 import 'repository.dart';
+import 'shard.dart';
 import 'test_configurations.dart';
 import 'utils.dart';
 
@@ -226,6 +227,9 @@
         defaultsTo: '1',
         hide: true,
         help: 'The index of this instance when running in sharded mode.')
+    ..addOption('previous-results',
+        hide: true,
+        help: '''An earlier results.json for balancing tests across shards.''')
     ..addFlag('help', abbr: 'h', help: 'Print list of options.')
     ..addIntegerOption('repeat',
         defaultsTo: '1', help: 'How many times each test is run')
@@ -656,10 +660,30 @@
 
     void addConfiguration(Configuration innerConfiguration,
         [String? namedConfiguration]) {
+      final selectors = _expandSelectors(data, innerConfiguration.nnbdMode);
+      final shard = int.parse(data["shard"] as String);
+      final shardCount = int.parse(data["shards"] as String);
+      final tasks = int.parse(data["tasks"] as String);
+      var shardOfTests = <String, int>{};
+      var testTimes = <String, int>{};
+      if (namedConfiguration != null &&
+          data["previous-results"] != null &&
+          1 < shardCount) {
+        final previousResults = data["previous-results"] as String;
+        testTimes =
+            loadTestTimes(previousResults, namedConfiguration, selectors);
+        final stopwatch = Stopwatch()..start();
+        List<Duration> shardDurations;
+        (shardOfTests, shardDurations) =
+            balanceShards(testTimes, shardCount, tasks);
+        print("Balancing ${testTimes.length} tests across $shardCount shards "
+            "with $tasks tasks took ${stopwatch.elapsed}");
+        print("Shard $shard has workload ${shardDurations[shard - 1]}");
+      }
       var configuration = TestConfiguration(
           configuration: innerConfiguration,
           progress: progress,
-          selectors: _expandSelectors(data, innerConfiguration.nnbdMode),
+          selectors: selectors,
           build: data["build"] as bool,
           testList: data["test-list-contents"] as List<String>?,
           repeat: int.parse(data["repeat"] as String),
@@ -687,9 +711,11 @@
           dartPrecompiledPath: data["dart-precompiled"] as String?,
           genSnapshotPath: data["gen-snapshot"] as String?,
           keepGeneratedFiles: data["keep-generated-files"] as bool,
-          taskCount: int.parse(data["tasks"] as String),
-          shardCount: int.parse(data["shards"] as String),
-          shard: int.parse(data["shard"] as String),
+          taskCount: tasks,
+          shardCount: shardCount,
+          shard: shard,
+          shardOfTests: shardOfTests,
+          testTimes: testTimes,
           stepName: data["step-name"] as String?,
           testServerPort: int.parse(data['test-server-port'] as String),
           testServerCrossOriginPort:
diff --git a/pkg/test_runner/lib/src/process_queue.dart b/pkg/test_runner/lib/src/process_queue.dart
index 8b3c738..9ae152b 100644
--- a/pkg/test_runner/lib/src/process_queue.dart
+++ b/pkg/test_runner/lib/src/process_queue.dart
@@ -229,9 +229,18 @@
     // configurations there is no need to repeatedly search the file
     // system, generate tests, and search test files for options.
     var testCache = <String, List<TestFile>>{};
+    var testCases = <TestCase>[];
 
     for (var suite in testSuites) {
-      suite.findTestCases(_add, testCache);
+      suite.findTestCases(
+          (TestCase testCase) => testCases.add(testCase), testCache);
+    }
+
+    testCases.sort((a, b) => (b.configuration.testTimes[b.displayName] ?? 0)
+        .compareTo(a.configuration.testTimes[a.displayName] ?? 0));
+
+    for (final testCase in testCases) {
+      _add(testCase);
     }
 
     // We're finished with building the dependency graph.
diff --git a/pkg/test_runner/lib/src/shard.dart b/pkg/test_runner/lib/src/shard.dart
new file mode 100644
index 0000000..465a302
--- /dev/null
+++ b/pkg/test_runner/lib/src/shard.dart
@@ -0,0 +1,72 @@
+// Copyright (c) 2023, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:io';
+import 'dart:math';
+
+/// Loads the times it takes to run each test from the previous results.json in
+/// [path] on the [configuration] for the tests matching the regular expression
+/// for each suite in [selectors].
+Map<String, int> loadTestTimes(
+    String path, String configuration, Map<String, RegExp> selectors) {
+  final times = <String, int>{};
+  for (final line in File(path).readAsLinesSync()) {
+    final result = jsonDecode(line) as Map<String, dynamic>;
+    final suite = result['suite'] as String;
+    final name = result['name'] as String;
+    if (result['configuration'] == configuration &&
+        selectors.containsKey(suite) &&
+        selectors[suite]!.hasMatch(name)) {
+      times[name] = result['time_ms'] as int;
+    }
+  }
+  return times;
+}
+
+/// Balances the shards to have equal run time using the [testTimes] timings for
+/// each test, across [shardCount] shards each able to run [taskCount] tasks
+/// concurrently.
+///
+/// The previous test results are used for the timings, if a test is new and
+/// doesn't have a timing, then it isn't allocated a shard. Instead the test
+/// falls back on the traditional behavior of assigning it to a shard based on
+/// the hash of the file path.
+///
+/// Returns a map of test name to its shard (zero based) and a list of how long
+/// each shard is expected to take.
+(Map<String, int>, List<Duration>) balanceShards(
+    Map<String, int> testTimes, int shardCount, int taskCount) {
+  final shardOfTests = <String, int>{};
+  // Predict N shards with M cores each.
+  final cores = shardCount * taskCount;
+  final coreDurations = List.filled(cores, 0);
+  // Greedily assign the longest running tests first.
+  final sorted = testTimes.keys.toList()
+    ..sort((a, b) => testTimes[b]!.compareTo(testTimes[a]!));
+  for (final test in sorted) {
+    final timeMs = testTimes[test]!;
+    var minMs = 0;
+    var minCore = 0;
+    // Assign the test to the core that would finish the earliest with the test.
+    for (var n = 0; n < cores; n++) {
+      if (n == 0 || coreDurations[n] + timeMs < minMs) {
+        minMs = coreDurations[n] + timeMs;
+        minCore = n;
+      }
+    }
+    // Assign the test to the shard associated with the core.
+    coreDurations[minCore] += timeMs;
+    shardOfTests[test] = minCore ~/ taskCount;
+  }
+  // Calculate how long each shard would run as its longest running core.
+  final shardDurations = [
+    for (var i = 0; i < shardCount; i++)
+      Duration(
+          milliseconds: coreDurations
+              .sublist(i * taskCount, (i + 1) * taskCount)
+              .reduce(max))
+  ];
+  return (shardOfTests, shardDurations);
+}
diff --git a/pkg/test_runner/lib/src/test_suite.dart b/pkg/test_runner/lib/src/test_suite.dart
index 1060d6b..6644eec 100644
--- a/pkg/test_runner/lib/src/test_suite.dart
+++ b/pkg/test_runner/lib/src/test_suite.dart
@@ -150,10 +150,11 @@
       return false;
     }
 
-    // Handle sharding based on the original test path. All multitests of a
-    // given original test belong to the same shard.
+    // Route tests to their allocated shard and fall back on hashing the test
+    // test for new tests without timing data.
     if (configuration.shardCount > 1 &&
-        testFile.shardHash % configuration.shardCount !=
+        (configuration.shardOfTests[displayName] ??
+                testFile.shardHash % configuration.shardCount) !=
             configuration.shard - 1) {
       return false;
     }
diff --git a/pkg/test_runner/pubspec.yaml b/pkg/test_runner/pubspec.yaml
index 713ec19..4bcd636 100644
--- a/pkg/test_runner/pubspec.yaml
+++ b/pkg/test_runner/pubspec.yaml
@@ -7,7 +7,7 @@
 publish_to: none
 
 environment:
-  sdk: '>=2.16.0 <3.0.0'
+  sdk: '>=3.0.0 <4.0.0'
 
 # Use 'any' constraints here; we get our versions from the DEPS file.
 dependencies:
diff --git a/tools/bots/test_matrix.json b/tools/bots/test_matrix.json
index b3c7d5d..466fcda 100644
--- a/tools/bots/test_matrix.json
+++ b/tools/bots/test_matrix.json
@@ -4,6 +4,7 @@
   ],
   "filesets": {
     "analyzer_unit_tests": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "pkg/",
       "third_party/pkg/",
@@ -12,6 +13,7 @@
       "xcodebuild/ReleaseX64/dart-sdk/"
     ],
     "js_platform_2": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseX64/dart-sdk/",
       "out/ReleaseX64/dart2js_platform.dill",
@@ -41,6 +43,7 @@
       "tools/"
     ],
     "js_platform": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseX64/dart-sdk/",
       "out/ReleaseX64/ddc_outline_unsound.dill",
@@ -68,6 +71,7 @@
       "xcodebuild/ReleaseARM64/gen/utils/dartdevc/"
     ],
     "js_platform_hostasserts_2": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseX64/dart",
       "out/ReleaseX64/dart2js_platform.dill",
@@ -93,6 +97,7 @@
       "tools/"
     ],
     "js_platform_hostasserts": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseX64/dart",
       "out/ReleaseX64/dart2js_platform.dill",
@@ -117,6 +122,7 @@
       "tools/"
     ],
     "dart2wasm_hostasserts": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseX64/dart",
       "out/ReleaseX64/dart_precompiled_runtime",
@@ -155,6 +161,7 @@
       "tools/"
     ],
     "front-end": [
+      "LATEST/results.json",
       ".dart_tool/package_config.json",
       "out/ReleaseIA32/",
       "out/ReleaseX64/",
@@ -199,6 +206,7 @@
       "xcodebuild/ReleaseX64/"
     ],
     "fuzzer": [
+      "LATEST/results.json",
       "runtime/tools/dartfuzz/",
       "out/",
       "third_party/pkg/",
@@ -218,6 +226,7 @@
       ".dart_tool/package_config.json"
     ],
     "vm": [
+      "LATEST/results.json",
       "benchmarks/",
       "out/",
       "xcodebuild/",