Use a small pool of code completion model isolates

This change makes it so that model can parallelize load over
a handful of isolates when a user is typing very quickly
instead of relying on a single isolate. I played around with
this manually while looking at the diagnostics graphs and found
that model response latency grows with consecutive requests using
a single isolate. Four isolates kept latency constant even as I
typed very quickly.

Change-Id: Id03afc67cb7d6a9e13605393a8658835bd88e4b5
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/113000
Reviewed-by: Brian Wilkerson <brianwilkerson@google.com>
diff --git a/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart b/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
index cfb01cc..5c7fde6 100644
--- a/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
+++ b/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
@@ -17,6 +17,9 @@
 /// Minimum probability to prioritize model-only suggestion.
 const double _MODEL_RELEVANCE_CUTOFF = 0.5;
 
+/// Number of code completion isolates.
+const int _ISOLATE_COUNT = 4;
+
 /// Prediction service run by the model isolate.
 void entrypoint(SendPort sendPort) {
   LanguageModel model;
@@ -44,8 +47,11 @@
   /// Filesystem location of model files.
   final String _directory;
 
-  /// Port to communicate from main to model isolate.
-  SendPort _write;
+  /// Ports to communicate from main to model isolates.
+  List<SendPort> _writes;
+
+  /// Pointer for round robin load balancing over isolates.
+  int _index;
 
   CompletionRanking(this._directory);
 
@@ -53,11 +59,12 @@
   Future<Map<String, Map<String, double>>> makeRequest(
       String method, List<String> args) async {
     final port = ReceivePort();
-    _write.send({
+    _writes[_index].send({
       'method': method,
       'args': args,
       'port': port.sendPort,
     });
+    this._index = (_index + 1) % _ISOLATE_COUNT;
     return await port.first;
   }
 
@@ -83,7 +90,7 @@
       DartCompletionRequest request,
       FeatureSet featureSet) async {
     final probability = await probabilityFuture
-        .timeout(const Duration(milliseconds: 500), onTimeout: () => null);
+        .timeout(const Duration(seconds: 1), onTimeout: () => null);
     if (probability == null || probability.isEmpty) {
       // Failed to compute probability distribution, don't rerank.
       return suggestions;
@@ -177,9 +184,20 @@
 
   /// Spins up the model isolate and tells it to load the tflite model.
   Future<void> start() async {
+    this._writes = [];
+    this._index = 0;
+    final initializations = <Future<void>>[];
+    for (var i = 0; i < _ISOLATE_COUNT; i++) {
+      initializations.add(_startIsolate());
+    }
+
+    await Future.wait(initializations);
+  }
+
+  Future<void> _startIsolate() async {
     final port = ReceivePort();
     await Isolate.spawn(entrypoint, port.sendPort);
-    this._write = await port.first;
+    this._writes.add(await port.first);
     await makeRequest('load', [_directory]);
   }
 }