Use a small pool of code completion model isolates
This change makes it so that model can parallelize load over
a handful of isolates when a user is typing very quickly
instead of relying on a single isolate. I played around with
this manually while looking at the diagnostics graphs and found
that model response latency grows with consecutive requests using
a single isolate. Four isolates kept latency constant even as I
typed very quickly.
Change-Id: Id03afc67cb7d6a9e13605393a8658835bd88e4b5
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/113000
Reviewed-by: Brian Wilkerson <brianwilkerson@google.com>
diff --git a/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart b/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
index cfb01cc..5c7fde6 100644
--- a/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
+++ b/pkg/analysis_server/lib/src/services/completion/dart/completion_ranking.dart
@@ -17,6 +17,9 @@
/// Minimum probability to prioritize model-only suggestion.
const double _MODEL_RELEVANCE_CUTOFF = 0.5;
+/// Number of code completion isolates.
+const int _ISOLATE_COUNT = 4;
+
/// Prediction service run by the model isolate.
void entrypoint(SendPort sendPort) {
LanguageModel model;
@@ -44,8 +47,11 @@
/// Filesystem location of model files.
final String _directory;
- /// Port to communicate from main to model isolate.
- SendPort _write;
+ /// Ports to communicate from main to model isolates.
+ List<SendPort> _writes;
+
+ /// Pointer for round robin load balancing over isolates.
+ int _index;
CompletionRanking(this._directory);
@@ -53,11 +59,12 @@
Future<Map<String, Map<String, double>>> makeRequest(
String method, List<String> args) async {
final port = ReceivePort();
- _write.send({
+ _writes[_index].send({
'method': method,
'args': args,
'port': port.sendPort,
});
+ this._index = (_index + 1) % _ISOLATE_COUNT;
return await port.first;
}
@@ -83,7 +90,7 @@
DartCompletionRequest request,
FeatureSet featureSet) async {
final probability = await probabilityFuture
- .timeout(const Duration(milliseconds: 500), onTimeout: () => null);
+ .timeout(const Duration(seconds: 1), onTimeout: () => null);
if (probability == null || probability.isEmpty) {
// Failed to compute probability distribution, don't rerank.
return suggestions;
@@ -177,9 +184,20 @@
/// Spins up the model isolate and tells it to load the tflite model.
Future<void> start() async {
+ this._writes = [];
+ this._index = 0;
+ final initializations = <Future<void>>[];
+ for (var i = 0; i < _ISOLATE_COUNT; i++) {
+ initializations.add(_startIsolate());
+ }
+
+ await Future.wait(initializations);
+ }
+
+ Future<void> _startIsolate() async {
final port = ReceivePort();
await Isolate.spawn(entrypoint, port.sendPort);
- this._write = await port.first;
+ this._writes.add(await port.first);
await makeRequest('load', [_directory]);
}
}