Add median, p90, and p95 metrics to completion_metrics This adds a section to the output like this: ``` ### Percentile metrics p50 p90 p95 count > 2s ms per completion 4 6 7 0 ``` Bug: https://github.com/dart-lang/sdk/issues/48788 Change-Id: I868580324b3bc83605aa6466ffd5799625d1a9f3 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/240941 Commit-Queue: Samuel Rawlins <srawlins@google.com> Reviewed-by: Brian Wilkerson <brianwilkerson@google.com>

commit: 6660e3c9a0dd4e35aaea4b43f786a91f31b2b9f6 [log] [tgz]
author: Sam Rawlins <srawlins@google.com> Wed Apr 13 20:48:03 2022 +0000
committer: Commit Bot <commit-bot@chromium.org> Wed Apr 13 20:48:03 2022 +0000
tree: 532109fcf68d5e4f62f70d53c09afa9814b6a688
parent: 3514a3ed930f3dd51c3953d51db406db6b3f7c81 [diff]
diff --git a/pkg/analysis_server/test/tool/completion_metrics/metrics_util_test.dart b/pkg/analysis_server/test/tool/completion_metrics/metrics_util_test.dart
index 639b3eb..9702d0e 100644
--- a/pkg/analysis_server/test/tool/completion_metrics/metrics_util_test.dart
+++ b/pkg/analysis_server/test/tool/completion_metrics/metrics_util_test.dart

@@ -161,6 +161,57 @@
     });
   });
 
+  group('PercentileComputer', () {
+    test('empty', () {
+      var computer = PercentileComputer('empty', valueLimit: 2000);
+      expect(computer.median, equals(0));
+      expect(computer.valueCount, equals(0));
+    });
+
+    test('clear', () {
+      var computer = PercentileComputer('name', valueLimit: 2000);
+      computer.addValue(4);
+      computer.addValue(5);
+      computer.addValue(6);
+      computer.addValue(3000);
+
+      expect(computer.median, equals(5));
+      expect(computer.valueCount, equals(3));
+      expect(computer.aboveValueMaxCount, equals(1));
+      computer.clear();
+
+      expect(computer.median, equals(0));
+      expect(computer.valueCount, equals(0));
+      expect(computer.aboveValueMaxCount, equals(0));
+      expect(computer.aboveValueMaxSamples, isEmpty);
+    });
+
+    test('percentiles', () {
+      var computer = PercentileComputer('name', valueLimit: 2000);
+      for (var i = 0; i < 100; i++) {
+        computer.addValue(i);
+      }
+
+      expect(computer.median, equals(50));
+      expect(computer.p90, equals(90));
+      expect(computer.p95, equals(95));
+    });
+
+    test('values above maxValue', () {
+      var computer = PercentileComputer('name', valueLimit: 2000);
+
+      computer.addValue(1);
+      computer.addValue(2);
+      computer.addValue(3);
+      computer.addValue(2500);
+      computer.addValue(3000);
+
+      expect(computer.median, equals(2));
+      expect(computer.aboveValueMaxCount, equals(2));
+      expect(computer.aboveValueMaxSamples, equals([2500, 3000]));
+    });
+  });
+
   group('Place', () {
     test('none', () {
       var place = Place.none();

diff --git a/pkg/analysis_server/tool/code_completion/completion_metrics.dart b/pkg/analysis_server/tool/code_completion/completion_metrics.dart
index fcc4650..ed88f0c 100644
--- a/pkg/analysis_server/tool/code_completion/completion_metrics.dart
+++ b/pkg/analysis_server/tool/code_completion/completion_metrics.dart

@@ -66,10 +66,13 @@
 ///
 /// This approach has several drawbacks:
 ///
-/// - The AST is always complete and correct, and that's rarely the case for
-///   real completion requests. Usually the tree is incomplete and often has a
-///   completely different structure because of the way recovery works. We
-///   currently have no way of measuring completions under realistic conditions.
+/// - The options for creating an "in-progress" file are limited. In the default
+///   'overlay' mode, the AST is always complete and correct, rarely the case
+///   for real completion requests. The other 'overlay' modes generate
+///   incomplete ASTs with error recovery nodes, but neither of these quite
+///   properly emulate the act of editing the middle of a file, perhaps the
+///   middle of an expression, or the middle of an argument list. We currently
+///   have no way of measuring completions under realistic conditions.
 ///
 /// - We can't measure completions for several keywords because the presence of
 ///   the keyword in the AST causes it to not be suggested.
@@ -77,7 +80,7 @@
 /// - The time it takes to compute the suggestions doesn't include the time
 ///   required to finish analyzing the file if the analysis hasn't been
 ///   completed before suggestions are requested. While the times are accurate
-///   (within the accuracy of the `Stopwatch` class) they are the minimum
+///   (within the accuracy of the [Stopwatch] class) they are the minimum
 ///   possible time. This doesn't give us a measure of how completion will
 ///   perform in production, but does give us an optimistic approximation.
 ///
@@ -193,7 +196,7 @@
     ..addFlag(CompletionMetricsOptions.PRINT_SHADOWED_COMPLETION_DETAILS,
         defaultsTo: false,
         help: 'Print detailed information every time a completion request '
-            'produces a suggestions whose name matches the expected suggestion '
+            'produces a suggestion whose name matches the expected suggestion '
             'but that is referencing a different element',
         negatable: false)
     ..addFlag(CompletionMetricsOptions.PRINT_SLOWEST_RESULTS,
@@ -332,6 +335,11 @@
   final ArithmeticMeanComputer meanCompletionMS =
       ArithmeticMeanComputer('ms per completion');
 
+  /// A percentile computer for the ms per completion request, using 2.000
+  /// seconds as the max value to use in percentile calculations.
+  final PercentileComputer percentileCompletionMS =
+      PercentileComputer('ms per completion', valueLimit: 2000);
+
   final DistributionComputer distributionCompletionMS = DistributionComputer();
 
   final MeanReciprocalRankComputer mrrComputer =
@@ -398,6 +406,8 @@
         .fromJson(map['completionElementKindCounter'] as Map<String, dynamic>);
     metrics.meanCompletionMS
         .fromJson(map['meanCompletionMS'] as Map<String, dynamic>);
+    metrics.percentileCompletionMS
+        .fromJson(map['percentileMS'] as Map<String, dynamic>);
     metrics.distributionCompletionMS
         .fromJson(map['distributionCompletionMS'] as Map<String, dynamic>);
     metrics.mrrComputer.fromJson(map['mrrComputer'] as Map<String, dynamic>);
@@ -453,6 +463,7 @@
     completionKindCounter.addData(metrics.completionKindCounter);
     completionElementKindCounter.addData(metrics.completionElementKindCounter);
     meanCompletionMS.addData(metrics.meanCompletionMS);
+    percentileCompletionMS.addData(metrics.percentileCompletionMS);
     distributionCompletionMS.addData(metrics.distributionCompletionMS);
     mrrComputer.addData(metrics.mrrComputer);
     successfulMrrComputer.addData(metrics.successfulMrrComputer);
@@ -545,6 +556,7 @@
       'completionKindCounter': completionKindCounter.toJson(),
       'completionElementKindCounter': completionElementKindCounter.toJson(),
       'meanCompletionMS': meanCompletionMS.toJson(),
+      'percentileCompletionMS': percentileCompletionMS.toJson(),
       'distributionCompletionMS': distributionCompletionMS.toJson(),
       'mrrComputer': mrrComputer.toJson(),
       'successfulMrrComputer': successfulMrrComputer.toJson(),
@@ -617,6 +629,7 @@
   /// Record this elapsed ms count for the average ms count.
   void _recordTime(CompletionResult result) {
     meanCompletionMS.addValue(result.elapsedMS);
+    percentileCompletionMS.addValue(result.elapsedMS);
     distributionCompletionMS.addValue(result.elapsedMS);
   }
 
@@ -815,7 +828,7 @@
 
   void printComparisonOfCompletionCounts() {
     String toString(int count, int totalCount) {
-      return '$count (${printPercentage(count / totalCount, 2)})';
+      return '$count (${(count / totalCount).asPercentage(2)})';
     }
 
     var counters = targetMetrics.map((metrics) => metrics.completionCounter);
@@ -1093,25 +1106,45 @@
   }
 
   void printOtherMetrics(CompletionMetrics metrics) {
-    List<String> toRow(ArithmeticMeanComputer computer) {
-      var min = computer.min;
-      var mean = computer.mean.toStringAsFixed(6);
-      var max = computer.max;
-      return [computer.name, '$min, $mean, $max'];
+    List<String> meanComputingRow(ArithmeticMeanComputer computer) {
+      return [
+        computer.name,
+        computer.min!.toStringAsFixed(3),
+        computer.mean.toStringAsFixed(3),
+        computer.max!.toStringAsFixed(3),
+      ];
     }
 
     var table = [
-      toRow(metrics.meanCompletionMS),
-      toRow(metrics.charsBeforeTop),
-      toRow(metrics.charsBeforeTopFive),
-      toRow(metrics.insertionLengthTheoretical),
+      ['', 'min', 'mean', 'max'],
+      meanComputingRow(metrics.meanCompletionMS),
+      meanComputingRow(metrics.charsBeforeTop),
+      meanComputingRow(metrics.charsBeforeTopFive),
+      meanComputingRow(metrics.insertionLengthTheoretical),
     ];
     rightJustifyColumns(table, range(1, table[0].length));
 
     printHeading(2, 'Other metrics');
     printTable(table);
 
+    var percentileTable = [
+      ['', 'p50', 'p90', 'p95', 'count > 2s', 'max'],
+      [
+        metrics.percentileCompletionMS.name,
+        metrics.percentileCompletionMS.median.toString(),
+        metrics.percentileCompletionMS.p90.toString(),
+        metrics.percentileCompletionMS.p95.toString(),
+        metrics.percentileCompletionMS.aboveValueMaxCount.toString(),
+        metrics.percentileCompletionMS.maxValue.toString(),
+      ],
+    ];
+    rightJustifyColumns(percentileTable, range(1, percentileTable[1].length));
+
+    printHeading(3, 'Percentile metrics');
+    printTable(percentileTable);
+
     var distribution = metrics.distributionCompletionMS.displayString();
+    printHeading(3, 'Completion ms distribution');
     print('${metrics.name}: $distribution');
     print('');
   }
@@ -2083,6 +2116,12 @@
   }
 }
 
+extension on num {
+  String asPercentage([int fractionDigits = 1]) =>
+      '${(this * 100).toStringAsFixed(fractionDigits)}%'
+          .padLeft(4 + fractionDigits);
+}
+
 extension AvailableSuggestionsExtension on protocol.AvailableSuggestion {
   // TODO(jwren) I am not sure if we want CompletionSuggestionKind.INVOCATION in
   // call cases here, to iterate I need to figure out why this algorithm is

diff --git a/pkg/analysis_server/tool/code_completion/metrics_util.dart b/pkg/analysis_server/tool/code_completion/metrics_util.dart
index 673f1bb..8899842 100644
--- a/pkg/analysis_server/tool/code_completion/metrics_util.dart
+++ b/pkg/analysis_server/tool/code_completion/metrics_util.dart

@@ -3,6 +3,7 @@
 // BSD-style license that can be found in the LICENSE file.
 
 import 'dart:math' as math;
+import 'dart:typed_data';
 
 import 'package:analysis_server/src/status/pages.dart';
 
@@ -295,6 +296,136 @@
   }
 }
 
+/// A computer for calculating percentile-based metrics on a data set.
+///
+/// Specifically this tracks p50 (the median), p90, and p95.
+///
+/// See https://en.wikipedia.org/wiki/Percentile.
+class PercentileComputer {
+  final String name;
+
+  /// The value limit allowed by this computer.
+  ///
+  /// The computer can calculate percentile values for all data that range
+  /// between 0 and [valueLimit], exclusive.
+  int valueLimit;
+
+  /// An array of counts; the value at each index _i_ is the number of
+  /// occurrences of value _i_.
+  ///
+  /// Any values larger than [valueLimit] are not counted here.
+  Uint32List _counts;
+
+  /// The number of values which are less than [valueLimit].
+  int valueCount = 0;
+
+  /// The number of values greater than [valueLimit].
+  int aboveValueMaxCount = 0;
+
+  List<int> aboveValueMaxSamples = [];
+
+  int maxValue = 0;
+
+  PercentileComputer(this.name, {required this.valueLimit})
+      : _counts = Uint32List(valueLimit);
+
+  /// Calculates the median (p50) value.
+  int get median => kthPercentile(50);
+
+  /// Calculates the p90 value; the value at the 90th percentile of the data.
+  int get p90 => kthPercentile(90);
+
+  /// Calculates the p95 value; the value at the 95th percentile of the data.
+  int get p95 => kthPercentile(95);
+
+  /// Add the data from the given [computer] to this computer.
+  void addData(PercentileComputer computer) {
+    if (computer.valueLimit != valueLimit) {
+      throw UnsupportedError(
+          'Cannot combine two PercentileComputers with different valueLimit '
+          'values');
+    }
+    for (var i = 0; i < _counts.length; i++) {
+      _counts[i] += computer._counts[i];
+    }
+    valueCount += computer.valueCount;
+    aboveValueMaxCount += computer.aboveValueMaxCount;
+    for (var val in computer.aboveValueMaxSamples) {
+      if (aboveValueMaxSamples.length < 10) {
+        aboveValueMaxSamples.add(val);
+      }
+    }
+    maxValue = math.max(maxValue, computer.maxValue);
+  }
+
+  void addValue(int val) {
+    if (val > valueLimit) {
+      aboveValueMaxCount++;
+      if (aboveValueMaxSamples.length < 10) {
+        aboveValueMaxSamples.add(val);
+      }
+    } else {
+      _counts[val]++;
+      valueCount++;
+    }
+    maxValue = math.max(maxValue, val);
+  }
+
+  void clear() {
+    _counts = Uint32List(0);
+    valueCount = 0;
+    aboveValueMaxCount = 0;
+    aboveValueMaxSamples = [];
+    maxValue = 0;
+  }
+
+  /// Set the state of this computer to the state recorded in the decoded JSON
+  /// [map].
+  void fromJson(Map<String, dynamic> map) {
+    valueLimit = map['valueLimit'] as int;
+    _counts = Uint32List.fromList((map['counts'] as List<dynamic>).cast<int>());
+    valueCount = map['valueCount'] as int;
+    aboveValueMaxCount = map['aboveValueMaxCount'] as int;
+    aboveValueMaxSamples =
+        (map['aboveValueMaxSamples'] as List<dynamic>).cast<int>();
+    maxValue = map['maxValue'] as int;
+  }
+
+  /// Calculates the value at the _k_th percentile of the data.
+  int kthPercentile(int percentile) {
+    if (valueCount == 0) {
+      return 0;
+    }
+    // Linear walk through the data takes O([maxValue]) time. If this is too
+    // slow, a binary search can be implemented.
+    var targetIndex = valueCount * percentile / 100;
+    // The number of values represented by walking the counts.
+    var accumulation = 0;
+    for (var i = 0; i < _counts.length; i++) {
+      accumulation += _counts[i];
+      if (accumulation > targetIndex) {
+        // We've now accounted for [targetIndex] values, which includes the
+        // median value.
+        return i;
+      }
+    }
+    // The median value is in the very highest expected possible value.
+    return valueLimit;
+  }
+
+  /// Return a map used to represent this computer in a JSON structure.
+  Map<String, dynamic> toJson() {
+    return {
+      'counts': _counts,
+      'valueLimit': valueLimit,
+      'valueCount': valueCount,
+      'aboveValueMaxCount': aboveValueMaxCount,
+      'aboveValueMaxSamples': aboveValueMaxSamples,
+      'maxValue': maxValue,
+    };
+  }
+}
+
 /// An immutable class to represent the placement in some list, for example '2nd
 /// place out of 5'.
 class Place {
commit	6660e3c9a0dd4e35aaea4b43f786a91f31b2b9f6	[log] [tgz]
author	Sam Rawlins <srawlins@google.com>	Wed Apr 13 20:48:03 2022 +0000
committer	Commit Bot <commit-bot@chromium.org>	Wed Apr 13 20:48:03 2022 +0000
tree	532109fcf68d5e4f62f70d53c09afa9814b6a688
parent	3514a3ed930f3dd51c3953d51db406db6b3f7c81 [diff]