[benchmarks] Add MemoryCopy benchmark suite.
The MemoryCopy benchmark suite measures the overhead of copying
data between compatible TypedData or Pointer values.
Change-Id: Iaf5ea27b7f9177f4800880da36234afd2b908db2
Bug: https://github.com/dart-lang/sdk/issues/42072
Bug: b/294114694
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/318661
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
diff --git a/benchmarks/MemoryCopy/dart/MemoryCopy.dart b/benchmarks/MemoryCopy/dart/MemoryCopy.dart
new file mode 100644
index 0000000..68ca255
--- /dev/null
+++ b/benchmarks/MemoryCopy/dart/MemoryCopy.dart
@@ -0,0 +1,427 @@
+// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// Micro-benchmarks for copying typed data lists.
+
+import 'dart:ffi';
+import 'dart:math';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:ffi/ffi.dart';
+
+const maxSizeInBytes = 10 * 1024 * 1024;
+
+// A modified version of BenchmarkBase from package:benchmark_harness where
+// - the run() method takes a number of rounds, so that there is only one run()
+// call per measurement and thus the overhead of calling the run() method is
+// the same across subclass results.
+// - the measureFor() method returns the number of bytes transfered per second,
+// not the number of microseconds per iteration (round).
+abstract class MemoryCopyBenchmark {
+ final String name;
+ final int bytes;
+
+ MemoryCopyBenchmark(String name, this.bytes) : name = 'MemoryCopy.$name';
+
+ static const targetBatchSizeInBytes = 32 * 1024;
+
+ // Returns the number of bytes copied per second.
+ double measureFor(Duration minDuration) {
+ // The logic below is based off of BenchmarkBase._measureForImpl.
+ // We can't use BenchmarkBase.measureFor directly, because
+ // * it calls the function in a loop instead of passing the number of
+ // desired iterations to the function being called. Here, method
+ // invocation would dominate the actual body for small byte counts.
+ // * it doesn't provide the caller with the number of iterations performed,
+ // which we need to calculate the number of bytes transferred.
+
+ // Start off with enough rounds to ensure a minimum number of bytes copied
+ // per run() invocation.
+ int rounds = max(targetBatchSizeInBytes ~/ bytes, 1);
+
+ // If running a long measurement permit some amount of measurement jitter
+ // to avoid discarding results that almost, but not quite, reach the minimum
+ // duration requested.
+ final allowedJitter = Duration(
+ microseconds: minDuration.inSeconds > 0
+ ? (minDuration.inMicroseconds * 0.1).floor()
+ : 0);
+
+ final watch = Stopwatch()..start();
+ while (true) {
+ // Try running for the current number of rounds and see if that reaches
+ // the minimum duration requested, so we only get the elapsed time from
+ // the StopWatch once for the final results used.
+ watch.reset();
+ run(rounds);
+ final elapsed = watch.elapsed;
+ final numberOfBytesCopied = rounds * bytes;
+ if (elapsed >= (minDuration - allowedJitter)) {
+ return (numberOfBytesCopied / elapsed.inMicroseconds) *
+ Duration.microsecondsPerSecond;
+ }
+ // If not, then adjust our estimate of how many iterations are needed to
+ // reach the minimum and try again.
+ rounds *= (minDuration.inMicroseconds / elapsed.inMicroseconds).ceil();
+ }
+ }
+
+ static final kValueRegexp = RegExp(r'^([0-9]+)');
+ static final kMaxLabelLength =
+ 'MemoryCopy.1048576.setRange.TypedData.Double(NanosecondsPerChar)'.length;
+ // Maximum expected number of digits on either side of the decimal point.
+ static final kMaxDigits = 16;
+
+ double measure() {
+ setup();
+
+ // Warmup for 100 ms.
+ measureFor(const Duration(milliseconds: 100));
+
+ // Run benchmark for 1 second.
+ final double result = measureFor(const Duration(seconds: 1));
+
+ teardown();
+ return result;
+ }
+
+ void report({bool verbose = false, bool aligned = false}) {
+ final bytesPerSecond = measure();
+
+ void printLine(String label, String content) {
+ String contentPadding = ' ';
+ if (aligned) {
+ final matches = kValueRegexp.firstMatch(content)!;
+ final contentPaddingLength = 1 +
+ (kMaxLabelLength - label.length) +
+ max<int>(kMaxDigits - matches[1]!.length, 0);
+ contentPadding = ' ' * contentPaddingLength;
+ }
+ print('$label:$contentPadding$content');
+ }
+
+ printLine('$name(BytesPerSecond)', '$bytesPerSecond');
+ if (verbose) {
+ const nanoSecondsPerSecond = 1000 * 1000 * 1000;
+ final nanosecondsPerByte = nanoSecondsPerSecond / bytesPerSecond;
+ printLine('$name(NanosecondsPerChar)', '$nanosecondsPerByte');
+ const bytesPerMebibyte = 1024 * 1024;
+ final mibPerSecond = bytesPerSecond / bytesPerMebibyte;
+ printLine('$name(MebibytesPerSecond)', '$mibPerSecond');
+ }
+ }
+
+ void setup();
+ void teardown();
+ void run(int rounds);
+}
+
+abstract class Uint8ListCopyBenchmark extends MemoryCopyBenchmark {
+ final int count;
+ late Uint8List input;
+ late Uint8List result;
+
+ Uint8ListCopyBenchmark(String method, int bytes)
+ : count = bytes,
+ super('$bytes.$method.TypedData.Uint8', bytes);
+
+ @override
+ void setup() {
+ input = Uint8List(count);
+ for (int i = 0; i < count; ++i) {
+ input[i] = (i + 3) & 0xff;
+ }
+ result = Uint8List(maxSizeInBytes);
+ }
+
+ @override
+ void teardown() {
+ for (int i = 0; i < count; ++i) {
+ final expected = (i + 3) & 0xff;
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ final expected = 0;
+ for (int i = count; i < maxSizeInBytes; ++i) {
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ }
+}
+
+class Uint8ListCopyViaLoopBenchmark extends Uint8ListCopyBenchmark {
+ Uint8ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
+
+ @override
+ void run(int rounds) {
+ final count = this.count;
+ final input = this.input;
+ final result = this.result;
+ for (int r = 0; r < rounds; r++) {
+ for (int i = 0; i < count; i++) {
+ result[i] = input[i];
+ }
+ }
+ }
+}
+
+class Uint8ListCopyViaSetRangeBenchmark extends Uint8ListCopyBenchmark {
+ Uint8ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
+
+ @override
+ void run(int rounds) {
+ for (int r = 0; r < rounds; r++) {
+ result.setRange(0, count, input);
+ }
+ }
+}
+
+abstract class Float64ListCopyBenchmark extends MemoryCopyBenchmark {
+ final int count;
+ late Float64List input;
+ late Float64List result;
+
+ Float64ListCopyBenchmark(String method, int bytes)
+ : count = bytes ~/ 8,
+ super('$bytes.$method.TypedData.Double', bytes);
+
+ static const maxSizeInElements = maxSizeInBytes ~/ 8;
+
+ @override
+ void setup() {
+ input = Float64List(count);
+ for (int i = 0; i < count; ++i) {
+ input[i] = (i - 7).toDouble();
+ }
+ result = Float64List(maxSizeInElements);
+ }
+
+ @override
+ void teardown() {
+ for (int i = 0; i < count; ++i) {
+ final expected = (i - 7).toDouble();
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ final expected = 0.0;
+ for (int i = count; i < maxSizeInElements; ++i) {
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ }
+}
+
+class Float64ListCopyViaLoopBenchmark extends Float64ListCopyBenchmark {
+ Float64ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
+
+ @override
+ void run(int rounds) {
+ final count = this.count;
+ final input = this.input;
+ final result = this.result;
+ for (int r = 0; r < rounds; r++) {
+ for (int i = 0; i < count; i++) {
+ result[i] = input[i];
+ }
+ }
+ }
+}
+
+class Float64ListCopyViaSetRangeBenchmark extends Float64ListCopyBenchmark {
+ Float64ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
+
+ @override
+ void run(int rounds) {
+ for (int r = 0; r < rounds; r++) {
+ result.setRange(0, count, input);
+ }
+ }
+}
+
+abstract class PointerUint8CopyBenchmark extends MemoryCopyBenchmark {
+ final int count;
+ late Pointer<Uint8> input;
+ late Pointer<Uint8> result;
+
+ PointerUint8CopyBenchmark(String method, int bytes)
+ : count = bytes,
+ super('$bytes.$method.Pointer.Uint8', bytes);
+
+ @override
+ void setup() {
+ input = malloc<Uint8>(count);
+ for (var i = 0; i < count; ++i) {
+ input[i] = (i + 3) & 0xff;
+ }
+ result = calloc<Uint8>(maxSizeInBytes);
+ }
+
+ @override
+ void teardown() {
+ malloc.free(input);
+ for (var i = 0; i < count; ++i) {
+ final expected = (i + 3) & 0xff;
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ final expected = 0;
+ for (var i = count; i < maxSizeInBytes; ++i) {
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ calloc.free(result);
+ }
+}
+
+class PointerUint8CopyViaLoopBenchmark extends PointerUint8CopyBenchmark {
+ PointerUint8CopyViaLoopBenchmark(int bytes) : super('loop', bytes);
+
+ @override
+ void run(int rounds) {
+ // Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
+ final count = this.count;
+ final input = this.input;
+ final result = this.result;
+ for (int r = 0; r < rounds; r++) {
+ for (int i = 0; i < count; i++) {
+ result[i] = input[i];
+ }
+ }
+ }
+}
+
+class PointerUint8CopyViaSetRangeBenchmark extends PointerUint8CopyBenchmark {
+ PointerUint8CopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
+
+ @override
+ void run(int rounds) {
+ for (int r = 0; r < rounds; r++) {
+ result
+ .asTypedList(maxSizeInBytes)
+ .setRange(0, count, input.asTypedList(count));
+ }
+ }
+}
+
+@Native<Void Function(Pointer<Void>, Pointer<Void>, Size)>(isLeaf: true)
+external void memcpy(Pointer<Void> to, Pointer<Void> from, int size);
+
+class PointerUint8CopyViaMemcpyBenchmark extends PointerUint8CopyBenchmark {
+ PointerUint8CopyViaMemcpyBenchmark(int bytes) : super('memcpy', bytes);
+
+ @override
+ void run(int rounds) {
+ for (int r = 0; r < rounds; r++) {
+ memcpy(result.cast(), input.cast(), count);
+ }
+ }
+}
+
+abstract class PointerDoubleCopyBenchmark extends MemoryCopyBenchmark {
+ final int count;
+ late Pointer<Double> input;
+ late Pointer<Double> result;
+
+ PointerDoubleCopyBenchmark(String method, int bytes)
+ : count = bytes ~/ 8,
+ super('$bytes.$method.Pointer.Double', bytes);
+
+ static const maxSizeInElements = maxSizeInBytes ~/ 8;
+
+ @override
+ void setup() {
+ input = malloc<Double>(count);
+ for (var i = 0; i < count; ++i) {
+ input[i] = (i - 7).toDouble();
+ }
+ result = calloc<Double>(maxSizeInElements);
+ }
+
+ @override
+ void teardown() {
+ malloc.free(input);
+ for (var i = 0; i < count; ++i) {
+ final expected = (i - 7).toDouble();
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ final expected = 0.0;
+ for (var i = count; i < maxSizeInElements; ++i) {
+ if (result[i] != expected) {
+ throw 'Expected result[$i] = $expected, got ${result[i]}';
+ }
+ }
+ calloc.free(result);
+ }
+}
+
+class PointerDoubleCopyViaLoopBenchmark extends PointerDoubleCopyBenchmark {
+ PointerDoubleCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
+
+ @override
+ void run(int rounds) {
+ // Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
+ final count = this.count;
+ final input = this.input;
+ final result = this.result;
+ for (int r = 0; r < rounds; r++) {
+ for (int i = 0; i < count; i++) {
+ result[i] = input[i];
+ }
+ }
+ }
+}
+
+class PointerDoubleCopyViaSetRangeBenchmark extends PointerDoubleCopyBenchmark {
+ PointerDoubleCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
+
+ @override
+ void run(int rounds) {
+ for (int r = 0; r < rounds; r++) {
+ result
+ .asTypedList(PointerDoubleCopyBenchmark.maxSizeInElements)
+ .setRange(0, count, input.asTypedList(count));
+ }
+ }
+}
+
+final argParser = ArgParser()
+ ..addFlag('verbose', abbr: 'v', help: 'Verbose output', defaultsTo: false)
+ ..addFlag('aligned',
+ abbr: 'a', help: 'Align results on initial numbers', defaultsTo: false);
+
+final defaultLengthsInBytes = [8, 64, 512, 4 * 1024, 1024 * 1024];
+
+void main(List<String> args) {
+ final results = argParser.parse(args);
+ List<int> lengthsInBytes = defaultLengthsInBytes;
+ if (results.rest.isNotEmpty) {
+ lengthsInBytes =
+ results.rest.map(int.parse).where((i) => i <= maxSizeInBytes).toList();
+ }
+ final benchmarks = [
+ for (int bytes in lengthsInBytes) ...[
+ PointerUint8CopyViaMemcpyBenchmark(bytes),
+ PointerUint8CopyViaLoopBenchmark(bytes),
+ PointerDoubleCopyViaLoopBenchmark(bytes),
+ Uint8ListCopyViaLoopBenchmark(bytes),
+ Float64ListCopyViaLoopBenchmark(bytes),
+ PointerUint8CopyViaSetRangeBenchmark(bytes),
+ PointerDoubleCopyViaSetRangeBenchmark(bytes),
+ Uint8ListCopyViaSetRangeBenchmark(bytes),
+ Float64ListCopyViaSetRangeBenchmark(bytes),
+ ],
+ ];
+ for (var bench in benchmarks) {
+ bench.report(verbose: results['verbose'], aligned: results['aligned']);
+ }
+}