blob: c55520b10a97f3b2b9c1f5401c08a42526ab8498 [file] [log] [blame]
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// Micro-benchmarks for copying typed data lists.
import 'dart:ffi';
import 'dart:math';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:ffi/ffi.dart';
const maxSizeInBytes = 10 * 1024 * 1024;
final argParser = ArgParser()
..addMultiOption('length',
abbr: 'l',
help: 'Byte length to benchmark',
valueHelp: 'INT',
defaultsTo: const [])
..addFlag('mebibytes-per-second',
abbr: 'm', help: 'Show MiB/s', defaultsTo: false)
..addFlag('nanoseconds-per-byte',
abbr: 'n', help: 'Show ns/byte', defaultsTo: false)
..addFlag('bytes-per-second',
abbr: 'b', help: 'Show byte/s', defaultsTo: true)
..addFlag('verbose', abbr: 'v', help: 'Verbose output', defaultsTo: false)
..addFlag('aligned',
abbr: 'a', help: 'Align results on initial numbers', defaultsTo: false);
class Emitter {
final bool bytesPerSecond;
final bool nanosecondsPerByte;
final bool mebibytesPerSecond;
final bool _alignedOutput;
Emitter(ArgResults results)
: bytesPerSecond = results['bytes-per-second'] || results['verbose'],
nanosecondsPerByte =
results['nanoseconds-per-byte'] || results['verbose'],
mebibytesPerSecond =
results['mebibytes-per-second'] || results['verbose'],
_alignedOutput = results['aligned'];
static final kValueRegexp = RegExp(r'^([0-9]+)');
static final kMaxLabelLength =
'MemoryCopy.1048576.setRange.TypedData.Double(NanosecondsPerChar)'.length;
// Maximum expected number of digits on either side of the decimal point.
static final kMaxDigits = 16;
void printLabeledValue(String label, double value) {
final valueString = value.toString();
final buffer = StringBuffer();
buffer
..write(label)
..write(': ');
if (_alignedOutput) {
final matches = kValueRegexp.firstMatch(valueString)!;
final valuePadding = (kMaxLabelLength - label.length) +
max<int>(kMaxDigits - matches[1]!.length, 0);
buffer..write(' ' * valuePadding);
}
buffer.write(valueString);
print(buffer.toString());
}
}
// A modified version of BenchmarkBase from package:benchmark_harness where
// - the run() method takes a number of rounds, so that there is only one run()
// call per measurement and thus the overhead of calling the run() method is
// the same across subclass results.
// - the measureFor() method returns the number of bytes transfered per second,
// not the number of microseconds per iteration (round).
abstract class MemoryCopyBenchmark {
final String name;
final int bytes;
MemoryCopyBenchmark(String name, this.bytes) : name = 'MemoryCopy.$name';
static const targetBatchSizeInBytes = 32 * 1024;
// Returns the number of bytes copied per second.
double measureFor(Duration minDuration) {
// The logic below is based off of BenchmarkBase._measureForImpl.
// We can't use BenchmarkBase.measureFor directly, because
// * it calls the function in a loop instead of passing the number of
// desired iterations to the function being called. Here, method
// invocation would dominate the actual body for small byte counts.
// * it doesn't provide the caller with the number of iterations performed,
// which we need to calculate the number of bytes transferred.
// Start off with enough rounds to ensure a minimum number of bytes copied
// per run() invocation.
int rounds = max(targetBatchSizeInBytes ~/ bytes, 1);
// If running a long measurement permit some amount of measurement jitter
// to avoid discarding results that almost, but not quite, reach the minimum
// duration requested.
final allowedJitter = Duration(
microseconds: minDuration.inSeconds > 0
? (minDuration.inMicroseconds * 0.1).floor()
: 0);
final watch = Stopwatch()..start();
while (true) {
// Try running for the current number of rounds and see if that reaches
// the minimum duration requested, so we only get the elapsed time from
// the StopWatch once for the final results used.
watch.reset();
run(rounds);
final elapsed = watch.elapsed;
final numberOfBytesCopied = rounds * bytes;
if (elapsed >= (minDuration - allowedJitter)) {
return (numberOfBytesCopied / elapsed.inMicroseconds) *
Duration.microsecondsPerSecond;
}
// If not, then adjust our estimate of how many iterations are needed to
// reach the minimum and try again.
if (elapsed.inMilliseconds == 0) {
rounds *= 1000;
} else {
rounds *= (minDuration.inMicroseconds / elapsed.inMicroseconds).ceil();
}
}
}
double measure() {
setup();
// Warmup for 100 ms.
measureFor(const Duration(milliseconds: 100));
// Run benchmark for 1 second.
final double result = measureFor(const Duration(seconds: 1));
teardown();
return result;
}
void report(Emitter emitter) {
final bytesPerSecond = measure();
if (emitter.bytesPerSecond) {
emitter.printLabeledValue('$name(BytesPerSecond)', bytesPerSecond);
}
if (emitter.nanosecondsPerByte) {
const nanoSecondsPerSecond = 1000 * 1000 * 1000;
final nanosecondsPerByte = nanoSecondsPerSecond / bytesPerSecond;
emitter.printLabeledValue(
'$name(NanosecondsPerChar)', nanosecondsPerByte);
}
if (emitter.mebibytesPerSecond) {
const bytesPerMebibyte = 1024 * 1024;
final mibPerSecond = bytesPerSecond / bytesPerMebibyte;
emitter.printLabeledValue('$name(MebibytesPerSecond)', mibPerSecond);
}
}
void setup();
void teardown();
void run(int rounds);
}
abstract class Uint8ListCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Uint8List input;
late Uint8List result;
Uint8ListCopyBenchmark(String method, int bytes)
: count = bytes,
super('$bytes.$method.TypedData.Uint8', bytes);
@override
void setup() {
input = Uint8List(count);
for (int i = 0; i < count; ++i) {
input[i] = (i + 3) & 0xff;
}
result = Uint8List(maxSizeInBytes);
}
@override
void teardown() {
for (int i = 0; i < count; ++i) {
final expected = (i + 3) & 0xff;
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0;
for (int i = count; i < maxSizeInBytes; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
}
}
class Uint8ListCopyViaLoopBenchmark extends Uint8ListCopyBenchmark {
Uint8ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class Uint8ListCopyViaSetRangeBenchmark extends Uint8ListCopyBenchmark {
Uint8ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result.setRange(0, count, input);
}
}
}
abstract class Float64ListCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Float64List input;
late Float64List result;
Float64ListCopyBenchmark(String method, int bytes)
: count = bytes ~/ 8,
super('$bytes.$method.TypedData.Double', bytes);
static const maxSizeInElements = maxSizeInBytes ~/ 8;
@override
void setup() {
input = Float64List(count);
for (int i = 0; i < count; ++i) {
input[i] = (i - 7).toDouble();
}
result = Float64List(maxSizeInElements);
}
@override
void teardown() {
for (int i = 0; i < count; ++i) {
final expected = (i - 7).toDouble();
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0.0;
for (int i = count; i < maxSizeInElements; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
}
}
class Float64ListCopyViaLoopBenchmark extends Float64ListCopyBenchmark {
Float64ListCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class Float64ListCopyViaSetRangeBenchmark extends Float64ListCopyBenchmark {
Float64ListCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result.setRange(0, count, input);
}
}
}
abstract class PointerUint8CopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Pointer<Uint8> input;
late Pointer<Uint8> result;
PointerUint8CopyBenchmark(String method, int bytes)
: count = bytes,
super('$bytes.$method.Pointer.Uint8', bytes);
@override
void setup() {
input = malloc<Uint8>(count);
for (var i = 0; i < count; ++i) {
input[i] = (i + 3) & 0xff;
}
result = calloc<Uint8>(maxSizeInBytes);
}
@override
void teardown() {
malloc.free(input);
for (var i = 0; i < count; ++i) {
final expected = (i + 3) & 0xff;
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0;
for (var i = count; i < maxSizeInBytes; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
calloc.free(result);
}
}
class PointerUint8CopyViaLoopBenchmark extends PointerUint8CopyBenchmark {
PointerUint8CopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
// Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class PointerUint8CopyViaSetRangeBenchmark extends PointerUint8CopyBenchmark {
PointerUint8CopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result
.asTypedList(maxSizeInBytes)
.setRange(0, count, input.asTypedList(count));
}
}
}
@Native<Void Function(Pointer<Void>, Pointer<Void>, Size)>(isLeaf: true)
external void memmove(Pointer<Void> to, Pointer<Void> from, int size);
class PointerUint8CopyViaMemmoveBenchmark extends PointerUint8CopyBenchmark {
// This particular benchmark was originally written using memcpy, but a
// better comparison is against memmove. While our benchmarks don't use
// to and from memory that overlaps, in general this case must be handled.
//
// In order to not have to change the benchmark suite in golem, we keep the
// old name for this result.
PointerUint8CopyViaMemmoveBenchmark(int bytes) : super('memcpy', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
memmove(result.cast(), input.cast(), count);
}
}
}
abstract class PointerDoubleCopyBenchmark extends MemoryCopyBenchmark {
final int count;
late Pointer<Double> input;
late Pointer<Double> result;
PointerDoubleCopyBenchmark(String method, int bytes)
: count = bytes ~/ 8,
super('$bytes.$method.Pointer.Double', bytes);
static const maxSizeInElements = maxSizeInBytes ~/ 8;
@override
void setup() {
input = malloc<Double>(count);
for (var i = 0; i < count; ++i) {
input[i] = (i - 7).toDouble();
}
result = calloc<Double>(maxSizeInElements);
}
@override
void teardown() {
malloc.free(input);
for (var i = 0; i < count; ++i) {
final expected = (i - 7).toDouble();
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
final expected = 0.0;
for (var i = count; i < maxSizeInElements; ++i) {
if (result[i] != expected) {
throw 'Expected result[$i] = $expected, got ${result[i]}';
}
}
calloc.free(result);
}
}
class PointerDoubleCopyViaLoopBenchmark extends PointerDoubleCopyBenchmark {
PointerDoubleCopyViaLoopBenchmark(int bytes) : super('loop', bytes);
@override
void run(int rounds) {
// Compare the setRange version to looping using Pointer.[]/Pointer.[]=.
final count = this.count;
final input = this.input;
final result = this.result;
for (int r = 0; r < rounds; r++) {
for (int i = 0; i < count; i++) {
result[i] = input[i];
}
}
}
}
class PointerDoubleCopyViaSetRangeBenchmark extends PointerDoubleCopyBenchmark {
PointerDoubleCopyViaSetRangeBenchmark(int bytes) : super('setRange', bytes);
@override
void run(int rounds) {
for (int r = 0; r < rounds; r++) {
result
.asTypedList(PointerDoubleCopyBenchmark.maxSizeInElements)
.setRange(0, count, input.asTypedList(count));
}
}
}
final defaultLengthsInBytes = [8, 64, 512, 4 * 1024, 1024 * 1024];
void main(List<String> args) {
final results = argParser.parse(args);
List<int> lengthsInBytes = defaultLengthsInBytes;
final emitter = Emitter(results);
if (results['length'].isNotEmpty) {
lengthsInBytes = (results['length'] as List<String>)
.map(int.parse)
.where((i) => i <= maxSizeInBytes)
.toList();
}
final filter = results.rest.firstOrNull;
final benchmarks = [
for (int bytes in lengthsInBytes) ...[
PointerUint8CopyViaMemmoveBenchmark(bytes),
PointerUint8CopyViaLoopBenchmark(bytes),
PointerDoubleCopyViaLoopBenchmark(bytes),
Uint8ListCopyViaLoopBenchmark(bytes),
Float64ListCopyViaLoopBenchmark(bytes),
PointerUint8CopyViaSetRangeBenchmark(bytes),
PointerDoubleCopyViaSetRangeBenchmark(bytes),
Uint8ListCopyViaSetRangeBenchmark(bytes),
Float64ListCopyViaSetRangeBenchmark(bytes),
],
];
for (var bench in benchmarks) {
if (filter == null || bench.name.contains(filter)) {
bench.report(emitter);
}
}
}