blob: bac0697ff28c76dacad37ad34fbb67c85e6a9592 [file] [edit]
// Copyright (c) 2026, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// Regression benchmark for https://github.com/dart-lang/sdk/issues/63217 and
// https://github.com/dart-lang/sdk/issues/53662: the five binary operators on
// Int32x4 (+, -, |, &, ^) were not specialized in AOT mode, so `Int32x4List`
// loops fell back to boxed runtime calls that ran 10-70x slower than the
// hand-written scalar version or than the JIT.
//
// For every operator there is a scalar variant and a SIMD variant over the
// same Uint32List buffer, so the benchmark suite exposes both the absolute
// cost of each SIMD op and its speedup over the scalar baseline.
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
const int words = 2048;
abstract class SimdBench extends BenchmarkBase {
SimdBench(String name) : super('SimdInt32x4.$name');
late final Uint32List a;
late final Uint32List b;
@override
void setup() {
a = Uint32List(words);
b = Uint32List(words);
for (int i = 0; i < words; i++) {
a[i] = 0xA5A5A5A5 ^ i;
b[i] = 0x5A5A5A5A ^ (i * 31);
}
}
}
class OrScalar extends SimdBench {
OrScalar() : super('orScalar');
@override
void run() {
final n = a.length;
for (int i = 0; i < n; i++) {
a[i] |= b[i];
}
}
}
class OrSimd extends SimdBench {
OrSimd() : super('orSimd');
@override
void run() {
final la = Int32x4List.view(a.buffer, a.offsetInBytes, a.length >> 2);
final lb = Int32x4List.view(b.buffer, b.offsetInBytes, b.length >> 2);
for (int j = 0; j < la.length; j++) {
la[j] = la[j] | lb[j];
}
}
}
class AndScalar extends SimdBench {
AndScalar() : super('andScalar');
@override
void run() {
final n = a.length;
for (int i = 0; i < n; i++) {
a[i] &= b[i];
}
}
}
class AndSimd extends SimdBench {
AndSimd() : super('andSimd');
@override
void run() {
final la = Int32x4List.view(a.buffer, a.offsetInBytes, a.length >> 2);
final lb = Int32x4List.view(b.buffer, b.offsetInBytes, b.length >> 2);
for (int j = 0; j < la.length; j++) {
la[j] = la[j] & lb[j];
}
}
}
class XorScalar extends SimdBench {
XorScalar() : super('xorScalar');
@override
void run() {
final n = a.length;
for (int i = 0; i < n; i++) {
a[i] ^= b[i];
}
}
}
class XorSimd extends SimdBench {
XorSimd() : super('xorSimd');
@override
void run() {
final la = Int32x4List.view(a.buffer, a.offsetInBytes, a.length >> 2);
final lb = Int32x4List.view(b.buffer, b.offsetInBytes, b.length >> 2);
for (int j = 0; j < la.length; j++) {
la[j] = la[j] ^ lb[j];
}
}
}
class AddScalar extends SimdBench {
AddScalar() : super('addScalar');
@override
void run() {
final n = a.length;
for (int i = 0; i < n; i++) {
a[i] = a[i] + b[i];
}
}
}
class AddSimd extends SimdBench {
AddSimd() : super('addSimd');
@override
void run() {
final la = Int32x4List.view(a.buffer, a.offsetInBytes, a.length >> 2);
final lb = Int32x4List.view(b.buffer, b.offsetInBytes, b.length >> 2);
for (int j = 0; j < la.length; j++) {
la[j] = la[j] + lb[j];
}
}
}
class SubScalar extends SimdBench {
SubScalar() : super('subScalar');
@override
void run() {
final n = a.length;
for (int i = 0; i < n; i++) {
a[i] = a[i] - b[i];
}
}
}
class SubSimd extends SimdBench {
SubSimd() : super('subSimd');
@override
void run() {
final la = Int32x4List.view(a.buffer, a.offsetInBytes, a.length >> 2);
final lb = Int32x4List.view(b.buffer, b.offsetInBytes, b.length >> 2);
for (int j = 0; j < la.length; j++) {
la[j] = la[j] - lb[j];
}
}
}
void main() {
final benchmarks = <BenchmarkBase Function()>[
OrScalar.new,
OrSimd.new,
AndScalar.new,
AndSimd.new,
XorScalar.new,
XorSimd.new,
AddScalar.new,
AddSimd.new,
SubScalar.new,
SubSimd.new,
];
for (final bm in benchmarks) {
bm()
..setup()
..run()
..run();
}
for (final bm in benchmarks) {
bm().report();
}
}