[vm/simd] Add Float64x2 clamp simd implementation
Fixes https://github.com/dart-lang/sdk/issues/40427
TEST=ci, float64x2_clamp_test
Change-Id: I12618c37135feecffb115ce4aca02af1ecb03167
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/243848
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index 6b6c9019..55899ac 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -9433,6 +9433,7 @@
M(1, _, Float32x4Abs, (Float32x4), Float32x4) \
M(1, _, Float64x2Abs, (Float64x2), Float64x2) \
M(3, _, Float32x4Clamp, (Float32x4, Float32x4, Float32x4), Float32x4) \
+ M(3, _, Float64x2Clamp, (Float64x2, Float64x2, Float64x2), Float64x2) \
M(1, _, Float64x2GetX, (Float64x2), Double) \
M(1, _, Float64x2GetY, (Float64x2), Double) \
M(2, _, Float64x2WithX, (Float64x2, Double), Float64x2) \
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index fad4204..0f1784c 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -5259,6 +5259,36 @@
__ vmaxqs(result, result, lower);
}
+DEFINE_EMIT(Float64x2Clamp,
+ (QRegisterView result,
+ QRegisterView left,
+ QRegisterView lower,
+ QRegisterView upper)) {
+ compiler::Label done0, done1;
+ // result = max(min(left, upper), lower) |
+ // lower if (upper is NaN || left is NaN) |
+ // upper if lower is NaN
+ __ vcmpd(left.d(0), upper.d(0));
+ __ vmstat();
+ __ vmovd(result.d(0), upper.d(0), GE);
+ __ vmovd(result.d(0), left.d(0), LT); // less than or unordered(NaN)
+ __ b(&done0, VS); // at least one argument was NaN
+ __ vcmpd(result.d(0), lower.d(0));
+ __ vmstat();
+ __ vmovd(result.d(0), lower.d(0), LE);
+ __ Bind(&done0);
+
+ __ vcmpd(left.d(1), upper.d(1));
+ __ vmstat();
+ __ vmovd(result.d(1), upper.d(1), GE);
+ __ vmovd(result.d(1), left.d(1), LT); // less than or unordered(NaN)
+ __ b(&done1, VS); // at least one argument was NaN
+ __ vcmpd(result.d(1), lower.d(1));
+ __ vmstat();
+ __ vmovd(result.d(1), lower.d(1), LE);
+ __ Bind(&done1);
+}
+
// Low (< 7) Q registers are needed for the vmovs instruction.
// TODO(dartbug.com/30953) support register range constraints in the regalloc.
DEFINE_EMIT(Float32x4With,
@@ -5564,6 +5594,7 @@
CASE(Int32x4ToFloat32x4) \
____(Simd32x4ToSimd32x4Convertion) \
SIMPLE(Float32x4Clamp) \
+ SIMPLE(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index fd52cc1..ac442ee 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -4427,6 +4427,13 @@
__ vmaxs(result, result, lower);
}
+DEFINE_EMIT(
+ Float64x2Clamp,
+ (VRegister result, VRegister value, VRegister lower, VRegister upper)) {
+ __ vmind(result, value, upper);
+ __ vmaxd(result, result, lower);
+}
+
DEFINE_EMIT(Float32x4With,
(VRegister result, VRegister replacement, VRegister value)) {
__ fcvtsd(VTMP, replacement);
@@ -4619,6 +4626,8 @@
____(SimdZero) \
CASE(Float32x4Clamp) \
____(Float32x4Clamp) \
+ CASE(Float64x2Clamp) \
+ ____(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 8a2ec5a..1915491 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -4578,6 +4578,15 @@
__ maxps(left, lower);
}
+DEFINE_EMIT(Float64x2Clamp,
+ (SameAsFirstInput,
+ XmmRegister left,
+ XmmRegister lower,
+ XmmRegister upper)) {
+ __ minpd(left, upper);
+ __ maxpd(left, lower);
+}
+
DEFINE_EMIT(Int32x4FromInts,
(XmmRegister result, Register, Register, Register, Register)) {
// TODO(dartbug.com/30949) avoid transfer through memory.
@@ -4727,6 +4736,7 @@
SIMPLE(Float32x4Zero) \
SIMPLE(Float64x2Zero) \
SIMPLE(Float32x4Clamp) \
+ SIMPLE(Float64x2Clamp) \
CASE(Int32x4GetFlagX) \
CASE(Int32x4GetFlagY) \
CASE(Int32x4GetFlagZ) \
diff --git a/runtime/vm/compiler/backend/il_riscv.cc b/runtime/vm/compiler/backend/il_riscv.cc
index 3355081..674370e 100644
--- a/runtime/vm/compiler/backend/il_riscv.cc
+++ b/runtime/vm/compiler/backend/il_riscv.cc
@@ -4722,6 +4722,12 @@
UNIMPLEMENTED();
}
+DEFINE_EMIT(
+ Float64x2Clamp,
+ (FRegister result, FRegister value, FRegister lower, FRegister upper)) {
+ UNIMPLEMENTED();
+}
+
DEFINE_EMIT(Float32x4With,
(FRegister result, FRegister replacement, FRegister value)) {
UNIMPLEMENTED();
@@ -4821,6 +4827,8 @@
____(SimdZero) \
CASE(Float32x4Clamp) \
____(Float32x4Clamp) \
+ CASE(Float64x2Clamp) \
+ ____(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 236ff1a..b6b58ce 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -4806,6 +4806,15 @@
__ maxps(value, lower);
}
+DEFINE_EMIT(Float64x2Clamp,
+ (SameAsFirstInput,
+ XmmRegister value,
+ XmmRegister lower,
+ XmmRegister upper)) {
+ __ minpd(value, upper);
+ __ maxpd(value, lower);
+}
+
DEFINE_EMIT(Int32x4FromInts,
(XmmRegister result, Register, Register, Register, Register)) {
// TODO(dartbug.com/30949) avoid transfer through memory.
@@ -4953,6 +4962,7 @@
SIMPLE(Float32x4Zero) \
SIMPLE(Float64x2Zero) \
SIMPLE(Float32x4Clamp) \
+ SIMPLE(Float64x2Clamp) \
CASE(Int32x4GetFlagX) \
CASE(Int32x4GetFlagY) \
____(Int32x4GetFlagXorY) \
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index be5af29..575121f 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -4072,6 +4072,7 @@
case MethodRecognizer::kFloat32x4WithZ:
case MethodRecognizer::kFloat32x4Zero:
case MethodRecognizer::kFloat64x2Abs:
+ case MethodRecognizer::kFloat64x2Clamp:
case MethodRecognizer::kFloat64x2FromDoubles:
case MethodRecognizer::kFloat64x2GetSignMask:
case MethodRecognizer::kFloat64x2GetX:
diff --git a/runtime/vm/compiler/recognized_methods_list.h b/runtime/vm/compiler/recognized_methods_list.h
index e887d24..98e305e 100644
--- a/runtime/vm/compiler/recognized_methods_list.h
+++ b/runtime/vm/compiler/recognized_methods_list.h
@@ -185,6 +185,7 @@
V(_Float64x2, get:y, Float64x2GetY, 0x27cae053) \
V(_Float64x2, unary-, Float64x2Negate, 0x958a0d28) \
V(_Float64x2, abs, Float64x2Abs, 0x9a24c75e) \
+ V(_Float64x2, clamp, Float64x2Clamp, 0xfddc1533) \
V(_Float64x2, sqrt, Float64x2Sqrt, 0x93d543c8) \
V(_Float64x2, get:signMask, Float64x2GetSignMask, 0x7c6b11ea) \
V(_Float64x2, scale, Float64x2Scale, 0x52959118) \
diff --git a/sdk/lib/_internal/vm/lib/typed_data_patch.dart b/sdk/lib/_internal/vm/lib/typed_data_patch.dart
index 68c63b1..2522ca4 100644
--- a/sdk/lib/_internal/vm/lib/typed_data_patch.dart
+++ b/sdk/lib/_internal/vm/lib/typed_data_patch.dart
@@ -4031,6 +4031,8 @@
@pragma("vm:exact-result-type", _Float64x2)
@pragma("vm:external-name", "Float64x2_abs")
external Float64x2 abs();
+ @pragma("vm:recognized", "other")
+ @pragma("vm:exact-result-type", _Float64x2)
@pragma("vm:external-name", "Float64x2_clamp")
external Float64x2 clamp(Float64x2 lowerLimit, Float64x2 upperLimit);
@pragma("vm:recognized", "other")
diff --git a/tests/lib/typed_data/float64x2_clamp_test.dart b/tests/lib/typed_data/float64x2_clamp_test.dart
new file mode 100644
index 0000000..24636f3
--- /dev/null
+++ b/tests/lib/typed_data/float64x2_clamp_test.dart
@@ -0,0 +1,74 @@
+// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+// VMOptions=--intrinsify --optimization-counter-threshold=10 --no-background-compilation
+// VMOptions=--no-intrinsify --optimization-counter-threshold=10 --no-background-compilation
+
+// Library tag to be able to run in html test framework.
+library float64x2_clamp_test;
+
+import 'dart:math';
+import 'dart:typed_data';
+import 'package:expect/expect.dart';
+
+void testClampLowerGreaterThanUpper() {
+ Float64x2 l = new Float64x2(1.0, 1.0);
+ Float64x2 u = new Float64x2(-1.0, -1.0);
+ Float64x2 z = new Float64x2.zero();
+ Float64x2 a = z.clamp(l, u);
+ Expect.equals(a.x, 1.0);
+ Expect.equals(a.y, 1.0);
+}
+
+void testClamp() {
+ Float64x2 l = new Float64x2(-1.0, -1.0);
+ Float64x2 u = new Float64x2(1.0, 1.0);
+ Float64x2 z = new Float64x2.zero();
+ Float64x2 a = z.clamp(l, u);
+ Expect.equals(a.x, 0.0);
+ Expect.equals(a.y, 0.0);
+}
+
+void testNonZeroClamp() {
+ Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
+ Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
+ Float64x2 v =
+ new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
+ Float64x2 a = v.clamp(l, u);
+ Expect.equals(a.x, -pow(123456.789, 123) as double);
+ Expect.equals(a.y, 234567.89);
+}
+
+Float64x2 negativeZeroClamp() {
+ final negZero = -Float64x2.zero();
+ return negZero.clamp(negZero, Float64x2.zero());
+}
+
+Float64x2 zeroClamp() {
+ final negOne = -Float64x2(1.0, 1.0);
+ return Float64x2.zero().clamp(negOne, -Float64x2.zero());
+}
+
+void testNegativeZeroClamp(Float64x2 unopt) {
+ final res = negativeZeroClamp();
+ Expect.equals(res.x.compareTo(unopt.x), 0);
+ Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+void testZeroClamp(Float64x2 unopt) {
+ final res = zeroClamp();
+ Expect.equals(res.x.compareTo(unopt.x), 0);
+ Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+main() {
+ final unoptNegZeroClamp = negativeZeroClamp();
+ final unoptZeroClamp = zeroClamp();
+ for (int i = 0; i < 2000; i++) {
+ testClampLowerGreaterThanUpper();
+ testClamp();
+ testNonZeroClamp();
+ testNegativeZeroClamp(unoptNegZeroClamp);
+ testZeroClamp(unoptZeroClamp);
+ }
+}
diff --git a/tests/lib_2/typed_data/float64x2_clamp_test.dart b/tests/lib_2/typed_data/float64x2_clamp_test.dart
new file mode 100644
index 0000000..3bbe32f
--- /dev/null
+++ b/tests/lib_2/typed_data/float64x2_clamp_test.dart
@@ -0,0 +1,75 @@
+// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+// VMOptions=--optimization-counter-threshold=10 --no-background-compilation
+
+// @dart = 2.9
+
+// Library tag to be able to run in html test framework.
+library float64x2_clamp_test;
+
+import 'dart:math';
+import 'dart:typed_data';
+import 'package:expect/expect.dart';
+
+void testClampLowerGreaterThanUpper() {
+ Float64x2 l = new Float64x2(1.0, 1.0);
+ Float64x2 u = new Float64x2(-1.0, -1.0);
+ Float64x2 z = new Float64x2.zero();
+ Float64x2 a = z.clamp(l, u);
+ Expect.equals(a.x, 1.0);
+ Expect.equals(a.y, 1.0);
+}
+
+void testClamp() {
+ Float64x2 l = new Float64x2(-1.0, -1.0);
+ Float64x2 u = new Float64x2(1.0, 1.0);
+ Float64x2 z = new Float64x2.zero();
+ Float64x2 a = z.clamp(l, u);
+ Expect.equals(a.x, 0.0);
+ Expect.equals(a.y, 0.0);
+}
+
+void testNonZeroClamp() {
+ Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
+ Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
+ Float64x2 v =
+ new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
+ Float64x2 a = v.clamp(l, u);
+ Expect.equals(a.x, -pow(123456.789, 123) as double);
+ Expect.equals(a.y, 234567.89);
+}
+
+Float64x2 negativeZeroClamp() {
+ final negZero = -Float64x2.zero();
+ return negZero.clamp(negZero, Float64x2.zero());
+}
+
+Float64x2 zeroClamp() {
+ final negOne = -Float64x2(1.0, 1.0);
+ return Float64x2.zero().clamp(negOne, -Float64x2.zero());
+}
+
+void testNegativeZeroClamp(Float64x2 unopt) {
+ final res = negativeZeroClamp();
+ Expect.equals(res.x.compareTo(unopt.x), 0);
+ Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+void testZeroClamp(Float64x2 unopt) {
+ final res = zeroClamp();
+ Expect.equals(res.x.compareTo(unopt.x), 0);
+ Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+main() {
+ final unoptNegZeroClamp = negativeZeroClamp();
+ final unoptZeroClamp = zeroClamp();
+ for (int i = 0; i < 2000; i++) {
+ testClampLowerGreaterThanUpper();
+ testClamp();
+ testNonZeroClamp();
+ testNegativeZeroClamp(unoptNegZeroClamp);
+ testZeroClamp(unoptZeroClamp);
+ }
+}