[vm/simd] Add Float64x2 clamp simd implementation

Fixes https://github.com/dart-lang/sdk/issues/40427

TEST=ci, float64x2_clamp_test

Change-Id: I12618c37135feecffb115ce4aca02af1ecb03167
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/243848
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index 6b6c9019..55899ac 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -9433,6 +9433,7 @@
   M(1, _, Float32x4Abs, (Float32x4), Float32x4)                                \
   M(1, _, Float64x2Abs, (Float64x2), Float64x2)                                \
   M(3, _, Float32x4Clamp, (Float32x4, Float32x4, Float32x4), Float32x4)        \
+  M(3, _, Float64x2Clamp, (Float64x2, Float64x2, Float64x2), Float64x2)        \
   M(1, _, Float64x2GetX, (Float64x2), Double)                                  \
   M(1, _, Float64x2GetY, (Float64x2), Double)                                  \
   M(2, _, Float64x2WithX, (Float64x2, Double), Float64x2)                      \
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index fad4204..0f1784c 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -5259,6 +5259,36 @@
   __ vmaxqs(result, result, lower);
 }
 
+DEFINE_EMIT(Float64x2Clamp,
+            (QRegisterView result,
+             QRegisterView left,
+             QRegisterView lower,
+             QRegisterView upper)) {
+  compiler::Label done0, done1;
+  // result = max(min(left, upper), lower) |
+  //          lower if (upper is NaN || left is NaN) |
+  //          upper if lower is NaN
+  __ vcmpd(left.d(0), upper.d(0));
+  __ vmstat();
+  __ vmovd(result.d(0), upper.d(0), GE);
+  __ vmovd(result.d(0), left.d(0), LT);  // less than or unordered(NaN)
+  __ b(&done0, VS);                      // at least one argument was NaN
+  __ vcmpd(result.d(0), lower.d(0));
+  __ vmstat();
+  __ vmovd(result.d(0), lower.d(0), LE);
+  __ Bind(&done0);
+
+  __ vcmpd(left.d(1), upper.d(1));
+  __ vmstat();
+  __ vmovd(result.d(1), upper.d(1), GE);
+  __ vmovd(result.d(1), left.d(1), LT);  // less than or unordered(NaN)
+  __ b(&done1, VS);                      // at least one argument was NaN
+  __ vcmpd(result.d(1), lower.d(1));
+  __ vmstat();
+  __ vmovd(result.d(1), lower.d(1), LE);
+  __ Bind(&done1);
+}
+
 // Low (< 7) Q registers are needed for the vmovs instruction.
 // TODO(dartbug.com/30953) support register range constraints in the regalloc.
 DEFINE_EMIT(Float32x4With,
@@ -5564,6 +5594,7 @@
   CASE(Int32x4ToFloat32x4)                                                     \
   ____(Simd32x4ToSimd32x4Convertion)                                           \
   SIMPLE(Float32x4Clamp)                                                       \
+  SIMPLE(Float64x2Clamp)                                                       \
   CASE(Float32x4WithX)                                                         \
   CASE(Float32x4WithY)                                                         \
   CASE(Float32x4WithZ)                                                         \
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index fd52cc1..ac442ee 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -4427,6 +4427,13 @@
   __ vmaxs(result, result, lower);
 }
 
+DEFINE_EMIT(
+    Float64x2Clamp,
+    (VRegister result, VRegister value, VRegister lower, VRegister upper)) {
+  __ vmind(result, value, upper);
+  __ vmaxd(result, result, lower);
+}
+
 DEFINE_EMIT(Float32x4With,
             (VRegister result, VRegister replacement, VRegister value)) {
   __ fcvtsd(VTMP, replacement);
@@ -4619,6 +4626,8 @@
   ____(SimdZero)                                                               \
   CASE(Float32x4Clamp)                                                         \
   ____(Float32x4Clamp)                                                         \
+  CASE(Float64x2Clamp)                                                         \
+  ____(Float64x2Clamp)                                                         \
   CASE(Float32x4WithX)                                                         \
   CASE(Float32x4WithY)                                                         \
   CASE(Float32x4WithZ)                                                         \
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 8a2ec5a..1915491 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -4578,6 +4578,15 @@
   __ maxps(left, lower);
 }
 
+DEFINE_EMIT(Float64x2Clamp,
+            (SameAsFirstInput,
+             XmmRegister left,
+             XmmRegister lower,
+             XmmRegister upper)) {
+  __ minpd(left, upper);
+  __ maxpd(left, lower);
+}
+
 DEFINE_EMIT(Int32x4FromInts,
             (XmmRegister result, Register, Register, Register, Register)) {
   // TODO(dartbug.com/30949) avoid transfer through memory.
@@ -4727,6 +4736,7 @@
   SIMPLE(Float32x4Zero)                                                        \
   SIMPLE(Float64x2Zero)                                                        \
   SIMPLE(Float32x4Clamp)                                                       \
+  SIMPLE(Float64x2Clamp)                                                       \
   CASE(Int32x4GetFlagX)                                                        \
   CASE(Int32x4GetFlagY)                                                        \
   CASE(Int32x4GetFlagZ)                                                        \
diff --git a/runtime/vm/compiler/backend/il_riscv.cc b/runtime/vm/compiler/backend/il_riscv.cc
index 3355081..674370e 100644
--- a/runtime/vm/compiler/backend/il_riscv.cc
+++ b/runtime/vm/compiler/backend/il_riscv.cc
@@ -4722,6 +4722,12 @@
   UNIMPLEMENTED();
 }
 
+DEFINE_EMIT(
+    Float64x2Clamp,
+    (FRegister result, FRegister value, FRegister lower, FRegister upper)) {
+  UNIMPLEMENTED();
+}
+
 DEFINE_EMIT(Float32x4With,
             (FRegister result, FRegister replacement, FRegister value)) {
   UNIMPLEMENTED();
@@ -4821,6 +4827,8 @@
   ____(SimdZero)                                                               \
   CASE(Float32x4Clamp)                                                         \
   ____(Float32x4Clamp)                                                         \
+  CASE(Float64x2Clamp)                                                         \
+  ____(Float64x2Clamp)                                                         \
   CASE(Float32x4WithX)                                                         \
   CASE(Float32x4WithY)                                                         \
   CASE(Float32x4WithZ)                                                         \
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 236ff1a..b6b58ce 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -4806,6 +4806,15 @@
   __ maxps(value, lower);
 }
 
+DEFINE_EMIT(Float64x2Clamp,
+            (SameAsFirstInput,
+             XmmRegister value,
+             XmmRegister lower,
+             XmmRegister upper)) {
+  __ minpd(value, upper);
+  __ maxpd(value, lower);
+}
+
 DEFINE_EMIT(Int32x4FromInts,
             (XmmRegister result, Register, Register, Register, Register)) {
   // TODO(dartbug.com/30949) avoid transfer through memory.
@@ -4953,6 +4962,7 @@
   SIMPLE(Float32x4Zero)                                                        \
   SIMPLE(Float64x2Zero)                                                        \
   SIMPLE(Float32x4Clamp)                                                       \
+  SIMPLE(Float64x2Clamp)                                                       \
   CASE(Int32x4GetFlagX)                                                        \
   CASE(Int32x4GetFlagY)                                                        \
   ____(Int32x4GetFlagXorY)                                                     \
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index be5af29..575121f 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -4072,6 +4072,7 @@
     case MethodRecognizer::kFloat32x4WithZ:
     case MethodRecognizer::kFloat32x4Zero:
     case MethodRecognizer::kFloat64x2Abs:
+    case MethodRecognizer::kFloat64x2Clamp:
     case MethodRecognizer::kFloat64x2FromDoubles:
     case MethodRecognizer::kFloat64x2GetSignMask:
     case MethodRecognizer::kFloat64x2GetX:
diff --git a/runtime/vm/compiler/recognized_methods_list.h b/runtime/vm/compiler/recognized_methods_list.h
index e887d24..98e305e 100644
--- a/runtime/vm/compiler/recognized_methods_list.h
+++ b/runtime/vm/compiler/recognized_methods_list.h
@@ -185,6 +185,7 @@
   V(_Float64x2, get:y, Float64x2GetY, 0x27cae053)                              \
   V(_Float64x2, unary-, Float64x2Negate, 0x958a0d28)                           \
   V(_Float64x2, abs, Float64x2Abs, 0x9a24c75e)                                 \
+  V(_Float64x2, clamp, Float64x2Clamp, 0xfddc1533)                             \
   V(_Float64x2, sqrt, Float64x2Sqrt, 0x93d543c8)                               \
   V(_Float64x2, get:signMask, Float64x2GetSignMask, 0x7c6b11ea)                \
   V(_Float64x2, scale, Float64x2Scale, 0x52959118)                             \
diff --git a/sdk/lib/_internal/vm/lib/typed_data_patch.dart b/sdk/lib/_internal/vm/lib/typed_data_patch.dart
index 68c63b1..2522ca4 100644
--- a/sdk/lib/_internal/vm/lib/typed_data_patch.dart
+++ b/sdk/lib/_internal/vm/lib/typed_data_patch.dart
@@ -4031,6 +4031,8 @@
   @pragma("vm:exact-result-type", _Float64x2)
   @pragma("vm:external-name", "Float64x2_abs")
   external Float64x2 abs();
+  @pragma("vm:recognized", "other")
+  @pragma("vm:exact-result-type", _Float64x2)
   @pragma("vm:external-name", "Float64x2_clamp")
   external Float64x2 clamp(Float64x2 lowerLimit, Float64x2 upperLimit);
   @pragma("vm:recognized", "other")
diff --git a/tests/lib/typed_data/float64x2_clamp_test.dart b/tests/lib/typed_data/float64x2_clamp_test.dart
new file mode 100644
index 0000000..24636f3
--- /dev/null
+++ b/tests/lib/typed_data/float64x2_clamp_test.dart
@@ -0,0 +1,74 @@
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+// VMOptions=--intrinsify --optimization-counter-threshold=10 --no-background-compilation
+// VMOptions=--no-intrinsify --optimization-counter-threshold=10 --no-background-compilation
+
+// Library tag to be able to run in html test framework.
+library float64x2_clamp_test;
+
+import 'dart:math';
+import 'dart:typed_data';
+import 'package:expect/expect.dart';
+
+void testClampLowerGreaterThanUpper() {
+  Float64x2 l = new Float64x2(1.0, 1.0);
+  Float64x2 u = new Float64x2(-1.0, -1.0);
+  Float64x2 z = new Float64x2.zero();
+  Float64x2 a = z.clamp(l, u);
+  Expect.equals(a.x, 1.0);
+  Expect.equals(a.y, 1.0);
+}
+
+void testClamp() {
+  Float64x2 l = new Float64x2(-1.0, -1.0);
+  Float64x2 u = new Float64x2(1.0, 1.0);
+  Float64x2 z = new Float64x2.zero();
+  Float64x2 a = z.clamp(l, u);
+  Expect.equals(a.x, 0.0);
+  Expect.equals(a.y, 0.0);
+}
+
+void testNonZeroClamp() {
+  Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
+  Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
+  Float64x2 v =
+      new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
+  Float64x2 a = v.clamp(l, u);
+  Expect.equals(a.x, -pow(123456.789, 123) as double);
+  Expect.equals(a.y, 234567.89);
+}
+
+Float64x2 negativeZeroClamp() {
+  final negZero = -Float64x2.zero();
+  return negZero.clamp(negZero, Float64x2.zero());
+}
+
+Float64x2 zeroClamp() {
+  final negOne = -Float64x2(1.0, 1.0);
+  return Float64x2.zero().clamp(negOne, -Float64x2.zero());
+}
+
+void testNegativeZeroClamp(Float64x2 unopt) {
+  final res = negativeZeroClamp();
+  Expect.equals(res.x.compareTo(unopt.x), 0);
+  Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+void testZeroClamp(Float64x2 unopt) {
+  final res = zeroClamp();
+  Expect.equals(res.x.compareTo(unopt.x), 0);
+  Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+main() {
+  final unoptNegZeroClamp = negativeZeroClamp();
+  final unoptZeroClamp = zeroClamp();
+  for (int i = 0; i < 2000; i++) {
+    testClampLowerGreaterThanUpper();
+    testClamp();
+    testNonZeroClamp();
+    testNegativeZeroClamp(unoptNegZeroClamp);
+    testZeroClamp(unoptZeroClamp);
+  }
+}
diff --git a/tests/lib_2/typed_data/float64x2_clamp_test.dart b/tests/lib_2/typed_data/float64x2_clamp_test.dart
new file mode 100644
index 0000000..3bbe32f
--- /dev/null
+++ b/tests/lib_2/typed_data/float64x2_clamp_test.dart
@@ -0,0 +1,75 @@
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+// VMOptions=--optimization-counter-threshold=10 --no-background-compilation
+
+// @dart = 2.9
+
+// Library tag to be able to run in html test framework.
+library float64x2_clamp_test;
+
+import 'dart:math';
+import 'dart:typed_data';
+import 'package:expect/expect.dart';
+
+void testClampLowerGreaterThanUpper() {
+  Float64x2 l = new Float64x2(1.0, 1.0);
+  Float64x2 u = new Float64x2(-1.0, -1.0);
+  Float64x2 z = new Float64x2.zero();
+  Float64x2 a = z.clamp(l, u);
+  Expect.equals(a.x, 1.0);
+  Expect.equals(a.y, 1.0);
+}
+
+void testClamp() {
+  Float64x2 l = new Float64x2(-1.0, -1.0);
+  Float64x2 u = new Float64x2(1.0, 1.0);
+  Float64x2 z = new Float64x2.zero();
+  Float64x2 a = z.clamp(l, u);
+  Expect.equals(a.x, 0.0);
+  Expect.equals(a.y, 0.0);
+}
+
+void testNonZeroClamp() {
+  Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
+  Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
+  Float64x2 v =
+      new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
+  Float64x2 a = v.clamp(l, u);
+  Expect.equals(a.x, -pow(123456.789, 123) as double);
+  Expect.equals(a.y, 234567.89);
+}
+
+Float64x2 negativeZeroClamp() {
+  final negZero = -Float64x2.zero();
+  return negZero.clamp(negZero, Float64x2.zero());
+}
+
+Float64x2 zeroClamp() {
+  final negOne = -Float64x2(1.0, 1.0);
+  return Float64x2.zero().clamp(negOne, -Float64x2.zero());
+}
+
+void testNegativeZeroClamp(Float64x2 unopt) {
+  final res = negativeZeroClamp();
+  Expect.equals(res.x.compareTo(unopt.x), 0);
+  Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+void testZeroClamp(Float64x2 unopt) {
+  final res = zeroClamp();
+  Expect.equals(res.x.compareTo(unopt.x), 0);
+  Expect.equals(res.y.compareTo(unopt.y), 0);
+}
+
+main() {
+  final unoptNegZeroClamp = negativeZeroClamp();
+  final unoptZeroClamp = zeroClamp();
+  for (int i = 0; i < 2000; i++) {
+    testClampLowerGreaterThanUpper();
+    testClamp();
+    testNonZeroClamp();
+    testNegativeZeroClamp(unoptNegZeroClamp);
+    testZeroClamp(unoptZeroClamp);
+  }
+}