[vm/compiler] Let SIMD values become constants.

Reland of https://github.com/dart-lang/sdk/commit/bdacbb8decbab527a43ff1fbe289c8e55b0f6ba6

Patchset 2 has fixes for tests failing in the reland.

TEST=ci

Change-Id: I1053ef653d4112d3998070cc4fc098d19d8ff264
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/357440
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
diff --git a/runtime/vm/app_snapshot.cc b/runtime/vm/app_snapshot.cc
index 597b7d4..27a087d 100644
--- a/runtime/vm/app_snapshot.cc
+++ b/runtime/vm/app_snapshot.cc
@@ -419,7 +419,7 @@
     stream_->WriteWordWith32BitWrites(value);
   }
 
-  void WriteBytes(const uint8_t* addr, intptr_t len) {
+  void WriteBytes(const void* addr, intptr_t len) {
     stream_->WriteBytes(addr, len);
   }
   void Align(intptr_t alignment, intptr_t offset = 0) {
@@ -5436,6 +5436,80 @@
 };
 
 #if !defined(DART_PRECOMPILED_RUNTIME)
+class Simd128SerializationCluster : public SerializationCluster {
+ public:
+  explicit Simd128SerializationCluster(intptr_t cid, bool is_canonical)
+      : SerializationCluster("Simd128",
+                             cid,
+                             compiler::target::Int32x4::InstanceSize(),
+                             is_canonical) {
+    ASSERT_EQUAL(compiler::target::Int32x4::InstanceSize(),
+                 compiler::target::Float32x4::InstanceSize());
+    ASSERT_EQUAL(compiler::target::Int32x4::InstanceSize(),
+                 compiler::target::Float64x2::InstanceSize());
+  }
+  ~Simd128SerializationCluster() {}
+
+  void Trace(Serializer* s, ObjectPtr object) { objects_.Add(object); }
+
+  void WriteAlloc(Serializer* s) {
+    const intptr_t count = objects_.length();
+    s->WriteUnsigned(count);
+    for (intptr_t i = 0; i < count; i++) {
+      ObjectPtr vector = objects_[i];
+      s->AssignRef(vector);
+    }
+  }
+
+  void WriteFill(Serializer* s) {
+    const intptr_t count = objects_.length();
+    for (intptr_t i = 0; i < count; i++) {
+      ObjectPtr vector = objects_[i];
+      AutoTraceObject(vector);
+      ASSERT_EQUAL(Int32x4::value_offset(), Float32x4::value_offset());
+      ASSERT_EQUAL(Int32x4::value_offset(), Float64x2::value_offset());
+      s->WriteBytes(&(static_cast<Int32x4Ptr>(vector)->untag()->value_),
+                    sizeof(simd128_value_t));
+    }
+  }
+
+ private:
+  GrowableArray<ObjectPtr> objects_;
+};
+#endif  // !DART_PRECOMPILED_RUNTIME
+
+class Simd128DeserializationCluster
+    : public AbstractInstanceDeserializationCluster {
+ public:
+  explicit Simd128DeserializationCluster(intptr_t cid, bool is_canonical)
+      : AbstractInstanceDeserializationCluster("Simd128", is_canonical),
+        cid_(cid) {}
+  ~Simd128DeserializationCluster() {}
+
+  void ReadAlloc(Deserializer* d) {
+    ASSERT_EQUAL(Int32x4::InstanceSize(), Float32x4::InstanceSize());
+    ASSERT_EQUAL(Int32x4::InstanceSize(), Float64x2::InstanceSize());
+    ReadAllocFixedSize(d, Int32x4::InstanceSize());
+  }
+
+  void ReadFill(Deserializer* d_, bool primary) {
+    Deserializer::Local d(d_);
+    const intptr_t cid = cid_;
+    const bool mark_canonical = primary && is_canonical();
+    for (intptr_t id = start_index_, n = stop_index_; id < n; id++) {
+      ObjectPtr vector = d.Ref(id);
+      Deserializer::InitializeHeader(vector, cid, Int32x4::InstanceSize(),
+                                     mark_canonical);
+      d.ReadBytes(&(static_cast<Int32x4Ptr>(vector)->untag()->value_),
+                  sizeof(simd128_value_t));
+    }
+  }
+
+ private:
+  intptr_t cid_;
+};
+
+#if !defined(DART_PRECOMPILED_RUNTIME)
 class GrowableObjectArraySerializationCluster : public SerializationCluster {
  public:
   GrowableObjectArraySerializationCluster()
@@ -7824,6 +7898,10 @@
       return new (Z) MintSerializationCluster(is_canonical);
     case kDoubleCid:
       return new (Z) DoubleSerializationCluster(is_canonical);
+    case kInt32x4Cid:
+    case kFloat32x4Cid:
+    case kFloat64x2Cid:
+      return new (Z) Simd128SerializationCluster(cid, is_canonical);
     case kGrowableObjectArrayCid:
       return new (Z) GrowableObjectArraySerializationCluster();
     case kRecordCid:
@@ -9005,6 +9083,10 @@
       return new (Z) MintDeserializationCluster(is_canonical);
     case kDoubleCid:
       return new (Z) DoubleDeserializationCluster(is_canonical);
+    case kInt32x4Cid:
+    case kFloat32x4Cid:
+    case kFloat64x2Cid:
+      return new (Z) Simd128DeserializationCluster(cid, is_canonical);
     case kGrowableObjectArrayCid:
       ASSERT(!is_canonical);
       return new (Z) GrowableObjectArrayDeserializationCluster();
diff --git a/runtime/vm/compiler/assembler/assembler_base.cc b/runtime/vm/compiler/assembler/assembler_base.cc
index ac16935..083f58a 100644
--- a/runtime/vm/compiler/assembler/assembler_base.cc
+++ b/runtime/vm/compiler/assembler/assembler_base.cc
@@ -508,10 +508,12 @@
 #else
   if (entry.type() == ObjectPoolBuilderEntry::kImmediate128) {
     ASSERT(entry.patchable() == ObjectPoolBuilderEntry::kNotPatchable);
-    uword lo64 = static_cast<uword>(entry.imm128_.int_storage[0]) |
-                 (static_cast<uword>(entry.imm128_.int_storage[1]) << 32);
-    uword hi64 = static_cast<uword>(entry.imm128_.int_storage[2]) |
-                 (static_cast<uword>(entry.imm128_.int_storage[3]) << 32);
+    uword lo64 =
+        (static_cast<uword>(entry.imm128_.int_storage[0]) & 0xffffffff) |
+        (static_cast<uword>(entry.imm128_.int_storage[1]) << 32);
+    uword hi64 =
+        (static_cast<uword>(entry.imm128_.int_storage[2]) & 0xffffffff) |
+        (static_cast<uword>(entry.imm128_.int_storage[3]) << 32);
     intptr_t idx = AddImmediate(lo64);
     AddImmediate(hi64);
     object_pool_index_table_.Insert(ObjIndexPair(entry, idx));
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.cc b/runtime/vm/compiler/assembler/assembler_ia32.cc
index 4fb4519..01eb209 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.cc
+++ b/runtime/vm/compiler/assembler/assembler_ia32.cc
@@ -2309,6 +2309,16 @@
   addl(ESP, Immediate(2 * target::kWordSize));
 }
 
+void Assembler::LoadQImmediate(XmmRegister dst, simd128_value_t value) {
+  // TODO(5410843): Need to have a code constants table.
+  pushl(Immediate(value.int_storage[3]));
+  pushl(Immediate(value.int_storage[2]));
+  pushl(Immediate(value.int_storage[1]));
+  pushl(Immediate(value.int_storage[0]));
+  movups(dst, Address(ESP, 0));
+  addl(ESP, Immediate(4 * target::kWordSize));
+}
+
 void Assembler::FloatNegate(XmmRegister f) {
   static const struct ALIGN16 {
     uint32_t a;
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.h b/runtime/vm/compiler/assembler/assembler_ia32.h
index dd7c856..52ba1af 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.h
+++ b/runtime/vm/compiler/assembler/assembler_ia32.h
@@ -826,6 +826,7 @@
 
   void LoadSImmediate(XmmRegister dst, float value);
   void LoadDImmediate(XmmRegister dst, double value);
+  void LoadQImmediate(XmmRegister dst, simd128_value_t value);
 
   void Drop(intptr_t stack_elements);
 
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 61d0f61..c745516 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -3207,6 +3207,22 @@
     return unbox_defn->value()->definition();
   }
 
+  if (value()->BindsToConstant()) {
+    switch (representation()) {
+      case kUnboxedFloat64x2:
+        ASSERT(value()->BoundConstant().IsFloat64x2());
+        return flow_graph->GetConstant(value()->BoundConstant(), kTagged);
+      case kUnboxedFloat32x4:
+        ASSERT(value()->BoundConstant().IsFloat32x4());
+        return flow_graph->GetConstant(value()->BoundConstant(), kTagged);
+      case kUnboxedInt32x4:
+        ASSERT(value()->BoundConstant().IsInt32x4());
+        return flow_graph->GetConstant(value()->BoundConstant(), kTagged);
+      default:
+        return this;
+    }
+  }
+
   return this;
 }
 
@@ -8322,6 +8338,53 @@
   return simd_op_information[kind()].has_mask;
 }
 
+Definition* SimdOpInstr::Canonicalize(FlowGraph* flow_graph) {
+  if ((kind() == SimdOpInstr::kFloat64x2FromDoubles) &&
+      InputAt(0)->BindsToConstant() && InputAt(1)->BindsToConstant()) {
+    const Object& x = InputAt(0)->BoundConstant();
+    const Object& y = InputAt(1)->BoundConstant();
+    if (x.IsDouble() && y.IsDouble()) {
+      Float64x2& result = Float64x2::ZoneHandle(Float64x2::New(
+          Double::Cast(x).value(), Double::Cast(y).value(), Heap::kOld));
+      result ^= result.Canonicalize(Thread::Current());
+      return flow_graph->GetConstant(result, kUnboxedFloat64x2);
+    }
+  }
+  if ((kind() == SimdOpInstr::kFloat32x4FromDoubles) &&
+      InputAt(0)->BindsToConstant() && InputAt(1)->BindsToConstant() &&
+      InputAt(2)->BindsToConstant() && InputAt(3)->BindsToConstant()) {
+    const Object& x = InputAt(0)->BoundConstant();
+    const Object& y = InputAt(1)->BoundConstant();
+    const Object& z = InputAt(2)->BoundConstant();
+    const Object& w = InputAt(3)->BoundConstant();
+    if (x.IsDouble() && y.IsDouble() && z.IsDouble() && w.IsDouble()) {
+      Float32x4& result = Float32x4::Handle(Float32x4::New(
+          Double::Cast(x).value(), Double::Cast(y).value(),
+          Double::Cast(z).value(), Double::Cast(w).value(), Heap::kOld));
+      result ^= result.Canonicalize(Thread::Current());
+      return flow_graph->GetConstant(result, kUnboxedFloat32x4);
+    }
+  }
+  if ((kind() == SimdOpInstr::kInt32x4FromInts) &&
+      InputAt(0)->BindsToConstant() && InputAt(1)->BindsToConstant() &&
+      InputAt(2)->BindsToConstant() && InputAt(3)->BindsToConstant()) {
+    const Object& x = InputAt(0)->BoundConstant();
+    const Object& y = InputAt(1)->BoundConstant();
+    const Object& z = InputAt(2)->BoundConstant();
+    const Object& w = InputAt(3)->BoundConstant();
+    if (x.IsInteger() && y.IsInteger() && z.IsInteger() && w.IsInteger()) {
+      Int32x4& result = Int32x4::Handle(Int32x4::New(
+          Integer::Cast(x).AsInt64Value(), Integer::Cast(y).AsInt64Value(),
+          Integer::Cast(z).AsInt64Value(), Integer::Cast(w).AsInt64Value(),
+          Heap::kOld));
+      result ^= result.Canonicalize(Thread::Current());
+      return flow_graph->GetConstant(result, kUnboxedInt32x4);
+    }
+  }
+
+  return this;
+}
+
 LocationSummary* Call1ArgStubInstr::MakeLocationSummary(Zone* zone,
                                                         bool opt) const {
   const intptr_t kNumInputs = 1;
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index c28636d..ed517bd 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -11265,6 +11265,8 @@
            (!HasMask() || mask() == other_op->mask());
   }
 
+  virtual Definition* Canonicalize(FlowGraph* flow_graph);
+
   DECLARE_INSTRUCTION(SimdOp)
   PRINT_OPERANDS_TO_SUPPORT
 
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 2422597..8054633 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -953,24 +953,53 @@
       __ LoadObject(destination.reg(), value_);
     }
   } else if (destination.IsFpuRegister()) {
-    const DRegister dst = EvenDRegisterOf(destination.fpu_reg());
-    if (representation() == kUnboxedFloat) {
-      __ LoadSImmediate(EvenSRegisterOf(dst), Double::Cast(value_).value());
-    } else {
-      ASSERT(representation() == kUnboxedDouble);
-      ASSERT(tmp != kNoRegister);
-      __ LoadDImmediate(dst, Double::Cast(value_).value(), tmp);
+    switch (representation()) {
+      case kUnboxedFloat:
+        __ LoadSImmediate(
+            EvenSRegisterOf(EvenDRegisterOf(destination.fpu_reg())),
+            Double::Cast(value_).value());
+        break;
+      case kUnboxedDouble:
+        ASSERT(tmp != kNoRegister);
+        __ LoadDImmediate(EvenDRegisterOf(destination.fpu_reg()),
+                          Double::Cast(value_).value(), tmp);
+        break;
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(destination.fpu_reg(), Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
     }
   } else if (destination.IsDoubleStackSlot()) {
-    if (Utils::DoublesBitEqual(Double::Cast(value_).value(), 0.0) &&
-        TargetCPUFeatures::neon_supported()) {
-      __ veorq(QTMP, QTMP, QTMP);
-    } else {
-      ASSERT(tmp != kNoRegister);
-      __ LoadDImmediate(DTMP, Double::Cast(value_).value(), tmp);
-    }
+    ASSERT(tmp != kNoRegister);
+    __ LoadDImmediate(DTMP, Double::Cast(value_).value(), tmp);
     const intptr_t dest_offset = destination.ToStackSlotOffset();
     __ StoreDToOffset(DTMP, destination.base_reg(), dest_offset);
+  } else if (destination.IsQuadStackSlot()) {
+    switch (representation()) {
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(QTMP, Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(QTMP, Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(QTMP, Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
+    }
+    const intptr_t dest_offset = destination.ToStackSlotOffset();
+    __ StoreMultipleDToOffset(EvenDRegisterOf(QTMP), 2, destination.base_reg(),
+                              dest_offset);
   } else {
     ASSERT(destination.IsStackSlot());
     ASSERT(tmp != kNoRegister);
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index 8f3fcd9..84712ae 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -788,20 +788,45 @@
       __ LoadObject(destination.reg(), value_);
     }
   } else if (destination.IsFpuRegister()) {
-    const VRegister dst = destination.fpu_reg();
-    if (representation() == kUnboxedFloat) {
-      __ LoadSImmediate(dst, Double::Cast(value_).value());
-    } else {
-      ASSERT(representation() == kUnboxedDouble);
-      __ LoadDImmediate(dst, Double::Cast(value_).value());
+    switch (representation()) {
+      case kUnboxedFloat:
+        __ LoadSImmediate(destination.fpu_reg(), Double::Cast(value_).value());
+        break;
+      case kUnboxedDouble:
+        __ LoadDImmediate(destination.fpu_reg(), Double::Cast(value_).value());
+        break;
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(destination.fpu_reg(), Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
     }
   } else if (destination.IsDoubleStackSlot()) {
+    ASSERT(representation() == kUnboxedDouble);
+    __ LoadDImmediate(VTMP, Double::Cast(value_).value());
     const intptr_t dest_offset = destination.ToStackSlotOffset();
-    if (Utils::DoublesBitEqual(Double::Cast(value_).value(), 0.0)) {
-      __ StoreToOffset(ZR, destination.base_reg(), dest_offset);
-    } else {
-      __ LoadDImmediate(VTMP, Double::Cast(value_).value());
-      __ StoreDToOffset(VTMP, destination.base_reg(), dest_offset);
+    __ StoreDToOffset(VTMP, destination.base_reg(), dest_offset);
+  } else if (destination.IsQuadStackSlot()) {
+    switch (representation()) {
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(VTMP, Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(VTMP, Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(VTMP, Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
     }
   } else {
     ASSERT(destination.IsStackSlot());
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 1edd9dc..7f12fc2 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -546,23 +546,40 @@
       __ LoadObjectSafely(destination.reg(), value_);
     }
   } else if (destination.IsFpuRegister()) {
-    if (representation() == kUnboxedFloat) {
-      __ LoadSImmediate(destination.fpu_reg(),
-                        static_cast<float>(Double::Cast(value_).value()));
-    } else {
-      const double value_as_double = Double::Cast(value_).value();
-      uword addr = FindDoubleConstant(value_as_double);
-      if (addr == 0) {
-        __ pushl(EAX);
-        __ LoadObject(EAX, value_);
-        __ movsd(destination.fpu_reg(),
-                 compiler::FieldAddress(EAX, Double::value_offset()));
-        __ popl(EAX);
-      } else if (Utils::DoublesBitEqual(value_as_double, 0.0)) {
-        __ xorps(destination.fpu_reg(), destination.fpu_reg());
-      } else {
-        __ movsd(destination.fpu_reg(), compiler::Address::Absolute(addr));
+    switch (representation()) {
+      case kUnboxedFloat:
+        __ LoadSImmediate(destination.fpu_reg(),
+                          static_cast<float>(Double::Cast(value_).value()));
+        break;
+      case kUnboxedDouble: {
+        const double value_as_double = Double::Cast(value_).value();
+        uword addr = FindDoubleConstant(value_as_double);
+        if (addr == 0) {
+          __ pushl(EAX);
+          __ LoadObject(EAX, value_);
+          __ movsd(destination.fpu_reg(),
+                   compiler::FieldAddress(EAX, Double::value_offset()));
+          __ popl(EAX);
+        } else if (Utils::DoublesBitEqual(value_as_double, 0.0)) {
+          __ xorps(destination.fpu_reg(), destination.fpu_reg());
+        } else {
+          __ movsd(destination.fpu_reg(), compiler::Address::Absolute(addr));
+        }
+        break;
       }
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(destination.fpu_reg(), Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
     }
   } else if (destination.IsDoubleStackSlot()) {
     const double value_as_double = Double::Cast(value_).value();
@@ -578,6 +595,21 @@
       __ movsd(FpuTMP, compiler::Address::Absolute(addr));
     }
     __ movsd(LocationToStackSlotAddress(destination), FpuTMP);
+  } else if (destination.IsQuadStackSlot()) {
+    switch (representation()) {
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(FpuTMP, Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(FpuTMP, Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(FpuTMP, Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
+    }
+    __ movups(LocationToStackSlotAddress(destination), FpuTMP);
   } else {
     ASSERT(destination.IsStackSlot());
     if (RepresentationUtils::IsUnboxedInteger(representation())) {
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 6fcba58..81ccd1c 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -719,15 +719,46 @@
       __ LoadObject(destination.reg(), value_);
     }
   } else if (destination.IsFpuRegister()) {
-    if (representation() == kUnboxedFloat) {
-      __ LoadSImmediate(destination.fpu_reg(), Double::Cast(value_).value());
-    } else {
-      ASSERT(representation() == kUnboxedDouble);
-      __ LoadDImmediate(destination.fpu_reg(), Double::Cast(value_).value());
+    switch (representation()) {
+      case kUnboxedFloat:
+        __ LoadSImmediate(destination.fpu_reg(), Double::Cast(value_).value());
+        break;
+      case kUnboxedDouble:
+        __ LoadDImmediate(destination.fpu_reg(), Double::Cast(value_).value());
+        break;
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(destination.fpu_reg(),
+                          Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(destination.fpu_reg(), Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
     }
   } else if (destination.IsDoubleStackSlot()) {
+    ASSERT(representation() == kUnboxedDouble);
     __ LoadDImmediate(FpuTMP, Double::Cast(value_).value());
     __ movsd(LocationToStackSlotAddress(destination), FpuTMP);
+  } else if (destination.IsQuadStackSlot()) {
+    switch (representation()) {
+      case kUnboxedFloat64x2:
+        __ LoadQImmediate(FpuTMP, Float64x2::Cast(value_).value());
+        break;
+      case kUnboxedFloat32x4:
+        __ LoadQImmediate(FpuTMP, Float32x4::Cast(value_).value());
+        break;
+      case kUnboxedInt32x4:
+        __ LoadQImmediate(FpuTMP, Int32x4::Cast(value_).value());
+        break;
+      default:
+        UNREACHABLE();
+    }
+    __ movups(LocationToStackSlotAddress(destination), FpuTMP);
   } else {
     ASSERT(destination.IsStackSlot());
     if (RepresentationUtils::IsUnboxedInteger(representation())) {
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index c40b622..c23ba8e 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -25589,6 +25589,16 @@
   return untag()->value_[3];
 }
 
+bool Float32x4::CanonicalizeEquals(const Instance& other) const {
+  return memcmp(&untag()->value_, Float32x4::Cast(other).untag()->value_,
+                sizeof(simd128_value_t)) == 0;
+}
+
+uint32_t Float32x4::CanonicalizeHash() const {
+  return HashBytes(reinterpret_cast<const uint8_t*>(&untag()->value_),
+                   sizeof(simd128_value_t));
+}
+
 const char* Float32x4::ToCString() const {
   float _x = x();
   float _y = y();
@@ -25663,6 +25673,16 @@
                  value);
 }
 
+bool Int32x4::CanonicalizeEquals(const Instance& other) const {
+  return memcmp(&untag()->value_, Int32x4::Cast(other).untag()->value_,
+                sizeof(simd128_value_t)) == 0;
+}
+
+uint32_t Int32x4::CanonicalizeHash() const {
+  return HashBytes(reinterpret_cast<const uint8_t*>(&untag()->value_),
+                   sizeof(simd128_value_t));
+}
+
 const char* Int32x4::ToCString() const {
   int32_t _x = x();
   int32_t _y = y();
@@ -25713,6 +25733,16 @@
   StoreSimd128(&untag()->value_[0], value);
 }
 
+bool Float64x2::CanonicalizeEquals(const Instance& other) const {
+  return memcmp(&untag()->value_, Float64x2::Cast(other).untag()->value_,
+                sizeof(simd128_value_t)) == 0;
+}
+
+uint32_t Float64x2::CanonicalizeHash() const {
+  return HashBytes(reinterpret_cast<const uint8_t*>(&untag()->value_),
+                   sizeof(simd128_value_t));
+}
+
 const char* Float64x2::ToCString() const {
   double _x = x();
   double _y = y();
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index bf88dbb..0c3a576 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -11165,6 +11165,9 @@
     return OFFSET_OF(UntaggedFloat32x4, value_);
   }
 
+  virtual bool CanonicalizeEquals(const Instance& other) const;
+  virtual uint32_t CanonicalizeHash() const;
+
  private:
   FINAL_HEAP_OBJECT_IMPLEMENTATION(Float32x4, Instance);
   friend class Class;
@@ -11198,6 +11201,9 @@
 
   static intptr_t value_offset() { return OFFSET_OF(UntaggedInt32x4, value_); }
 
+  virtual bool CanonicalizeEquals(const Instance& other) const;
+  virtual uint32_t CanonicalizeHash() const;
+
  private:
   FINAL_HEAP_OBJECT_IMPLEMENTATION(Int32x4, Instance);
   friend class Class;
@@ -11228,6 +11234,9 @@
     return OFFSET_OF(UntaggedFloat64x2, value_);
   }
 
+  virtual bool CanonicalizeEquals(const Instance& other) const;
+  virtual uint32_t CanonicalizeHash() const;
+
  private:
   FINAL_HEAP_OBJECT_IMPLEMENTATION(Float64x2, Instance);
   friend class Class;
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index 95e5299..3aff087 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -3300,8 +3300,10 @@
 
   ALIGN8 int32_t value_[4];
 
-  friend class Simd128MessageSerializationCluster;
+  friend class Simd128DeserializationCluster;
   friend class Simd128MessageDeserializationCluster;
+  friend class Simd128MessageSerializationCluster;
+  friend class Simd128SerializationCluster;
 
  public:
   int32_t x() const { return value_[0]; }