[VM] Reland Support some 32bit instructions in 64bit assemblers.

These instructions will soon be used for for smaller (32 bit) Smis.
Also fix broken ubfiz instruction in ARM64 assembler.

This reland fixes the disassembler test that used Stop
instructions which disassemble differently on Mac
because constants have higher addresses.

Originally reviewed at
https://dart-review.googlesource.com/c/sdk/+/43668

Fix assembler test on Mac

R=kustermann@google.com

Change-Id: I61f8626184af495f18a74b5fef07bad02b6615e9
Reviewed-on: https://dart-review.googlesource.com/45240
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Erik Corry <erikcorry@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.cc b/runtime/vm/compiler/assembler/assembler_arm64.cc
index 6d5c531..6eba840 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm64.cc
@@ -425,6 +425,7 @@
   // TODO(zra, kmillikin): Also load other large immediates from the object
   // pool
   if (object.IsSmi()) {
+    ASSERT(Smi::IsValid(Smi::Value(reinterpret_cast<RawSmi*>(object.raw()))));
     // If the raw smi does not fit into a 32-bit signed int, then we'll keep
     // the raw value in the object pool.
     return !Utils::IsInt(32, reinterpret_cast<int64_t>(object.raw()));
@@ -680,37 +681,69 @@
   }
 }
 
-void Assembler::AddImmediateSetFlags(Register dest, Register rn, int64_t imm) {
+void Assembler::AddImmediateSetFlags(Register dest,
+                                     Register rn,
+                                     int64_t imm,
+                                     OperandSize sz) {
+  ASSERT(sz == kDoubleWord || sz == kWord);
   Operand op;
   if (Operand::CanHold(imm, kXRegSizeInBits, &op) == Operand::Immediate) {
     // Handles imm == kMinInt64.
-    adds(dest, rn, op);
+    if (sz == kDoubleWord) {
+      adds(dest, rn, op);
+    } else {
+      addsw(dest, rn, op);
+    }
   } else if (Operand::CanHold(-imm, kXRegSizeInBits, &op) ==
              Operand::Immediate) {
     ASSERT(imm != kMinInt64);  // Would cause erroneous overflow detection.
-    subs(dest, rn, op);
+    if (sz == kDoubleWord) {
+      subs(dest, rn, op);
+    } else {
+      subsw(dest, rn, op);
+    }
   } else {
     // TODO(zra): Try adding top 12 bits, then bottom 12 bits.
     ASSERT(rn != TMP2);
     LoadImmediate(TMP2, imm);
-    adds(dest, rn, Operand(TMP2));
+    if (sz == kDoubleWord) {
+      adds(dest, rn, Operand(TMP2));
+    } else {
+      addsw(dest, rn, Operand(TMP2));
+    }
   }
 }
 
-void Assembler::SubImmediateSetFlags(Register dest, Register rn, int64_t imm) {
+void Assembler::SubImmediateSetFlags(Register dest,
+                                     Register rn,
+                                     int64_t imm,
+                                     OperandSize sz) {
   Operand op;
+  ASSERT(sz == kDoubleWord || sz == kWord);
   if (Operand::CanHold(imm, kXRegSizeInBits, &op) == Operand::Immediate) {
     // Handles imm == kMinInt64.
-    subs(dest, rn, op);
+    if (sz == kDoubleWord) {
+      subs(dest, rn, op);
+    } else {
+      subsw(dest, rn, op);
+    }
   } else if (Operand::CanHold(-imm, kXRegSizeInBits, &op) ==
              Operand::Immediate) {
     ASSERT(imm != kMinInt64);  // Would cause erroneous overflow detection.
-    adds(dest, rn, op);
+    if (sz == kDoubleWord) {
+      adds(dest, rn, op);
+    } else {
+      addsw(dest, rn, op);
+    }
   } else {
     // TODO(zra): Try subtracting top 12 bits, then bottom 12 bits.
     ASSERT(rn != TMP2);
     LoadImmediate(TMP2, imm);
-    subs(dest, rn, Operand(TMP2));
+    if (sz == kDoubleWord) {
+      subs(dest, rn, Operand(TMP2));
+    } else {
+      subsw(dest, rn, Operand(TMP2));
+    }
   }
 }
 
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.h b/runtime/vm/compiler/assembler/assembler_arm64.h
index 4a9c450..3b7e289 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.h
+++ b/runtime/vm/compiler/assembler/assembler_arm64.h
@@ -649,8 +649,9 @@
              int width,
              OperandSize size = kDoubleWord) {
     int wordsize = size == kDoubleWord ? 64 : 32;
-    EmitBitfieldOp(UBFM, rd, rn, (width - low_bit) & (wordsize - 1),
-                   wordsize - 1, size);
+    ASSERT(width > 0);
+    ASSERT(low_bit < wordsize);
+    EmitBitfieldOp(UBFM, rd, rn, (-low_bit) & (wordsize - 1), width - 1, size);
   }
 
   // Unsigned bitfield extract.  Takes the width bits, starting at low_bit and
@@ -688,6 +689,11 @@
     EmitBitfieldOp(UBFM, rd, rn, 0, 15, kDoubleWord);
   }
 
+  // Zero/unsigned extend word->64 bit.
+  void uxtw(Register rd, Register rn) {
+    EmitBitfieldOp(UBFM, rd, rn, 0, 31, kDoubleWord);
+  }
+
   // Logical immediate operations.
   void andi(Register rd, Register rn, const Immediate& imm) {
     Operand imm_op;
@@ -737,6 +743,9 @@
   void orn(Register rd, Register rn, Operand o) {
     EmitLogicalShiftOp(ORN, rd, rn, o, kDoubleWord);
   }
+  void ornw(Register rd, Register rn, Operand o) {
+    EmitLogicalShiftOp(ORN, rd, rn, o, kWord);
+  }
   void eor(Register rd, Register rn, Operand o) {
     EmitLogicalShiftOp(EOR, rd, rn, o, kDoubleWord);
   }
@@ -774,20 +783,66 @@
   void asrv(Register rd, Register rn, Register rm) {
     EmitMiscDP2Source(ASRV, rd, rn, rm, kDoubleWord);
   }
-  void madd(Register rd, Register rn, Register rm, Register ra) {
-    EmitMiscDP3Source(MADD, rd, rn, rm, ra, kDoubleWord);
+  void lslvw(Register rd, Register rn, Register rm) {
+    EmitMiscDP2Source(LSLV, rd, rn, rm, kWord);
   }
-  void msub(Register rd, Register rn, Register rm, Register ra) {
-    EmitMiscDP3Source(MSUB, rd, rn, rm, ra, kDoubleWord);
+  void lsrvw(Register rd, Register rn, Register rm) {
+    EmitMiscDP2Source(LSRV, rd, rn, rm, kWord);
   }
-  void smulh(Register rd, Register rn, Register rm) {
-    EmitMiscDP3Source(SMULH, rd, rn, rm, R31, kDoubleWord);
+  void asrvw(Register rd, Register rn, Register rm) {
+    EmitMiscDP2Source(ASRV, rd, rn, rm, kWord);
   }
-  void umulh(Register rd, Register rn, Register rm) {
-    EmitMiscDP3Source(UMULH, rd, rn, rm, R31, kDoubleWord);
+  void madd(Register rd,
+            Register rn,
+            Register rm,
+            Register ra,
+            OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(MADD, rd, rn, rm, ra, sz);
   }
-  void umaddl(Register rd, Register rn, Register rm, Register ra) {
-    EmitMiscDP3Source(UMADDL, rd, rn, rm, ra, kDoubleWord);
+  void msub(Register rd,
+            Register rn,
+            Register rm,
+            Register ra,
+            OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(MSUB, rd, rn, rm, ra, sz);
+  }
+  void smulh(Register rd,
+             Register rn,
+             Register rm,
+             OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(SMULH, rd, rn, rm, R31, sz);
+  }
+  void umulh(Register rd,
+             Register rn,
+             Register rm,
+             OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(UMULH, rd, rn, rm, R31, sz);
+  }
+  void umaddl(Register rd,
+              Register rn,
+              Register rm,
+              Register ra,
+              OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(UMADDL, rd, rn, rm, ra, sz);
+  }
+  void umull(Register rd,
+             Register rn,
+             Register rm,
+             OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(UMADDL, rd, rn, rm, ZR, sz);
+  }
+  void smaddl(Register rd,
+              Register rn,
+              Register rm,
+              Register ra,
+              OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(SMADDL, rd, rn, rm, ra, sz);
+  }
+  void smull(Register rd,
+             Register rn,
+             Register rm,
+             OperandSize sz = kDoubleWord) {
+    EmitMiscDP3Source(SMADDL, rd, rn, rm, ZR, sz);
   }
 
   // Move wide immediate.
@@ -903,6 +958,7 @@
   // For add and sub, to use CSP for rn, o must be of type Operand::Extend.
   // For an unmodified rm in this case, use Operand(rm, UXTX, 0);
   void cmp(Register rn, Operand o) { subs(ZR, rn, o); }
+  void cmpw(Register rn, Operand o) { subsw(ZR, rn, o); }
   // rn cmp -o.
   void cmn(Register rn, Operand o) { adds(ZR, rn, o); }
 
@@ -1199,9 +1255,16 @@
   }
   void vmov(VRegister vd, VRegister vn) { vorr(vd, vn, vn); }
   void mvn(Register rd, Register rm) { orn(rd, ZR, Operand(rm)); }
+  void mvnw(Register rd, Register rm) { ornw(rd, ZR, Operand(rm)); }
   void neg(Register rd, Register rm) { sub(rd, ZR, Operand(rm)); }
   void negs(Register rd, Register rm) { subs(rd, ZR, Operand(rm)); }
-  void mul(Register rd, Register rn, Register rm) { madd(rd, rn, rm, ZR); }
+  void negsw(Register rd, Register rm) { subsw(rd, ZR, Operand(rm)); }
+  void mul(Register rd, Register rn, Register rm) {
+    madd(rd, rn, rm, ZR, kDoubleWord);
+  }
+  void mulw(Register rd, Register rn, Register rm) {
+    madd(rd, rn, rm, ZR, kWord);
+  }
   void Push(Register reg) {
     ASSERT(reg != PP);  // Only push PP with TagAndPushPP().
     str(reg, Address(SP, -1 * kWordSize, Address::PreIndex));
@@ -1257,8 +1320,15 @@
   void tsti(Register rn, const Immediate& imm) { andis(ZR, rn, imm); }
 
   // We use an alias of add, where ARM recommends an alias of ubfm.
-  void LslImmediate(Register rd, Register rn, int shift) {
-    add(rd, ZR, Operand(rn, LSL, shift));
+  void LslImmediate(Register rd,
+                    Register rn,
+                    int shift,
+                    OperandSize sz = kDoubleWord) {
+    if (sz == kDoubleWord) {
+      add(rd, ZR, Operand(rn, LSL, shift));
+    } else {
+      addw(rd, ZR, Operand(rn, LSL, shift));
+    }
   }
   // We use an alias of add, where ARM recommends an alias of ubfm.
   void LsrImmediate(Register rd, Register rn, int shift) {
@@ -1310,8 +1380,14 @@
   // pool pointer is in another register, or that it is not available at all,
   // PP should be passed for pp.
   void AddImmediate(Register dest, Register rn, int64_t imm);
-  void AddImmediateSetFlags(Register dest, Register rn, int64_t imm);
-  void SubImmediateSetFlags(Register dest, Register rn, int64_t imm);
+  void AddImmediateSetFlags(Register dest,
+                            Register rn,
+                            int64_t imm,
+                            OperandSize sz = kDoubleWord);
+  void SubImmediateSetFlags(Register dest,
+                            Register rn,
+                            int64_t imm,
+                            OperandSize sz = kDoubleWord);
   void AndImmediate(Register rd, Register rn, int64_t imm);
   void OrImmediate(Register rd, Register rn, int64_t imm);
   void XorImmediate(Register rd, Register rn, int64_t imm);
diff --git a/runtime/vm/compiler/assembler/assembler_arm64_test.cc b/runtime/vm/compiler/assembler/assembler_arm64_test.cc
index 8bf00c9..fbfb286 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm64_test.cc
@@ -1654,6 +1654,154 @@
   EXPECT_EQ(0x700000001, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
 }
 
+ASSEMBLER_TEST_GENERATE(Smaddl, assembler) {
+  __ movn(R1, Immediate(1), 0);   // W1 = -2.
+  __ movz(R2, Immediate(7), 0);   // W2 = 7.
+  __ movz(R3, Immediate(20), 0);  // X3 = 20.
+  __ smaddl(R0, R1, R2, R3);      // X0 = W1*W2 + X3 = 6.
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(Smaddl, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(6, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(Smaddl2, assembler) {
+  __ movn(R1, Immediate(1), 0);  // W1 = -2.
+  __ movn(R2, Immediate(0), 0);  // W2 = -1.
+  __ smull(R0, R1, R2);          // X0 = W1*W2 = 2, alias of smaddl.
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(Smaddl2, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(2, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(Smaddl3, assembler) {
+  __ movz(R1, Immediate(0xffff), 0);  // W1 = 0xffff.
+  __ movz(R2, Immediate(0xffff), 0);  // W2 = 0xffff.
+  __ smull(R0, R1, R2);               // X0 = W1*W2, alias of smaddl.
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(Smaddl3, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(0xffffl * 0xffffl,
+            EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(SmaddlOverflow, assembler) {
+  Label return_ltuae;
+  __ movz(R1, Immediate(0xffff), 0);  // W1 = 0xffff.
+  __ AddImmediate(R1, 4);             // W1 = 0x10003.
+  __ movz(R2, Immediate(0x7fff), 0);  // W2 = 0xffff.
+  __ smull(R0, R1, R2);               // X0 = W1*W2, alias of smaddl.
+  __ AsrImmediate(R3, R0, 31);
+  __ cmp(R3, Operand(R0, ASR, 63));  // Detect signed 32 bit overflow.
+  __ b(&return_ltuae, NE);
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(SmaddlOverflow, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(SmaddlOverflow2, assembler) {
+  Label return_ltuae;
+  __ movz(R1, Immediate(0xffff), 0);  // W1 = 0xffff.
+  __ movn(R2, Immediate(0xffff), 0);  // W2 = -0x10000.
+  __ AddImmediate(R2, -3);            // W2 = -0x10003.
+  __ smull(R0, R1, R2);               // X0 = W1*W2, alias of smaddl.
+  __ AsrImmediate(R3, R0, 31);
+  __ cmp(R3, Operand(R0, ASR, 63));  // Detect signed 32 bit overflow.
+  __ b(&return_ltuae, NE);
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(SmaddlOverflow2, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(SmaddlOverflow3, assembler) {
+  Label return_ltuae;
+  __ LoadImmediate(R1, 0x01007fff);
+  __ LoadImmediate(R2, 0x01007fff);
+  __ smull(R0, R1, R2);  // X0 = W1*W2, alias of smaddl.
+  __ AsrImmediate(R3, R0, 31);
+  __ cmp(R3, Operand(R0, ASR, 63));  // Detect signed 32 bit overflow.
+  __ b(&return_ltuae, NE);
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(SmaddlOverflow3, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(NegNoOverflow, assembler) {
+  Label return_ltuae;
+  __ LoadImmediate(R1, 0x7fffffff);
+  __ negsw(R0, R1);  // X0 = W1*W2, alias of smaddl.
+  __ sxtw(R0, R0);
+  __ b(&return_ltuae, VS);  // Branch on overflow set.
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(NegNoOverflow, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(-0x7fffffff, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(NegNoOverflow2, assembler) {
+  Label return_ltuae;
+  __ LoadImmediate(R1, 0x7123);
+  __ negsw(R0, R1);  // X0 = W1*W2, alias of smaddl.
+  __ sxtw(R0, R0);
+  __ b(&return_ltuae, VS);  // Branch on overflow set.
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(NegNoOverflow2, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(-0x7123, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
+ASSEMBLER_TEST_GENERATE(NegOverflow, assembler) {
+  Label return_ltuae;
+  __ LoadImmediate(R1, -0x80000000ll);
+  __ negsw(R0, R1);  // X0 = W1*W2, alias of smaddl.
+  __ sxtw(R0, R0);
+  __ b(&return_ltuae, VS);  // Branch on overflow set.
+  __ ret();
+  __ Bind(&return_ltuae);
+  __ movz(R0, Immediate(42), 0);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(NegOverflow, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
 // Loading immediate values without the object pool.
 ASSEMBLER_TEST_GENERATE(LoadImmediateSmall, assembler) {
   __ LoadImmediate(R0, 42);
@@ -2145,6 +2293,19 @@
   EXPECT_EQ(0x5a5b9a5a, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
 }
 
+ASSEMBLER_TEST_GENERATE(Ubfiz, assembler) {
+  __ LoadImmediate(R1, 0xff1248ff);
+  __ LoadImmediate(R0, 0x5a5a5a5a);
+  // Take 30 low bits and place at position 1 in R0, zeroing the rest.
+  __ ubfiz(R0, R1, 1, 30);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(Ubfiz, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(0x7e2491fe, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
 ASSEMBLER_TEST_GENERATE(Bfxil, assembler) {
   __ LoadImmediate(R1, 0x819);
   __ LoadImmediate(R0, 0x5a5a5a5a);
@@ -2217,6 +2378,23 @@
   EXPECT_EQ(0x29, EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
 }
 
+ASSEMBLER_TEST_GENERATE(Uxtw, assembler) {
+  __ LoadImmediate(R1, 0xffffffffll);
+  __ LoadImmediate(R0, 0x5a5a5a5a);  // Overwritten.
+  __ ubfiz(R0, R1, 0, 32);           // Zero extend word.
+  __ LoadImmediate(R2, 0x10000002all);
+  __ LoadImmediate(R1, 0x5a5a5a5a);  // Overwritten.
+  __ ubfiz(R1, R2, 0, 32);           // Zero extend word.
+  __ add(R0, R0, Operand(R1));
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(Uxtw, test) {
+  typedef int64_t (*Int64Return)() DART_UNUSED;
+  EXPECT_EQ(0xffffffffll + 42,
+            EXECUTE_TEST_CODE_INT64(Int64Return, test->entry()));
+}
+
 ASSEMBLER_TEST_GENERATE(Uxtb, assembler) {
   __ LoadImmediate(R1, -1);
   __ LoadImmediate(R0, 0x5a5a5a5a);  // Overwritten.
diff --git a/runtime/vm/compiler/assembler/assembler_x64.cc b/runtime/vm/compiler/assembler/assembler_x64.cc
index e9451d0..b13e48a 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.cc
+++ b/runtime/vm/compiler/assembler/assembler_x64.cc
@@ -683,12 +683,19 @@
   }
 }
 
-void Assembler::MulImmediate(Register reg, const Immediate& imm) {
+void Assembler::MulImmediate(Register reg,
+                             const Immediate& imm,
+                             OperandWidth width) {
   if (imm.is_int32()) {
-    imulq(reg, imm);
+    if (width == k32Bit) {
+      imull(reg, imm);
+    } else {
+      imulq(reg, imm);
+    }
   } else {
     ASSERT(reg != TMP);
-    LoadImmediate(TMP, imm);
+    ASSERT(width != k32Bit);
+    movq(TMP, imm);
     imulq(reg, TMP);
   }
 }
@@ -943,25 +950,36 @@
   popq(r);
 }
 
-void Assembler::AddImmediate(Register reg, const Immediate& imm) {
+void Assembler::AddImmediate(Register reg,
+                             const Immediate& imm,
+                             OperandWidth width) {
   const int64_t value = imm.value();
   if (value == 0) {
     return;
   }
   if ((value > 0) || (value == kMinInt64)) {
     if (value == 1) {
-      incq(reg);
+      if (width == k32Bit) {
+        incl(reg);
+      } else {
+        incq(reg);
+      }
     } else {
-      if (imm.is_int32()) {
-        addq(reg, imm);
+      if (imm.is_int32() || (width == k32Bit && imm.is_uint32())) {
+        if (width == k32Bit) {
+          addl(reg, imm);
+        } else {
+          addq(reg, imm);
+        }
       } else {
         ASSERT(reg != TMP);
+        ASSERT(width != k32Bit);
         LoadImmediate(TMP, imm);
         addq(reg, TMP);
       }
     }
   } else {
-    SubImmediate(reg, Immediate(-value));
+    SubImmediate(reg, Immediate(-value), width);
   }
 }
 
@@ -986,25 +1004,37 @@
   }
 }
 
-void Assembler::SubImmediate(Register reg, const Immediate& imm) {
+void Assembler::SubImmediate(Register reg,
+                             const Immediate& imm,
+                             OperandWidth width) {
   const int64_t value = imm.value();
   if (value == 0) {
     return;
   }
-  if ((value > 0) || (value == kMinInt64)) {
+  if ((value > 0) || (value == kMinInt64) ||
+      (value == kMinInt32 && width == k32Bit)) {
     if (value == 1) {
-      decq(reg);
+      if (width == k32Bit) {
+        decl(reg);
+      } else {
+        decq(reg);
+      }
     } else {
       if (imm.is_int32()) {
-        subq(reg, imm);
+        if (width == k32Bit) {
+          subl(reg, imm);
+        } else {
+          subq(reg, imm);
+        }
       } else {
         ASSERT(reg != TMP);
+        ASSERT(width != k32Bit);
         LoadImmediate(TMP, imm);
         subq(reg, TMP);
       }
     }
   } else {
-    AddImmediate(reg, Immediate(-value));
+    AddImmediate(reg, Immediate(-value), width);
   }
 }
 
diff --git a/runtime/vm/compiler/assembler/assembler_x64.h b/runtime/vm/compiler/assembler/assembler_x64.h
index c6117d1..116ff36 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.h
+++ b/runtime/vm/compiler/assembler/assembler_x64.h
@@ -484,6 +484,9 @@
   void cvttsd2siq(Register dst, XmmRegister src) {
     EmitQ(dst, src, 0x2C, 0x0F, 0xF2);
   }
+  void cvttsd2sil(Register dst, XmmRegister src) {
+    EmitL(dst, src, 0x2C, 0x0F, 0xF2);
+  }
   void movmskpd(Register dst, XmmRegister src) {
     EmitL(dst, src, 0x50, 0x0F, 0x66);
   }
@@ -579,10 +582,16 @@
   REGULAR_UNARY(dec, 0xFF, 1)
 #undef REGULAR_UNARY
 
+  // We could use kWord, kDoubleWord, and kQuadWord here, but it is rather
+  // confusing since the same sizes mean something different on ARM.
+  enum OperandWidth { k32Bit, k64Bit };
+
   void imull(Register reg, const Immediate& imm);
 
   void imulq(Register dst, const Immediate& imm);
-  void MulImmediate(Register reg, const Immediate& imm);
+  void MulImmediate(Register reg,
+                    const Immediate& imm,
+                    OperandWidth width = k64Bit);
 
   void shll(Register reg, const Immediate& imm);
   void shll(Register operand, Register shifter);
@@ -653,9 +662,13 @@
   // Methods for adding/subtracting an immediate value that may be loaded from
   // the constant pool.
   // TODO(koda): Assert that these are not used for heap objects.
-  void AddImmediate(Register reg, const Immediate& imm);
+  void AddImmediate(Register reg,
+                    const Immediate& imm,
+                    OperandWidth width = k64Bit);
   void AddImmediate(const Address& address, const Immediate& imm);
-  void SubImmediate(Register reg, const Immediate& imm);
+  void SubImmediate(Register reg,
+                    const Immediate& imm,
+                    OperandWidth width = k64Bit);
   void SubImmediate(const Address& address, const Immediate& imm);
 
   void Drop(intptr_t stack_elements, Register tmp = TMP);
diff --git a/runtime/vm/compiler/assembler/assembler_x64_test.cc b/runtime/vm/compiler/assembler/assembler_x64_test.cc
index 73ec3b7..a18453c 100644
--- a/runtime/vm/compiler/assembler/assembler_x64_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_x64_test.cc
@@ -838,7 +838,7 @@
   __ movq(RAX, Immediate(kLargeConstant));
   __ movq(RCX, Immediate(kAnotherLargeConstant));
   __ imulq(RAX, RCX);
-  __ imulq(RCX, Immediate(kLargeConstant));
+  __ MulImmediate(RCX, Immediate(kLargeConstant));
   __ cmpq(RAX, RCX);
   __ j(EQUAL, &done);
   __ int3();
@@ -4628,7 +4628,17 @@
 }
 
 ASSEMBLER_TEST_GENERATE(DoubleToInt64Conversion, assembler) {
-  __ movq(RAX, Immediate(bit_cast<int64_t, double>(12.3)));
+  __ movq(RAX, Immediate(bit_cast<int64_t, double>(4.2e22)));
+  __ pushq(RAX);
+  __ movsd(XMM9, Address(RSP, 0));
+  __ popq(RAX);
+  __ cvttsd2siq(RAX, XMM9);
+  __ CompareImmediate(RAX, Immediate(0x8000000000000000ll));
+  Label ok;
+  __ j(EQUAL, &ok);
+  __ int3();  // cvttsd2siq overflow not detected
+  __ Bind(&ok);
+  __ movq(RAX, Immediate(bit_cast<int64_t, double>(4.2e11)));
   __ pushq(RAX);
   __ movsd(XMM9, Address(RSP, 0));
   __ movsd(XMM6, Address(RSP, 0));
@@ -4638,6 +4648,8 @@
   __ cvttsd2siq(R10, XMM9);
   __ cvttsd2siq(RDX, XMM9);
   __ subq(RDX, R10);
+  __ addq(RDX, RDX);
+  __ addq(RDX, R10);
   __ movq(RAX, RDX);
   __ ret();
 }
@@ -4645,11 +4657,20 @@
 ASSEMBLER_TEST_RUN(DoubleToInt64Conversion, test) {
   typedef int64_t (*DoubleToInt64ConversionCode)();
   int64_t res = reinterpret_cast<DoubleToInt64ConversionCode>(test->entry())();
-  EXPECT_EQ(0, res);
+  EXPECT_EQ(420000000000l, res);
   EXPECT_DISASSEMBLY(
       "movq rax,0x................\n"
       "push rax\n"
       "movsd xmm9,[rsp]\n"
+      "pop rax\n"
+      "cvttsd2siq rax,xmm9\n"
+      "movq r11,0x................\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movq rax,0x................\n"
+      "push rax\n"
+      "movsd xmm9,[rsp]\n"
       "movsd xmm6,[rsp]\n"
       "pop rax\n"
       "cvttsd2siq r10,xmm6\n"
@@ -4657,6 +4678,99 @@
       "cvttsd2siq r10,xmm9\n"
       "cvttsd2siq rdx,xmm9\n"
       "subq rdx,r10\n"
+      "addq rdx,rdx\n"
+      "addq rdx,r10\n"
+      "movq rax,rdx\n"
+      "ret\n");
+}
+
+ASSEMBLER_TEST_GENERATE(DoubleToInt32Conversion, assembler) {
+  // Check that a too big double results in the overflow value for a conversion
+  // to signed 32 bit.
+  __ movq(RAX, Immediate(bit_cast<int64_t, double>(4.2e11)));
+  __ pushq(RAX);
+  __ movsd(XMM9, Address(RSP, 0));
+  __ popq(RAX);
+  __ cvttsd2sil(RAX, XMM9);
+  __ CompareImmediate(RAX, Immediate(0x80000000ll));
+  {
+    Label ok;
+    __ j(EQUAL, &ok);
+    __ int3();  // cvttsd2sil overflow not detected.
+    __ Bind(&ok);
+  }
+
+  // Check that negative floats result in signed 32 bit results with the top
+  // bits zeroed.
+  __ movq(RAX, Immediate(bit_cast<int64_t, double>(-42.0)));
+  __ pushq(RAX);
+  __ movsd(XMM9, Address(RSP, 0));
+  __ popq(RAX);
+  // These high 1-bits will be zeroed in the next insn.
+  __ movq(R10, Immediate(-1));
+  // Put -42 in r10d, zeroing the high bits of r10.
+  __ cvttsd2sil(R10, XMM9);
+  __ CompareImmediate(R10, Immediate(-42 & 0xffffffffll));
+  {
+    Label ok;
+    __ j(EQUAL, &ok);
+    __ int3();  // cvttsd2sil negative result error
+    __ Bind(&ok);
+  }
+
+  // Check for correct result for positive in-range input.
+  __ movq(RAX, Immediate(bit_cast<int64_t, double>(42.0)));
+  __ pushq(RAX);
+  __ movsd(XMM9, Address(RSP, 0));
+  __ movsd(XMM6, Address(RSP, 0));
+  __ popq(RAX);
+  __ cvttsd2sil(R10, XMM6);
+  __ cvttsd2sil(RDX, XMM6);
+  __ cvttsd2sil(R10, XMM9);
+  __ cvttsd2sil(RDX, XMM9);
+  __ subq(RDX, R10);
+  __ addq(RDX, RDX);
+  __ addq(RDX, R10);
+  __ movq(RAX, RDX);
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(DoubleToInt32Conversion, test) {
+  typedef int64_t (*DoubleToInt32ConversionCode)();
+  int64_t res = reinterpret_cast<DoubleToInt32ConversionCode>(test->entry())();
+  EXPECT_EQ(42, res);
+  EXPECT_DISASSEMBLY(
+      "movq rax,0x................\n"
+      "push rax\n"
+      "movsd xmm9,[rsp]\n"
+      "pop rax\n"
+      "cvttsd2sil rax,xmm9\n"
+      "movl r11,0x........\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movq rax,0x................\n"
+      "push rax\n"
+      "movsd xmm9,[rsp]\n"
+      "pop rax\n"
+      "movq r10,-1\n"
+      "cvttsd2sil r10,xmm9\n"
+      "movl r11,0x........\n"
+      "cmpq r10,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movq rax,0x................\n"
+      "push rax\n"
+      "movsd xmm9,[rsp]\n"
+      "movsd xmm6,[rsp]\n"
+      "pop rax\n"
+      "cvttsd2sil r10,xmm6\n"
+      "cvttsd2sil rdx,xmm6\n"
+      "cvttsd2sil r10,xmm9\n"
+      "cvttsd2sil rdx,xmm9\n"
+      "subq rdx,r10\n"
+      "addq rdx,rdx\n"
+      "addq rdx,r10\n"
       "movq rax,rdx\n"
       "ret\n");
 }
@@ -5435,6 +5549,160 @@
       "ret\n");
 }
 
+ASSEMBLER_TEST_GENERATE(ImmediateMacros, assembler) {
+  const intptr_t kBillion = 1000 * 1000 * 1000;
+  {
+    __ LoadImmediate(RAX, Immediate(42));
+    __ MulImmediate(RAX, Immediate(kBillion));
+    Label ok;
+    __ CompareImmediate(RAX, Immediate(42 * kBillion));
+    __ j(EQUAL, &ok);
+    __ int3();  // MulImmediate 64 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(42));
+    __ MulImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    Label ok;
+    __ CompareImmediate(RAX, Immediate((42 * kBillion) & 0xffffffffll));
+    __ j(EQUAL, &ok);
+    __ int3();  // MulImmediate 32 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(kBillion));
+    __ AddImmediate(RAX, Immediate(41 * kBillion));
+    Label ok;
+    __ CompareImmediate(RAX, Immediate(42 * kBillion));
+    __ j(EQUAL, &ok);
+    __ int3();  // AddImmediate 64 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(kBillion));
+    __ AddImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ AddImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ AddImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    Label ok;
+    __ CompareImmediate(RAX, Immediate((4 * kBillion) & 0xffffffffll));
+    __ j(EQUAL, &ok);
+    __ int3();  // AddImmediate 32 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(kBillion));
+    __ AddImmediate(RAX, Immediate(static_cast<int32_t>(3 * kBillion)),
+                    Assembler::k32Bit);
+    __ AddImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ AddImmediate(RAX, Immediate(-kBillion), Assembler::k32Bit);
+    Label ok;
+    __ CompareImmediate(RAX, Immediate((4 * kBillion) & 0xffffffffll));
+    __ j(EQUAL, &ok);
+    __ int3();  // AddImmediate negative 32 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(kBillion));
+    __ SubImmediate(RAX, Immediate(43 * kBillion));
+    Label ok;
+    __ CompareImmediate(RAX, Immediate(-42 * kBillion));
+    __ j(EQUAL, &ok);
+    __ int3();  // AddImmediate negative 64 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(-kBillion));
+    __ SubImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ SubImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ SubImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    Label ok;
+    __ CompareImmediate(RAX, Immediate((-4 * kBillion) & 0xffffffffll));
+    __ j(EQUAL, &ok);
+    __ int3();  // SubImmediate 32 bit.
+    __ Bind(&ok);
+  }
+  {
+    __ LoadImmediate(RAX, Immediate(kBillion));
+    __ SubImmediate(RAX, Immediate((-3 * kBillion) & 0xffffffffll),
+                    Assembler::k32Bit);
+    __ SubImmediate(RAX, Immediate(kBillion), Assembler::k32Bit);
+    __ SubImmediate(RAX, Immediate(-kBillion), Assembler::k32Bit);
+    Label ok;
+    __ CompareImmediate(RAX, Immediate((4 * kBillion) & 0xffffffffll));
+    __ j(EQUAL, &ok);
+    __ int3();  // SubImmediate 32 bit.
+    __ Bind(&ok);
+  }
+  __ LoadImmediate(RAX, Immediate(42));
+  __ ret();
+}
+
+ASSEMBLER_TEST_RUN(ImmediateMacros, test) {
+  typedef int (*ImmediateMacrosCode)();
+  int res = reinterpret_cast<ImmediateMacrosCode>(test->entry())();
+  EXPECT_EQ(42, res);
+  EXPECT_DISASSEMBLY(
+      "movl rax,0x2a\n"
+      "imulq rax,rax,0x........\n"
+      "movq r11,0x................\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x2a\n"
+      "imull rax,rax,0x........\n"
+      "movl r11,0x........\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x........\n"
+      "movq r11,0x................\n"
+      "addq rax,r11\n"
+      "movq r11,0x................\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x........\n"
+      "addl rax,0x........\n"
+      "addl rax,0x........\n"
+      "addl rax,0x........\n"
+      "movl r11,0x........\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x........\n"
+      "subl rax,0x........\n"
+      "addl rax,0x........\n"
+      "subl rax,0x........\n"
+      "movl r11,0x........\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x........\n"
+      "movq r11,0x................\n"
+      "subq rax,r11\n"
+      "movq r11,0x................\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movq rax,-0x........\n"
+      "subl rax,0x........\n"
+      "subl rax,0x........\n"
+      "subl rax,0x........\n"
+      "cmpq rax,0x........\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x........\n"
+      "subl rax,0x........\n"
+      "subl rax,0x........\n"
+      "addl rax,0x........\n"
+      "movl r11,0x........\n"
+      "cmpq rax,r11\n"
+      "jz 0x................\n"
+      "int3\n"
+      "movl rax,0x2a\n"
+      "ret\n");
+}
+
 // clang-format off
 #define ALU_TEST(NAME, WIDTH, INTRO, LHS, RHS, OUTRO)                          \
   ASSEMBLER_TEST_GENERATE(NAME, assembler) {                                   \
diff --git a/runtime/vm/compiler/assembler/disassembler_arm64.cc b/runtime/vm/compiler/assembler/disassembler_arm64.cc
index 06b9ad7..ad102433 100644
--- a/runtime/vm/compiler/assembler/disassembler_arm64.cc
+++ b/runtime/vm/compiler/assembler/disassembler_arm64.cc
@@ -1061,8 +1061,12 @@
       break;
     case 2: {
       if ((instr->RnField() == R31) && (instr->IsShift()) &&
-          (instr->Imm16Field() == 0) && (instr->ShiftTypeField() == LSL)) {
-        Format(instr, "mov'sf 'rd, 'rm");
+          (instr->ShiftTypeField() == LSL)) {
+        if (instr->ShiftAmountField() == 0) {
+          Format(instr, "mov'sf 'rd, 'rm");
+        } else {
+          Format(instr, "lsl'sf 'rd, 'rm, 'imms");
+        }
       } else {
         Format(instr, "orr'sf 'rd, 'rn, 'shift_op");
       }
@@ -1134,25 +1138,50 @@
 }
 
 void ARM64Decoder::DecodeMiscDP3Source(Instr* instr) {
-  if ((instr->Bits(29, 2) == 0) && (instr->Bits(21, 3) == 0) &&
-      (instr->Bit(15) == 0)) {
-    if (instr->RaField() == R31) {
+  bool zero_operand = instr->RaField() == R31;
+  int32_t mask = B31 | B30 | B29 | B23 | B22 | B21 | B15 | MiscDP3SourceMask;
+  int32_t bits = instr->InstructionBits() & mask;
+
+  if (bits == MADD) {
+    if (zero_operand) {
       Format(instr, "mul'sf 'rd, 'rn, 'rm");
     } else {
       Format(instr, "madd'sf 'rd, 'rn, 'rm, 'ra");
     }
-  } else if ((instr->Bits(29, 2) == 0) && (instr->Bits(21, 3) == 0) &&
-             (instr->Bit(15) == 1)) {
-    Format(instr, "msub'sf 'rd, 'rn, 'rm, 'ra");
-  } else if ((instr->Bits(29, 2) == 0) && (instr->Bits(21, 3) == 2) &&
-             (instr->Bit(15) == 0)) {
+  } else if (bits == MSUB) {
+    if (zero_operand) {
+      Format(instr, "mneg'sf 'rd, 'rn, 'rm");
+    } else {
+      Format(instr, "msub'sf 'rd, 'rn, 'rm, 'ra");
+    }
+  } else if (bits == SMULH) {
     Format(instr, "smulh 'rd, 'rn, 'rm");
-  } else if ((instr->Bits(29, 2) == 0) && (instr->Bits(21, 3) == 6) &&
-             (instr->Bit(15) == 0)) {
+  } else if (bits == UMULH) {
     Format(instr, "umulh 'rd, 'rn, 'rm");
-  } else if ((instr->Bits(29, 3) == 4) && (instr->Bits(21, 3) == 5) &&
-             (instr->Bit(15) == 0)) {
-    Format(instr, "umaddl 'rd, 'rn, 'rm, 'ra");
+  } else if (bits == UMADDL) {
+    if (zero_operand) {
+      Format(instr, "umull 'rd, 'rn, 'rm");
+    } else {
+      Format(instr, "umaddl 'rd, 'rn, 'rm, 'ra");
+    }
+  } else if (bits == SMADDL) {
+    if (zero_operand) {
+      Format(instr, "smull 'rd, 'rn, 'rm");
+    } else {
+      Format(instr, "smaddl 'rd, 'rn, 'rm, 'ra");
+    }
+  } else if (bits == SMSUBL) {
+    if (zero_operand) {
+      Format(instr, "smnegl 'rd, 'rn, 'rm");
+    } else {
+      Format(instr, "smsubl 'rd, 'rn, 'rm, 'ra");
+    }
+  } else if (bits == UMSUBL) {
+    if (zero_operand) {
+      Format(instr, "umnegl 'rd, 'rn, 'rm");
+    } else {
+      Format(instr, "umsubl 'rd, 'rn, 'rm, 'ra");
+    }
   } else {
     Unknown(instr);
   }
diff --git a/runtime/vm/compiler/assembler/disassembler_x86.cc b/runtime/vm/compiler/assembler/disassembler_x86.cc
index bd9b2de..56b3878 100644
--- a/runtime/vm/compiler/assembler/disassembler_x86.cc
+++ b/runtime/vm/compiler/assembler/disassembler_x86.cc
@@ -1559,7 +1559,7 @@
       Print(",");
       current += PrintImmediate(current, BYTE_SIZE);
     }
-  } else if (opcode == 0xBA && (*current & 0xE0) == 0xE0) {
+  } else if (opcode == 0xBA && (*current & 0x60) == 0x60) {
     // bt? immediate instruction
     int r = (*current >> 3) & 7;
     static const char* const names[4] = {"bt", "bts", "btr", "btc"};
diff --git a/runtime/vm/constants_arm64.h b/runtime/vm/constants_arm64.h
index 1665a3a..829e3d9 100644
--- a/runtime/vm/constants_arm64.h
+++ b/runtime/vm/constants_arm64.h
@@ -525,7 +525,10 @@
   MSUB = MiscDP3SourceFixed | B15,
   SMULH = MiscDP3SourceFixed | B31 | B22,
   UMULH = MiscDP3SourceFixed | B31 | B23 | B22,
+  SMADDL = MiscDP3SourceFixed | B31 | B21,
   UMADDL = MiscDP3SourceFixed | B31 | B23 | B21,
+  SMSUBL = MiscDP3SourceFixed | B31 | B21 | B15,
+  UMSUBL = MiscDP3SourceFixed | B31 | B23 | B21 | B15,
 };
 
 // C3.5.10
diff --git a/runtime/vm/simulator_arm64.cc b/runtime/vm/simulator_arm64.cc
index a97c62c..4c9495a 100644
--- a/runtime/vm/simulator_arm64.cc
+++ b/runtime/vm/simulator_arm64.cc
@@ -2573,14 +2573,22 @@
     const uint64_t alu_out = static_cast<uint64_t>(res >> 64);
 #endif  // HOST_OS_WINDOWS
     set_register(instr, rd, alu_out, R31IsZR);
-  } else if ((instr->Bits(29, 3) == 4) && (instr->Bits(21, 3) == 5) &&
-             (instr->Bit(15) == 0)) {
-    // Format(instr, "umaddl 'rd, 'rn, 'rm, 'ra");
-    const uint64_t rn_val = static_cast<uint32_t>(get_wregister(rn, R31IsZR));
-    const uint64_t rm_val = static_cast<uint32_t>(get_wregister(rm, R31IsZR));
-    const uint64_t ra_val = get_register(ra, R31IsZR);
-    const uint64_t alu_out = ra_val + (rn_val * rm_val);
-    set_register(instr, rd, alu_out, R31IsZR);
+  } else if ((instr->Bits(29, 3) == 4) && (instr->Bit(15) == 0)) {
+    if (instr->Bits(21, 3) == 5) {
+      // Format(instr, "umaddl 'rd, 'rn, 'rm, 'ra");
+      const uint64_t rn_val = static_cast<uint32_t>(get_wregister(rn, R31IsZR));
+      const uint64_t rm_val = static_cast<uint32_t>(get_wregister(rm, R31IsZR));
+      const uint64_t ra_val = get_register(ra, R31IsZR);
+      const uint64_t alu_out = ra_val + (rn_val * rm_val);
+      set_register(instr, rd, alu_out, R31IsZR);
+    } else {
+      // Format(instr, "smaddl 'rd, 'rn, 'rm, 'ra");
+      const int64_t rn_val = static_cast<int32_t>(get_wregister(rn, R31IsZR));
+      const int64_t rm_val = static_cast<int32_t>(get_wregister(rm, R31IsZR));
+      const int64_t ra_val = get_register(ra, R31IsZR);
+      const int64_t alu_out = ra_val + (rn_val * rm_val);
+      set_register(instr, rd, alu_out, R31IsZR);
+    }
   } else {
     UnimplementedInstruction(instr);
   }