Implements ARM SIMD comparison instructions.

R=regis@google.com

Review URL: https://codereview.chromium.org//19678020

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@25189 260f80e4-7a28-3924-810f-c04153c831b5
diff --git a/runtime/vm/assembler_arm.cc b/runtime/vm/assembler_arm.cc
index 91015c3..5e4bfe6 100644
--- a/runtime/vm/assembler_arm.cc
+++ b/runtime/vm/assembler_arm.cc
@@ -1327,6 +1327,11 @@
 }
 
 
+void Assembler::vornq(QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B21 | B20 | B8 | B4, kByte, qd, qn, qm);
+}
+
+
 void Assembler::vdup(OperandSize sz, QRegister qd, DRegister dm, int idx) {
   ASSERT((sz != kDWord) && (sz != kSWord) && (sz != kWordPair));
   int code = 0;
@@ -1368,6 +1373,51 @@
 }
 
 
+void Assembler::vceqqi(OperandSize sz,
+                      QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B24 | B11 | B4, sz, qd, qn, qm);
+}
+
+
+void Assembler::vceqqs(QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B11 | B10 | B9, kSWord, qd, qn, qm);
+}
+
+
+void Assembler::vcgeqi(OperandSize sz,
+                      QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B9 | B8 | B4, sz, qd, qn, qm);
+}
+
+
+void Assembler::vcugeqi(OperandSize sz,
+                      QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B24 | B9 | B8 | B4, sz, qd, qn, qm);
+}
+
+
+void Assembler::vcgeqs(QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B24 | B11 | B10 | B9, kSWord, qd, qn, qm);
+}
+
+
+void Assembler::vcgtqi(OperandSize sz,
+                      QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B9 | B8, sz, qd, qn, qm);
+}
+
+
+void Assembler::vcugtqi(OperandSize sz,
+                      QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B24 | B9 | B8, sz, qd, qn, qm);
+}
+
+
+void Assembler::vcgtqs(QRegister qd, QRegister qn, QRegister qm) {
+  EmitSIMDqqq(B24 | B21 | B11 | B10 | B9, kSWord, qd, qn, qm);
+}
+
+
 void Assembler::svc(uint32_t imm24, Condition cond) {
   ASSERT(cond != kNoCondition);
   ASSERT(imm24 < (1 << 24));
diff --git a/runtime/vm/assembler_arm.h b/runtime/vm/assembler_arm.h
index 66e87e8..ece3212 100644
--- a/runtime/vm/assembler_arm.h
+++ b/runtime/vm/assembler_arm.h
@@ -485,6 +485,16 @@
   void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
   void veorq(QRegister qd, QRegister qn, QRegister qm);
   void vorrq(QRegister qd, QRegister qn, QRegister qm);
+  void vornq(QRegister qd, QRegister qn, QRegister qm);
+
+  void vceqqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
+  void vceqqs(QRegister qd, QRegister qn, QRegister qm);
+  void vcgeqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
+  void vcugeqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
+  void vcgeqs(QRegister qd, QRegister qn, QRegister qm);
+  void vcgtqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
+  void vcugtqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
+  void vcgtqs(QRegister qd, QRegister qn, QRegister qm);
 
   void vabss(SRegister sd, SRegister sm, Condition cond = AL);
   void vabsd(DRegister dd, DRegister dm, Condition cond = AL);
diff --git a/runtime/vm/assembler_arm_test.cc b/runtime/vm/assembler_arm_test.cc
index c29ca70..f44a564 100644
--- a/runtime/vm/assembler_arm_test.cc
+++ b/runtime/vm/assembler_arm_test.cc
@@ -2375,6 +2375,48 @@
 }
 
 
+ASSEMBLER_TEST_GENERATE(Vornq, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    // Q0
+    __ LoadImmediate(R0, 0xfffffff0);
+    __ vmovsr(S0, R0);
+    __ vmovsr(S1, R0);
+    __ vmovsr(S2, R0);
+    __ vmovsr(S3, R0);
+
+    // Q1
+    __ LoadImmediate(R0, 0);
+    __ vmovsr(S4, R0);
+    __ vmovsr(S5, R0);
+    __ vmovsr(S6, R0);
+    __ vmovsr(S7, R0);
+
+    // Q2 = 15 15 15 15
+    __ vornq(Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, 60);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vornq, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(60, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
 ASSEMBLER_TEST_GENERATE(Vorrq, assembler) {
   if (CPUFeatures::neon_supported()) {
     // Q0
@@ -2554,6 +2596,334 @@
 }
 
 
+ASSEMBLER_TEST_GENERATE(Vceqqi32, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S0, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S1, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S2, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S3, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S4, R0);
+    __ mov(R0, ShifterOperand(20));
+    __ vmovsr(S5, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S6, R0);
+    __ mov(R0, ShifterOperand(40));
+    __ vmovsr(S7, R0);
+
+    __ vceqqi(kWord, Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vceqqi32, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vceqqs, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ LoadSImmediate(S0, 1.0);
+    __ LoadSImmediate(S1, 2.0);
+    __ LoadSImmediate(S2, 3.0);
+    __ LoadSImmediate(S3, 4.0);
+    __ LoadSImmediate(S4, 1.0);
+    __ LoadSImmediate(S5, 4.0);
+    __ LoadSImmediate(S6, 3.0);
+    __ LoadSImmediate(S7, 8.0);
+
+    __ vceqqs(Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vceqqs, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcgeqi32, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S0, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S1, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S2, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S3, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S4, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S5, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S6, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S7, R0);
+
+    __ vcgeqi(kWord, Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcgeqi32, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcugeqi32, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S0, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S1, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S2, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S3, R0);
+    __ LoadImmediate(R0, -1);
+    __ vmovsr(S4, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S5, R0);
+    __ LoadImmediate(R0, -3);
+    __ vmovsr(S6, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S7, R0);
+
+    __ vcugeqi(kWord, Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcugeqi32, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcgeqs, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ LoadSImmediate(S0, 1.0);
+    __ LoadSImmediate(S1, 2.0);
+    __ LoadSImmediate(S2, 3.0);
+    __ LoadSImmediate(S3, 4.0);
+    __ LoadSImmediate(S4, 1.0);
+    __ LoadSImmediate(S5, 1.0);
+    __ LoadSImmediate(S6, 3.0);
+    __ LoadSImmediate(S7, 1.0);
+
+    __ vcgeqs(Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcgeqs, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcgtqi32, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S0, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S1, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S2, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S3, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S4, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S5, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S6, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S7, R0);
+
+    __ vcgtqi(kWord, Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcgtqi32, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcugtqi32, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S0, R0);
+    __ mov(R0, ShifterOperand(2));
+    __ vmovsr(S1, R0);
+    __ mov(R0, ShifterOperand(3));
+    __ vmovsr(S2, R0);
+    __ mov(R0, ShifterOperand(4));
+    __ vmovsr(S3, R0);
+    __ LoadImmediate(R0, -1);
+    __ vmovsr(S4, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S5, R0);
+    __ LoadImmediate(R0, -3);
+    __ vmovsr(S6, R0);
+    __ mov(R0, ShifterOperand(1));
+    __ vmovsr(S7, R0);
+
+    __ vcugtqi(kWord, Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcugtqi32, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
+ASSEMBLER_TEST_GENERATE(Vcgtqs, assembler) {
+  if (CPUFeatures::neon_supported()) {
+    __ LoadSImmediate(S0, 1.0);
+    __ LoadSImmediate(S1, 2.0);
+    __ LoadSImmediate(S2, 3.0);
+    __ LoadSImmediate(S3, 4.0);
+    __ LoadSImmediate(S4, 2.0);
+    __ LoadSImmediate(S5, 1.0);
+    __ LoadSImmediate(S6, 4.0);
+    __ LoadSImmediate(S7, 1.0);
+
+    __ vcgtqs(Q2, Q1, Q0);
+
+    __ vmovrs(R0, S8);
+    __ vmovrs(R1, S9);
+    __ vmovrs(R2, S10);
+    __ vmovrs(R3, S11);
+
+    __ add(R0, R0, ShifterOperand(R1));
+    __ add(R0, R0, ShifterOperand(R2));
+    __ add(R0, R0, ShifterOperand(R3));
+    __ bx(LR);
+  } else {
+    __ LoadImmediate(R0, -2);
+    __ bx(LR);
+  }
+}
+
+
+ASSEMBLER_TEST_RUN(Vcgtqs, test) {
+  EXPECT(test != NULL);
+  typedef int (*Tst)();
+  EXPECT_EQ(-2, EXECUTE_TEST_CODE_INT32(Tst, test->entry()));
+}
+
+
 // Called from assembler_test.cc.
 // LR: return address.
 // R0: context.
diff --git a/runtime/vm/disassembler_arm.cc b/runtime/vm/disassembler_arm.cc
index f07b97f..078d0a3 100644
--- a/runtime/vm/disassembler_arm.cc
+++ b/runtime/vm/disassembler_arm.cc
@@ -1326,6 +1326,9 @@
                (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 2)) {
       Format(instr, "veorq 'qd, 'qn, 'qm");
     } else if ((instr->Bits(8, 4) == 1) && (instr->Bit(4) == 1) &&
+               (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 0)) {
+      Format(instr, "vornq 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 1) && (instr->Bit(4) == 1) &&
                (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) {
       if (instr->QmField() == instr->QnField()) {
         Format(instr, "vmovq 'qd, 'qm");
@@ -1345,6 +1348,30 @@
       } else {
         Unknown(instr);
       }
+    } else if ((instr->Bits(8, 4) == 8) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 2)) {
+      Format(instr, "vceqq'sz 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {
+      Format(instr, "vceqqs 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 0)) {
+      Format(instr, "vcgeq'sz 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 2)) {
+      Format(instr, "vcugeq'sz 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 2)) {
+      Format(instr, "vcgeqs 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 0) &&
+               (instr->Bits(23, 2) == 0)) {
+      Format(instr, "vcgtq'sz 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 0) &&
+               (instr->Bits(23, 2) == 2)) {
+      Format(instr, "vcugtq'sz 'qd, 'qn, 'qm");
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 2)) {
+      Format(instr, "vcgtqs 'qd, 'qn, 'qm");
     } else {
       Unknown(instr);
     }
diff --git a/runtime/vm/simulator_arm.cc b/runtime/vm/simulator_arm.cc
index 442a542..ad2fc20 100644
--- a/runtime/vm/simulator_arm.cc
+++ b/runtime/vm/simulator_arm.cc
@@ -2914,9 +2914,15 @@
     int8_t* s8d_8 = reinterpret_cast<int8_t*>(&s8d);
     int8_t* s8n_8 = reinterpret_cast<int8_t*>(&s8n);
     int8_t* s8m_8 = reinterpret_cast<int8_t*>(&s8m);
+    uint8_t* s8n_u8 = reinterpret_cast<uint8_t*>(&s8n);
+    uint8_t* s8m_u8 = reinterpret_cast<uint8_t*>(&s8m);
     int16_t* s8d_16 = reinterpret_cast<int16_t*>(&s8d);
     int16_t* s8n_16 = reinterpret_cast<int16_t*>(&s8n);
     int16_t* s8m_16 = reinterpret_cast<int16_t*>(&s8m);
+    uint16_t* s8n_u16 = reinterpret_cast<uint16_t*>(&s8n);
+    uint16_t* s8m_u16 = reinterpret_cast<uint16_t*>(&s8m);
+    int32_t* s8n_32 = reinterpret_cast<int32_t*>(&s8n);
+    int32_t* s8m_32 = reinterpret_cast<int32_t*>(&s8m);
     int64_t* s8d_64 = reinterpret_cast<int64_t*>(&s8d);
     int64_t* s8n_64 = reinterpret_cast<int64_t*>(&s8n);
     int64_t* s8m_64 = reinterpret_cast<int64_t*>(&s8m);
@@ -3014,6 +3020,12 @@
         s8d.data_[i].u = s8n.data_[i].u ^ s8m.data_[i].u;
       }
     } else if ((instr->Bits(8, 4) == 1) && (instr->Bit(4) == 1) &&
+               (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 0)) {
+      // Format(instr, "vornq 'qd, 'qn, 'qm");
+      for (int i = 0; i < 4; i++) {
+        s8d.data_[i].u = s8n.data_[i].u | ~s8m.data_[i].u;
+      }
+    } else if ((instr->Bits(8, 4) == 1) && (instr->Bit(4) == 1) &&
                (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) {
       if (qm == qn) {
         // Format(instr, "vmovq 'qd, 'qm");
@@ -3037,26 +3049,152 @@
         // Format(instr, "vdupb 'qd, 'dm['imm4_vdup]");
         int8_t* dm_b = reinterpret_cast<int8_t*>(&dm_value);
         idx = imm4 >> 1;
+        int8_t val = dm_b[idx];
         for (int i = 0; i < 16; i++) {
-          s8d_8[i] = dm_b[idx];
+          s8d_8[i] = val;
         }
       } else if ((imm4 & 2) != 0) {
         // Format(instr, "vduph 'qd, 'dm['imm4_vdup]");
         int16_t* dm_h = reinterpret_cast<int16_t*>(&dm_value);
         idx = imm4 >> 2;
+        int16_t val = dm_h[idx];
         for (int i = 0; i < 8; i++) {
-          s8d_16[i] = dm_h[idx];
+          s8d_16[i] = val;
         }
       } else if ((imm4 & 4) != 0) {
         // Format(instr, "vdupw 'qd, 'dm['imm4_vdup]");
         int32_t* dm_w = reinterpret_cast<int32_t*>(&dm_value);
         idx = imm4 >> 3;
+        int32_t val = dm_w[idx];
         for (int i = 0; i < 4; i++) {
-          s8d.data_[i].u = dm_w[idx];
+          s8d.data_[i].u = val;
         }
       } else {
         UnimplementedInstruction(instr);
       }
+    } else if ((instr->Bits(8, 4) == 8) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 2)) {
+      // Format(instr, "vceqq'sz 'qd, 'qn, 'qm");
+      const int size = instr->Bits(20, 2);
+      if (size == 0) {
+        for (int i = 0; i < 16; i++) {
+          s8d_8[i] = s8n_8[i] == s8m_8[i] ? 0xff : 0;
+        }
+      } else if (size == 1) {
+        for (int i = 0; i < 8; i++) {
+          s8d_16[i] = s8n_16[i] == s8m_16[i] ? 0xffff : 0;
+        }
+      } else if (size == 2) {
+        for (int i = 0; i < 4; i++) {
+          s8d.data_[i].u = s8n.data_[i].u == s8m.data_[i].u ? 0xffffffff : 0;
+        }
+      } else if (size == 3) {
+        UnimplementedInstruction(instr);
+      } else {
+        UNREACHABLE();
+      }
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {
+      // Format(instr, "vceqqs 'qd, 'qn, 'qm");
+      for (int i = 0; i < 4; i++) {
+        s8d.data_[i].u = s8n.data_[i].f == s8m.data_[i].f ? 0xffffffff : 0;
+      }
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 0)) {
+      // Format(instr, "vcgeq'sz 'qd, 'qn, 'qm");
+      const int size = instr->Bits(20, 2);
+      if (size == 0) {
+        for (int i = 0; i < 16; i++) {
+          s8d_8[i] = s8n_8[i] >= s8m_8[i] ? 0xff : 0;
+        }
+      } else if (size == 1) {
+        for (int i = 0; i < 8; i++) {
+          s8d_16[i] = s8n_16[i] >= s8m_16[i] ? 0xffff : 0;
+        }
+      } else if (size == 2) {
+        for (int i = 0; i < 4; i++) {
+          s8d.data_[i].u = s8n_32[i] >= s8m_32[i] ? 0xffffffff : 0;
+        }
+      } else if (size == 3) {
+        UnimplementedInstruction(instr);
+      } else {
+        UNREACHABLE();
+      }
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 1) &&
+               (instr->Bits(23, 2) == 2)) {
+      // Format(instr, "vcugeq'sz 'qd, 'qn, 'qm");
+      const int size = instr->Bits(20, 2);
+      if (size == 0) {
+        for (int i = 0; i < 16; i++) {
+          s8d_8[i] = s8n_u8[i] >= s8m_u8[i] ? 0xff : 0;
+        }
+      } else if (size == 1) {
+        for (int i = 0; i < 8; i++) {
+          s8d_16[i] = s8n_u16[i] >= s8m_u16[i] ? 0xffff : 0;
+        }
+      } else if (size == 2) {
+        for (int i = 0; i < 4; i++) {
+          s8d.data_[i].u = s8n.data_[i].u >= s8m.data_[i].u ? 0xffffffff : 0;
+        }
+      } else if (size == 3) {
+        UnimplementedInstruction(instr);
+      } else {
+        UNREACHABLE();
+      }
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 2)) {
+      // Format(instr, "vcgeqs 'qd, 'qn, 'qm");
+      for (int i = 0; i < 4; i++) {
+        s8d.data_[i].u = s8n.data_[i].f >= s8m.data_[i].f ? 0xffffffff : 0;
+      }
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 0) &&
+               (instr->Bits(23, 2) == 0)) {
+      // Format(instr, "vcgtq'sz 'qd, 'qn, 'qm");
+      const int size = instr->Bits(20, 2);
+      if (size == 0) {
+        for (int i = 0; i < 16; i++) {
+          s8d_8[i] = s8n_8[i] > s8m_8[i] ? 0xff : 0;
+        }
+      } else if (size == 1) {
+        for (int i = 0; i < 8; i++) {
+          s8d_16[i] = s8n_16[i] > s8m_16[i] ? 0xffff : 0;
+        }
+      } else if (size == 2) {
+        for (int i = 0; i < 4; i++) {
+          s8d.data_[i].u = s8n_32[i] > s8m_32[i] ? 0xffffffff : 0;
+        }
+      } else if (size == 3) {
+        UnimplementedInstruction(instr);
+      } else {
+        UNREACHABLE();
+      }
+    } else if ((instr->Bits(8, 4) == 3) && (instr->Bit(4) == 0) &&
+               (instr->Bits(23, 2) == 2)) {
+      // Format(instr, "vcugtq'sz 'qd, 'qn, 'qm");
+      const int size = instr->Bits(20, 2);
+      if (size == 0) {
+        for (int i = 0; i < 16; i++) {
+          s8d_8[i] = s8n_u8[i] > s8m_u8[i] ? 0xff : 0;
+        }
+      } else if (size == 1) {
+        for (int i = 0; i < 8; i++) {
+          s8d_16[i] = s8n_u16[i] > s8m_u16[i] ? 0xffff : 0;
+        }
+      } else if (size == 2) {
+        for (int i = 0; i < 4; i++) {
+          s8d.data_[i].u = s8n.data_[i].u > s8m.data_[i].u ? 0xffffffff : 0;
+        }
+      } else if (size == 3) {
+        UnimplementedInstruction(instr);
+      } else {
+        UNREACHABLE();
+      }
+    } else if ((instr->Bits(8, 4) == 14) && (instr->Bit(4) == 0) &&
+               (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 2)) {
+      // Format(instr, "vcgtqs 'qd, 'qn, 'qm");
+      for (int i = 0; i < 4; i++) {
+        s8d.data_[i].u = s8n.data_[i].f > s8m.data_[i].f ? 0xffffffff : 0;
+      }
     } else {
       UnimplementedInstruction(instr);
     }