[vm, jit] The hottest Smi ops are + < == not + - ==.

Bug: https://github.com/dart-lang/sdk/issues/36409
Change-Id: Id1a6a65b26c95fb3f56bf844943aa09cc4f13a2a
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/99728
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Aart Bik <ajcbik@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 307c7ad..4ee0e27 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -4048,8 +4048,8 @@
   switch (kind) {
     case Token::kADD:
       return StubCode::SmiAddInlineCache().raw();
-    case Token::kSUB:
-      return StubCode::SmiSubInlineCache().raw();
+    case Token::kLT:
+      return StubCode::SmiLessInlineCache().raw();
     case Token::kEQ:
       return StubCode::SmiEqualInlineCache().raw();
     default:
diff --git a/runtime/vm/compiler/stub_code_compiler_arm.cc b/runtime/vm/compiler/stub_code_compiler_arm.cc
index 7024fd8..8038fbd 100644
--- a/runtime/vm/compiler/stub_code_compiler_arm.cc
+++ b/runtime/vm/compiler/stub_code_compiler_arm.cc
@@ -1841,8 +1841,8 @@
                           intptr_t num_args,
                           Label* not_smi_or_overflow) {
   __ Comment("Fast Smi op");
-  __ ldr(R0, Address(SP, 0 * target::kWordSize));
-  __ ldr(R1, Address(SP, 1 * target::kWordSize));
+  __ ldr(R0, Address(SP, 1 * target::kWordSize));  // Left.
+  __ ldr(R1, Address(SP, 0 * target::kWordSize));  // Right.
   __ orr(TMP, R0, Operand(R1));
   __ tst(TMP, Operand(kSmiTagMask));
   __ b(not_smi_or_overflow, NE);
@@ -1852,9 +1852,10 @@
       __ b(not_smi_or_overflow, VS);  // Branch if overflow.
       break;
     }
-    case Token::kSUB: {
-      __ subs(R0, R1, Operand(R0));   // Subtract.
-      __ b(not_smi_or_overflow, VS);  // Branch if overflow.
+    case Token::kLT: {
+      __ cmp(R0, Operand(R1));
+      __ LoadObject(R0, CastHandle<Object>(TrueObject()), LT);
+      __ LoadObject(R0, CastHandle<Object>(FalseObject()), GE);
       break;
     }
     case Token::kEQ: {
@@ -2114,10 +2115,10 @@
       assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
 }
 
-void StubCodeCompiler::GenerateSmiSubInlineCacheStub(Assembler* assembler) {
+void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
   GenerateUsageCounterIncrement(assembler, R8);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kSUB);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
 }
 
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
diff --git a/runtime/vm/compiler/stub_code_compiler_arm64.cc b/runtime/vm/compiler/stub_code_compiler_arm64.cc
index 2205c2b..01a0e20 100644
--- a/runtime/vm/compiler/stub_code_compiler_arm64.cc
+++ b/runtime/vm/compiler/stub_code_compiler_arm64.cc
@@ -1903,8 +1903,8 @@
                           intptr_t num_args,
                           Label* not_smi_or_overflow) {
   __ Comment("Fast Smi op");
-  __ ldr(R0, Address(SP, +0 * target::kWordSize));  // Right.
-  __ ldr(R1, Address(SP, +1 * target::kWordSize));  // Left.
+  __ ldr(R0, Address(SP, +1 * target::kWordSize));  // Left.
+  __ ldr(R1, Address(SP, +0 * target::kWordSize));  // Right.
   __ orr(TMP, R0, Operand(R1));
   __ BranchIfNotSmi(TMP, not_smi_or_overflow);
   switch (kind) {
@@ -1913,16 +1913,18 @@
       __ b(not_smi_or_overflow, VS);  // Branch if overflow.
       break;
     }
-    case Token::kSUB: {
-      __ subs(R0, R1, Operand(R0));   // Subtract.
-      __ b(not_smi_or_overflow, VS);  // Branch if overflow.
+    case Token::kLT: {
+      __ CompareRegisters(R0, R1);
+      __ LoadObject(R0, CastHandle<Object>(TrueObject()));
+      __ LoadObject(R1, CastHandle<Object>(FalseObject()));
+      __ csel(R0, R0, R1, LT);
       break;
     }
     case Token::kEQ: {
       __ CompareRegisters(R0, R1);
       __ LoadObject(R0, CastHandle<Object>(TrueObject()));
       __ LoadObject(R1, CastHandle<Object>(FalseObject()));
-      __ csel(R0, R1, R0, NE);
+      __ csel(R0, R0, R1, EQ);
       break;
     }
     default:
@@ -2188,10 +2190,10 @@
       assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
 }
 
-void StubCodeCompiler::GenerateSmiSubInlineCacheStub(Assembler* assembler) {
+void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
   GenerateUsageCounterIncrement(assembler, R6);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kSUB);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
 }
 
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
diff --git a/runtime/vm/compiler/stub_code_compiler_ia32.cc b/runtime/vm/compiler/stub_code_compiler_ia32.cc
index 12259cc..570d2f8 100644
--- a/runtime/vm/compiler/stub_code_compiler_ia32.cc
+++ b/runtime/vm/compiler/stub_code_compiler_ia32.cc
@@ -1539,8 +1539,8 @@
                           Label* not_smi_or_overflow) {
   __ Comment("Fast Smi op");
   ASSERT(num_args == 2);
-  __ movl(EDI, Address(ESP, +1 * target::kWordSize));  // Right
   __ movl(EAX, Address(ESP, +2 * target::kWordSize));  // Left
+  __ movl(EDI, Address(ESP, +1 * target::kWordSize));  // Right
   __ movl(EBX, EDI);
   __ orl(EBX, EAX);
   __ testl(EBX, Immediate(kSmiTagMask));
@@ -1551,26 +1551,26 @@
       __ j(OVERFLOW, not_smi_or_overflow, Assembler::kNearJump);
       break;
     }
-    case Token::kSUB: {
-      __ subl(EAX, EDI);
-      __ j(OVERFLOW, not_smi_or_overflow, Assembler::kNearJump);
-      break;
-    }
-    case Token::kMUL: {
-      __ SmiUntag(EAX);
-      __ imull(EAX, EDI);
-      __ j(OVERFLOW, not_smi_or_overflow, Assembler::kNearJump);
+    case Token::kLT: {
+      Label done, is_true;
+      __ cmpl(EAX, EDI);
+      __ setcc(GREATER_EQUAL, AL);
+      __ movzxb(EAX, AL);  // EAX := EAX < EDI ? 0 : 1
+      __ movl(EAX,
+              Address(THR, EAX, TIMES_4, target::Thread::bool_true_offset()));
+      ASSERT(target::Thread::bool_true_offset() + 4 ==
+             target::Thread::bool_false_offset());
       break;
     }
     case Token::kEQ: {
       Label done, is_true;
       __ cmpl(EAX, EDI);
-      __ j(EQUAL, &is_true, Assembler::kNearJump);
-      __ LoadObject(EAX, CastHandle<Object>(FalseObject()));
-      __ jmp(&done, Assembler::kNearJump);
-      __ Bind(&is_true);
-      __ LoadObject(EAX, CastHandle<Object>(TrueObject()));
-      __ Bind(&done);
+      __ setcc(NOT_EQUAL, AL);
+      __ movzxb(EAX, AL);  // EAX := EAX == EDI ? 0 : 1
+      __ movl(EAX,
+              Address(THR, EAX, TIMES_4, target::Thread::bool_true_offset()));
+      ASSERT(target::Thread::bool_true_offset() + 4 ==
+             target::Thread::bool_false_offset());
       break;
     }
     default:
@@ -1825,10 +1825,10 @@
       assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
 }
 
-void StubCodeCompiler::GenerateSmiSubInlineCacheStub(Assembler* assembler) {
+void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
   GenerateUsageCounterIncrement(assembler, EBX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kSUB);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
 }
 
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
diff --git a/runtime/vm/compiler/stub_code_compiler_x64.cc b/runtime/vm/compiler/stub_code_compiler_x64.cc
index 0df3f7a..8c8e0be 100644
--- a/runtime/vm/compiler/stub_code_compiler_x64.cc
+++ b/runtime/vm/compiler/stub_code_compiler_x64.cc
@@ -1872,8 +1872,8 @@
                           Label* not_smi_or_overflow) {
   __ Comment("Fast Smi op");
   ASSERT(num_args == 2);
-  __ movq(RCX, Address(RSP, +1 * target::kWordSize));  // Right
   __ movq(RAX, Address(RSP, +2 * target::kWordSize));  // Left.
+  __ movq(RCX, Address(RSP, +1 * target::kWordSize));  // Right
   __ movq(R13, RCX);
   __ orq(R13, RAX);
   __ testq(R13, Immediate(kSmiTagMask));
@@ -1884,20 +1884,24 @@
       __ j(OVERFLOW, not_smi_or_overflow);
       break;
     }
-    case Token::kSUB: {
-      __ subq(RAX, RCX);
-      __ j(OVERFLOW, not_smi_or_overflow);
+    case Token::kLT: {
+      __ cmpq(RAX, RCX);
+      __ setcc(GREATER_EQUAL, ByteRegisterOf(RAX));
+      __ movzxb(RAX, RAX);  // RAX := RAX < RCX ? 0 : 1
+      __ movq(RAX,
+              Address(THR, RAX, TIMES_8, target::Thread::bool_true_offset()));
+      ASSERT(target::Thread::bool_true_offset() + 8 ==
+             target::Thread::bool_false_offset());
       break;
     }
     case Token::kEQ: {
-      Label done, is_true;
       __ cmpq(RAX, RCX);
-      __ j(EQUAL, &is_true, Assembler::kNearJump);
-      __ LoadObject(RAX, CastHandle<Object>(FalseObject()));
-      __ jmp(&done, Assembler::kNearJump);
-      __ Bind(&is_true);
-      __ LoadObject(RAX, CastHandle<Object>(TrueObject()));
-      __ Bind(&done);
+      __ setcc(NOT_EQUAL, ByteRegisterOf(RAX));
+      __ movzxb(RAX, RAX);  // RAX := RAX == RCX ? 0 : 1
+      __ movq(RAX,
+              Address(THR, RAX, TIMES_8, target::Thread::bool_true_offset()));
+      ASSERT(target::Thread::bool_true_offset() + 8 ==
+             target::Thread::bool_false_offset());
       break;
     }
     default:
@@ -2192,10 +2196,10 @@
       assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD);
 }
 
-void StubCodeCompiler::GenerateSmiSubInlineCacheStub(Assembler* assembler) {
+void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) {
   GenerateUsageCounterIncrement(assembler, RCX);
   GenerateNArgsCheckInlineCacheStub(
-      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kSUB);
+      assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT);
 }
 
 void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) {
diff --git a/runtime/vm/stub_code_list.h b/runtime/vm/stub_code_list.h
index 602ec4d..7f93f5b8 100644
--- a/runtime/vm/stub_code_list.h
+++ b/runtime/vm/stub_code_list.h
@@ -50,7 +50,7 @@
   V(OneArgCheckInlineCache)                                                    \
   V(TwoArgsCheckInlineCache)                                                   \
   V(SmiAddInlineCache)                                                         \
-  V(SmiSubInlineCache)                                                         \
+  V(SmiLessInlineCache)                                                        \
   V(SmiEqualInlineCache)                                                       \
   V(OneArgOptimizedCheckInlineCache)                                           \
   V(TwoArgsOptimizedCheckInlineCache)                                          \