[vm, compiler] Use RISC-V's compare-and-branch for a shorter write barrier sequence.

dart2js.aot.rv64 25753840 -> 25721032 (-0.13%)
dart2js.aot.rv32 24891160 -> 24858368 (-0.13%)

TEST=ci
Change-Id: I252e0477f11b9198fe043f105a8745bc283aaaa8
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/249062
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.h b/runtime/vm/compiler/assembler/assembler_arm64.h
index a13f06a7..ea47f52 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.h
+++ b/runtime/vm/compiler/assembler/assembler_arm64.h
@@ -2144,7 +2144,7 @@
   void RestoreCodePointer();
 
   // Restores the values of the registers that are blocked to cache some values
-  // e.g. BARRIER_MASK and NULL_REG.
+  // e.g. HEAP_BITS and NULL_REG.
   void RestorePinnedRegisters();
 
   void SetupGlobalPoolAndDispatchTable();
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.cc b/runtime/vm/compiler/assembler/assembler_riscv.cc
index cdfa87a..862bf92 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv.cc
@@ -3019,6 +3019,7 @@
   //    in progress
   // If so, call the WriteBarrier stub, which will either add object to the
   // store buffer (case 1) or add value to the marking stack (case 2).
+  // See RestorePinnedRegisters for why this can be `ble`.
   // Compare UntaggedObject::StorePointer.
   Label done;
   if (can_value_be_smi == kValueCanBeSmi) {
@@ -3028,8 +3029,7 @@
   lbu(TMP2, FieldAddress(value, target::Object::tags_offset()));
   srli(TMP, TMP, target::UntaggedObject::kBarrierOverlapShift);
   and_(TMP, TMP, TMP2);
-  and_(TMP, TMP, WRITE_BARRIER_MASK);
-  beqz(TMP, &done, kNearJump);
+  ble(TMP, WRITE_BARRIER_STATE, &done, kNearJump);
 
   Register objectForCall = object;
   if (value != kWriteBarrierValueReg) {
@@ -3091,6 +3091,7 @@
   //    in progress
   // If so, call the WriteBarrier stub, which will either add object to the
   // store buffer (case 1) or add value to the marking stack (case 2).
+  // See RestorePinnedRegisters for why this can be `ble`.
   // Compare UntaggedObject::StorePointer.
   Label done;
   if (can_value_be_smi == kValueCanBeSmi) {
@@ -3100,8 +3101,7 @@
   lbu(TMP2, FieldAddress(value, target::Object::tags_offset()));
   srli(TMP, TMP, target::UntaggedObject::kBarrierOverlapShift);
   and_(TMP, TMP, TMP2);
-  and_(TMP, TMP, WRITE_BARRIER_MASK);
-  beqz(TMP, &done, kNearJump);
+  ble(TMP, WRITE_BARRIER_STATE, &done, kNearJump);
   if (spill_lr) {
     PushRegister(RA);
   }
@@ -3672,12 +3672,46 @@
   subi(PP, PP, kHeapObjectTag);  // Pool in PP is untagged!
 }
 
-// Restores the values of the registers that are blocked to cache some values
-// e.g. BARRIER_MASK and NULL_REG.
 void Assembler::RestorePinnedRegisters() {
-  lx(WRITE_BARRIER_MASK,
+  lx(WRITE_BARRIER_STATE,
      Address(THR, target::Thread::write_barrier_mask_offset()));
   lx(NULL_REG, Address(THR, target::Thread::object_null_offset()));
+
+  // Our write barrier usually uses mask-and-test,
+  //   01b6f6b3  and tmp, tmp, mask
+  //       c689  beqz tmp, +10
+  // but on RISC-V compare-and-branch is shorter,
+  //   00ddd663  ble tmp, wbs, +12
+  //
+  // TMP bit 4+ = 0
+  // TMP bit 3  = object is old-and-not-remembered AND value is new (genr bit)
+  // TMP bit 2  = object is old AND value is old-and-not-marked     (incr bit)
+  // TMP bit 1  = garbage
+  // TMP bit 0  = garbage
+  //
+  // Thread::wbm | WRITE_BARRIER_STATE | TMP/combined headers | result
+  // generational only
+  // 0b1000        0b0111                0b11xx                 impossible
+  //                                     0b10xx                 call stub
+  //                                     0b01xx                 skip
+  //                                     0b00xx                 skip
+  // generational and incremental
+  // 0b1100        0b0011                0b11xx                 impossible
+  //                                     0b10xx                 call stub
+  //                                     0b01xx                 call stub
+  //                                     0b00xx                 skip
+  xori(WRITE_BARRIER_STATE, WRITE_BARRIER_STATE,
+       (target::UntaggedObject::kGenerationalBarrierMask << 1) - 1);
+
+  // Generational bit must be higher than incremental bit, with no other bits
+  // between.
+  ASSERT(target::UntaggedObject::kGenerationalBarrierMask ==
+         (target::UntaggedObject::kIncrementalBarrierMask << 1));
+  // Other header bits must be lower.
+  ASSERT(target::UntaggedObject::kIncrementalBarrierMask >
+         target::UntaggedObject::kCanonicalBit);
+  ASSERT(target::UntaggedObject::kIncrementalBarrierMask >
+         target::UntaggedObject::kCardRememberedBit);
 }
 
 void Assembler::SetupGlobalPoolAndDispatchTable() {
@@ -3816,7 +3850,7 @@
     // Or would need to save above.
     COMPILE_ASSERT(IsCalleeSavedRegister(THR));
     COMPILE_ASSERT(IsCalleeSavedRegister(NULL_REG));
-    COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_MASK));
+    COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_STATE));
     COMPILE_ASSERT(IsCalleeSavedRegister(DISPATCH_TABLE_REG));
   }
 
@@ -3859,7 +3893,7 @@
   // Already saved.
   COMPILE_ASSERT(IsCalleeSavedRegister(THR));
   COMPILE_ASSERT(IsCalleeSavedRegister(NULL_REG));
-  COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_MASK));
+  COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_STATE));
   COMPILE_ASSERT(IsCalleeSavedRegister(DISPATCH_TABLE_REG));
   // Need to save.
   COMPILE_ASSERT(!IsCalleeSavedRegister(PP));
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.h b/runtime/vm/compiler/assembler/assembler_riscv.h
index f62f545..cefa220 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.h
+++ b/runtime/vm/compiler/assembler/assembler_riscv.h
@@ -1257,7 +1257,7 @@
   void RestorePoolPointer();
 
   // Restores the values of the registers that are blocked to cache some values
-  // e.g. BARRIER_MASK and NULL_REG.
+  // e.g. WRITE_BARRIER_STATE and NULL_REG.
   void RestorePinnedRegisters();
 
   void SetupGlobalPoolAndDispatchTable();
diff --git a/runtime/vm/compiler/assembler/assembler_riscv_test.cc b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
index ff2bfb3..8616410 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
@@ -32,7 +32,7 @@
   __ PushNativeCalleeSavedRegisters();
 
   __ mv(THR, A2);
-  __ lx(WRITE_BARRIER_MASK, Address(THR, Thread::write_barrier_mask_offset()));
+  __ RestorePinnedRegisters();  // Setup WRITE_BARRIER_STATE.
 
   __ StoreIntoObject(A1, FieldAddress(A1, GrowableObjectArray::data_offset()),
                      A0);
diff --git a/runtime/vm/compiler/runtime_api.cc b/runtime/vm/compiler/runtime_api.cc
index abf5464..2490c68 100644
--- a/runtime/vm/compiler/runtime_api.cc
+++ b/runtime/vm/compiler/runtime_api.cc
@@ -355,6 +355,8 @@
 const word UntaggedObject::kCardRememberedBit =
     dart::UntaggedObject::kCardRememberedBit;
 
+const word UntaggedObject::kCanonicalBit = dart::UntaggedObject::kCanonicalBit;
+
 const word UntaggedObject::kOldAndNotRememberedBit =
     dart::UntaggedObject::kOldAndNotRememberedBit;
 
@@ -397,6 +399,9 @@
 const word UntaggedObject::kGenerationalBarrierMask =
     dart::UntaggedObject::kGenerationalBarrierMask;
 
+const word UntaggedObject::kIncrementalBarrierMask =
+    dart::UntaggedObject::kIncrementalBarrierMask;
+
 bool IsTypedDataClassId(intptr_t cid) {
   return dart::IsTypedDataClassId(cid);
 }
diff --git a/runtime/vm/compiler/runtime_api.h b/runtime/vm/compiler/runtime_api.h
index 723e277..5f8ec14 100644
--- a/runtime/vm/compiler/runtime_api.h
+++ b/runtime/vm/compiler/runtime_api.h
@@ -410,6 +410,7 @@
 class UntaggedObject : public AllStatic {
  public:
   static const word kCardRememberedBit;
+  static const word kCanonicalBit;
   static const word kOldAndNotRememberedBit;
   static const word kOldAndNotMarkedBit;
   static const word kSizeTagPos;
@@ -422,6 +423,7 @@
   static const word kTagBitsSizeTagPos;
   static const word kBarrierOverlapShift;
   static const word kGenerationalBarrierMask;
+  static const word kIncrementalBarrierMask;
 
   static bool IsTypedDataClassId(intptr_t cid);
 };
diff --git a/runtime/vm/constants_riscv.cc b/runtime/vm/constants_riscv.cc
index 7c9b6ed..481d83f 100644
--- a/runtime/vm/constants_riscv.cc
+++ b/runtime/vm/constants_riscv.cc
@@ -18,9 +18,9 @@
 #endif
 
 const char* const cpu_reg_names[kNumberOfCpuRegisters] = {
-    "zero", "ra", "sp",  "gp",   "tp",   "t0",   "t1", "t2", "fp", "thr", "a0",
-    "a1",   "a2", "tmp", "tmp2", "pp",   "a6",   "a7", "s2", "s3", "s4",  "s5",
-    "s6",   "s7", "s8",  "s9",   "null", "mask", "t3", "t4", "t5", "t6",
+    "zero", "ra", "sp",  "gp",   "tp",   "t0",  "t1", "t2", "fp", "thr", "a0",
+    "a1",   "a2", "tmp", "tmp2", "pp",   "a6",  "a7", "s2", "s3", "s4",  "s5",
+    "s6",   "s7", "s8",  "s9",   "null", "wbs", "t3", "t4", "t5", "t6",
 };
 
 const char* const cpu_reg_abi_names[kNumberOfCpuRegisters] = {
diff --git a/runtime/vm/constants_riscv.h b/runtime/vm/constants_riscv.h
index 7c2a2b9..e073037 100644
--- a/runtime/vm/constants_riscv.h
+++ b/runtime/vm/constants_riscv.h
@@ -70,7 +70,7 @@
   S8 = 24,   // CALLEE_SAVED_TEMP / FAR_TMP
   S9 = 25,   // DISPATCH_TABLE_REG
   S10 = 26,  // NULL
-  S11 = 27,  // WRITE_BARRIER_MASK
+  S11 = 27,  // WRITE_BARRIER_STATE
   T3 = 28,
   T4 = 29,
   T5 = 30,
@@ -162,7 +162,7 @@
 constexpr Register THR = S1;  // Caches current thread in generated code.
 constexpr Register CALLEE_SAVED_TEMP = S8;
 constexpr Register CALLEE_SAVED_TEMP2 = S7;
-constexpr Register WRITE_BARRIER_MASK = S11;
+constexpr Register WRITE_BARRIER_STATE = S11;
 constexpr Register NULL_REG = S10;  // Caches NullObject() value.
 
 // ABI for catch-clause entry point.
@@ -450,13 +450,13 @@
 // We rely on that any calls into C++ also preserve X18.
 constexpr intptr_t kReservedCpuRegisters =
     R(ZR) | R(TP) | R(GP) | R(SP) | R(FP) | R(TMP) | R(TMP2) | R(PP) | R(THR) |
-    R(RA) | R(WRITE_BARRIER_MASK) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
+    R(RA) | R(WRITE_BARRIER_STATE) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
     R(FAR_TMP) | R(18);
 constexpr intptr_t kNumberOfReservedCpuRegisters = 15;
 #else
 constexpr intptr_t kReservedCpuRegisters =
     R(ZR) | R(TP) | R(GP) | R(SP) | R(FP) | R(TMP) | R(TMP2) | R(PP) | R(THR) |
-    R(RA) | R(WRITE_BARRIER_MASK) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
+    R(RA) | R(WRITE_BARRIER_STATE) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
     R(FAR_TMP);
 constexpr intptr_t kNumberOfReservedCpuRegisters = 14;
 #endif
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index abc979e..2e4ad32 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -158,11 +158,11 @@
   // bit fields for storing tags.
   enum TagBits {
     kCardRememberedBit = 0,
-    kOldAndNotMarkedBit = 1,      // Incremental barrier target.
-    kNewBit = 2,                  // Generational barrier target.
-    kOldBit = 3,                  // Incremental barrier source.
-    kOldAndNotRememberedBit = 4,  // Generational barrier source.
-    kCanonicalBit = 5,
+    kCanonicalBit = 1,
+    kOldAndNotMarkedBit = 2,      // Incremental barrier target.
+    kNewBit = 3,                  // Generational barrier target.
+    kOldBit = 4,                  // Incremental barrier source.
+    kOldAndNotRememberedBit = 5,  // Generational barrier source.
     kReservedTagPos = 6,
     kReservedTagSize = 2,
 
diff --git a/runtime/vm/simulator_riscv.cc b/runtime/vm/simulator_riscv.cc
index 3b2034f..937c314 100644
--- a/runtime/vm/simulator_riscv.cc
+++ b/runtime/vm/simulator_riscv.cc
@@ -463,7 +463,9 @@
   pp -= kHeapObjectTag;  // In the PP register, the pool pointer is untagged.
   set_xreg(CODE_REG, code);
   set_xreg(PP, pp);
-  set_xreg(WRITE_BARRIER_MASK, thread->write_barrier_mask());
+  set_xreg(WRITE_BARRIER_STATE,
+           thread->write_barrier_mask() ^
+               ((UntaggedObject::kGenerationalBarrierMask << 1) - 1));
   set_xreg(NULL_REG, static_cast<uintx_t>(Object::null()));
   if (FLAG_precompiled_mode) {
     set_xreg(DISPATCH_TABLE_REG,