[vm, compiler] Use RISC-V's compare-and-branch for a shorter write barrier sequence.
dart2js.aot.rv64 25753840 -> 25721032 (-0.13%)
dart2js.aot.rv32 24891160 -> 24858368 (-0.13%)
TEST=ci
Change-Id: I252e0477f11b9198fe043f105a8745bc283aaaa8
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/249062
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.h b/runtime/vm/compiler/assembler/assembler_arm64.h
index a13f06a7..ea47f52 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.h
+++ b/runtime/vm/compiler/assembler/assembler_arm64.h
@@ -2144,7 +2144,7 @@
void RestoreCodePointer();
// Restores the values of the registers that are blocked to cache some values
- // e.g. BARRIER_MASK and NULL_REG.
+ // e.g. HEAP_BITS and NULL_REG.
void RestorePinnedRegisters();
void SetupGlobalPoolAndDispatchTable();
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.cc b/runtime/vm/compiler/assembler/assembler_riscv.cc
index cdfa87a..862bf92 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv.cc
@@ -3019,6 +3019,7 @@
// in progress
// If so, call the WriteBarrier stub, which will either add object to the
// store buffer (case 1) or add value to the marking stack (case 2).
+ // See RestorePinnedRegisters for why this can be `ble`.
// Compare UntaggedObject::StorePointer.
Label done;
if (can_value_be_smi == kValueCanBeSmi) {
@@ -3028,8 +3029,7 @@
lbu(TMP2, FieldAddress(value, target::Object::tags_offset()));
srli(TMP, TMP, target::UntaggedObject::kBarrierOverlapShift);
and_(TMP, TMP, TMP2);
- and_(TMP, TMP, WRITE_BARRIER_MASK);
- beqz(TMP, &done, kNearJump);
+ ble(TMP, WRITE_BARRIER_STATE, &done, kNearJump);
Register objectForCall = object;
if (value != kWriteBarrierValueReg) {
@@ -3091,6 +3091,7 @@
// in progress
// If so, call the WriteBarrier stub, which will either add object to the
// store buffer (case 1) or add value to the marking stack (case 2).
+ // See RestorePinnedRegisters for why this can be `ble`.
// Compare UntaggedObject::StorePointer.
Label done;
if (can_value_be_smi == kValueCanBeSmi) {
@@ -3100,8 +3101,7 @@
lbu(TMP2, FieldAddress(value, target::Object::tags_offset()));
srli(TMP, TMP, target::UntaggedObject::kBarrierOverlapShift);
and_(TMP, TMP, TMP2);
- and_(TMP, TMP, WRITE_BARRIER_MASK);
- beqz(TMP, &done, kNearJump);
+ ble(TMP, WRITE_BARRIER_STATE, &done, kNearJump);
if (spill_lr) {
PushRegister(RA);
}
@@ -3672,12 +3672,46 @@
subi(PP, PP, kHeapObjectTag); // Pool in PP is untagged!
}
-// Restores the values of the registers that are blocked to cache some values
-// e.g. BARRIER_MASK and NULL_REG.
void Assembler::RestorePinnedRegisters() {
- lx(WRITE_BARRIER_MASK,
+ lx(WRITE_BARRIER_STATE,
Address(THR, target::Thread::write_barrier_mask_offset()));
lx(NULL_REG, Address(THR, target::Thread::object_null_offset()));
+
+ // Our write barrier usually uses mask-and-test,
+ // 01b6f6b3 and tmp, tmp, mask
+ // c689 beqz tmp, +10
+ // but on RISC-V compare-and-branch is shorter,
+ // 00ddd663 ble tmp, wbs, +12
+ //
+ // TMP bit 4+ = 0
+ // TMP bit 3 = object is old-and-not-remembered AND value is new (genr bit)
+ // TMP bit 2 = object is old AND value is old-and-not-marked (incr bit)
+ // TMP bit 1 = garbage
+ // TMP bit 0 = garbage
+ //
+ // Thread::wbm | WRITE_BARRIER_STATE | TMP/combined headers | result
+ // generational only
+ // 0b1000 0b0111 0b11xx impossible
+ // 0b10xx call stub
+ // 0b01xx skip
+ // 0b00xx skip
+ // generational and incremental
+ // 0b1100 0b0011 0b11xx impossible
+ // 0b10xx call stub
+ // 0b01xx call stub
+ // 0b00xx skip
+ xori(WRITE_BARRIER_STATE, WRITE_BARRIER_STATE,
+ (target::UntaggedObject::kGenerationalBarrierMask << 1) - 1);
+
+ // Generational bit must be higher than incremental bit, with no other bits
+ // between.
+ ASSERT(target::UntaggedObject::kGenerationalBarrierMask ==
+ (target::UntaggedObject::kIncrementalBarrierMask << 1));
+ // Other header bits must be lower.
+ ASSERT(target::UntaggedObject::kIncrementalBarrierMask >
+ target::UntaggedObject::kCanonicalBit);
+ ASSERT(target::UntaggedObject::kIncrementalBarrierMask >
+ target::UntaggedObject::kCardRememberedBit);
}
void Assembler::SetupGlobalPoolAndDispatchTable() {
@@ -3816,7 +3850,7 @@
// Or would need to save above.
COMPILE_ASSERT(IsCalleeSavedRegister(THR));
COMPILE_ASSERT(IsCalleeSavedRegister(NULL_REG));
- COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_MASK));
+ COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_STATE));
COMPILE_ASSERT(IsCalleeSavedRegister(DISPATCH_TABLE_REG));
}
@@ -3859,7 +3893,7 @@
// Already saved.
COMPILE_ASSERT(IsCalleeSavedRegister(THR));
COMPILE_ASSERT(IsCalleeSavedRegister(NULL_REG));
- COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_MASK));
+ COMPILE_ASSERT(IsCalleeSavedRegister(WRITE_BARRIER_STATE));
COMPILE_ASSERT(IsCalleeSavedRegister(DISPATCH_TABLE_REG));
// Need to save.
COMPILE_ASSERT(!IsCalleeSavedRegister(PP));
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.h b/runtime/vm/compiler/assembler/assembler_riscv.h
index f62f545..cefa220 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.h
+++ b/runtime/vm/compiler/assembler/assembler_riscv.h
@@ -1257,7 +1257,7 @@
void RestorePoolPointer();
// Restores the values of the registers that are blocked to cache some values
- // e.g. BARRIER_MASK and NULL_REG.
+ // e.g. WRITE_BARRIER_STATE and NULL_REG.
void RestorePinnedRegisters();
void SetupGlobalPoolAndDispatchTable();
diff --git a/runtime/vm/compiler/assembler/assembler_riscv_test.cc b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
index ff2bfb3..8616410 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
@@ -32,7 +32,7 @@
__ PushNativeCalleeSavedRegisters();
__ mv(THR, A2);
- __ lx(WRITE_BARRIER_MASK, Address(THR, Thread::write_barrier_mask_offset()));
+ __ RestorePinnedRegisters(); // Setup WRITE_BARRIER_STATE.
__ StoreIntoObject(A1, FieldAddress(A1, GrowableObjectArray::data_offset()),
A0);
diff --git a/runtime/vm/compiler/runtime_api.cc b/runtime/vm/compiler/runtime_api.cc
index abf5464..2490c68 100644
--- a/runtime/vm/compiler/runtime_api.cc
+++ b/runtime/vm/compiler/runtime_api.cc
@@ -355,6 +355,8 @@
const word UntaggedObject::kCardRememberedBit =
dart::UntaggedObject::kCardRememberedBit;
+const word UntaggedObject::kCanonicalBit = dart::UntaggedObject::kCanonicalBit;
+
const word UntaggedObject::kOldAndNotRememberedBit =
dart::UntaggedObject::kOldAndNotRememberedBit;
@@ -397,6 +399,9 @@
const word UntaggedObject::kGenerationalBarrierMask =
dart::UntaggedObject::kGenerationalBarrierMask;
+const word UntaggedObject::kIncrementalBarrierMask =
+ dart::UntaggedObject::kIncrementalBarrierMask;
+
bool IsTypedDataClassId(intptr_t cid) {
return dart::IsTypedDataClassId(cid);
}
diff --git a/runtime/vm/compiler/runtime_api.h b/runtime/vm/compiler/runtime_api.h
index 723e277..5f8ec14 100644
--- a/runtime/vm/compiler/runtime_api.h
+++ b/runtime/vm/compiler/runtime_api.h
@@ -410,6 +410,7 @@
class UntaggedObject : public AllStatic {
public:
static const word kCardRememberedBit;
+ static const word kCanonicalBit;
static const word kOldAndNotRememberedBit;
static const word kOldAndNotMarkedBit;
static const word kSizeTagPos;
@@ -422,6 +423,7 @@
static const word kTagBitsSizeTagPos;
static const word kBarrierOverlapShift;
static const word kGenerationalBarrierMask;
+ static const word kIncrementalBarrierMask;
static bool IsTypedDataClassId(intptr_t cid);
};
diff --git a/runtime/vm/constants_riscv.cc b/runtime/vm/constants_riscv.cc
index 7c9b6ed..481d83f 100644
--- a/runtime/vm/constants_riscv.cc
+++ b/runtime/vm/constants_riscv.cc
@@ -18,9 +18,9 @@
#endif
const char* const cpu_reg_names[kNumberOfCpuRegisters] = {
- "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "thr", "a0",
- "a1", "a2", "tmp", "tmp2", "pp", "a6", "a7", "s2", "s3", "s4", "s5",
- "s6", "s7", "s8", "s9", "null", "mask", "t3", "t4", "t5", "t6",
+ "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "thr", "a0",
+ "a1", "a2", "tmp", "tmp2", "pp", "a6", "a7", "s2", "s3", "s4", "s5",
+ "s6", "s7", "s8", "s9", "null", "wbs", "t3", "t4", "t5", "t6",
};
const char* const cpu_reg_abi_names[kNumberOfCpuRegisters] = {
diff --git a/runtime/vm/constants_riscv.h b/runtime/vm/constants_riscv.h
index 7c2a2b9..e073037 100644
--- a/runtime/vm/constants_riscv.h
+++ b/runtime/vm/constants_riscv.h
@@ -70,7 +70,7 @@
S8 = 24, // CALLEE_SAVED_TEMP / FAR_TMP
S9 = 25, // DISPATCH_TABLE_REG
S10 = 26, // NULL
- S11 = 27, // WRITE_BARRIER_MASK
+ S11 = 27, // WRITE_BARRIER_STATE
T3 = 28,
T4 = 29,
T5 = 30,
@@ -162,7 +162,7 @@
constexpr Register THR = S1; // Caches current thread in generated code.
constexpr Register CALLEE_SAVED_TEMP = S8;
constexpr Register CALLEE_SAVED_TEMP2 = S7;
-constexpr Register WRITE_BARRIER_MASK = S11;
+constexpr Register WRITE_BARRIER_STATE = S11;
constexpr Register NULL_REG = S10; // Caches NullObject() value.
// ABI for catch-clause entry point.
@@ -450,13 +450,13 @@
// We rely on that any calls into C++ also preserve X18.
constexpr intptr_t kReservedCpuRegisters =
R(ZR) | R(TP) | R(GP) | R(SP) | R(FP) | R(TMP) | R(TMP2) | R(PP) | R(THR) |
- R(RA) | R(WRITE_BARRIER_MASK) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
+ R(RA) | R(WRITE_BARRIER_STATE) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
R(FAR_TMP) | R(18);
constexpr intptr_t kNumberOfReservedCpuRegisters = 15;
#else
constexpr intptr_t kReservedCpuRegisters =
R(ZR) | R(TP) | R(GP) | R(SP) | R(FP) | R(TMP) | R(TMP2) | R(PP) | R(THR) |
- R(RA) | R(WRITE_BARRIER_MASK) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
+ R(RA) | R(WRITE_BARRIER_STATE) | R(NULL_REG) | R(DISPATCH_TABLE_REG) |
R(FAR_TMP);
constexpr intptr_t kNumberOfReservedCpuRegisters = 14;
#endif
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index abc979e..2e4ad32 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -158,11 +158,11 @@
// bit fields for storing tags.
enum TagBits {
kCardRememberedBit = 0,
- kOldAndNotMarkedBit = 1, // Incremental barrier target.
- kNewBit = 2, // Generational barrier target.
- kOldBit = 3, // Incremental barrier source.
- kOldAndNotRememberedBit = 4, // Generational barrier source.
- kCanonicalBit = 5,
+ kCanonicalBit = 1,
+ kOldAndNotMarkedBit = 2, // Incremental barrier target.
+ kNewBit = 3, // Generational barrier target.
+ kOldBit = 4, // Incremental barrier source.
+ kOldAndNotRememberedBit = 5, // Generational barrier source.
kReservedTagPos = 6,
kReservedTagSize = 2,
diff --git a/runtime/vm/simulator_riscv.cc b/runtime/vm/simulator_riscv.cc
index 3b2034f..937c314 100644
--- a/runtime/vm/simulator_riscv.cc
+++ b/runtime/vm/simulator_riscv.cc
@@ -463,7 +463,9 @@
pp -= kHeapObjectTag; // In the PP register, the pool pointer is untagged.
set_xreg(CODE_REG, code);
set_xreg(PP, pp);
- set_xreg(WRITE_BARRIER_MASK, thread->write_barrier_mask());
+ set_xreg(WRITE_BARRIER_STATE,
+ thread->write_barrier_mask() ^
+ ((UntaggedObject::kGenerationalBarrierMask << 1) - 1));
set_xreg(NULL_REG, static_cast<uintx_t>(Object::null()));
if (FLAG_precompiled_mode) {
set_xreg(DISPATCH_TABLE_REG,