[vm] Weaken CAS to RMW when accessing the remembered and mark bits.
Tighten some sequences in the write barrier stub.
Change-Id: Ib3657b9b582082137d17e86135200444172f428a
Reviewed-on: https://dart-review.googlesource.com/60820
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Siva Annamalai <asiva@google.com>
diff --git a/runtime/platform/atomic.h b/runtime/platform/atomic.h
index 4a468e5..37836e0 100644
--- a/runtime/platform/atomic.h
+++ b/runtime/platform/atomic.h
@@ -30,6 +30,10 @@
// Atomically decrement the value at p by 'value'.
static void DecrementBy(intptr_t* p, intptr_t value);
+ // Atomically perform { tmp = *ptr; *ptr = (tmp OP value); return tmp; }.
+ static uint32_t FetchOrRelaxedUint32(uint32_t* ptr, uint32_t value);
+ static uint32_t FetchAndRelaxedUint32(uint32_t* ptr, uint32_t value);
+
// Atomically compare *ptr to old_value, and if equal, store new_value.
// Returns the original value at ptr.
static uword CompareAndSwapWord(uword* ptr, uword old_value, uword new_value);
diff --git a/runtime/platform/atomic_android.h b/runtime/platform/atomic_android.h
index 42dda56..f95ba02 100644
--- a/runtime/platform/atomic_android.h
+++ b/runtime/platform/atomic_android.h
@@ -46,6 +46,16 @@
__sync_fetch_and_sub(p, value);
}
+inline uint32_t AtomicOperations::FetchOrRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_or(ptr, value, __ATOMIC_RELAXED);
+}
+
+inline uint32_t AtomicOperations::FetchAndRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_and(ptr, value, __ATOMIC_RELAXED);
+}
+
inline uword AtomicOperations::CompareAndSwapWord(uword* ptr,
uword old_value,
uword new_value) {
diff --git a/runtime/platform/atomic_fuchsia.h b/runtime/platform/atomic_fuchsia.h
index 5434fb7..2883e1e 100644
--- a/runtime/platform/atomic_fuchsia.h
+++ b/runtime/platform/atomic_fuchsia.h
@@ -43,6 +43,16 @@
__sync_fetch_and_sub(p, value);
}
+inline uint32_t AtomicOperations::FetchOrRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_or(ptr, value, __ATOMIC_RELAXED);
+}
+
+inline uint32_t AtomicOperations::FetchAndRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_and(ptr, value, __ATOMIC_RELAXED);
+}
+
inline uword AtomicOperations::CompareAndSwapWord(uword* ptr,
uword old_value,
uword new_value) {
diff --git a/runtime/platform/atomic_linux.h b/runtime/platform/atomic_linux.h
index 3db8d73..fd1773f 100644
--- a/runtime/platform/atomic_linux.h
+++ b/runtime/platform/atomic_linux.h
@@ -46,6 +46,16 @@
__sync_fetch_and_sub(p, value);
}
+inline uint32_t AtomicOperations::FetchOrRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_or(ptr, value, __ATOMIC_RELAXED);
+}
+
+inline uint32_t AtomicOperations::FetchAndRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_and(ptr, value, __ATOMIC_RELAXED);
+}
+
inline uword AtomicOperations::CompareAndSwapWord(uword* ptr,
uword old_value,
uword new_value) {
diff --git a/runtime/platform/atomic_macos.h b/runtime/platform/atomic_macos.h
index b08ba4f..b0bd31f 100644
--- a/runtime/platform/atomic_macos.h
+++ b/runtime/platform/atomic_macos.h
@@ -46,6 +46,16 @@
__sync_fetch_and_sub(p, value);
}
+inline uint32_t AtomicOperations::FetchOrRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_or(ptr, value, __ATOMIC_RELAXED);
+}
+
+inline uint32_t AtomicOperations::FetchAndRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return __atomic_fetch_and(ptr, value, __ATOMIC_RELAXED);
+}
+
inline uword AtomicOperations::CompareAndSwapWord(uword* ptr,
uword old_value,
uword new_value) {
diff --git a/runtime/platform/atomic_win.h b/runtime/platform/atomic_win.h
index f7fc322..5e6db1b 100644
--- a/runtime/platform/atomic_win.h
+++ b/runtime/platform/atomic_win.h
@@ -102,6 +102,18 @@
#endif
}
+inline uint32_t AtomicOperations::FetchOrRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return static_cast<uint32_t>(InterlockedOrNoFence(
+ reinterpret_cast<LONG*>(ptr), static_cast<LONG>(value)));
+}
+
+inline uint32_t AtomicOperations::FetchAndRelaxedUint32(uint32_t* ptr,
+ uint32_t value) {
+ return static_cast<uint32_t>(InterlockedAndNoFence(
+ reinterpret_cast<LONG*>(ptr), static_cast<LONG>(value)));
+}
+
inline uword AtomicOperations::CompareAndSwapWord(uword* ptr,
uword old_value,
uword new_value) {
diff --git a/runtime/vm/atomic_test.cc b/runtime/vm/atomic_test.cc
index c9afc97..5f1d565 100644
--- a/runtime/vm/atomic_test.cc
+++ b/runtime/vm/atomic_test.cc
@@ -50,6 +50,20 @@
EXPECT_EQ(static_cast<intptr_t>(1), v);
}
+VM_UNIT_TEST_CASE(FetchOrRelaxed) {
+ uint32_t v = 42;
+ uint32_t previous = AtomicOperations::FetchOrRelaxedUint32(&v, 3);
+ EXPECT_EQ(static_cast<uint32_t>(42), previous);
+ EXPECT_EQ(static_cast<uint32_t>(43), v);
+}
+
+VM_UNIT_TEST_CASE(FetchAndRelaxed) {
+ uint32_t v = 42;
+ uint32_t previous = AtomicOperations::FetchAndRelaxedUint32(&v, 3);
+ EXPECT_EQ(static_cast<uint32_t>(42), previous);
+ EXPECT_EQ(static_cast<uint32_t>(2), v);
+}
+
VM_UNIT_TEST_CASE(LoadRelaxed) {
uword v = 42;
EXPECT_EQ(static_cast<uword>(42), AtomicOperations::LoadRelaxed(&v));
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index d8b7048..bf7d68f 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -623,28 +623,20 @@
template <class TagBitField>
void UpdateTagBit(bool value) {
- uint32_t tags = ptr()->tags_;
- uint32_t old_tags;
- do {
- old_tags = tags;
- uint32_t new_tags = TagBitField::update(value, old_tags);
- tags = AtomicOperations::CompareAndSwapUint32(&ptr()->tags_, old_tags,
- new_tags);
- } while (tags != old_tags);
+ if (value) {
+ AtomicOperations::FetchOrRelaxedUint32(&ptr()->tags_,
+ TagBitField::encode(true));
+ } else {
+ AtomicOperations::FetchAndRelaxedUint32(&ptr()->tags_,
+ ~TagBitField::encode(true));
+ }
}
template <class TagBitField>
bool TryAcquireTagBit() {
- uint32_t tags = ptr()->tags_;
- uint32_t old_tags;
- do {
- old_tags = tags;
- if (TagBitField::decode(tags)) return false;
- uint32_t new_tags = TagBitField::update(true, old_tags);
- tags = AtomicOperations::CompareAndSwapUint32(&ptr()->tags_, old_tags,
- new_tags);
- } while (tags != old_tags);
- return true;
+ uint32_t old_tags = AtomicOperations::FetchOrRelaxedUint32(
+ &ptr()->tags_, TagBitField::encode(true));
+ return !TagBitField::decode(old_tags);
}
// All writes to heap objects should ultimately pass through one of the
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 7b43afa..493329f 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -1030,21 +1030,21 @@
// Input parameters:
// R0: address (i.e. object) being stored into.
void StubCode::GenerateUpdateStoreBufferStub(Assembler* assembler) {
- // Save values being destroyed.
- __ PushList((1 << R1) | (1 << R2) | (1 << R3));
-
Label add_to_buffer;
// Check whether this object has already been remembered. Skip adding to the
// store buffer if the object is in the store buffer already.
// Spilled: R1, R2, R3
// R0: Address being stored
- __ ldr(R2, FieldAddress(R0, Object::tags_offset()));
- __ tst(R2, Operand(1 << RawObject::kRememberedBit));
+ __ ldr(TMP, FieldAddress(R0, Object::tags_offset()));
+ __ tst(TMP, Operand(1 << RawObject::kRememberedBit));
__ b(&add_to_buffer, EQ);
- __ PopList((1 << R1) | (1 << R2) | (1 << R3));
__ Ret();
__ Bind(&add_to_buffer);
+
+ // Save values being destroyed.
+ __ PushList((1 << R1) | (1 << R2) | (1 << R3));
+
// R2: Header word.
if (TargetCPUFeatures::arm_version() == ARMv5TE) {
// TODO(21263): Implement 'swp' and use it below.
@@ -1077,17 +1077,17 @@
// Increment top_ and check for overflow.
// R2: top_.
// R1: StoreBufferBlock.
- Label L;
+ Label overflow;
__ add(R2, R2, Operand(1));
__ str(R2, Address(R1, StoreBufferBlock::top_offset()));
__ CompareImmediate(R2, StoreBufferBlock::kSize);
// Restore values.
__ PopList((1 << R1) | (1 << R2) | (1 << R3));
- __ b(&L, EQ);
+ __ b(&overflow, EQ);
__ Ret();
// Handle overflow: Call the runtime leaf function.
- __ Bind(&L);
+ __ Bind(&overflow);
// Setup frame, push callee-saved registers.
__ Push(CODE_REG);
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index 97a107b..c38de9a 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -1211,8 +1211,7 @@
// Check whether this object has already been remembered. Skip adding to the
// store buffer if the object is in the store buffer already.
__ LoadFieldFromOffset(TMP, R0, Object::tags_offset(), kWord);
- __ tsti(TMP, Immediate(1 << RawObject::kRememberedBit));
- __ b(&add_to_buffer, EQ);
+ __ tbz(&add_to_buffer, TMP, RawObject::kRememberedBit);
__ ret();
__ Bind(&add_to_buffer);
@@ -1232,8 +1231,7 @@
__ ldxr(R2, R3, kWord);
__ orri(R2, R2, Immediate(1 << RawObject::kRememberedBit));
__ stxr(R1, R2, R3, kWord);
- __ cmp(R1, Operand(1));
- __ b(&retry, EQ);
+ __ cbnz(&retry, R1);
// Load the StoreBuffer block out of the thread. Then load top_ out of the
// StoreBufferBlock and add the address to the pointers_.
@@ -1245,7 +1243,7 @@
// Increment top_ and check for overflow.
// R2: top_.
// R1: StoreBufferBlock.
- Label L;
+ Label overflow;
__ add(R2, R2, Operand(1));
__ StoreToOffset(R2, R1, StoreBufferBlock::top_offset(), kUnsignedWord);
__ CompareImmediate(R2, StoreBufferBlock::kSize);
@@ -1253,11 +1251,11 @@
__ Pop(R3);
__ Pop(R2);
__ Pop(R1);
- __ b(&L, EQ);
+ __ b(&overflow, EQ);
__ ret();
// Handle overflow: Call the runtime leaf function.
- __ Bind(&L);
+ __ Bind(&overflow);
// Setup frame, push callee-saved registers.
__ Push(CODE_REG);
diff --git a/runtime/vm/stub_code_ia32.cc b/runtime/vm/stub_code_ia32.cc
index bdc494b..60a107b 100644
--- a/runtime/vm/stub_code_ia32.cc
+++ b/runtime/vm/stub_code_ia32.cc
@@ -961,8 +961,6 @@
// store buffer if the object is in the store buffer already.
// Spilled: EAX, ECX
// EDX: Address being stored
- Label reload;
- __ Bind(&reload);
__ movl(EAX, FieldAddress(EDX, Object::tags_offset()));
__ testl(EAX, Immediate(1 << RawObject::kRememberedBit));
__ j(EQUAL, &add_to_buffer, Assembler::kNearJump);
@@ -974,11 +972,10 @@
// EDX: Address being stored
// EAX: Current tag value
__ Bind(&add_to_buffer);
- __ movl(ECX, EAX);
- __ orl(ECX, Immediate(1 << RawObject::kRememberedBit));
- // Compare the tag word with EAX, update to ECX if unchanged.
- __ LockCmpxchgl(FieldAddress(EDX, Object::tags_offset()), ECX);
- __ j(NOT_EQUAL, &reload);
+ // lock+orl is an atomic read-modify-write.
+ __ lock();
+ __ orl(FieldAddress(EDX, Object::tags_offset()),
+ Immediate(1 << RawObject::kRememberedBit));
// Load the StoreBuffer block out of the thread. Then load top_ out of the
// StoreBufferBlock and add the address to the pointers_.
@@ -992,7 +989,7 @@
// Spilled: EAX, ECX
// ECX: top_
// EAX: StoreBufferBlock
- Label L;
+ Label overflow;
__ incl(ECX);
__ movl(Address(EAX, StoreBufferBlock::top_offset()), ECX);
__ cmpl(ECX, Immediate(StoreBufferBlock::kSize));
@@ -1000,11 +997,11 @@
// Spilled: EAX, ECX
__ popl(ECX);
__ popl(EAX);
- __ j(EQUAL, &L, Assembler::kNearJump);
+ __ j(EQUAL, &overflow, Assembler::kNearJump);
__ ret();
// Handle overflow: Call the runtime leaf function.
- __ Bind(&L);
+ __ Bind(&overflow);
// Setup frame, push callee-saved registers.
__ EnterCallRuntimeFrame(1 * kWordSize);
diff --git a/runtime/vm/stub_code_x64.cc b/runtime/vm/stub_code_x64.cc
index 6b0b539..22da3fe 100644
--- a/runtime/vm/stub_code_x64.cc
+++ b/runtime/vm/stub_code_x64.cc
@@ -1194,22 +1194,13 @@
// Input parameters:
// RDX: Address being stored
void StubCode::GenerateUpdateStoreBufferStub(Assembler* assembler) {
- // Save registers being destroyed.
- __ pushq(RAX);
- __ pushq(RCX);
-
Label add_to_buffer;
// Check whether this object has already been remembered. Skip adding to the
// store buffer if the object is in the store buffer already.
- // Spilled: RAX, RCX
// RDX: Address being stored
- Label reload;
- __ Bind(&reload);
- __ movl(RAX, FieldAddress(RDX, Object::tags_offset()));
- __ testl(RAX, Immediate(1 << RawObject::kRememberedBit));
+ __ movl(TMP, FieldAddress(RDX, Object::tags_offset()));
+ __ testl(TMP, Immediate(1 << RawObject::kRememberedBit));
__ j(EQUAL, &add_to_buffer, Assembler::kNearJump);
- __ popq(RCX);
- __ popq(RAX);
__ ret();
// Update the tags that this object has been remembered.
@@ -1218,11 +1209,14 @@
// RDX: Address being stored
// RAX: Current tag value
__ Bind(&add_to_buffer);
- __ movl(RCX, RAX);
- __ orl(RCX, Immediate(1 << RawObject::kRememberedBit));
- // Compare the tag word with RAX, update to RCX if unchanged.
- __ LockCmpxchgl(FieldAddress(RDX, Object::tags_offset()), RCX);
- __ j(NOT_EQUAL, &reload);
+ // lock+orl is an atomic read-modify-write.
+ __ lock();
+ __ orl(FieldAddress(RDX, Object::tags_offset()),
+ Immediate(1 << RawObject::kRememberedBit));
+
+ // Save registers being destroyed.
+ __ pushq(RAX);
+ __ pushq(RCX);
// Load the StoreBuffer block out of the thread. Then load top_ out of the
// StoreBufferBlock and add the address to the pointers_.
@@ -1234,18 +1228,18 @@
// Increment top_ and check for overflow.
// RCX: top_
// RAX: StoreBufferBlock
- Label L;
+ Label overflow;
__ incq(RCX);
__ movl(Address(RAX, StoreBufferBlock::top_offset()), RCX);
__ cmpl(RCX, Immediate(StoreBufferBlock::kSize));
// Restore values.
__ popq(RCX);
__ popq(RAX);
- __ j(EQUAL, &L, Assembler::kNearJump);
+ __ j(EQUAL, &overflow, Assembler::kNearJump);
__ ret();
// Handle overflow: Call the runtime leaf function.
- __ Bind(&L);
+ __ Bind(&overflow);
// Setup frame, push callee-saved registers.
__ pushq(CODE_REG);
__ movq(CODE_REG, Address(THR, Thread::update_store_buffer_code_offset()));