[VM] Bare instructions - Part 6: Get rid of CODE_REG indirection in CallThroughCode/CallThroughFunction stubs
If the --use-bare-instructions flag is enabled we will:
* Not load CODE_REG inside CallThroughCode (polymorphic case)
* Not load CODE_REG inside CallThroughFunction (megamorphic case)
* Store the EntryPoint of the function in the MegamorphicCache tables
(instead of a Function pointer)
Issue https://github.com/dart-lang/sdk/issues/33274
Change-Id: I8aca689dc4e92ece897972da4ae202338527ac62
Reviewed-on: https://dart-review.googlesource.com/c/86928
Commit-Queue: Martin Kustermann <kustermann@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/clustered_snapshot.cc b/runtime/vm/clustered_snapshot.cc
index 6b68265..27536c5 100644
--- a/runtime/vm/clustered_snapshot.cc
+++ b/runtime/vm/clustered_snapshot.cc
@@ -2340,6 +2340,32 @@
cache->ptr()->filled_entry_count_ = d->Read<int32_t>();
}
}
+
+ void PostLoad(const Array& refs, Snapshot::Kind kind, Zone* zone) {
+#if defined(DART_PRECOMPILED_RUNTIME)
+ if (FLAG_use_bare_instructions) {
+ // By default, every megamorphic call site will load the target
+ // [Function] from the hash table and call indirectly via loading the
+ // entrypoint from the function.
+ //
+ // In --use-bare-instruction we reduce the extra indirection via the
+ // [Function] object by storing the entry point directly into the hashmap.
+ //
+ // Currently our AOT compiler will emit megamorphic calls in certain
+ // situations (namely in slow-path code of CheckedSmi* instructions).
+ //
+ // TODO(compiler-team): Change the CheckedSmi* slow path code to use
+ // normal switchable calls instead of megamorphic calls. (This is also a
+ // memory balance beause [MegamorphicCache]s are per-selector while
+ // [ICData] are per-callsite.)
+ auto& cache = MegamorphicCache::Handle(zone);
+ for (intptr_t i = start_index_; i < stop_index_; ++i) {
+ cache ^= refs.At(i);
+ cache.SwitchToBareInstructions();
+ }
+ }
+#endif // defined(DART_PRECOMPILED_RUNTIME)
+ }
};
#if !defined(DART_PRECOMPILED_RUNTIME)
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index e2f3264..e08de19 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -15259,8 +15259,8 @@
const Array& new_buckets =
Array::Handle(Array::New(kEntryLength * new_capacity));
- Function& target = Function::Handle(
- MegamorphicCacheTable::miss_handler(Isolate::Current()));
+ auto& target =
+ Object::Handle(MegamorphicCacheTable::miss_handler(Isolate::Current()));
for (intptr_t i = 0; i < new_capacity; ++i) {
SetEntry(new_buckets, i, smi_illegal_cid(), target);
}
@@ -15273,15 +15273,14 @@
for (intptr_t i = 0; i < old_capacity; ++i) {
class_id ^= GetClassId(old_buckets, i);
if (class_id.Value() != kIllegalCid) {
- target ^= GetTargetFunction(old_buckets, i);
+ target = GetTargetFunction(old_buckets, i);
Insert(class_id, target);
}
}
}
}
-void MegamorphicCache::Insert(const Smi& class_id,
- const Function& target) const {
+void MegamorphicCache::Insert(const Smi& class_id, const Object& target) const {
ASSERT(static_cast<double>(filled_entry_count() + 1) <=
(kLoadFactor * static_cast<double>(mask() + 1)));
const Array& backing_array = Array::Handle(buckets());
@@ -15305,6 +15304,23 @@
name.ToCString());
}
+void MegamorphicCache::SwitchToBareInstructions() {
+ NoSafepointScope no_safepoint_scope;
+
+ intptr_t capacity = mask() + 1;
+ for (intptr_t i = 0; i < capacity; ++i) {
+ const intptr_t target_index = i * kEntryLength + kTargetFunctionIndex;
+ RawObject** slot = &Array::DataOf(buckets())[target_index];
+ const intptr_t cid = (*slot)->GetClassIdMayBeSmi();
+ if (cid == kFunctionCid) {
+ RawCode* code = Function::CurrentCodeOf(Function::RawCast(*slot));
+ *slot = Smi::FromAlignedAddress(Code::EntryPoint(code));
+ } else {
+ ASSERT(cid == kSmiCid);
+ }
+ }
+}
+
RawSubtypeTestCache* SubtypeTestCache::New() {
ASSERT(Object::subtypetestcache_class() != Class::null());
SubtypeTestCache& result = SubtypeTestCache::Handle();
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index 5cda55f..ee91913 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -2273,7 +2273,11 @@
// Return the most recently compiled and installed code for this function.
// It is not the only Code object that points to this function.
- RawCode* CurrentCode() const { return raw_ptr()->code_; }
+ RawCode* CurrentCode() const { return CurrentCodeOf(raw()); }
+
+ static RawCode* CurrentCodeOf(const RawFunction* function) {
+ return function->ptr()->code_;
+ }
RawCode* unoptimized_code() const {
#if defined(DART_PRECOMPILED_RUNTIME)
@@ -5541,7 +5545,9 @@
void EnsureCapacity() const;
- void Insert(const Smi& class_id, const Function& target) const;
+ void Insert(const Smi& class_id, const Object& target) const;
+
+ void SwitchToBareInstructions();
static intptr_t InstanceSize() {
return RoundedAllocationSize(sizeof(RawMegamorphicCache));
@@ -5566,7 +5572,7 @@
static inline void SetEntry(const Array& array,
intptr_t index,
const Smi& class_id,
- const Function& target);
+ const Object& target);
static inline RawObject* GetClassId(const Array& array, intptr_t index);
static inline RawObject* GetTargetFunction(const Array& array,
@@ -9257,8 +9263,20 @@
void MegamorphicCache::SetEntry(const Array& array,
intptr_t index,
const Smi& class_id,
- const Function& target) {
+ const Object& target) {
+ ASSERT(target.IsFunction() || target.IsSmi());
array.SetAt((index * kEntryLength) + kClassIdIndex, class_id);
+#if defined(DART_PRECOMPILED_RUNTIME)
+ if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+ if (target.IsFunction()) {
+ const auto& function = Function::Cast(target);
+ const auto& entry_point = Smi::Handle(
+ Smi::FromAlignedAddress(Code::EntryPoint(function.CurrentCode())));
+ array.SetAt((index * kEntryLength) + kTargetFunctionIndex, entry_point);
+ return;
+ }
+ }
+#endif // defined(DART_PRECOMPILED_RUNTIME)
array.SetAt((index * kEntryLength) + kTargetFunctionIndex, target);
}
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 7c66ac2..71a8559 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -2680,10 +2680,18 @@
// proper target for the given name and arguments descriptor. If the
// illegal class id was found, the target is a cache miss handler that can
// be invoked as a normal Dart function.
- __ ldr(R0, FieldAddress(IP, base + kWordSize));
- __ ldr(R4, FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
- __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
- __ Branch(FieldAddress(R0, Function::entry_point_offset()));
+ const auto target_address = FieldAddress(IP, base + kWordSize);
+ if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
+ __ Branch(target_address);
+ } else {
+ __ ldr(R0, target_address);
+ __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
+ __ Branch(FieldAddress(R0, Function::entry_point_offset()));
+ }
// Probe failed, check if it is a miss.
__ Bind(&probe_failed);
@@ -2704,7 +2712,8 @@
// R4: arguments descriptor
void StubCode::GenerateICCallThroughFunctionStub(Assembler* assembler) {
Label loop, found, miss;
- __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R9, ICData::arguments_descriptor_offset()));
__ ldr(R8, FieldAddress(R9, ICData::ic_data_offset()));
__ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
// R8: first IC entry
@@ -2737,8 +2746,8 @@
void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
Label loop, found, miss;
- __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
__ ldr(R8, FieldAddress(R9, ICData::ic_data_offset()));
+ __ ldr(R4, FieldAddress(R9, ICData::arguments_descriptor_offset()));
__ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
// R8: first IC entry
__ LoadTaggedClassIdMayBeSmi(R1, R0);
@@ -2759,7 +2768,9 @@
__ Bind(&found);
const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
- __ ldr(CODE_REG, Address(R8, code_offset));
+ if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+ __ ldr(CODE_REG, Address(R8, code_offset));
+ }
__ Branch(Address(R8, entry_offset));
__ Bind(&miss);
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index 7a2b85a..91996d9 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -2945,10 +2945,18 @@
// proper target for the given name and arguments descriptor. If the
// illegal class id was found, the target is a cache miss handler that can
// be invoked as a normal Dart function.
- __ ldr(R0, FieldAddress(TMP, base + kWordSize));
- __ ldr(R4, FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
- __ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
- __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+ const auto target_address = FieldAddress(TMP, base + kWordSize);
+ if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+ __ ldr(R1, target_address);
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
+ } else {
+ __ ldr(R0, target_address);
+ __ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R5, MegamorphicCache::arguments_descriptor_offset()));
+ __ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
+ }
__ br(R1);
// Probe failed, check if it is a miss.
@@ -2975,7 +2983,8 @@
// R4: arguments descriptor
void StubCode::GenerateICCallThroughFunctionStub(Assembler* assembler) {
Label loop, found, miss;
- __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
+ __ ldr(ARGS_DESC_REG,
+ FieldAddress(R5, ICData::arguments_descriptor_offset()));
__ ldr(R8, FieldAddress(R5, ICData::ic_data_offset()));
__ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
// R8: first IC entry
@@ -3010,8 +3019,8 @@
void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
Label loop, found, miss;
- __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
__ ldr(R8, FieldAddress(R5, ICData::ic_data_offset()));
+ __ ldr(R4, FieldAddress(R5, ICData::arguments_descriptor_offset()));
__ AddImmediate(R8, Array::data_offset() - kHeapObjectTag);
// R8: first IC entry
__ LoadTaggedClassIdMayBeSmi(R1, R0);
@@ -3033,7 +3042,9 @@
const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
__ ldr(R1, Address(R8, entry_offset));
- __ ldr(CODE_REG, Address(R8, code_offset));
+ if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+ __ ldr(CODE_REG, Address(R8, code_offset));
+ }
__ br(R1);
__ Bind(&miss);
diff --git a/runtime/vm/stub_code_x64.cc b/runtime/vm/stub_code_x64.cc
index 42b6b21..7184e25 100644
--- a/runtime/vm/stub_code_x64.cc
+++ b/runtime/vm/stub_code_x64.cc
@@ -2952,12 +2952,19 @@
// proper target for the given name and arguments descriptor. If the
// illegal class id was found, the target is a cache miss handler that can
// be invoked as a normal Dart function.
- __ movq(RAX, FieldAddress(RDI, RCX, TIMES_8, base + kWordSize));
- __ movq(R10,
- FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
- __ movq(RCX, FieldAddress(RAX, Function::entry_point_offset()));
- __ movq(CODE_REG, FieldAddress(RAX, Function::code_offset()));
- __ jmp(RCX);
+ const auto target_address = FieldAddress(RDI, RCX, TIMES_8, base + kWordSize);
+ if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+ __ movq(R10,
+ FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
+ __ jmp(target_address);
+ } else {
+ __ movq(RAX, target_address);
+ __ movq(R10,
+ FieldAddress(RBX, MegamorphicCache::arguments_descriptor_offset()));
+ __ movq(RCX, FieldAddress(RAX, Function::entry_point_offset()));
+ __ movq(CODE_REG, FieldAddress(RAX, Function::code_offset()));
+ __ jmp(RCX);
+ }
// Probe failed, check if it is a miss.
__ Bind(&probe_failed);
@@ -3044,9 +3051,10 @@
__ Bind(&found);
const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
- __ movq(RCX, Address(R13, entry_offset));
- __ movq(CODE_REG, Address(R13, code_offset));
- __ jmp(RCX);
+ if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
+ __ movq(CODE_REG, Address(R13, code_offset));
+ }
+ __ jmp(Address(R13, entry_offset));
__ Bind(&miss);
__ LoadIsolate(RAX);