[vm/inliner] Inline typed data on 32-bit archs.
Rationale:
All recent improvements, now for 32-bit too.
Performance:
Many large improvements on micro benchmarks.
Meteor down as expected.
https://github.com/dart-lang/sdk/issues/33205
Change-Id: Ie9ebcfdfe9c5e265595c95d5e943ae35c5700a97
Reviewed-on: https://dart-review.googlesource.com/63685
Commit-Queue: Aart Bik <ajcbik@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index 70bee17..e427841 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -2112,8 +2112,7 @@
return kUnsignedWord;
case kTypedDataInt64ArrayCid:
case kTypedDataUint64ArrayCid:
- UNREACHABLE();
- return kByte;
+ return kDWord;
case kTypedDataFloat32ArrayCid:
return kSWord;
case kTypedDataFloat64ArrayCid:
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.cc b/runtime/vm/compiler/assembler/assembler_ia32.cc
index 692d9c8..b566572 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.cc
+++ b/runtime/vm/compiler/assembler/assembler_ia32.cc
@@ -2512,12 +2512,13 @@
intptr_t cid,
intptr_t index_scale,
Register array,
- intptr_t index) {
+ intptr_t index,
+ intptr_t extra_disp) {
if (is_external) {
- return Address(array, index * index_scale);
+ return Address(array, index * index_scale + extra_disp);
} else {
const int64_t disp = static_cast<int64_t>(index) * index_scale +
- Instance::DataOffsetFor(cid);
+ Instance::DataOffsetFor(cid) + extra_disp;
ASSERT(Utils::IsInt(32, disp));
return FieldAddress(array, static_cast<int32_t>(disp));
}
@@ -2549,12 +2550,13 @@
intptr_t cid,
intptr_t index_scale,
Register array,
- Register index) {
+ Register index,
+ intptr_t extra_disp) {
if (is_external) {
- return Address(array, index, ToScaleFactor(index_scale), 0);
+ return Address(array, index, ToScaleFactor(index_scale), extra_disp);
} else {
return FieldAddress(array, index, ToScaleFactor(index_scale),
- Instance::DataOffsetFor(cid));
+ Instance::DataOffsetFor(cid) + extra_disp);
}
}
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.h b/runtime/vm/compiler/assembler/assembler_ia32.h
index 6694a85..19a00b4 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.h
+++ b/runtime/vm/compiler/assembler/assembler_ia32.h
@@ -672,13 +672,15 @@
intptr_t cid,
intptr_t index_scale,
Register array,
- intptr_t index);
+ intptr_t index,
+ intptr_t extra_disp = 0);
static Address ElementAddressForRegIndex(bool is_external,
intptr_t cid,
intptr_t index_scale,
Register array,
- Register index);
+ Register index,
+ intptr_t extra_disp = 0);
static Address VMTagAddress() {
return Address(THR, Thread::vm_tag_offset());
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 694f8f2..36a33a2 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -1131,6 +1131,8 @@
case kTypedDataInt32ArrayCid:
case kTypedDataUint32ArrayCid:
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
return CompileType::Int();
default:
@@ -1159,6 +1161,9 @@
return kUnboxedInt32;
case kTypedDataUint32ArrayCid:
return kUnboxedUint32;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ return kUnboxedInt64;
case kTypedDataFloat32ArrayCid:
case kTypedDataFloat64ArrayCid:
return kUnboxedDouble;
@@ -1213,9 +1218,12 @@
LocationSummary* LoadIndexedInstr::MakeLocationSummary(Zone* zone,
bool opt) const {
+ const bool directly_addressable =
+ aligned() && representation() != kUnboxedInt64;
const intptr_t kNumInputs = 2;
intptr_t kNumTemps = 0;
- if (!aligned()) {
+
+ if (!directly_addressable) {
kNumTemps += 1;
if (representation() == kUnboxedDouble) {
kNumTemps += 1;
@@ -1251,11 +1259,16 @@
} else if (representation() == kUnboxedInt32) {
ASSERT(class_id() == kTypedDataInt32ArrayCid);
locs->set_out(0, Location::RequiresRegister());
+ } else if (representation() == kUnboxedInt64) {
+ ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+ class_id() == kTypedDataUint64ArrayCid);
+ locs->set_out(0, Location::Pair(Location::RequiresRegister(),
+ Location::RequiresRegister()));
} else {
ASSERT(representation() == kTagged);
locs->set_out(0, Location::RequiresRegister());
}
- if (!aligned()) {
+ if (!directly_addressable) {
locs->set_temp(0, Location::RequiresRegister());
if (representation() == kUnboxedDouble) {
locs->set_temp(1, Location::RequiresRegister());
@@ -1265,13 +1278,16 @@
}
void LoadIndexedInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+ const bool directly_addressable =
+ aligned() && representation() != kUnboxedInt64;
// The array register points to the backing store for external arrays.
const Register array = locs()->in(0).reg();
const Location index = locs()->in(1);
- const Register address = aligned() ? kNoRegister : locs()->temp(0).reg();
+ const Register address =
+ directly_addressable ? kNoRegister : locs()->temp(0).reg();
Address element_address(kNoRegister);
- if (aligned()) {
+ if (directly_addressable) {
element_address = index.IsRegister()
? __ ElementAddressForRegIndex(
true, // Load.
@@ -1368,6 +1384,25 @@
return;
}
+ if (representation() == kUnboxedInt64) {
+ ASSERT(!directly_addressable); // need to add to register
+ ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+ class_id() == kTypedDataUint64ArrayCid);
+ ASSERT(locs()->out(0).IsPairLocation());
+ PairLocation* result_pair = locs()->out(0).AsPairLocation();
+ Register result_lo = result_pair->At(0).reg();
+ Register result_hi = result_pair->At(1).reg();
+ if (aligned()) {
+ __ ldr(result_lo, Address(address));
+ __ ldr(result_hi, Address(address, kWordSize));
+ } else {
+ __ LoadWordUnaligned(result_lo, address, TMP);
+ __ AddImmediate(address, address, kWordSize);
+ __ LoadWordUnaligned(result_hi, address, TMP);
+ }
+ return;
+ }
+
ASSERT(representation() == kTagged);
const Register result = locs()->out(0).reg();
@@ -1435,6 +1470,9 @@
return kUnboxedInt32;
case kTypedDataUint32ArrayCid:
return kUnboxedUint32;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ return kUnboxedInt64;
case kTypedDataFloat32ArrayCid:
case kTypedDataFloat64ArrayCid:
return kUnboxedDouble;
@@ -1452,6 +1490,9 @@
LocationSummary* StoreIndexedInstr::MakeLocationSummary(Zone* zone,
bool opt) const {
+ const bool directly_addressable = aligned() &&
+ class_id() != kTypedDataInt64ArrayCid &&
+ class_id() != kTypedDataUint64ArrayCid;
const intptr_t kNumInputs = 3;
LocationSummary* locs;
@@ -1460,7 +1501,7 @@
if (CanBeImmediateIndex(index(), class_id(), IsExternal(),
false, // Store.
&needs_base)) {
- if (!aligned()) {
+ if (!directly_addressable) {
kNumTemps += 2;
} else if (needs_base) {
kNumTemps += 1;
@@ -1472,7 +1513,7 @@
// CanBeImmediateIndex must return false for unsafe smis.
locs->set_in(1, Location::Constant(index()->definition()->AsConstant()));
} else {
- if (!aligned()) {
+ if (!directly_addressable) {
kNumTemps += 2;
}
@@ -1504,6 +1545,11 @@
case kTypedDataUint32ArrayCid:
locs->set_in(2, Location::RequiresRegister());
break;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ locs->set_in(2, Location::Pair(Location::RequiresRegister(),
+ Location::RequiresRegister()));
+ break;
case kTypedDataFloat32ArrayCid:
// Need low register (<= Q7).
locs->set_in(2, Location::FpuRegisterLocation(Q7));
@@ -1522,6 +1568,9 @@
}
void StoreIndexedInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+ const bool directly_addressable = aligned() &&
+ class_id() != kTypedDataInt64ArrayCid &&
+ class_id() != kTypedDataUint64ArrayCid;
// The array register points to the backing store for external arrays.
const Register array = locs()->in(0).reg();
const Location index = locs()->in(1);
@@ -1531,7 +1580,7 @@
(locs()->temp_count() > 1) ? locs()->temp(1).reg() : kNoRegister;
Address element_address(kNoRegister);
- if (aligned()) {
+ if (directly_addressable) {
element_address = index.IsRegister()
? __ ElementAddressForRegIndex(
false, // Store.
@@ -1630,6 +1679,23 @@
}
break;
}
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid: {
+ ASSERT(!directly_addressable); // need to add to register
+ ASSERT(locs()->in(2).IsPairLocation());
+ PairLocation* value_pair = locs()->in(2).AsPairLocation();
+ Register value_lo = value_pair->At(0).reg();
+ Register value_hi = value_pair->At(1).reg();
+ if (aligned()) {
+ __ str(value_lo, Address(temp));
+ __ str(value_hi, Address(temp, kWordSize));
+ } else {
+ __ StoreWordUnaligned(value_lo, temp, temp2);
+ __ AddImmediate(temp, temp, kWordSize);
+ __ StoreWordUnaligned(value_hi, temp, temp2);
+ }
+ break;
+ }
case kTypedDataFloat32ArrayCid: {
const SRegister value_reg =
EvenSRegisterOf(EvenDRegisterOf(locs()->in(2).fpu_reg()));
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 57f3b5d..41891ab 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -1011,6 +1011,8 @@
case kTypedDataInt32ArrayCid:
case kTypedDataUint32ArrayCid:
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
return CompileType::Int();
default:
@@ -1039,6 +1041,9 @@
return kUnboxedInt32;
case kTypedDataUint32ArrayCid:
return kUnboxedUint32;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ return kUnboxedInt64;
case kTypedDataFloat32ArrayCid:
case kTypedDataFloat64ArrayCid:
return kUnboxedDouble;
@@ -1081,6 +1086,11 @@
} else if (representation() == kUnboxedInt32) {
ASSERT(class_id() == kTypedDataInt32ArrayCid);
locs->set_out(0, Location::RequiresRegister());
+ } else if (representation() == kUnboxedInt64) {
+ ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+ class_id() == kTypedDataUint64ArrayCid);
+ locs->set_out(0, Location::Pair(Location::RequiresRegister(),
+ Location::RequiresRegister()));
} else {
ASSERT(representation() == kTagged);
locs->set_out(0, Location::RequiresRegister());
@@ -1148,6 +1158,28 @@
return;
}
+ if (representation() == kUnboxedInt64) {
+ ASSERT(locs()->out(0).IsPairLocation());
+ PairLocation* result_pair = locs()->out(0).AsPairLocation();
+ Register result_lo = result_pair->At(0).reg();
+ Register result_hi = result_pair->At(1).reg();
+ if ((index_scale() == 1) && index.IsRegister()) {
+ __ SmiUntag(index.reg());
+ }
+ ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+ class_id() == kTypedDataUint64ArrayCid);
+ __ movl(result_lo, element_address);
+ element_address = index.IsRegister()
+ ? Assembler::ElementAddressForRegIndex(
+ IsExternal(), class_id(), index_scale(), array,
+ index.reg(), kWordSize)
+ : Assembler::ElementAddressForIntIndex(
+ IsExternal(), class_id(), index_scale(), array,
+ Smi::Cast(index.constant()).Value(), kWordSize);
+ __ movl(result_hi, element_address);
+ return;
+ }
+
ASSERT(representation() == kTagged);
Register result = locs()->out(0).reg();
@@ -1208,6 +1240,9 @@
return kUnboxedInt32;
case kTypedDataUint32ArrayCid:
return kUnboxedUint32;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ return kUnboxedInt64;
case kTypedDataFloat32ArrayCid:
case kTypedDataFloat64ArrayCid:
return kUnboxedDouble;
@@ -1264,6 +1299,11 @@
case kTypedDataUint32ArrayCid:
locs->set_in(2, Location::RequiresRegister());
break;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid:
+ locs->set_in(2, Location::Pair(Location::RequiresRegister(),
+ Location::RequiresRegister()));
+ break;
case kTypedDataFloat32ArrayCid:
case kTypedDataFloat64ArrayCid:
// TODO(srdjan): Support Float64 constants.
@@ -1364,6 +1404,24 @@
case kTypedDataUint32ArrayCid:
__ movl(element_address, locs()->in(2).reg());
break;
+ case kTypedDataInt64ArrayCid:
+ case kTypedDataUint64ArrayCid: {
+ ASSERT(locs()->in(2).IsPairLocation());
+ PairLocation* value_pair = locs()->in(2).AsPairLocation();
+ Register value_lo = value_pair->At(0).reg();
+ Register value_hi = value_pair->At(1).reg();
+ __ movl(element_address, value_lo);
+ element_address =
+ index.IsRegister()
+ ? Assembler::ElementAddressForRegIndex(IsExternal(), class_id(),
+ index_scale(), array,
+ index.reg(), kWordSize)
+ : Assembler::ElementAddressForIntIndex(
+ IsExternal(), class_id(), index_scale(), array,
+ Smi::Cast(index.constant()).Value(), kWordSize);
+ __ movl(element_address, value_hi);
+ break;
+ }
case kTypedDataFloat32ArrayCid:
__ movss(element_address, locs()->in(2).fpu_reg());
break;
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index 14c293c..f041b41 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -2258,8 +2258,7 @@
}
static bool ShouldInlineInt64ArrayOps() {
- // TODO(ajcbik): look into doing this even for 32-bit targets.
- return (kBitsPerWord == 64) && FlowGraphCompiler::SupportsUnboxedInt64();
+ return FlowGraphCompiler::SupportsUnboxedInt64();
}
static bool CanUnboxInt32() {
diff --git a/runtime/vm/compiler/call_specializer.cc b/runtime/vm/compiler/call_specializer.cc
index 4faa42a..6b3556b 100644
--- a/runtime/vm/compiler/call_specializer.cc
+++ b/runtime/vm/compiler/call_specializer.cc
@@ -212,12 +212,12 @@
}
const Token::Kind op_kind = call->token_kind();
- if (FLAG_precompiled_mode && FLAG_strong && kBitsPerWord == 64) {
- // Avoid speculation for AOT Dart2 64-bit targets.
+ if (FLAG_precompiled_mode && FLAG_strong) {
+ // Avoid speculation for AOT Dart2 targets.
//
// TODO(ajcbik): expand this to more and more targets as we
// investigate the performance impact of moving smi decision
- // into a later phase.
+ // into a later phase, and recover from Meteor loss.
//
} else if (FLAG_guess_icdata_cid) {
if (FLAG_precompiled_mode) {