[vm/inliner] Inline typed data on 32-bit archs.

Rationale:
All recent improvements, now for 32-bit too.

Performance:
Many large improvements on micro benchmarks.
Meteor down as expected.


https://github.com/dart-lang/sdk/issues/33205

Change-Id: Ie9ebcfdfe9c5e265595c95d5e943ae35c5700a97
Reviewed-on: https://dart-review.googlesource.com/63685
Commit-Queue: Aart Bik <ajcbik@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index 70bee17..e427841 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -2112,8 +2112,7 @@
       return kUnsignedWord;
     case kTypedDataInt64ArrayCid:
     case kTypedDataUint64ArrayCid:
-      UNREACHABLE();
-      return kByte;
+      return kDWord;
     case kTypedDataFloat32ArrayCid:
       return kSWord;
     case kTypedDataFloat64ArrayCid:
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.cc b/runtime/vm/compiler/assembler/assembler_ia32.cc
index 692d9c8..b566572 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.cc
+++ b/runtime/vm/compiler/assembler/assembler_ia32.cc
@@ -2512,12 +2512,13 @@
                                              intptr_t cid,
                                              intptr_t index_scale,
                                              Register array,
-                                             intptr_t index) {
+                                             intptr_t index,
+                                             intptr_t extra_disp) {
   if (is_external) {
-    return Address(array, index * index_scale);
+    return Address(array, index * index_scale + extra_disp);
   } else {
     const int64_t disp = static_cast<int64_t>(index) * index_scale +
-                         Instance::DataOffsetFor(cid);
+                         Instance::DataOffsetFor(cid) + extra_disp;
     ASSERT(Utils::IsInt(32, disp));
     return FieldAddress(array, static_cast<int32_t>(disp));
   }
@@ -2549,12 +2550,13 @@
                                              intptr_t cid,
                                              intptr_t index_scale,
                                              Register array,
-                                             Register index) {
+                                             Register index,
+                                             intptr_t extra_disp) {
   if (is_external) {
-    return Address(array, index, ToScaleFactor(index_scale), 0);
+    return Address(array, index, ToScaleFactor(index_scale), extra_disp);
   } else {
     return FieldAddress(array, index, ToScaleFactor(index_scale),
-                        Instance::DataOffsetFor(cid));
+                        Instance::DataOffsetFor(cid) + extra_disp);
   }
 }
 
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.h b/runtime/vm/compiler/assembler/assembler_ia32.h
index 6694a85..19a00b4 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.h
+++ b/runtime/vm/compiler/assembler/assembler_ia32.h
@@ -672,13 +672,15 @@
                                            intptr_t cid,
                                            intptr_t index_scale,
                                            Register array,
-                                           intptr_t index);
+                                           intptr_t index,
+                                           intptr_t extra_disp = 0);
 
   static Address ElementAddressForRegIndex(bool is_external,
                                            intptr_t cid,
                                            intptr_t index_scale,
                                            Register array,
-                                           Register index);
+                                           Register index,
+                                           intptr_t extra_disp = 0);
 
   static Address VMTagAddress() {
     return Address(THR, Thread::vm_tag_offset());
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 694f8f2..36a33a2 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -1131,6 +1131,8 @@
 
     case kTypedDataInt32ArrayCid:
     case kTypedDataUint32ArrayCid:
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
       return CompileType::Int();
 
     default:
@@ -1159,6 +1161,9 @@
       return kUnboxedInt32;
     case kTypedDataUint32ArrayCid:
       return kUnboxedUint32;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      return kUnboxedInt64;
     case kTypedDataFloat32ArrayCid:
     case kTypedDataFloat64ArrayCid:
       return kUnboxedDouble;
@@ -1213,9 +1218,12 @@
 
 LocationSummary* LoadIndexedInstr::MakeLocationSummary(Zone* zone,
                                                        bool opt) const {
+  const bool directly_addressable =
+      aligned() && representation() != kUnboxedInt64;
   const intptr_t kNumInputs = 2;
   intptr_t kNumTemps = 0;
-  if (!aligned()) {
+
+  if (!directly_addressable) {
     kNumTemps += 1;
     if (representation() == kUnboxedDouble) {
       kNumTemps += 1;
@@ -1251,11 +1259,16 @@
   } else if (representation() == kUnboxedInt32) {
     ASSERT(class_id() == kTypedDataInt32ArrayCid);
     locs->set_out(0, Location::RequiresRegister());
+  } else if (representation() == kUnboxedInt64) {
+    ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+           class_id() == kTypedDataUint64ArrayCid);
+    locs->set_out(0, Location::Pair(Location::RequiresRegister(),
+                                    Location::RequiresRegister()));
   } else {
     ASSERT(representation() == kTagged);
     locs->set_out(0, Location::RequiresRegister());
   }
-  if (!aligned()) {
+  if (!directly_addressable) {
     locs->set_temp(0, Location::RequiresRegister());
     if (representation() == kUnboxedDouble) {
       locs->set_temp(1, Location::RequiresRegister());
@@ -1265,13 +1278,16 @@
 }
 
 void LoadIndexedInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  const bool directly_addressable =
+      aligned() && representation() != kUnboxedInt64;
   // The array register points to the backing store for external arrays.
   const Register array = locs()->in(0).reg();
   const Location index = locs()->in(1);
-  const Register address = aligned() ? kNoRegister : locs()->temp(0).reg();
+  const Register address =
+      directly_addressable ? kNoRegister : locs()->temp(0).reg();
 
   Address element_address(kNoRegister);
-  if (aligned()) {
+  if (directly_addressable) {
     element_address = index.IsRegister()
                           ? __ ElementAddressForRegIndex(
                                 true,  // Load.
@@ -1368,6 +1384,25 @@
     return;
   }
 
+  if (representation() == kUnboxedInt64) {
+    ASSERT(!directly_addressable);  // need to add to register
+    ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+           class_id() == kTypedDataUint64ArrayCid);
+    ASSERT(locs()->out(0).IsPairLocation());
+    PairLocation* result_pair = locs()->out(0).AsPairLocation();
+    Register result_lo = result_pair->At(0).reg();
+    Register result_hi = result_pair->At(1).reg();
+    if (aligned()) {
+      __ ldr(result_lo, Address(address));
+      __ ldr(result_hi, Address(address, kWordSize));
+    } else {
+      __ LoadWordUnaligned(result_lo, address, TMP);
+      __ AddImmediate(address, address, kWordSize);
+      __ LoadWordUnaligned(result_hi, address, TMP);
+    }
+    return;
+  }
+
   ASSERT(representation() == kTagged);
 
   const Register result = locs()->out(0).reg();
@@ -1435,6 +1470,9 @@
       return kUnboxedInt32;
     case kTypedDataUint32ArrayCid:
       return kUnboxedUint32;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      return kUnboxedInt64;
     case kTypedDataFloat32ArrayCid:
     case kTypedDataFloat64ArrayCid:
       return kUnboxedDouble;
@@ -1452,6 +1490,9 @@
 
 LocationSummary* StoreIndexedInstr::MakeLocationSummary(Zone* zone,
                                                         bool opt) const {
+  const bool directly_addressable = aligned() &&
+                                    class_id() != kTypedDataInt64ArrayCid &&
+                                    class_id() != kTypedDataUint64ArrayCid;
   const intptr_t kNumInputs = 3;
   LocationSummary* locs;
 
@@ -1460,7 +1501,7 @@
   if (CanBeImmediateIndex(index(), class_id(), IsExternal(),
                           false,  // Store.
                           &needs_base)) {
-    if (!aligned()) {
+    if (!directly_addressable) {
       kNumTemps += 2;
     } else if (needs_base) {
       kNumTemps += 1;
@@ -1472,7 +1513,7 @@
     // CanBeImmediateIndex must return false for unsafe smis.
     locs->set_in(1, Location::Constant(index()->definition()->AsConstant()));
   } else {
-    if (!aligned()) {
+    if (!directly_addressable) {
       kNumTemps += 2;
     }
 
@@ -1504,6 +1545,11 @@
     case kTypedDataUint32ArrayCid:
       locs->set_in(2, Location::RequiresRegister());
       break;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      locs->set_in(2, Location::Pair(Location::RequiresRegister(),
+                                     Location::RequiresRegister()));
+      break;
     case kTypedDataFloat32ArrayCid:
       // Need low register (<= Q7).
       locs->set_in(2, Location::FpuRegisterLocation(Q7));
@@ -1522,6 +1568,9 @@
 }
 
 void StoreIndexedInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  const bool directly_addressable = aligned() &&
+                                    class_id() != kTypedDataInt64ArrayCid &&
+                                    class_id() != kTypedDataUint64ArrayCid;
   // The array register points to the backing store for external arrays.
   const Register array = locs()->in(0).reg();
   const Location index = locs()->in(1);
@@ -1531,7 +1580,7 @@
       (locs()->temp_count() > 1) ? locs()->temp(1).reg() : kNoRegister;
 
   Address element_address(kNoRegister);
-  if (aligned()) {
+  if (directly_addressable) {
     element_address = index.IsRegister()
                           ? __ ElementAddressForRegIndex(
                                 false,  // Store.
@@ -1630,6 +1679,23 @@
       }
       break;
     }
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid: {
+      ASSERT(!directly_addressable);  // need to add to register
+      ASSERT(locs()->in(2).IsPairLocation());
+      PairLocation* value_pair = locs()->in(2).AsPairLocation();
+      Register value_lo = value_pair->At(0).reg();
+      Register value_hi = value_pair->At(1).reg();
+      if (aligned()) {
+        __ str(value_lo, Address(temp));
+        __ str(value_hi, Address(temp, kWordSize));
+      } else {
+        __ StoreWordUnaligned(value_lo, temp, temp2);
+        __ AddImmediate(temp, temp, kWordSize);
+        __ StoreWordUnaligned(value_hi, temp, temp2);
+      }
+      break;
+    }
     case kTypedDataFloat32ArrayCid: {
       const SRegister value_reg =
           EvenSRegisterOf(EvenDRegisterOf(locs()->in(2).fpu_reg()));
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 57f3b5d..41891ab 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -1011,6 +1011,8 @@
 
     case kTypedDataInt32ArrayCid:
     case kTypedDataUint32ArrayCid:
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
       return CompileType::Int();
 
     default:
@@ -1039,6 +1041,9 @@
       return kUnboxedInt32;
     case kTypedDataUint32ArrayCid:
       return kUnboxedUint32;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      return kUnboxedInt64;
     case kTypedDataFloat32ArrayCid:
     case kTypedDataFloat64ArrayCid:
       return kUnboxedDouble;
@@ -1081,6 +1086,11 @@
   } else if (representation() == kUnboxedInt32) {
     ASSERT(class_id() == kTypedDataInt32ArrayCid);
     locs->set_out(0, Location::RequiresRegister());
+  } else if (representation() == kUnboxedInt64) {
+    ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+           class_id() == kTypedDataUint64ArrayCid);
+    locs->set_out(0, Location::Pair(Location::RequiresRegister(),
+                                    Location::RequiresRegister()));
   } else {
     ASSERT(representation() == kTagged);
     locs->set_out(0, Location::RequiresRegister());
@@ -1148,6 +1158,28 @@
     return;
   }
 
+  if (representation() == kUnboxedInt64) {
+    ASSERT(locs()->out(0).IsPairLocation());
+    PairLocation* result_pair = locs()->out(0).AsPairLocation();
+    Register result_lo = result_pair->At(0).reg();
+    Register result_hi = result_pair->At(1).reg();
+    if ((index_scale() == 1) && index.IsRegister()) {
+      __ SmiUntag(index.reg());
+    }
+    ASSERT(class_id() == kTypedDataInt64ArrayCid ||
+           class_id() == kTypedDataUint64ArrayCid);
+    __ movl(result_lo, element_address);
+    element_address = index.IsRegister()
+                          ? Assembler::ElementAddressForRegIndex(
+                                IsExternal(), class_id(), index_scale(), array,
+                                index.reg(), kWordSize)
+                          : Assembler::ElementAddressForIntIndex(
+                                IsExternal(), class_id(), index_scale(), array,
+                                Smi::Cast(index.constant()).Value(), kWordSize);
+    __ movl(result_hi, element_address);
+    return;
+  }
+
   ASSERT(representation() == kTagged);
 
   Register result = locs()->out(0).reg();
@@ -1208,6 +1240,9 @@
       return kUnboxedInt32;
     case kTypedDataUint32ArrayCid:
       return kUnboxedUint32;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      return kUnboxedInt64;
     case kTypedDataFloat32ArrayCid:
     case kTypedDataFloat64ArrayCid:
       return kUnboxedDouble;
@@ -1264,6 +1299,11 @@
     case kTypedDataUint32ArrayCid:
       locs->set_in(2, Location::RequiresRegister());
       break;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid:
+      locs->set_in(2, Location::Pair(Location::RequiresRegister(),
+                                     Location::RequiresRegister()));
+      break;
     case kTypedDataFloat32ArrayCid:
     case kTypedDataFloat64ArrayCid:
       // TODO(srdjan): Support Float64 constants.
@@ -1364,6 +1404,24 @@
     case kTypedDataUint32ArrayCid:
       __ movl(element_address, locs()->in(2).reg());
       break;
+    case kTypedDataInt64ArrayCid:
+    case kTypedDataUint64ArrayCid: {
+      ASSERT(locs()->in(2).IsPairLocation());
+      PairLocation* value_pair = locs()->in(2).AsPairLocation();
+      Register value_lo = value_pair->At(0).reg();
+      Register value_hi = value_pair->At(1).reg();
+      __ movl(element_address, value_lo);
+      element_address =
+          index.IsRegister()
+              ? Assembler::ElementAddressForRegIndex(IsExternal(), class_id(),
+                                                     index_scale(), array,
+                                                     index.reg(), kWordSize)
+              : Assembler::ElementAddressForIntIndex(
+                    IsExternal(), class_id(), index_scale(), array,
+                    Smi::Cast(index.constant()).Value(), kWordSize);
+      __ movl(element_address, value_hi);
+      break;
+    }
     case kTypedDataFloat32ArrayCid:
       __ movss(element_address, locs()->in(2).fpu_reg());
       break;
diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc
index 14c293c..f041b41 100644
--- a/runtime/vm/compiler/backend/inliner.cc
+++ b/runtime/vm/compiler/backend/inliner.cc
@@ -2258,8 +2258,7 @@
 }
 
 static bool ShouldInlineInt64ArrayOps() {
-  // TODO(ajcbik): look into doing this even for 32-bit targets.
-  return (kBitsPerWord == 64) && FlowGraphCompiler::SupportsUnboxedInt64();
+  return FlowGraphCompiler::SupportsUnboxedInt64();
 }
 
 static bool CanUnboxInt32() {
diff --git a/runtime/vm/compiler/call_specializer.cc b/runtime/vm/compiler/call_specializer.cc
index 4faa42a..6b3556b 100644
--- a/runtime/vm/compiler/call_specializer.cc
+++ b/runtime/vm/compiler/call_specializer.cc
@@ -212,12 +212,12 @@
   }
 
   const Token::Kind op_kind = call->token_kind();
-  if (FLAG_precompiled_mode && FLAG_strong && kBitsPerWord == 64) {
-    // Avoid speculation for AOT Dart2 64-bit targets.
+  if (FLAG_precompiled_mode && FLAG_strong) {
+    // Avoid speculation for AOT Dart2 targets.
     //
     // TODO(ajcbik): expand this to more and more targets as we
     // investigate the performance impact of moving smi decision
-    // into a later phase.
+    // into a later phase, and recover from Meteor loss.
     //
   } else if (FLAG_guess_icdata_cid) {
     if (FLAG_precompiled_mode) {