[vm] Support FFI on 32-bit Intel.

Change-Id: I08acf6ef00e899f95dbaf2de36f1aea5e6e7ea3d
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/97108
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Aart Bik <ajcbik@google.com>
diff --git a/runtime/bin/ffi_test_functions.cc b/runtime/bin/ffi_test_functions.cc
index 6dfa08d..c22939d 100644
--- a/runtime/bin/ffi_test_functions.cc
+++ b/runtime/bin/ffi_test_functions.cc
@@ -462,13 +462,11 @@
   return 0x80000000 * -1.0;
 }
 
-DART_EXPORT void* SmallPointer() {
-  intptr_t value = 0x80000000;
-  return reinterpret_cast<void*>(-value);
-}
-
+// Requires boxing on 32-bit and 64-bit systems, even if the top 32-bits are
+// truncated.
 DART_EXPORT void* LargePointer() {
-  return reinterpret_cast<void*>(-0x8000000000000000L);
+  uint64_t origin = 0x8100000082000000;
+  return *reinterpret_cast<void**>(&origin);
 }
 
 #if !defined(_WIN32)
diff --git a/runtime/lib/ffi.cc b/runtime/lib/ffi.cc
index 6e1903b..d717fc9 100644
--- a/runtime/lib/ffi.cc
+++ b/runtime/lib/ffi.cc
@@ -559,18 +559,11 @@
   // https://github.com/dart-lang/sdk/issues/35773 DBC
   UNREACHABLE();
 #else
-  extern void GenerateFfiInverseTrampoline(
-      Assembler * assembler, const Function& signature, void* dart_entry_point);
-  ObjectPoolBuilder object_pool_builder;
-  Assembler assembler(&object_pool_builder);
-  GenerateFfiInverseTrampoline(&assembler, signature, dart_entry_point);
-  const Code& code = Code::Handle(
-      Code::FinalizeCodeAndNotify("inverse trampoline", nullptr, &assembler,
-                                  Code::PoolAttachment::kAttachPool, false));
 
-  uword entryPoint = code.EntryPoint();
-
-  return reinterpret_cast<void*>(entryPoint);
+  // TODO(dacoharkes): Implement this.
+  // https://github.com/dart-lang/sdk/issues/35761
+  // Look at StubCode::GenerateInvokeDartCodeStub.
+  UNREACHABLE();
 #endif
 }
 
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 3d2a546..5f1a3d6 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -3525,6 +3525,7 @@
 bool UnboxInstr::CanConvertSmi() const {
   switch (representation()) {
     case kUnboxedDouble:
+    case kUnboxedFloat:
     case kUnboxedInt64:
       return true;
 
@@ -5192,7 +5193,7 @@
   set_native_c_function(native_function);
 }
 
-#if defined(TARGET_ARCH_X64)
+#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_IA32)
 
 #define Z zone_
 
@@ -5211,27 +5212,38 @@
   ASSERT(((1 << CallingConventions::kFirstCalleeSavedCpuReg) &
           CallingConventions::kArgumentRegisters) == 0);
 
-  LocationSummary* summary =
-      new (zone) LocationSummary(zone, /*num_inputs=*/InputCount(),
-                                 /*num_temps=*/1, LocationSummary::kCall);
+#if defined(TARGET_ARCH_IA32)
+  constexpr intptr_t kNumTemps = 2;
+#else
+  constexpr intptr_t kNumTemps = 1;
+#endif
+
+  LocationSummary* summary = new (zone)
+      LocationSummary(zone, /*num_inputs=*/InputCount(),
+                      /*num_temps=*/kNumTemps, LocationSummary::kCall);
 
   summary->set_in(TargetAddressIndex(),
                   Location::RegisterLocation(
                       CallingConventions::kFirstNonArgumentRegister));
   summary->set_temp(0, Location::RegisterLocation(
                            CallingConventions::kSecondNonArgumentRegister));
+#if defined(TARGET_ARCH_IA32)
+  summary->set_temp(1, Location::RegisterLocation(
+                           CallingConventions::kFirstCalleeSavedCpuReg));
+#endif
   summary->set_out(0, compiler::ffi::ResultLocation(
                           compiler::ffi::ResultRepresentation(signature_)));
 
   for (intptr_t i = 0, n = NativeArgCount(); i < n; ++i) {
-    Location target = arg_locations_[i];
-    if (target.IsMachineRegister()) {
-      summary->set_in(i, target);
-    } else {
-      // Since we have to push this input on the stack, there's no point in
-      // pinning it to any specific register.
-      summary->set_in(i, Location::Any());
-    }
+    // Floating point values are never split: they are either in a single "FPU"
+    // register or a contiguous 64-bit slot on the stack. Unboxed 64-bit integer
+    // values, in contrast, can be split between any two registers on a 32-bit
+    // system.
+    const bool is_atomic = arg_representations_[i] == kUnboxedFloat ||
+                           arg_representations_[i] == kUnboxedDouble;
+    // Since we have to move this input down to the stack, there's no point in
+    // pinning it to any specific register.
+    summary->set_in(i, UnallocateStackSlots(arg_locations_[i], is_atomic));
   }
 
   return summary;
@@ -5241,6 +5253,22 @@
   return compiler::ffi::ResultRepresentation(signature_);
 }
 
+Location FfiCallInstr::UnallocateStackSlots(Location in, bool is_atomic) {
+  if (in.IsPairLocation()) {
+    ASSERT(!is_atomic);
+    return Location::Pair(UnallocateStackSlots(in.AsPairLocation()->At(0)),
+                          UnallocateStackSlots(in.AsPairLocation()->At(1)));
+  } else if (in.IsMachineRegister()) {
+    return in;
+  } else if (in.IsDoubleStackSlot()) {
+    return is_atomic ? Location::Any()
+                     : Location::Pair(Location::Any(), Location::Any());
+  } else {
+    ASSERT(in.IsStackSlot());
+    return Location::Any();
+  }
+}
+
 #undef Z
 
 #else
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index d7ae5c0..20d2bd3 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -4137,6 +4137,10 @@
  private:
   virtual void RawSetInputAt(intptr_t i, Value* value) { inputs_[i] = value; }
 
+  // Mark stack slots in 'loc' as unallocated. Split a double-word stack slot
+  // into a pair location if 'is_atomic' is false.
+  static Location UnallocateStackSlots(Location loc, bool is_atomic = false);
+
   Zone* const zone_;
   const Function& signature_;
 
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index a935736..6aee4ca 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -846,7 +846,119 @@
 }
 
 void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
-  UNREACHABLE();
+  Register saved_fp = locs()->temp(0).reg();
+  Register branch = locs()->in(TargetAddressIndex()).reg();
+  Register tmp = locs()->temp(1).reg();
+
+  // Save frame pointer because we're going to update it when we enter the exit
+  // frame.
+  __ movl(saved_fp, FPREG);
+
+  // Make a space to put the return address.
+  __ pushl(Immediate(0));
+
+  // We need to create a dummy "exit frame". It will have a null code object.
+  __ LoadObject(CODE_REG, Object::null_object());
+  __ EnterDartFrame(compiler::ffi::NumStackSlots(arg_locations_) * kWordSize);
+
+  // Save exit frame information to enable stack walking as we are about
+  // to transition to Dart VM C++ code.
+  __ movl(Address(THR, Thread::top_exit_frame_info_offset()), FPREG);
+
+  // Align frame before entering C++ world.
+  if (OS::ActivationFrameAlignment() > 1) {
+    __ andl(SPREG, Immediate(~(OS::ActivationFrameAlignment() - 1)));
+  }
+
+  // Load a 32-bit argument, or a 32-bit component of a 64-bit argument.
+  auto load_single_slot = [&](Location from, Location to) {
+    ASSERT(to.IsStackSlot());
+    if (from.IsRegister()) {
+      __ movl(to.ToStackSlotAddress(), from.reg());
+    } else if (from.IsFpuRegister()) {
+      __ movss(to.ToStackSlotAddress(), from.fpu_reg());
+    } else if (from.IsStackSlot() || from.IsDoubleStackSlot()) {
+      ASSERT(from.base_reg() == FPREG);
+      __ movl(tmp, Address(saved_fp, from.ToStackSlotOffset()));
+      __ movl(to.ToStackSlotAddress(), tmp);
+    } else {
+      UNREACHABLE();
+    }
+  };
+
+  for (intptr_t i = 0, n = NativeArgCount(); i < n; ++i) {
+    Location origin = locs()->in(i);
+    Location target = arg_locations_[i];
+
+    if (target.IsStackSlot()) {
+      load_single_slot(origin, target);
+    } else if (target.IsDoubleStackSlot()) {
+      if (origin.IsFpuRegister()) {
+        __ movsd(target.ToStackSlotAddress(), origin.fpu_reg());
+      } else {
+        ASSERT(origin.IsDoubleStackSlot() && origin.base_reg() == FPREG);
+        __ movl(tmp, Address(saved_fp, origin.ToStackSlotOffset()));
+        __ movl(target.ToStackSlotAddress(), tmp);
+        __ movl(tmp, Address(saved_fp, origin.ToStackSlotOffset() + 4));
+        __ movl(Address(SPREG, target.ToStackSlotOffset() + 4), tmp);
+      }
+    } else if (target.IsPairLocation()) {
+      ASSERT(origin.IsPairLocation());
+      load_single_slot(origin.AsPairLocation()->At(0),
+                       target.AsPairLocation()->At(0));
+      load_single_slot(origin.AsPairLocation()->At(1),
+                       target.AsPairLocation()->At(1));
+    }
+  }
+
+  // Mark that the thread is executing VM code.
+  __ movl(Assembler::VMTagAddress(), branch);
+
+  // We need to copy the return address up into the dummy stack frame so the
+  // stack walker will know which safepoint to use. Unlike X64, there's no
+  // PC-relative 'leaq' available, so we have do a trick with 'call'.
+  constexpr intptr_t kCallSequenceLength = 6;
+
+  Label get_pc;
+  __ call(&get_pc);
+  __ Bind(&get_pc);
+
+  const intptr_t call_sequence_start = __ CodeSize();
+
+  __ popl(tmp);
+  __ movl(Address(FPREG, kSavedCallerPcSlotFromFp * kWordSize), tmp);
+  __ call(branch);
+
+  ASSERT(__ CodeSize() - call_sequence_start == kCallSequenceLength);
+
+  compiler->EmitCallsiteMetadata(TokenPosition::kNoSource, DeoptId::kNone,
+                                 RawPcDescriptors::Kind::kOther, locs());
+
+  // The x86 calling convention requires floating point values to be returned on
+  // the "floating-point stack" (aka. register ST0). We don't use the
+  // floating-point stack in Dart, so we need to move the return value back into
+  // an XMM register.
+  if (representation() == kUnboxedDouble) {
+    __ subl(SPREG, Immediate(8));
+    __ fstpl(Address(SPREG, 0));
+    __ movsd(XMM0, Address(SPREG, 0));
+  } else if (representation() == kUnboxedFloat) {
+    __ subl(SPREG, Immediate(4));
+    __ fstps(Address(SPREG, 0));
+    __ movss(XMM0, Address(SPREG, 0));
+  }
+
+  // Mark that the thread is executing Dart code.
+  __ movl(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
+
+  // Reset exit frame information in Isolate structure.
+  __ movl(Address(THR, Thread::top_exit_frame_info_offset()), Immediate(0));
+
+  // Leave dummy exit frame.
+  __ LeaveFrame();
+
+  // Instead of returning to the "fake" return address, we just pop it.
+  __ popl(tmp);
 }
 
 static bool CanBeImmediateIndex(Value* value, intptr_t cid) {
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 89a8616..2852a51 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -894,8 +894,8 @@
   // but have a null code object.
   __ LoadObject(CODE_REG, Object::null_object());
   __ set_constant_pool_allowed(false);
-  __ EnterDartFrame(
-      compiler::ffi::NumStackArguments(arg_locations_) * kWordSize, PP);
+  __ EnterDartFrame(compiler::ffi::NumStackSlots(arg_locations_) * kWordSize,
+                    PP);
 
   // Save exit frame information to enable stack walking as we are about to
   // transition to Dart VM C++ code.
diff --git a/runtime/vm/compiler/backend/linearscan.cc b/runtime/vm/compiler/backend/linearscan.cc
index 23b5189..d786d83 100644
--- a/runtime/vm/compiler/backend/linearscan.cc
+++ b/runtime/vm/compiler/backend/linearscan.cc
@@ -1465,9 +1465,9 @@
       if (locs->in(j).IsPairLocation()) {
         PairLocation* pair = locs->in_slot(j)->AsPairLocation();
         ASSERT(!pair->At(0).IsUnallocated() ||
-               locs->in(j).policy() == Location::kAny);
+               pair->At(0).policy() == Location::kAny);
         ASSERT(!pair->At(1).IsUnallocated() ||
-               locs->in(j).policy() == Location::kAny);
+               pair->At(1).policy() == Location::kAny);
       } else {
         ASSERT(!locs->in(j).IsUnallocated() ||
                locs->in(j).policy() == Location::kAny);
diff --git a/runtime/vm/compiler/backend/locations.h b/runtime/vm/compiler/backend/locations.h
index d1e6f00..a256dc9 100644
--- a/runtime/vm/compiler/backend/locations.h
+++ b/runtime/vm/compiler/backend/locations.h
@@ -34,8 +34,12 @@
   kNumRepresentations
 };
 
+// 'UnboxedIntPtr' should be able to hold a pointer of the target word-size. On
+// a 32-bit platform, it's an unsigned 32-bit int because it should be
+// zero-extended to 64-bits, not sign-extended (pointers are inherently
+// unsigned).
 static constexpr Representation kUnboxedIntPtr =
-    compiler::target::kWordSize == 4 ? kUnboxedInt32 : kUnboxedInt64;
+    compiler::target::kWordSize == 4 ? kUnboxedUint32 : kUnboxedInt64;
 
 // Location objects are used to connect register allocator and code generator.
 // Instruction templates used by code generator have a corresponding
@@ -342,8 +346,8 @@
 
   bool IsStackSlot() const { return kind() == kStackSlot; }
 
-  static Location DoubleStackSlot(intptr_t stack_index) {
-    uword payload = StackSlotBaseField::encode(FPREG) |
+  static Location DoubleStackSlot(intptr_t stack_index, Register base = FPREG) {
+    uword payload = StackSlotBaseField::encode(base) |
                     StackIndexField::encode(EncodeStackIndex(stack_index));
     Location loc(kDoubleStackSlot, payload);
     // Ensure that sign is preserved.
@@ -642,8 +646,16 @@
     ASSERT(index < num_inputs_);
     // See FlowGraphAllocator::ProcessOneInstruction for explanation of this
     // restriction.
-    ASSERT(!always_calls() || loc.IsMachineRegister() ||
-           (loc.IsUnallocated() && loc.policy() == Location::kAny));
+    if (always_calls()) {
+      if (loc.IsUnallocated()) {
+        ASSERT(loc.policy() == Location::kAny);
+      } else if (loc.IsPairLocation()) {
+        ASSERT(!loc.AsPairLocation()->At(0).IsUnallocated() ||
+               loc.AsPairLocation()->At(0).policy() == Location::kAny);
+        ASSERT(!loc.AsPairLocation()->At(0).IsUnallocated() ||
+               loc.AsPairLocation()->At(0).policy() == Location::kAny);
+      }
+    }
     input_locations_[index] = loc;
   }
 
diff --git a/runtime/vm/compiler/ffi.cc b/runtime/vm/compiler/ffi.cc
index 375dc5d..a92d033 100644
--- a/runtime/vm/compiler/ffi.cc
+++ b/runtime/vm/compiler/ffi.cc
@@ -3,6 +3,7 @@
 // BSD-style license that can be found in the LICENSE file.
 
 #include "vm/compiler/ffi.h"
+#include "vm/compiler/runtime_api.h"
 
 namespace dart {
 
@@ -10,7 +11,7 @@
 
 namespace ffi {
 
-#if defined(TARGET_ARCH_X64)
+#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_IA32)
 
 static const size_t kSizeUnknown = 0;
 
@@ -44,6 +45,8 @@
   return element_size_table[index];
 }
 
+#if !defined(DART_PRECOMPILED_RUNTIME)
+
 Representation TypeRepresentation(const AbstractType& result_type) {
   switch (result_type.type_class_id()) {
     case kFfiFloatCid:
@@ -107,54 +110,81 @@
   return result;
 }
 
+// Represents the state of a stack frame going into a call, between allocations
+// of argument locations. Acts like a register allocator but for arguments in
+// the native ABI.
+class ArgumentFrameState : public ValueObject {
+ public:
+  Location AllocateArgument(Representation rep) {
+    switch (rep) {
+      case kUnboxedInt64:
+      case kUnboxedUint32:
+      case kUnboxedInt32:
+        if (rep == kUnboxedInt64) {
+          ASSERT(compiler::target::kWordSize == 8);
+        }
+        if (cpu_regs_used < CallingConventions::kNumArgRegs) {
+          Location result = Location::RegisterLocation(
+              CallingConventions::ArgumentRegisters[cpu_regs_used]);
+          cpu_regs_used++;
+          if (CallingConventions::kArgumentIntRegXorXmmReg) {
+            fpu_regs_used++;
+          }
+          return result;
+        }
+        break;
+      case kUnboxedFloat:
+      case kUnboxedDouble:
+        if (fpu_regs_used < CallingConventions::kNumXmmArgRegs) {
+          Location result = Location::FpuRegisterLocation(
+              CallingConventions::XmmArgumentRegisters[fpu_regs_used]);
+          fpu_regs_used++;
+          if (CallingConventions::kArgumentIntRegXorXmmReg) {
+            cpu_regs_used++;
+          }
+          return result;
+        }
+        break;
+      default:
+        UNREACHABLE();
+    }
+
+    // Argument must be spilled.
+    const intptr_t stack_slots_needed =
+        rep == kUnboxedDouble || rep == kUnboxedInt64
+            ? 8 / compiler::target::kWordSize
+            : 1;
+    Location result =
+        stack_slots_needed == 1
+            ? Location::StackSlot(stack_height_in_slots, SPREG)
+            : Location::DoubleStackSlot(stack_height_in_slots, SPREG);
+    stack_height_in_slots += stack_slots_needed;
+    return result;
+  }
+
+  intptr_t cpu_regs_used = 0;
+  intptr_t fpu_regs_used = 0;
+  intptr_t stack_height_in_slots = 0;
+};
+
 // Takes a list of argument representations, and converts it to a list of
 // argument locations based on calling convention.
 ZoneGrowableArray<Location>* ArgumentLocations(
     const ZoneGrowableArray<Representation>& arg_reps) {
   intptr_t num_arguments = arg_reps.length();
   auto result = new ZoneGrowableArray<Location>(num_arguments);
-  result->FillWith(Location(), 0, num_arguments);
-  Location* data = result->data();
 
   // Loop through all arguments and assign a register or a stack location.
-  intptr_t regs_used = 0;
-  intptr_t xmm_regs_used = 0;
-  intptr_t nth_stack_argument = 0;
-  bool on_stack;
+  ArgumentFrameState frame_state;
   for (intptr_t i = 0; i < num_arguments; i++) {
-    on_stack = true;
-    switch (arg_reps.At(i)) {
-      case kUnboxedInt32:
-      case kUnboxedUint32:
-      case kUnboxedInt64:
-        if (regs_used < CallingConventions::kNumArgRegs) {
-          data[i] = Location::RegisterLocation(
-              CallingConventions::ArgumentRegisters[regs_used]);
-          regs_used++;
-          if (CallingConventions::kArgumentIntRegXorXmmReg) {
-            xmm_regs_used++;
-          }
-          on_stack = false;
-        }
-        break;
-      case kUnboxedFloat:
-      case kUnboxedDouble:
-        if (xmm_regs_used < CallingConventions::kNumXmmArgRegs) {
-          data[i] = Location::FpuRegisterLocation(
-              CallingConventions::XmmArgumentRegisters[xmm_regs_used]);
-          xmm_regs_used++;
-          if (CallingConventions::kArgumentIntRegXorXmmReg) {
-            regs_used++;
-          }
-          on_stack = false;
-        }
-        break;
-      default:
-        UNREACHABLE();
-    }
-    if (on_stack) {
-      data[i] = Location::StackSlot(nth_stack_argument, RSP);
-      nth_stack_argument++;
+    Representation rep = arg_reps[i];
+    if (rep == kUnboxedInt64 && compiler::target::kWordSize < 8) {
+      Location low_bits_loc = frame_state.AllocateArgument(kUnboxedInt32);
+      Location high_bits_loc = frame_state.AllocateArgument(kUnboxedInt32);
+      ASSERT(low_bits_loc.IsStackSlot() == high_bits_loc.IsStackSlot());
+      result->Add(Location::Pair(low_bits_loc, high_bits_loc));
+    } else {
+      result->Add(frame_state.AllocateArgument(rep));
     }
   }
   return result;
@@ -169,34 +199,56 @@
   switch (result_rep) {
     case kUnboxedInt32:
     case kUnboxedUint32:
-    case kUnboxedInt64:
       return Location::RegisterLocation(CallingConventions::kReturnReg);
+    case kUnboxedInt64:
+      if (compiler::target::kWordSize == 4) {
+        return Location::Pair(
+            Location::RegisterLocation(CallingConventions::kReturnReg),
+            Location::RegisterLocation(CallingConventions::kSecondReturnReg));
+      } else {
+        return Location::RegisterLocation(CallingConventions::kReturnReg);
+      }
     case kUnboxedFloat:
     case kUnboxedDouble:
+#if defined(TARGET_ARCH_IA32)
+      // The result is returned in ST0, but we don't allocate ST registers, so
+      // the FFI trampoline will move it to XMM0.
+      return Location::FpuRegisterLocation(XMM0);
+#else
       return Location::FpuRegisterLocation(CallingConventions::kReturnFpuReg);
+#endif
     default:
       UNREACHABLE();
   }
 }
 
-intptr_t NumStackArguments(const ZoneGrowableArray<Location>& locations) {
+intptr_t NumStackSlots(const ZoneGrowableArray<Location>& locations) {
   intptr_t num_arguments = locations.length();
-  intptr_t num_stack_arguments = 0;
+  intptr_t num_stack_slots = 0;
   for (intptr_t i = 0; i < num_arguments; i++) {
     if (locations.At(i).IsStackSlot()) {
-      num_stack_arguments++;
+      num_stack_slots++;
+    } else if (locations.At(i).IsDoubleStackSlot()) {
+      num_stack_slots += 8 / compiler::target::kWordSize;
+    } else if (locations.At(i).IsPairLocation()) {
+      num_stack_slots +=
+          locations.At(i).AsPairLocation()->At(0).IsStackSlot() ? 1 : 0;
+      num_stack_slots +=
+          locations.At(i).AsPairLocation()->At(1).IsStackSlot() ? 1 : 0;
     }
   }
-  return num_stack_arguments;
+  return num_stack_slots;
 }
 
+#endif  // !defined(DART_PRECOMPILED_RUNTIME)
+
 #else
 
 size_t ElementSizeInBytes(intptr_t class_id) {
   UNREACHABLE();
 }
 
-#endif
+#endif  // defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_IA32)
 
 }  // namespace ffi
 
diff --git a/runtime/vm/compiler/ffi.h b/runtime/vm/compiler/ffi.h
index 2cfaf78..efed0cc 100644
--- a/runtime/vm/compiler/ffi.h
+++ b/runtime/vm/compiler/ffi.h
@@ -49,7 +49,7 @@
     const ZoneGrowableArray<Representation>& arg_reps);
 
 // Number of stack slots used in 'locations'.
-intptr_t NumStackArguments(const ZoneGrowableArray<Location>& locations);
+intptr_t NumStackSlots(const ZoneGrowableArray<Location>& locations);
 
 }  // namespace ffi
 
diff --git a/runtime/vm/compiler/frontend/kernel_to_il.cc b/runtime/vm/compiler/frontend/kernel_to_il.cc
index 931dc0f..169355a 100644
--- a/runtime/vm/compiler/frontend/kernel_to_il.cc
+++ b/runtime/vm/compiler/frontend/kernel_to_il.cc
@@ -2485,7 +2485,7 @@
 
 FlowGraph* FlowGraphBuilder::BuildGraphOfFfiTrampoline(
     const Function& function) {
-#if !defined(TARGET_ARCH_X64)
+#if !defined(TARGET_ARCH_X64) && !defined(TARGET_ARCH_IA32)
   UNREACHABLE();
 #else
   graph_entry_ =
diff --git a/runtime/vm/constants_ia32.cc b/runtime/vm/constants_ia32.cc
new file mode 100644
index 0000000..16ae341
--- /dev/null
+++ b/runtime/vm/constants_ia32.cc
@@ -0,0 +1,21 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#if defined(TARGET_ARCH_IA32)
+
+#include "vm/constants_ia32.h"
+
+namespace dart {
+
+// Although 'kArgumentRegisters' and 'kXmmArgumentRegisters' are both 0, we have
+// to give these arrays at least one element to appease MSVC.
+
+const Register CallingConventions::ArgumentRegisters[] = {
+    static_cast<Register>(0)};
+const XmmRegister CallingConventions::XmmArgumentRegisters[] = {
+    static_cast<XmmRegister>(0)};
+
+}  // namespace dart
+
+#endif
diff --git a/runtime/vm/constants_ia32.h b/runtime/vm/constants_ia32.h
index 4e56d03..43d3064 100644
--- a/runtime/vm/constants_ia32.h
+++ b/runtime/vm/constants_ia32.h
@@ -119,6 +119,29 @@
 // becomes important to us.
 const int MAX_NOP_SIZE = 8;
 
+class CallingConventions {
+ public:
+  static const Register ArgumentRegisters[];
+  static const intptr_t kArgumentRegisters = 0;
+  static const intptr_t kNumArgRegs = 0;
+
+  static const XmmRegister XmmArgumentRegisters[];
+  static const intptr_t kXmmArgumentRegisters = 0;
+  static const intptr_t kNumXmmArgRegs = 0;
+
+  static const bool kArgumentIntRegXorXmmReg = false;
+
+  static constexpr Register kReturnReg = EAX;
+  static constexpr Register kSecondReturnReg = EDX;
+
+  // Floating point values are returned on the "FPU stack" (in "ST" registers).
+  static constexpr XmmRegister kReturnXmmReg = kNoXmmRegister;
+
+  static constexpr Register kFirstCalleeSavedCpuReg = EBX;
+  static constexpr Register kFirstNonArgumentRegister = EAX;
+  static constexpr Register kSecondNonArgumentRegister = ECX;
+};
+
 }  // namespace dart
 
 #endif  // RUNTIME_VM_CONSTANTS_IA32_H_
diff --git a/runtime/vm/constants_x64.cc b/runtime/vm/constants_x64.cc
index df6fb75..397977e 100644
--- a/runtime/vm/constants_x64.cc
+++ b/runtime/vm/constants_x64.cc
@@ -2,6 +2,8 @@
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
+#if defined(TARGET_ARCH_X64)
+
 #include "vm/constants_x64.h"
 
 namespace dart {
@@ -25,3 +27,5 @@
 #endif
 
 }  // namespace dart
+
+#endif
diff --git a/runtime/vm/constants_x64.h b/runtime/vm/constants_x64.h
index 00affe4..cbaedaa 100644
--- a/runtime/vm/constants_x64.h
+++ b/runtime/vm/constants_x64.h
@@ -188,6 +188,7 @@
   static const size_t kRegisterTransferLimit = 16;
 
   static constexpr Register kReturnReg = RAX;
+  static constexpr Register kSecondReturnReg = kNoRegister;
   static constexpr FpuRegister kReturnFpuReg = XMM0;
 #else
   static const Register kArg1Reg = RDI;
@@ -231,6 +232,7 @@
   static const XmmRegister xmmFirstNonParameterReg = XMM8;
 
   static constexpr Register kReturnReg = RAX;
+  static constexpr Register kSecondReturnReg = kNoRegister;
   static constexpr FpuRegister kReturnFpuReg = XMM0;
 #endif
 
diff --git a/runtime/vm/dart_api_impl.h b/runtime/vm/dart_api_impl.h
index 41f7383..0fa7622 100644
--- a/runtime/vm/dart_api_impl.h
+++ b/runtime/vm/dart_api_impl.h
@@ -297,8 +297,9 @@
 
   static bool IsFfiEnabled() {
     // dart:ffi is not implemented for the following configurations
-#if !defined(TARGET_ARCH_X64)
-    // https://github.com/dart-lang/sdk/issues/35774 IA32
+#if !defined(TARGET_ARCH_X64) && !defined(TARGET_ARCH_IA32)
+    // https://github.com/dart-lang/sdk/issues/35760 Arm32 && Android
+    // https://github.com/dart-lang/sdk/issues/35772 Arm64
     return false;
 #elif !defined(TARGET_OS_LINUX) && !defined(TARGET_OS_MACOS) &&                \
     !defined(TARGET_OS_WINDOWS)
diff --git a/runtime/vm/ffi_trampoline_stubs_x64.cc b/runtime/vm/ffi_trampoline_stubs_x64.cc
deleted file mode 100644
index a98236d..0000000
--- a/runtime/vm/ffi_trampoline_stubs_x64.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-// TODO(dacoharkes): Move this into compiler namespace.
-
-#include "vm/class_id.h"
-#include "vm/globals.h"
-
-#include "vm/stub_code.h"
-
-#if defined(TARGET_ARCH_X64) && !defined(DART_PRECOMPILED_RUNTIME)
-
-#include "vm/compiler/assembler/assembler.h"
-#include "vm/compiler/assembler/disassembler.h"
-#include "vm/compiler/backend/flow_graph_compiler.h"
-#include "vm/compiler/ffi.h"
-#include "vm/compiler/jit/compiler.h"
-#include "vm/constants_x64.h"
-#include "vm/dart_entry.h"
-#include "vm/heap/heap.h"
-#include "vm/heap/scavenger.h"
-#include "vm/instructions.h"
-#include "vm/object_store.h"
-#include "vm/resolver.h"
-#include "vm/stack_frame.h"
-#include "vm/tags.h"
-#include "vm/type_testing_stubs.h"
-
-#define __ assembler->
-
-namespace dart {
-
-void GenerateFfiInverseTrampoline(Assembler* assembler,
-                                  const Function& signature,
-                                  void* dart_entry_point) {
-  ZoneGrowableArray<Representation>* arg_representations =
-      compiler::ffi::ArgumentRepresentations(signature);
-  ZoneGrowableArray<Location>* arg_locations =
-      compiler::ffi::ArgumentLocations(*arg_representations);
-
-  intptr_t num_dart_arguments = signature.num_fixed_parameters();
-  intptr_t num_arguments = num_dart_arguments - 1;  // Ignore closure.
-
-  // TODO(dacoharkes): Implement this.
-  // https://github.com/dart-lang/sdk/issues/35761
-  // Look at StubCode::GenerateInvokeDartCodeStub.
-
-  __ int3();
-
-  for (intptr_t i = 0; i < num_arguments; i++) {
-    Register reg = arg_locations->At(i).reg();
-    __ SmiTag(reg);
-  }
-
-  __ movq(RBX, Immediate(reinterpret_cast<intptr_t>(dart_entry_point)));
-
-  __ int3();
-
-  __ call(RBX);
-
-  __ int3();
-}
-
-}  // namespace dart
-
-#endif  // defined(TARGET_ARCH_X64) && !defined(DART_PRECOMPILED_RUNTIME)
diff --git a/runtime/vm/vm_sources.gni b/runtime/vm/vm_sources.gni
index 4081d35..7fb6f1f 100644
--- a/runtime/vm/vm_sources.gni
+++ b/runtime/vm/vm_sources.gni
@@ -46,6 +46,7 @@
   "constants_arm64.h",
   "constants_dbc.h",
   "constants_ia32.h",
+  "constants_ia32.cc",
   "constants_kbc.h",
   "constants_x64.cc",
   "constants_x64.h",
@@ -92,7 +93,6 @@
   "dwarf.h",
   "exceptions.cc",
   "exceptions.h",
-  "ffi_trampoline_stubs_x64.cc",
   "finalizable_data.h",
   "fixed_cache.h",
   "flag_list.h",
diff --git a/tests/standalone_2/ffi/data_test.dart b/tests/standalone_2/ffi/data_test.dart
index 27a3790..4cdc79b 100644
--- a/tests/standalone_2/ffi/data_test.dart
+++ b/tests/standalone_2/ffi/data_test.dart
@@ -461,7 +461,7 @@
   }
 
   int size = generic<ffi.Pointer<ffi.Int64>>();
-  Expect.equals(8, size);
+  Expect.isTrue(size == 8 || size == 4);
 }
 
 void testSizeOfVoid() {
diff --git a/tests/standalone_2/ffi/function_callbacks_test.dart b/tests/standalone_2/ffi/function_callbacks_test.dart
index 9817182..e9a0a7e 100644
--- a/tests/standalone_2/ffi/function_callbacks_test.dart
+++ b/tests/standalone_2/ffi/function_callbacks_test.dart
@@ -22,7 +22,6 @@
 void main() {
   testFunctionWithFunctionPointer();
   testNativeFunctionWithFunctionPointer();
-  testFromFunction();
 }
 
 ffi.DynamicLibrary ffiTestFunctions =
@@ -75,18 +74,3 @@
 
 typedef ApplyTo42And74Type = int Function(
     ffi.Pointer<ffi.NativeFunction<NativeIntptrBinOp>>);
-
-void testFromFunction() {
-  ffi.Pointer<ffi.NativeFunction<NativeIntptrBinOp>> pointer =
-      ffi.fromFunction(myPlus);
-  Expect.isNotNull(pointer);
-
-  ffi.Pointer<ffi.NativeFunction<NativeApplyTo42And74Type>> p17 =
-      ffiTestFunctions.lookup("ApplyTo42And74");
-  ApplyTo42And74Type applyTo42And74 = p17.asFunction();
-
-  // TODO(dacoharkes): implement this
-
-  // int result = applyTo42And74(pointer);
-  // print(result);
-}
diff --git a/tests/standalone_2/ffi/function_stress_test.dart b/tests/standalone_2/ffi/function_stress_test.dart
index fd3f0ec..2b0099e 100644
--- a/tests/standalone_2/ffi/function_stress_test.dart
+++ b/tests/standalone_2/ffi/function_stress_test.dart
@@ -41,8 +41,7 @@
     // Smi.
     await test(watcher, testBoxInt32, mustTriggerGC: false);
     await test(watcher, testBoxDouble);
-    await test(watcher, testBoxSmallPointer);
-    await test(watcher, testBoxLargePointer);
+    await test(watcher, testBoxPointer);
   } finally {
     watcher.dispose();
   }
@@ -85,19 +84,17 @@
   Expect.equals(0x80000000 * -1.0, smallDouble());
 }
 
-final smallPointer = ffiTestFunctions
-    .lookupFunction<NativeNullaryOpPtr, NullaryOpPtr>("SmallPointer");
-
-// Forces boxing into ffi.Pointer. On 32-bit platforms, also forces boxing into
-// Mint inside of ffi.Pointer.
-void testBoxSmallPointer() {
-  Expect.equals(-0x80000000, smallPointer().address);
-}
-
 final largePointer = ffiTestFunctions
     .lookupFunction<NativeNullaryOpPtr, NullaryOpPtr>("LargePointer");
 
-// Forces boxing into ffi.Pointer and ffi.Mint on all platforms.
-void testBoxLargePointer() {
-  Expect.equals(-0x8000000000000000, largePointer().address);
+// Forces boxing into ffi.Pointer and ffi.Mint.
+void testBoxPointer() {
+  ffi.Pointer pointer = largePointer();
+  if (pointer != null) {
+    if (ffi.sizeOf<ffi.Pointer>() == 4) {
+      Expect.equals(0x82000000, pointer.address);
+    } else {
+      Expect.equals(0x8100000082000000, pointer.address);
+    }
+  }
 }
diff --git a/tests/standalone_2/standalone_2_kernel.status b/tests/standalone_2/standalone_2_kernel.status
index 2e1dc6a..75ee38a 100644
--- a/tests/standalone_2/standalone_2_kernel.status
+++ b/tests/standalone_2/standalone_2_kernel.status
@@ -7,6 +7,9 @@
 fragmentation_test: Pass, Slow # GC heavy
 io/process_sync_test: Pass, Slow # Spawns synchronously subprocesses in sequence.
 
+[ $arch == ia32 ]
+ffi/function_structs_test: Skip # Struct alignment rules are broken on 32-bit. # Issue 35768
+
 [ $builder_tag == asan ]
 ffi/data_not_asan_test: Skip # this test tries to allocate too much memory on purpose
 io/file_test: Fail # Issue 34724
@@ -241,12 +244,12 @@
 io/web_socket_compression_test: Skip # Timeout
 io/web_socket_test: Skip # Timeout
 
-[ $arch != x64 || $compiler != dartk || $mode == product || $system != linux && $system != macos && $system != windows ]
+[ $compiler != dartk || $mode == product || $arch != ia32 && $arch != x64 || $system != linux && $system != macos && $system != windows ]
 ffi/function_stress_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product.
 ffi/subtype_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product.
 
-[ $arch != x64 || $system != linux && $system != macos && $system != windows ]
-ffi: Skip # ffi not yet supported on other systems than linux/macos/windows x64
+[ $arch != ia32 && $arch != x64 || $system != linux && $system != macos && $system != windows ]
+ffi: Skip # ffi not yet supported on other systems than linux/macos/windows x64/ia32
 
 [ $compiler != dartk && $compiler != dartkb && $compiler != dartkp || $compiler == dartkp && $system == windows ]
 entrypoints_verification_test: SkipByDesign # Requires VM to run. Cannot run in precompiled Windows because the DLL is linked against dart.exe instead of dart_precompiled_runtime.exe.