[vm] Build FFI trampolines with IL.

Bugs fixed: dartbug.com/36033, dartbug.com/36034, dartbug.com/36155.

Change-Id: Ic463dd2d299018e03f840ecedc8b7dfa350c6b95
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94860
Commit-Queue: Samir Jindel <sjindel@google.com>
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Aart Bik <ajcbik@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/lib/ffi.cc b/runtime/lib/ffi.cc
index d084216..6e1903b 100644
--- a/runtime/lib/ffi.cc
+++ b/runtime/lib/ffi.cc
@@ -6,7 +6,6 @@
 #include "include/dart_api.h"
 #include "vm/bootstrap_natives.h"
 #include "vm/class_finalizer.h"
-#include "vm/class_id.h"
 #include "vm/compiler/assembler/assembler.h"
 #include "vm/exceptions.h"
 #include "vm/log.h"
@@ -227,10 +226,10 @@
   GET_NON_NULL_NATIVE_ARGUMENT(Integer, argCount, arguments->NativeArgAt(0));
   int64_t count = argCount.AsInt64Value();
   classid_t type_cid = type_arg.type_class_id();
-  int64_t max_count = INTPTR_MAX / ffi::ElementSizeInBytes(type_cid);
+  int64_t max_count = INTPTR_MAX / compiler::ffi::ElementSizeInBytes(type_cid);
   CheckRange(argCount, 1, max_count, "count");
 
-  size_t size = ffi::ElementSizeInBytes(type_cid) * count;
+  size_t size = compiler::ffi::ElementSizeInBytes(type_cid) * count;
   intptr_t memory = reinterpret_cast<intptr_t>(malloc(size));
   if (memory == 0) {
     const String& error = String::Handle(String::NewFormatted(
@@ -268,9 +267,9 @@
 
   classid_t class_id = pointer_type_arg.type_class_id();
   Integer& address = Integer::Handle(zone, pointer.GetCMemoryAddress());
-  address =
-      Integer::New(address.AsInt64Value() +
-                   index.AsInt64Value() * ffi::ElementSizeInBytes(class_id));
+  address = Integer::New(address.AsInt64Value() +
+                         index.AsInt64Value() *
+                             compiler::ffi::ElementSizeInBytes(class_id));
   RawPointer* result = Pointer::New(pointer_type_arg, address);
   return result;
 }
@@ -470,54 +469,7 @@
   CheckSized(type_arg);
 
   classid_t type_cid = type_arg.type_class_id();
-  return Smi::New(ffi::ElementSizeInBytes(type_cid));
-}
-
-// Generates assembly to trampoline from Dart into C++.
-//
-// Attaches assembly code to the function with the folling features:
-// - unboxes arguments
-// - puts the arguments on the c stack
-// - invokes the c function
-// - reads the the result
-// - boxes the result and returns it.
-//
-// It inspects the signature to know what to box/unbox
-// Parameter `function` has the Dart types in its signature
-// Parameter `c_signature` has the C++ types in its signature
-static RawCode* TrampolineCode(const Function& function,
-                               const Function& c_signature) {
-#if defined(DART_PRECOMPILED_RUNTIME) || defined(DART_PRECOMPILER)
-  // Currently we generate the trampoline when calling asFunction(), this means
-  // the ffi cannot be used in AOT.
-  // In order make it work in AOT we need to:
-  // - collect all asFunction signatures ahead of time
-  // - generate trampolines for those
-  // - store these in the object store
-  // - and read these from the object store when calling asFunction()
-  // https://github.com/dart-lang/sdk/issues/35765
-  UNREACHABLE();
-#elif !defined(TARGET_ARCH_X64)
-  // https://github.com/dart-lang/sdk/issues/35774
-  UNREACHABLE();
-#elif !defined(TARGET_OS_LINUX) && !defined(TARGET_OS_MACOS) &&                \
-    !defined(TARGET_OS_WINDOWS)
-  // https://github.com/dart-lang/sdk/issues/35760 Arm32 && Android
-  // https://github.com/dart-lang/sdk/issues/35772 Arm64
-  // https://github.com/dart-lang/sdk/issues/35773 DBC
-  UNREACHABLE();
-#else
-  extern void GenerateFfiTrampoline(Assembler * assembler,
-                                    const Function& signature);
-  ObjectPoolBuilder object_pool_builder;
-  Assembler assembler(&object_pool_builder);
-  GenerateFfiTrampoline(&assembler, c_signature);
-  const Code& code = Code::Handle(Code::FinalizeCodeAndNotify(
-      function, nullptr, &assembler, Code::PoolAttachment::kAttachPool));
-  code.set_exception_handlers(
-      ExceptionHandlers::Handle(ExceptionHandlers::New(0)));
-  return code.raw();
-#endif
+  return Smi::New(compiler::ffi::ElementSizeInBytes(type_cid));
 }
 
 // TODO(dacoharkes): Cache the trampolines.
@@ -526,25 +478,37 @@
                                        const Function& c_signature) {
   Thread* thread = Thread::Current();
   Zone* zone = thread->zone();
-  const String& name =
+  String& name =
       String::ZoneHandle(Symbols::New(Thread::Current(), "FfiTrampoline"));
   const Library& lib = Library::Handle(Library::FfiLibrary());
   const Class& owner_class = Class::Handle(lib.toplevel_class());
-  Function& function = Function::ZoneHandle(
-      zone, Function::New(name, RawFunction::kFfiTrampoline,
-                          true,   // is_static
-                          false,  // is_const
-                          false,  // is_abstract
-                          false,  // is_external
-                          true,   // is_native
-                          owner_class, TokenPosition::kMinSource));
-
+  Function& function =
+      Function::Handle(zone, Function::New(name, RawFunction::kFfiTrampoline,
+                                           /*is_static=*/true,
+                                           /*is_const=*/false,
+                                           /*is_abstract=*/false,
+                                           /*is_external=*/false,
+                                           /*is_native=*/false, owner_class,
+                                           TokenPosition::kMinSource));
+  function.set_is_debuggable(false);
   function.set_num_fixed_parameters(dart_signature.num_fixed_parameters());
   function.set_result_type(AbstractType::Handle(dart_signature.result_type()));
   function.set_parameter_types(Array::Handle(dart_signature.parameter_types()));
 
-  const Code& code = Code::Handle(TrampolineCode(function, c_signature));
-  function.AttachCode(code);
+  // The signature function won't have any names for the parameters. We need to
+  // assign unique names for scope building and error messages.
+  const intptr_t num_params = dart_signature.num_fixed_parameters();
+  const Array& parameter_names = Array::Handle(Array::New(num_params));
+  for (intptr_t i = 0; i < num_params; ++i) {
+    if (i == 0) {
+      name = Symbols::ClosureParameter().raw();
+    } else {
+      name = Symbols::NewFormatted(thread, ":ffiParam%" Pd, i);
+    }
+    parameter_names.SetAt(i, name);
+  }
+  function.set_parameter_names(parameter_names);
+  function.SetFfiCSignature(c_signature);
 
   return function.raw();
 }
diff --git a/runtime/vm/compiler/assembler/assembler_x64.h b/runtime/vm/compiler/assembler/assembler_x64.h
index db70602..8f2d56b 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.h
+++ b/runtime/vm/compiler/assembler/assembler_x64.h
@@ -836,7 +836,7 @@
   //   ....
   //   locals space  <=== RSP
   //   saved PP
-  //   pc (used to derive the RawInstruction Object of the dart code)
+  //   code object (used to derive the RawInstruction Object of the dart code)
   //   saved RBP     <=== RBP
   //   ret PC
   //   .....
diff --git a/runtime/vm/compiler/backend/constant_propagator.cc b/runtime/vm/compiler/backend/constant_propagator.cc
index 12b10eb..7eab5b9 100644
--- a/runtime/vm/compiler/backend/constant_propagator.cc
+++ b/runtime/vm/compiler/backend/constant_propagator.cc
@@ -669,6 +669,10 @@
   SetValue(instr, non_constant_);
 }
 
+void ConstantPropagator::VisitFfiCall(FfiCallInstr* instr) {
+  SetValue(instr, non_constant_);
+}
+
 void ConstantPropagator::VisitDebugStepCheck(DebugStepCheckInstr* instr) {
   // Nothing to do.
 }
diff --git a/runtime/vm/compiler/backend/flow_graph.cc b/runtime/vm/compiler/backend/flow_graph.cc
index e2bbc15..41adc25 100644
--- a/runtime/vm/compiler/backend/flow_graph.cc
+++ b/runtime/vm/compiler/backend/flow_graph.cc
@@ -123,7 +123,8 @@
 
 bool FlowGraph::ShouldReorderBlocks(const Function& function,
                                     bool is_optimized) {
-  return is_optimized && FLAG_reorder_basic_blocks && !function.is_intrinsic();
+  return is_optimized && FLAG_reorder_basic_blocks &&
+         !function.is_intrinsic() && !function.IsFfiTrampoline();
 }
 
 GrowableArray<BlockEntryInstr*>* FlowGraph::CodegenBlockOrder(
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc
index 4d70754..f36f34f 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc
@@ -1821,6 +1821,7 @@
 
 const Class& FlowGraphCompiler::BoxClassFor(Representation rep) {
   switch (rep) {
+    case kUnboxedFloat:
     case kUnboxedDouble:
       return double_class();
     case kUnboxedFloat32x4:
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 9dfb20d..0ffd5c1 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -15,6 +15,7 @@
 #include "vm/compiler/backend/locations.h"
 #include "vm/compiler/backend/loops.h"
 #include "vm/compiler/backend/range_analysis.h"
+#include "vm/compiler/ffi.h"
 #include "vm/compiler/frontend/flow_graph_builder.h"
 #include "vm/compiler/jit/compiler.h"
 #include "vm/compiler/method_recognizer.h"
@@ -3473,6 +3474,7 @@
       return new BoxInt64Instr(value);
 
     case kUnboxedDouble:
+    case kUnboxedFloat:
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -3490,8 +3492,12 @@
                                SpeculativeMode speculative_mode) {
   switch (to) {
     case kUnboxedInt32:
-      return new UnboxInt32Instr(UnboxInt32Instr::kNoTruncation, value,
-                                 deopt_id, speculative_mode);
+      // We must truncate if we can't deoptimize.
+      return new UnboxInt32Instr(
+          speculative_mode == SpeculativeMode::kNotSpeculative
+              ? UnboxInt32Instr::kTruncate
+              : UnboxInt32Instr::kNoTruncation,
+          value, deopt_id, speculative_mode);
 
     case kUnboxedUint32:
       return new UnboxUint32Instr(value, deopt_id, speculative_mode);
@@ -3500,6 +3506,7 @@
       return new UnboxInt64Instr(value, deopt_id, speculative_mode);
 
     case kUnboxedDouble:
+    case kUnboxedFloat:
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -3728,10 +3735,10 @@
 #endif
   __ Bind(compiler->GetJumpLabel(this));
 
-  // In the AOT compiler we want to reduce code size, so generate no
-  // fall-through code in [FlowGraphCompiler::CompileGraph()].
-  // (As opposed to here where we don't check for the return value of
-  // [Intrinsify]).
+// In the AOT compiler we want to reduce code size, so generate no
+// fall-through code in [FlowGraphCompiler::CompileGraph()].
+// (As opposed to here where we don't check for the return value of
+// [Intrinsify]).
 #if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM)
   if (FLAG_precompiled_mode) {
     const Function& function = compiler->parsed_function().function();
@@ -4632,6 +4639,7 @@
   if (speculative_mode() == kNotSpeculative) {
     switch (representation()) {
       case kUnboxedDouble:
+      case kUnboxedFloat:
         EmitLoadFromBox(compiler);
         break;
 
@@ -5181,6 +5189,74 @@
   set_native_c_function(native_function);
 }
 
+#if defined(TARGET_ARCH_X64)
+
+#define Z zone_
+
+Representation FfiCallInstr::RequiredInputRepresentation(intptr_t idx) const {
+  if (idx == TargetAddressIndex()) {
+    return kUnboxedIntPtr;
+  } else {
+    return arg_representations_[idx];
+  }
+}
+
+LocationSummary* FfiCallInstr::MakeLocationSummary(Zone* zone,
+                                                   bool is_optimizing) const {
+  // The temporary register needs to be callee-saved and not an argument
+  // register.
+  ASSERT(((1 << CallingConventions::kFirstCalleeSavedCpuReg) &
+          CallingConventions::kArgumentRegisters) == 0);
+
+  LocationSummary* summary =
+      new (zone) LocationSummary(zone, /*num_inputs=*/InputCount(),
+                                 /*num_temps=*/1, LocationSummary::kCall);
+
+  summary->set_in(TargetAddressIndex(),
+                  Location::RegisterLocation(
+                      CallingConventions::kFirstNonArgumentRegister));
+  summary->set_temp(0, Location::RegisterLocation(
+                           CallingConventions::kSecondNonArgumentRegister));
+  summary->set_out(0, compiler::ffi::ResultLocation(
+                          compiler::ffi::ResultRepresentation(signature_)));
+
+  for (intptr_t i = 0, n = NativeArgCount(); i < n; ++i) {
+    Location target = arg_locations_[i];
+    if (target.IsMachineRegister()) {
+      summary->set_in(i, target);
+    } else {
+      // Since we have to push this input on the stack, there's no point in
+      // pinning it to any specific register.
+      summary->set_in(i, Location::Any());
+    }
+  }
+
+  return summary;
+}
+
+Representation FfiCallInstr::representation() const {
+  return compiler::ffi::ResultRepresentation(signature_);
+}
+
+#undef Z
+
+#else
+
+Representation FfiCallInstr::RequiredInputRepresentation(intptr_t idx) const {
+  UNREACHABLE();
+}
+
+LocationSummary* FfiCallInstr::MakeLocationSummary(Zone* zone,
+                                                   bool is_optimizing) const {
+  UNREACHABLE();
+}
+
+Representation FfiCallInstr::representation() const {
+  UNREACHABLE();
+}
+
+#endif
+
 // SIMD
 
 SimdOpInstr* SimdOpInstr::CreateFromCall(Zone* zone,
diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h
index f9217d6..a16b035 100644
--- a/runtime/vm/compiler/backend/il.h
+++ b/runtime/vm/compiler/backend/il.h
@@ -11,6 +11,7 @@
 #include "vm/compiler/backend/locations.h"
 #include "vm/compiler/backend/slot.h"
 #include "vm/compiler/compiler_state.h"
+#include "vm/compiler/ffi.h"
 #include "vm/compiler/method_recognizer.h"
 #include "vm/flags.h"
 #include "vm/growable_array.h"
@@ -355,6 +356,7 @@
   M(AssertBoolean, _)                                                          \
   M(SpecialParameter, kNoGC)                                                   \
   M(ClosureCall, _)                                                            \
+  M(FfiCall, _)                                                                \
   M(InstanceCall, _)                                                           \
   M(PolymorphicInstanceCall, _)                                                \
   M(StaticCall, _)                                                             \
@@ -4087,6 +4089,63 @@
   DISALLOW_COPY_AND_ASSIGN(NativeCallInstr);
 };
 
+// Performs a call to native C code. In contrast to NativeCall, the arguments
+// are unboxed and passed through the native calling convention. However, not
+// all dart objects can be passed as arguments. Please see the FFI documentation
+// for more details.
+// TODO(35775): Add link to the documentation when it's written.
+class FfiCallInstr : public Definition {
+ public:
+  FfiCallInstr(Zone* zone,
+               intptr_t deopt_id,
+               const Function& signature,
+               const ZoneGrowableArray<Representation>& arg_reps,
+               const ZoneGrowableArray<Location>& arg_locs)
+      : Definition(deopt_id),
+        zone_(zone),
+        signature_(signature),
+        inputs_(arg_reps.length() + 1),
+        arg_representations_(arg_reps),
+        arg_locations_(arg_locs) {
+    inputs_.FillWith(nullptr, 0, arg_reps.length() + 1);
+    ASSERT(signature.IsZoneHandle());
+  }
+
+  DECLARE_INSTRUCTION(FfiCall)
+
+  // Number of arguments to the native function.
+  intptr_t NativeArgCount() const { return InputCount() - 1; }
+
+  // Input index of the function pointer to invoke.
+  intptr_t TargetAddressIndex() const { return NativeArgCount(); }
+
+  virtual intptr_t InputCount() const { return inputs_.length(); }
+  virtual Value* InputAt(intptr_t i) const { return inputs_[i]; }
+  virtual bool MayThrow() const { return false; }
+
+  // FfiCallInstr calls C code, which can call back into Dart.
+  virtual bool ComputeCanDeoptimize() const { return true; }
+
+  virtual bool HasUnknownSideEffects() const { return true; }
+
+  virtual Representation RequiredInputRepresentation(intptr_t idx) const;
+  virtual Representation representation() const;
+
+  PRINT_OPERANDS_TO_SUPPORT
+
+ private:
+  virtual void RawSetInputAt(intptr_t i, Value* value) { inputs_[i] = value; }
+
+  Zone* const zone_;
+  const Function& signature_;
+
+  GrowableArray<Value*> inputs_;
+  const ZoneGrowableArray<Representation>& arg_representations_;
+  const ZoneGrowableArray<Location>& arg_locations_;
+
+  DISALLOW_COPY_AND_ASSIGN(FfiCallInstr);
+};
+
 class DebugStepCheckInstr : public TemplateInstruction<0, NoThrow> {
  public:
   DebugStepCheckInstr(TokenPosition token_pos,
@@ -5362,6 +5421,7 @@
 
   static intptr_t ValueOffset(Representation rep) {
     switch (rep) {
+      case kUnboxedFloat:
       case kUnboxedDouble:
         return Double::value_offset();
 
@@ -5388,6 +5448,7 @@
       case kUnboxedInt64:
         return kMintCid;
       case kUnboxedDouble:
+      case kUnboxedFloat:
         return kDoubleCid;
       case kUnboxedFloat32x4:
         return kFloat32x4Cid;
@@ -6926,6 +6987,7 @@
   DISALLOW_COPY_AND_ASSIGN(FloatToDoubleInstr);
 };
 
+// TODO(sjindel): Replace with FFICallInstr.
 class InvokeMathCFunctionInstr : public PureDefinition {
  public:
   InvokeMathCFunctionInstr(ZoneGrowableArray<Value*>* inputs,
diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc
index 033b200..bcf71bb 100644
--- a/runtime/vm/compiler/backend/il_arm.cc
+++ b/runtime/vm/compiler/backend/il_arm.cc
@@ -982,6 +982,10 @@
   __ Drop(ArgumentCount());  // Drop the arguments.
 }
 
+void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNREACHABLE();
+}
+
 LocationSummary* OneByteStringFromCharCodeInstr::MakeLocationSummary(
     Zone* zone,
     bool opt) const {
@@ -4048,6 +4052,11 @@
     case kUnboxedDouble:
       __ StoreDToOffset(value, out_reg, ValueOffset() - kHeapObjectTag);
       break;
+    case kUnboxedFloat:
+      __ vcvtds(DTMP, EvenSRegisterOf(value));
+      __ StoreDToOffset(EvenDRegisterOf(FpuTMP), out_reg,
+                        ValueOffset() - kHeapObjectTag);
+      break;
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -4098,6 +4107,13 @@
       break;
     }
 
+    case kUnboxedFloat: {
+      const DRegister result = EvenDRegisterOf(locs()->out(0).fpu_reg());
+      __ LoadDFromOffset(result, box, ValueOffset() - kHeapObjectTag);
+      __ vcvtsd(EvenSRegisterOf(result), result);
+      break;
+    }
+
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4: {
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index b820504..7e1de69 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -872,6 +872,10 @@
   __ Drop(ArgumentCount());  // Drop the arguments.
 }
 
+void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNREACHABLE();
+}
+
 LocationSummary* OneByteStringFromCharCodeInstr::MakeLocationSummary(
     Zone* zone,
     bool opt) const {
@@ -3505,6 +3509,10 @@
     case kUnboxedDouble:
       __ StoreDFieldToOffset(value, out_reg, ValueOffset());
       break;
+    case kUnboxedFloat:
+      __ fcvtds(FpuTMP, value);
+      __ StoreDFieldToOffset(FpuTMP, out_reg, ValueOffset());
+      break;
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -3544,6 +3552,13 @@
       break;
     }
 
+    case kUnboxedFloat: {
+      const VRegister result = locs()->out(0).fpu_reg();
+      __ LoadDFieldFromOffset(result, box, ValueOffset());
+      __ fcvtsd(result, result);
+      break;
+    }
+
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4: {
diff --git a/runtime/vm/compiler/backend/il_dbc.cc b/runtime/vm/compiler/backend/il_dbc.cc
index 70f8e2b..0ffd8d9 100644
--- a/runtime/vm/compiler/backend/il_dbc.cc
+++ b/runtime/vm/compiler/backend/il_dbc.cc
@@ -958,6 +958,10 @@
   }
 }
 
+void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNREACHABLE();
+}
+
 EMIT_NATIVE_CODE(NativeCall,
                  0,
                  Location::NoLocation(),
diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc
index 3f6ee68..1d0b013 100644
--- a/runtime/vm/compiler/backend/il_ia32.cc
+++ b/runtime/vm/compiler/backend/il_ia32.cc
@@ -12,6 +12,7 @@
 #include "vm/compiler/backend/locations.h"
 #include "vm/compiler/backend/locations_helpers.h"
 #include "vm/compiler/backend/range_analysis.h"
+#include "vm/compiler/ffi.h"
 #include "vm/compiler/frontend/flow_graph_builder.h"
 #include "vm/compiler/jit/compiler.h"
 #include "vm/dart_entry.h"
@@ -844,6 +845,10 @@
   __ Drop(ArgumentCount());  // Drop the arguments.
 }
 
+void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNREACHABLE();
+}
+
 static bool CanBeImmediateIndex(Value* value, intptr_t cid) {
   ConstantInstr* constant = value->definition()->AsConstant();
   if ((constant == NULL) || !Assembler::IsSafeSmi(constant->value())) {
@@ -3358,6 +3363,10 @@
     case kUnboxedDouble:
       __ movsd(FieldAddress(out_reg, ValueOffset()), value);
       break;
+    case kUnboxedFloat:
+      __ cvtss2sd(FpuTMP, value);
+      __ movsd(FieldAddress(out_reg, ValueOffset()), FpuTMP);
+      break;
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -3410,6 +3419,13 @@
       break;
     }
 
+    case kUnboxedFloat: {
+      const FpuRegister result = locs()->out(0).fpu_reg();
+      __ movsd(result, FieldAddress(box, ValueOffset()));
+      __ cvtsd2ss(result, result);
+      break;
+    }
+
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4: {
diff --git a/runtime/vm/compiler/backend/il_printer.cc b/runtime/vm/compiler/backend/il_printer.cc
index a2a26ea..c6ab009 100644
--- a/runtime/vm/compiler/backend/il_printer.cc
+++ b/runtime/vm/compiler/backend/il_printer.cc
@@ -511,6 +511,18 @@
   }
 }
 
+void FfiCallInstr::PrintOperandsTo(BufferFormatter* f) const {
+  f->Print(" pointer=");
+  InputAt(TargetAddressIndex())->PrintTo(f);
+  f->Print(" signature=%s",
+           Type::Handle(signature_.SignatureType()).ToCString());
+  for (intptr_t i = 0, n = InputCount(); i < n - 1; ++i) {
+    f->Print(", ");
+    InputAt(i)->PrintTo(f);
+    f->Print(" (at %s) ", arg_locations_[i].ToCString());
+  }
+}
+
 void InstanceCallInstr::PrintOperandsTo(BufferFormatter* f) const {
   f->Print(" %s<%" Pd ">", function_name().ToCString(), type_args_len());
   for (intptr_t i = 0; i < ArgumentCount(); ++i) {
@@ -910,6 +922,8 @@
       return "untagged";
     case kUnboxedDouble:
       return "double";
+    case kUnboxedFloat:
+      return "float";
     case kUnboxedInt32:
       return "int32";
     case kUnboxedUint32:
diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc
index 2c632c8..53c0b02 100644
--- a/runtime/vm/compiler/backend/il_x64.cc
+++ b/runtime/vm/compiler/backend/il_x64.cc
@@ -7,11 +7,13 @@
 
 #include "vm/compiler/backend/il.h"
 
+#include "vm/compiler/assembler/assembler.h"
 #include "vm/compiler/backend/flow_graph.h"
 #include "vm/compiler/backend/flow_graph_compiler.h"
 #include "vm/compiler/backend/locations.h"
 #include "vm/compiler/backend/locations_helpers.h"
 #include "vm/compiler/backend/range_analysis.h"
+#include "vm/compiler/ffi.h"
 #include "vm/compiler/jit/compiler.h"
 #include "vm/dart_entry.h"
 #include "vm/instructions.h"
@@ -877,6 +879,99 @@
   __ Drop(ArgumentCount());  // Drop the arguments.
 }
 
+void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  Register saved_fp = locs()->temp(0).reg();
+  Register target_address = locs()->in(TargetAddressIndex()).reg();
+
+  // Save frame pointer because we're going to update it when we enter the exit
+  // frame.
+  __ movq(saved_fp, FPREG);
+
+  // Make a space to put the return address.
+  __ pushq(Immediate(0));
+
+  // We need to create a dummy "exit frame". It will share the same pool pointer
+  // but have a null code object.
+  __ LoadObject(CODE_REG, Object::null_object());
+  __ set_constant_pool_allowed(false);
+  __ EnterDartFrame(
+      compiler::ffi::NumStackArguments(arg_locations_) * kWordSize, PP);
+
+  // Save exit frame information to enable stack walking as we are about to
+  // transition to Dart VM C++ code.
+  __ movq(Address(THR, Thread::top_exit_frame_info_offset()), FPREG);
+
+  // Align frame before entering C++ world.
+  if (OS::ActivationFrameAlignment() > 1) {
+    __ andq(SPREG, Immediate(~(OS::ActivationFrameAlignment() - 1)));
+  }
+
+  for (intptr_t i = 0, n = NativeArgCount(); i < n; ++i) {
+    Location origin = locs()->in(i);
+    Location target = arg_locations_[i];
+
+    if (target.IsStackSlot()) {
+      if (origin.IsRegister()) {
+        __ movq(target.ToStackSlotAddress(), origin.reg());
+      } else if (origin.IsFpuRegister()) {
+        __ movq(TMP, origin.fpu_reg());
+        __ movq(target.ToStackSlotAddress(), TMP);
+      } else if (origin.IsStackSlot() || origin.IsDoubleStackSlot()) {
+        // The base register cannot be SPREG because we've moved it.
+        ASSERT(origin.base_reg() == FPREG);
+        __ movq(TMP, Address(saved_fp, origin.ToStackSlotOffset()));
+        __ movq(target.ToStackSlotAddress(), TMP);
+      }
+    } else {
+      ASSERT(origin.Equals(target));
+    }
+  }
+
+  // Mark that the thread is executing VM code.
+  __ movq(Assembler::VMTagAddress(), target_address);
+
+// We need to copy the return address up into the dummy stack frame so the
+// stack walker will know which safepoint to use.
+#if defined(TARGET_OS_WINDOWS)
+  constexpr intptr_t kCallSequenceLength = 10;
+#else
+  constexpr intptr_t kCallSequenceLength = 6;
+#endif
+
+  // RIP points to the *next* instruction, so 'AddressRIPRelative' loads the
+  // address of the following 'movq'.
+  __ leaq(TMP, Address::AddressRIPRelative(kCallSequenceLength));
+
+  const intptr_t call_sequence_start = __ CodeSize();
+  __ movq(Address(FPREG, kSavedCallerPcSlotFromFp * kWordSize), TMP);
+  __ CallCFunction(target_address);
+
+  ASSERT(__ CodeSize() - call_sequence_start == kCallSequenceLength);
+
+  compiler->EmitCallsiteMetadata(TokenPosition::kNoSource, DeoptId::kNone,
+                                 RawPcDescriptors::Kind::kOther, locs());
+
+  // Mark that the thread is executing Dart code.
+  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
+
+  // Reset exit frame information in Isolate structure.
+  __ movq(Address(THR, Thread::top_exit_frame_info_offset()), Immediate(0));
+
+  // Although PP is a callee-saved register, it may have been moved by the GC.
+  __ LeaveDartFrame(compiler::kRestoreCallerPP);
+
+  // Restore the global object pool after returning from runtime (old space is
+  // moving, so the GOP could have been relocated).
+  if (FLAG_precompiled_mode && FLAG_use_bare_instructions) {
+    __ movq(PP, Address(THR, Thread::global_object_pool_offset()));
+  }
+
+  __ set_constant_pool_allowed(true);
+
+  // Instead of returning to the "fake" return address, we just pop it.
+  __ popq(TMP);
+}
+
 static bool CanBeImmediateIndex(Value* index, intptr_t cid) {
   if (!index->definition()->IsConstant()) return false;
   const Object& constant = index->definition()->AsConstant()->value();
@@ -3698,6 +3793,11 @@
     case kUnboxedDouble:
       __ movsd(FieldAddress(out_reg, ValueOffset()), value);
       break;
+    case kUnboxedFloat: {
+      __ cvtss2sd(FpuTMP, value);
+      __ movsd(FieldAddress(out_reg, ValueOffset()), FpuTMP);
+      break;
+    }
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4:
@@ -3743,6 +3843,13 @@
       break;
     }
 
+    case kUnboxedFloat: {
+      const FpuRegister result = locs()->out(0).fpu_reg();
+      __ movsd(result, FieldAddress(box, ValueOffset()));
+      __ cvtsd2ss(result, result);
+      break;
+    }
+
     case kUnboxedFloat32x4:
     case kUnboxedFloat64x2:
     case kUnboxedInt32x4: {
diff --git a/runtime/vm/compiler/backend/linearscan.cc b/runtime/vm/compiler/backend/linearscan.cc
index b09079d..23b5189 100644
--- a/runtime/vm/compiler/backend/linearscan.cc
+++ b/runtime/vm/compiler/backend/linearscan.cc
@@ -824,8 +824,9 @@
 static Location::Kind RegisterKindForResult(Instruction* instr) {
   const Representation rep = instr->representation();
 #if !defined(TARGET_ARCH_DBC)
-  if ((rep == kUnboxedDouble) || (rep == kUnboxedFloat32x4) ||
-      (rep == kUnboxedInt32x4) || (rep == kUnboxedFloat64x2)) {
+  if ((rep == kUnboxedFloat) || (rep == kUnboxedDouble) ||
+      (rep == kUnboxedFloat32x4) || (rep == kUnboxedInt32x4) ||
+      (rep == kUnboxedFloat64x2)) {
     return Location::kFpuRegister;
   } else {
     return Location::kRegister;
@@ -1454,7 +1455,7 @@
 #if defined(DEBUG)
     // Verify that temps, inputs and output were specified as fixed
     // locations.  Every register is blocked now so attempt to
-    // allocate will not succeed.
+    // allocate will go on the stack.
     for (intptr_t j = 0; j < locs->temp_count(); j++) {
       ASSERT(!locs->temp(j).IsPairLocation());
       ASSERT(!locs->temp(j).IsUnallocated());
@@ -1463,10 +1464,13 @@
     for (intptr_t j = 0; j < locs->input_count(); j++) {
       if (locs->in(j).IsPairLocation()) {
         PairLocation* pair = locs->in_slot(j)->AsPairLocation();
-        ASSERT(!pair->At(0).IsUnallocated());
-        ASSERT(!pair->At(1).IsUnallocated());
+        ASSERT(!pair->At(0).IsUnallocated() ||
+               locs->in(j).policy() == Location::kAny);
+        ASSERT(!pair->At(1).IsUnallocated() ||
+               locs->in(j).policy() == Location::kAny);
       } else {
-        ASSERT(!locs->in(j).IsUnallocated());
+        ASSERT(!locs->in(j).IsUnallocated() ||
+               locs->in(j).policy() == Location::kAny);
       }
     }
 
@@ -2059,7 +2063,8 @@
       ASSERT(need_quad);
       location = Location::QuadStackSlot(slot_idx);
     } else {
-      ASSERT((range->representation() == kUnboxedDouble));
+      ASSERT(range->representation() == kUnboxedFloat ||
+             range->representation() == kUnboxedDouble);
       location = Location::DoubleStackSlot(slot_idx);
     }
     range->set_spill_slot(location);
diff --git a/runtime/vm/compiler/backend/locations.h b/runtime/vm/compiler/backend/locations.h
index b88b9d5..d1e6f00 100644
--- a/runtime/vm/compiler/backend/locations.h
+++ b/runtime/vm/compiler/backend/locations.h
@@ -23,6 +23,7 @@
   kTagged,
   kUntagged,
   kUnboxedDouble,
+  kUnboxedFloat,
   kUnboxedInt32,
   kUnboxedUint32,
   kUnboxedInt64,
@@ -639,7 +640,10 @@
   void set_in(intptr_t index, Location loc) {
     ASSERT(index >= 0);
     ASSERT(index < num_inputs_);
-    ASSERT(!always_calls() || loc.IsMachineRegister());
+    // See FlowGraphAllocator::ProcessOneInstruction for explanation of this
+    // restriction.
+    ASSERT(!always_calls() || loc.IsMachineRegister() ||
+           (loc.IsUnallocated() && loc.policy() == Location::kAny));
     input_locations_[index] = loc;
   }
 
diff --git a/runtime/vm/compiler/backend/type_propagator.cc b/runtime/vm/compiler/backend/type_propagator.cc
index f0f7046..e3f4f7d 100644
--- a/runtime/vm/compiler/backend/type_propagator.cc
+++ b/runtime/vm/compiler/backend/type_propagator.cc
@@ -1506,6 +1506,7 @@
 
 CompileType UnboxInstr::ComputeType() const {
   switch (representation()) {
+    case kUnboxedFloat:
     case kUnboxedDouble:
       return CompileType::FromCid(kDoubleCid);
 
@@ -1529,6 +1530,7 @@
 
 CompileType BoxInstr::ComputeType() const {
   switch (from_representation()) {
+    case kUnboxedFloat:
     case kUnboxedDouble:
       return CompileType::FromCid(kDoubleCid);
 
diff --git a/runtime/vm/compiler/ffi.cc b/runtime/vm/compiler/ffi.cc
index e343fc7..375dc5d 100644
--- a/runtime/vm/compiler/ffi.cc
+++ b/runtime/vm/compiler/ffi.cc
@@ -6,6 +6,8 @@
 
 namespace dart {
 
+namespace compiler {
+
 namespace ffi {
 
 #if defined(TARGET_ARCH_X64)
@@ -15,26 +17,22 @@
 static const intptr_t kNumElementSizes = kFfiVoidCid - kFfiPointerCid + 1;
 
 static const size_t element_size_table[kNumElementSizes] = {
-    sizeof(intptr_t),  // kFfiPointerCid
-    kSizeUnknown,      // kFfiNativeFunctionCid
-    1,                 // kFfiInt8Cid
-    2,                 // kFfiInt16Cid
-    4,                 // kFfiInt32Cid
-    8,                 // kFfiInt64Cid
-    1,                 // kFfiUint8Cid
-    2,                 // kFfiUint16Cid
-    4,                 // kFfiUint32Cid
-    8,                 // kFfiUint64Cid
-    sizeof(intptr_t),  // kFfiIntPtrCid
-    4,                 // kFfiFloatCid
-    8,                 // kFfiDoubleCid
-    kSizeUnknown,      // kFfiVoidCid
+    target::kWordSize,  // kFfiPointerCid
+    kSizeUnknown,       // kFfiNativeFunctionCid
+    1,                  // kFfiInt8Cid
+    2,                  // kFfiInt16Cid
+    4,                  // kFfiInt32Cid
+    8,                  // kFfiInt64Cid
+    1,                  // kFfiUint8Cid
+    2,                  // kFfiUint16Cid
+    4,                  // kFfiUint32Cid
+    8,                  // kFfiUint64Cid
+    target::kWordSize,  // kFfiIntPtrCid
+    4,                  // kFfiFloatCid
+    8,                  // kFfiDoubleCid
+    kSizeUnknown,       // kFfiVoidCid
 };
 
-Representation WordRep() {
-  return compiler::target::kWordSize > 4 ? kUnboxedInt64 : kUnboxedInt32;
-}
-
 size_t ElementSizeInBytes(intptr_t class_id) {
   ASSERT(class_id != kFfiNativeFunctionCid);
   ASSERT(class_id != kFfiVoidCid);
@@ -46,45 +44,52 @@
   return element_size_table[index];
 }
 
-bool ElementIsSigned(intptr_t class_id) {
-  switch (class_id) {
-    case kFfiFloatCid:
-    case kFfiDoubleCid:
-    case kFfiInt8Cid:
-    case kFfiInt16Cid:
-    case kFfiInt32Cid:
-    case kFfiInt64Cid:
-    case kFfiIntPtrCid:
-      return true;
-    case kFfiUint8Cid:
-    case kFfiUint16Cid:
-    case kFfiUint32Cid:
-    case kFfiUint64Cid:
-    case kFfiPointerCid:
-    default:  // Subtypes of Pointer.
-      return false;
-  }
-}
-
 Representation TypeRepresentation(const AbstractType& result_type) {
   switch (result_type.type_class_id()) {
     case kFfiFloatCid:
+      return kUnboxedFloat;
     case kFfiDoubleCid:
       return kUnboxedDouble;
     case kFfiInt8Cid:
     case kFfiInt16Cid:
     case kFfiInt32Cid:
+      return kUnboxedInt32;
     case kFfiUint8Cid:
     case kFfiUint16Cid:
     case kFfiUint32Cid:
-      return kUnboxedInt32;
+      return kUnboxedUint32;
     case kFfiInt64Cid:
     case kFfiUint64Cid:
       return kUnboxedInt64;
     case kFfiIntPtrCid:
     case kFfiPointerCid:
     default:  // Subtypes of Pointer.
-      return WordRep();
+      return kUnboxedIntPtr;
+  }
+}
+
+bool NativeTypeIsVoid(const AbstractType& result_type) {
+  return result_type.type_class_id() == kFfiVoidCid;
+}
+
+bool NativeTypeIsPointer(const AbstractType& result_type) {
+  switch (result_type.type_class_id()) {
+    case kFfiVoidCid:
+    case kFfiFloatCid:
+    case kFfiDoubleCid:
+    case kFfiInt8Cid:
+    case kFfiInt16Cid:
+    case kFfiInt32Cid:
+    case kFfiUint8Cid:
+    case kFfiUint16Cid:
+    case kFfiUint32Cid:
+    case kFfiInt64Cid:
+    case kFfiUint64Cid:
+    case kFfiIntPtrCid:
+      return false;
+    case kFfiPointerCid:
+    default:
+      return true;
   }
 }
 
@@ -120,6 +125,7 @@
     on_stack = true;
     switch (arg_reps.At(i)) {
       case kUnboxedInt32:
+      case kUnboxedUint32:
       case kUnboxedInt64:
         if (regs_used < CallingConventions::kNumArgRegs) {
           data[i] = Location::RegisterLocation(
@@ -131,6 +137,7 @@
           on_stack = false;
         }
         break;
+      case kUnboxedFloat:
       case kUnboxedDouble:
         if (xmm_regs_used < CallingConventions::kNumXmmArgRegs) {
           data[i] = Location::FpuRegisterLocation(
@@ -146,7 +153,6 @@
         UNREACHABLE();
     }
     if (on_stack) {
-      // SAMIR_TODO: Is this correct?
       data[i] = Location::StackSlot(nth_stack_argument, RSP);
       nth_stack_argument++;
     }
@@ -154,6 +160,36 @@
   return result;
 }
 
+Representation ResultRepresentation(const Function& signature) {
+  AbstractType& arg_type = AbstractType::Handle(signature.result_type());
+  return TypeRepresentation(arg_type);
+}
+
+Location ResultLocation(Representation result_rep) {
+  switch (result_rep) {
+    case kUnboxedInt32:
+    case kUnboxedUint32:
+    case kUnboxedInt64:
+      return Location::RegisterLocation(CallingConventions::kReturnReg);
+    case kUnboxedFloat:
+    case kUnboxedDouble:
+      return Location::FpuRegisterLocation(CallingConventions::kReturnFpuReg);
+    default:
+      UNREACHABLE();
+  }
+}
+
+intptr_t NumStackArguments(const ZoneGrowableArray<Location>& locations) {
+  intptr_t num_arguments = locations.length();
+  intptr_t num_stack_arguments = 0;
+  for (intptr_t i = 0; i < num_arguments; i++) {
+    if (locations.At(i).IsStackSlot()) {
+      num_stack_arguments++;
+    }
+  }
+  return num_stack_arguments;
+}
+
 #else
 
 size_t ElementSizeInBytes(intptr_t class_id) {
@@ -164,4 +200,6 @@
 
 }  // namespace ffi
 
+}  // namespace compiler
+
 }  // namespace dart
diff --git a/runtime/vm/compiler/ffi.h b/runtime/vm/compiler/ffi.h
index b66818b..e3eca6f 100644
--- a/runtime/vm/compiler/ffi.h
+++ b/runtime/vm/compiler/ffi.h
@@ -14,26 +14,43 @@
 
 namespace dart {
 
+namespace compiler {
+
 namespace ffi {
 
-// Native data types sizes in bytes
-
+// Storage size for an FFI type (extends 'ffi.NativeType').
 size_t ElementSizeInBytes(intptr_t class_id);
 
-bool ElementIsSigned(intptr_t class_id);
-
+// Unboxed representation of an FFI type (extends 'ffi.NativeType').
 Representation TypeRepresentation(const AbstractType& result_type);
 
-Representation WordRep();
+// Whether a type which extends 'ffi.NativeType' also extends 'ffi.Pointer'.
+bool NativeTypeIsPointer(const AbstractType& result_type);
 
+// Whether a type is 'ffi.Void'.
+bool NativeTypeIsVoid(const AbstractType& result_type);
+
+// Unboxed representation of the result of a C signature function.
+Representation ResultRepresentation(const Function& signature);
+
+// Location for the result of a C signature function.
+Location ResultLocation(Representation result_rep);
+
+// Unboxed representations of the arguments to a C signature function.
 ZoneGrowableArray<Representation>* ArgumentRepresentations(
     const Function& signature);
 
+// Location for the arguments of a C signature function.
 ZoneGrowableArray<Location>* ArgumentLocations(
     const ZoneGrowableArray<Representation>& arg_reps);
 
+// Number of stack slots used in 'locations'.
+intptr_t NumStackArguments(const ZoneGrowableArray<Location>& locations);
+
 }  // namespace ffi
 
+}  // namespace compiler
+
 }  // namespace dart
 
 #endif  // RUNTIME_VM_COMPILER_FFI_H_
diff --git a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
index 2a0f1732..35477d9 100644
--- a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
+++ b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
@@ -1020,9 +1020,10 @@
       SetupDefaultParameterValues();
       ReadDefaultFunctionTypeArguments(function);
       return flow_graph_builder_->BuildGraphOfImplicitClosureFunction(function);
+    case RawFunction::kFfiTrampoline:
+      return flow_graph_builder_->BuildGraphOfFfiTrampoline(function);
     case RawFunction::kSignatureFunction:
     case RawFunction::kIrregexpFunction:
-    case RawFunction::kFfiTrampoline:
       break;
   }
   UNREACHABLE();
diff --git a/runtime/vm/compiler/frontend/kernel_to_il.cc b/runtime/vm/compiler/frontend/kernel_to_il.cc
index 3b42750..1c9371b 100644
--- a/runtime/vm/compiler/frontend/kernel_to_il.cc
+++ b/runtime/vm/compiler/frontend/kernel_to_il.cc
@@ -2,11 +2,13 @@
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
-#include "vm/compiler/aot/precompiler.h"
 #include "vm/compiler/frontend/kernel_to_il.h"
+#include "vm/compiler/aot/precompiler.h"
+#include "vm/compiler/backend/locations.h"
 
 #include "vm/compiler/backend/il.h"
 #include "vm/compiler/backend/il_printer.h"
+#include "vm/compiler/ffi.h"
 #include "vm/compiler/frontend/kernel_binary_flowgraph.h"
 #include "vm/compiler/frontend/kernel_translation_helper.h"
 #include "vm/compiler/frontend/prologue_builder.h"
@@ -393,6 +395,25 @@
   return Fragment(call);
 }
 
+Fragment FlowGraphBuilder::FfiCall(
+    const Function& signature,
+    const ZoneGrowableArray<Representation>& arg_reps,
+    const ZoneGrowableArray<Location>& arg_locs) {
+  Fragment body;
+
+  FfiCallInstr* call =
+      new (Z) FfiCallInstr(Z, GetNextDeoptId(), signature, arg_reps, arg_locs);
+
+  for (intptr_t i = call->InputCount() - 1; i >= 0; --i) {
+    call->SetInputAt(i, Pop());
+  }
+
+  Push(call);
+  body <<= call;
+
+  return body;
+}
+
 Fragment FlowGraphBuilder::RethrowException(TokenPosition position,
                                             int catch_try_index) {
   Fragment instructions;
@@ -915,9 +936,7 @@
   return body + Return(TokenPosition::kNoSource, omit_result_type_check);
 }
 
-static const LocalScope* MakeImplicitClosureScope(Zone* Z,
-                                                  const Function& function) {
-  Class& klass = Class::Handle(Z, function.Owner());
+static const LocalScope* MakeImplicitClosureScope(Zone* Z, const Class& klass) {
   ASSERT(!klass.IsNull());
   // Note that if klass is _Closure, DeclarationType will be _Closure,
   // and not the signature type.
@@ -959,7 +978,7 @@
   // Allocate a context that closes over `this`.
   // Note: this must be kept in sync with ScopeBuilder::BuildScopes.
   const LocalScope* implicit_closure_scope =
-      MakeImplicitClosureScope(Z, target);
+      MakeImplicitClosureScope(Z, Class::Handle(Z, target.Owner()));
   fragment += AllocateContext(implicit_closure_scope->context_variables());
   LocalVariable* context = MakeTemporary();
 
@@ -2254,6 +2273,180 @@
                            prologue_info);
 }
 
+Fragment FlowGraphBuilder::UnboxTruncate(Representation to) {
+  auto* unbox = UnboxInstr::Create(to, Pop(), DeoptId::kNone,
+                                   Instruction::kNotSpeculative);
+  Push(unbox);
+  return Fragment(unbox);
+}
+
+Fragment FlowGraphBuilder::LoadAddressFromFfiPointer() {
+  Fragment test;
+  TargetEntryInstr* null_entry;
+  TargetEntryInstr* not_null_entry;
+  JoinEntryInstr* join = BuildJoinEntry();
+
+  LocalVariable* result = parsed_function_->expression_temp_var();
+
+  LocalVariable* pointer = MakeTemporary();
+  test += LoadLocal(pointer);
+  test += BranchIfNull(&null_entry, &not_null_entry);
+
+  Fragment load_0(null_entry);
+  load_0 += IntConstant(0);
+  load_0 += StoreLocal(TokenPosition::kNoSource, result);
+  load_0 += Drop();
+  load_0 += Goto(join);
+
+  Fragment unbox(not_null_entry);
+  unbox += LoadLocal(pointer);
+  unbox += LoadNativeField(Slot::Pointer_c_memory_address());
+  unbox += StoreLocal(TokenPosition::kNoSource, result);
+  unbox += Drop();
+  unbox += Goto(join);
+
+  Fragment done{test.entry, join};
+  done += Drop();
+  done += LoadLocal(result);
+
+  return done;
+}
+
+Fragment FlowGraphBuilder::Box(Representation from) {
+  BoxInstr* box = BoxInstr::Create(from, Pop());
+  Push(box);
+  return Fragment(box);
+}
+
+Fragment FlowGraphBuilder::FfiPointerFromAddress(const Type& result_type) {
+  Fragment test;
+  TargetEntryInstr* null_entry;
+  TargetEntryInstr* not_null_entry;
+  JoinEntryInstr* join = BuildJoinEntry();
+
+  LocalVariable* address = MakeTemporary();
+  LocalVariable* result = parsed_function_->expression_temp_var();
+
+  test += LoadLocal(address);
+  test += IntConstant(0);
+  test += BranchIfEqual(&null_entry, &not_null_entry);
+
+  // If the result is 0, we return null because "0 means null".
+  Fragment load_null(null_entry);
+  {
+    load_null += NullConstant();
+    load_null += StoreLocal(TokenPosition::kNoSource, result);
+    load_null += Drop();
+    load_null += Goto(join);
+  }
+
+  Fragment box(not_null_entry);
+  {
+    Class& result_class = Class::ZoneHandle(Z, result_type.type_class());
+    TypeArguments& args = TypeArguments::ZoneHandle(Z, result_type.arguments());
+
+    // A kernel transform for FFI in the front-end ensures that type parameters
+    // do not appear in the type arguments to a any Pointer classes in an FFI
+    // signature.
+    ASSERT(args.IsNull() || args.IsInstantiated());
+
+    box += Constant(args);
+    box += PushArgument();
+    box += AllocateObject(TokenPosition::kNoSource, result_class, 1);
+    LocalVariable* pointer = MakeTemporary();
+    box += LoadLocal(pointer);
+    box += LoadLocal(address);
+    box += StoreInstanceField(TokenPosition::kNoSource,
+                              Slot::Pointer_c_memory_address());
+    box += StoreLocal(TokenPosition::kNoSource, result);
+    box += Drop();
+    box += Goto(join);
+  }
+
+  Fragment rest(test.entry, join);
+  rest += Drop();
+  rest += LoadLocal(result);
+
+  return rest;
+}
+
+FlowGraph* FlowGraphBuilder::BuildGraphOfFfiTrampoline(
+    const Function& function) {
+#if !defined(TARGET_ARCH_X64)
+  UNREACHABLE();
+#else
+  graph_entry_ =
+      new (Z) GraphEntryInstr(*parsed_function_, Compiler::kNoOSRDeoptId);
+
+  auto normal_entry = BuildFunctionEntry(graph_entry_);
+  graph_entry_->set_normal_entry(normal_entry);
+
+  PrologueInfo prologue_info(-1, -1);
+
+  BlockEntryInstr* instruction_cursor =
+      BuildPrologue(normal_entry, &prologue_info);
+
+  Fragment body(instruction_cursor);
+  body += CheckStackOverflowInPrologue(function.token_pos());
+
+  const Function& signature = Function::ZoneHandle(Z, function.FfiCSignature());
+  const auto& arg_reps = *compiler::ffi::ArgumentRepresentations(signature);
+  const auto& arg_locs = *compiler::ffi::ArgumentLocations(arg_reps);
+
+  BuildArgumentTypeChecks(TypeChecksToBuild::kCheckAllTypeParameterBounds,
+                          &body, &body, &body);
+
+  // Unbox and push the arguments.
+  AbstractType& ffi_type = AbstractType::Handle(Z);
+  for (intptr_t pos = 1; pos < function.num_fixed_parameters(); pos++) {
+    body += LoadLocal(parsed_function_->ParameterVariable(pos));
+    ffi_type = signature.ParameterTypeAt(pos);
+
+    // Check for 'null'. Only ffi.Pointers are allowed to be null.
+    if (!compiler::ffi::NativeTypeIsPointer(ffi_type)) {
+      body += LoadLocal(parsed_function_->ParameterVariable(pos));
+      body <<=
+          new (Z) CheckNullInstr(Pop(), String::ZoneHandle(Z, function.name()),
+                                 GetNextDeoptId(), TokenPosition::kNoSource);
+    }
+
+    if (compiler::ffi::NativeTypeIsPointer(ffi_type)) {
+      body += LoadAddressFromFfiPointer();
+      body += UnboxTruncate(kUnboxedIntPtr);
+    } else {
+      body += UnboxTruncate(arg_reps[pos - 1]);
+    }
+  }
+
+  // Push the function pointer, which is stored (boxed) in the first slot of the
+  // context.
+  body += LoadLocal(parsed_function_->ParameterVariable(0));
+  body += LoadNativeField(Slot::Closure_context());
+  body += LoadNativeField(Slot::GetContextVariableSlotFor(
+      thread_, *MakeImplicitClosureScope(
+                    Z, Class::Handle(I->object_store()->ffi_pointer_class()))
+                    ->context_variables()[0]));
+  body += UnboxTruncate(kUnboxedIntPtr);
+  body += FfiCall(signature, arg_reps, arg_locs);
+
+  ffi_type = signature.result_type();
+  if (compiler::ffi::NativeTypeIsPointer(ffi_type)) {
+    body += Box(kUnboxedIntPtr);
+    body += FfiPointerFromAddress(Type::Cast(ffi_type));
+  } else if (compiler::ffi::NativeTypeIsVoid(ffi_type)) {
+    body += Drop();
+    body += NullConstant();
+  } else {
+    body += Box(compiler::ffi::ResultRepresentation(signature));
+  }
+
+  body += Return(TokenPosition::kNoSource);
+
+  return new (Z) FlowGraph(*parsed_function_, graph_entry_, last_used_block_id_,
+                           prologue_info);
+#endif
+}
+
 void FlowGraphBuilder::SetCurrentTryCatchBlock(TryCatchBlock* try_catch_block) {
   try_catch_block_ = try_catch_block;
   SetCurrentTryIndex(try_catch_block == nullptr ? kInvalidTryIndex
diff --git a/runtime/vm/compiler/frontend/kernel_to_il.h b/runtime/vm/compiler/frontend/kernel_to_il.h
index 7442db1..6212ac2 100644
--- a/runtime/vm/compiler/frontend/kernel_to_il.h
+++ b/runtime/vm/compiler/frontend/kernel_to_il.h
@@ -100,6 +100,7 @@
   FlowGraph* BuildGraphOfMethodExtractor(const Function& method);
   FlowGraph* BuildGraphOfNoSuchMethodDispatcher(const Function& function);
   FlowGraph* BuildGraphOfInvokeFieldDispatcher(const Function& function);
+  FlowGraph* BuildGraphOfFfiTrampoline(const Function& function);
 
   Fragment NativeFunctionBody(const Function& function,
                               LocalVariable* first_parameter);
@@ -148,6 +149,9 @@
                        intptr_t argument_count,
                        const Array& argument_names,
                        bool use_unchecked_entry = false);
+  Fragment FfiCall(const Function& signature,
+                   const ZoneGrowableArray<Representation>& arg_reps,
+                   const ZoneGrowableArray<Location>& arg_locs);
 
   Fragment RethrowException(TokenPosition position, int catch_try_index);
   Fragment LoadClassId();
@@ -204,6 +208,25 @@
   bool NeedsDebugStepCheck(Value* value, TokenPosition position);
   Fragment DebugStepCheck(TokenPosition position);
 
+  // Truncates (instead of deoptimizing) if the origin does not fit into the
+  // target representation.
+  Fragment UnboxTruncate(Representation to);
+
+  // Sign-extends kUnboxedInt32 and zero-extends kUnboxedUint32.
+  Fragment Box(Representation from);
+
+  // Pops an 'ffi.Pointer' off the stack.
+  // If it's null, pushes 0.
+  // Otherwise pushes the address (in boxed representation).
+  Fragment LoadAddressFromFfiPointer();
+
+  // Reverse of 'LoadPointerFromFfiPointer':
+  // Pops an integer off the the stack.
+  // If it's zero, pushes null.
+  // If it's nonzero, creates an 'ffi.Pointer' holding the address and pushes
+  // the pointer.
+  Fragment FfiPointerFromAddress(const Type& result_type);
+
   LocalVariable* LookupVariable(intptr_t kernel_offset);
 
   // Build argument type checks for the current function.
diff --git a/runtime/vm/compiler/frontend/prologue_builder.cc b/runtime/vm/compiler/frontend/prologue_builder.cc
index c7c6afe..3c5103c 100644
--- a/runtime/vm/compiler/frontend/prologue_builder.cc
+++ b/runtime/vm/compiler/frontend/prologue_builder.cc
@@ -29,7 +29,7 @@
 
 bool PrologueBuilder::HasEmptyPrologue(const Function& function) {
   return !function.HasOptionalParameters() && !function.IsGeneric() &&
-         !function.IsClosureFunction();
+         !function.CanReceiveDynamicInvocation();
 }
 
 BlockEntryInstr* PrologueBuilder::BuildPrologue(BlockEntryInstr* entry,
@@ -41,7 +41,7 @@
 
   const bool load_optional_arguments = function_.HasOptionalParameters();
   const bool expect_type_args = function_.IsGeneric();
-  const bool check_arguments = function_.IsClosureFunction();
+  const bool check_arguments = function_.CanReceiveDynamicInvocation();
 
   Fragment prologue = Fragment(entry);
   JoinEntryInstr* nsm = NULL;
diff --git a/runtime/vm/compiler/frontend/scope_builder.cc b/runtime/vm/compiler/frontend/scope_builder.cc
index aea56ce..e23bf10 100644
--- a/runtime/vm/compiler/frontend/scope_builder.cc
+++ b/runtime/vm/compiler/frontend/scope_builder.cc
@@ -127,6 +127,10 @@
     scope_->AddVariable(parsed_function_->arg_desc_var());
   }
 
+  if (parsed_function_->function().IsFfiTrampoline()) {
+    needs_expr_temp_ = true;
+  }
+
   LocalVariable* context_var = parsed_function_->current_context_var();
   context_var->set_is_forced_stack();
   scope_->AddVariable(context_var);
@@ -372,17 +376,19 @@
     }
     case RawFunction::kNoSuchMethodDispatcher:
     case RawFunction::kInvokeFieldDispatcher:
+    case RawFunction::kFfiTrampoline:
       for (intptr_t i = 0; i < function.NumParameters(); ++i) {
-        LocalVariable* variable =
-            MakeVariable(TokenPosition::kNoSource, TokenPosition::kNoSource,
-                         String::ZoneHandle(Z, function.ParameterNameAt(i)),
-                         AbstractType::dynamic_type());
+        LocalVariable* variable = MakeVariable(
+            TokenPosition::kNoSource, TokenPosition::kNoSource,
+            String::ZoneHandle(Z, function.ParameterNameAt(i)),
+            AbstractType::ZoneHandle(Z, function.IsFfiTrampoline()
+                                            ? function.ParameterTypeAt(i)
+                                            : Object::dynamic_type().raw()));
         scope_->InsertParameterAt(i, variable);
       }
       break;
     case RawFunction::kSignatureFunction:
     case RawFunction::kIrregexpFunction:
-    case RawFunction::kFfiTrampoline:
       UNREACHABLE();
   }
   if (needs_expr_temp_) {
diff --git a/runtime/vm/compiler/jit/compiler.cc b/runtime/vm/compiler/jit/compiler.cc
index ba9c4f4..db4a507 100644
--- a/runtime/vm/compiler/jit/compiler.cc
+++ b/runtime/vm/compiler/jit/compiler.cc
@@ -381,8 +381,11 @@
 
   // CreateDeoptInfo uses the object pool and needs to be done before
   // FinalizeCode.
-  const Array& deopt_info_array =
-      Array::Handle(zone, graph_compiler->CreateDeoptInfo(assembler));
+  Array& deopt_info_array = Array::Handle(zone, Object::empty_array().raw());
+  if (!function.ForceOptimize()) {
+    deopt_info_array = graph_compiler->CreateDeoptInfo(assembler);
+  }
+
   // Allocates instruction object. Since this occurs only at safepoint,
   // there can be no concurrent access to the instruction page.
   Code& code = Code::Handle(Code::FinalizeCode(
@@ -431,7 +434,13 @@
   graph_compiler->FinalizeStaticCallTargetsTable(code);
   graph_compiler->FinalizeCodeSourceMap(code);
 
-  if (optimized()) {
+  if (function.ForceOptimize()) {
+    ASSERT(optimized() && thread()->IsMutatorThread());
+    code.set_is_optimized(false);
+    function.AttachCode(code);
+    function.set_unoptimized_code(code);
+    function.SetWasCompiled(true);
+  } else if (optimized()) {
     // Installs code while at safepoint.
     if (thread()->IsMutatorThread()) {
       const bool is_osr = osr_id() != Compiler::kNoOSRDeoptId;
@@ -764,6 +773,7 @@
                                         intptr_t osr_id) {
   ASSERT(!FLAG_precompiled_mode);
   ASSERT(!optimized || function.WasCompiled());
+  if (function.ForceOptimize()) optimized = true;
   LongJumpScope jump;
   if (setjmp(*jump.Set()) == 0) {
     Thread* const thread = Thread::Current();
diff --git a/runtime/vm/constants_x64.h b/runtime/vm/constants_x64.h
index 5de7ddc..00affe4 100644
--- a/runtime/vm/constants_x64.h
+++ b/runtime/vm/constants_x64.h
@@ -145,9 +145,9 @@
 
 #define R(reg) (1 << (reg))
 
-#if defined(_WIN64)
 class CallingConventions {
  public:
+#if defined(_WIN64)
   static const Register kArg1Reg = RCX;
   static const Register kArg2Reg = RDX;
   static const Register kArg3Reg = R8;
@@ -186,10 +186,10 @@
   // Windows x64 ABI specifies that small objects are passed in registers.
   // Otherwise they are passed by reference.
   static const size_t kRegisterTransferLimit = 16;
-};
+
+  static constexpr Register kReturnReg = RAX;
+  static constexpr FpuRegister kReturnFpuReg = XMM0;
 #else
-class CallingConventions {
- public:
   static const Register kArg1Reg = RDI;
   static const Register kArg2Reg = RSI;
   static const Register kArg3Reg = RDX;
@@ -229,9 +229,24 @@
   static const intptr_t kCalleeSaveXmmRegisters = 0;
 
   static const XmmRegister xmmFirstNonParameterReg = XMM8;
-};
+
+  static constexpr Register kReturnReg = RAX;
+  static constexpr FpuRegister kReturnFpuReg = XMM0;
 #endif
 
+  COMPILE_ASSERT((kArgumentRegisters & kReservedCpuRegisters) == 0);
+
+  static constexpr Register kFirstCalleeSavedCpuReg = RBX;
+  static constexpr Register kFirstNonArgumentRegister = RAX;
+  static constexpr Register kSecondNonArgumentRegister = RBX;
+
+  COMPILE_ASSERT(((R(kFirstCalleeSavedCpuReg)) & kCalleeSaveCpuRegisters) != 0);
+
+  COMPILE_ASSERT(((R(kFirstNonArgumentRegister) |
+                   R(kSecondNonArgumentRegister)) &
+                  kArgumentRegisters) == 0);
+};
+
 #undef R
 
 class Instr {
diff --git a/runtime/vm/dart_api_impl.h b/runtime/vm/dart_api_impl.h
index 0bdeec5..41f7383 100644
--- a/runtime/vm/dart_api_impl.h
+++ b/runtime/vm/dart_api_impl.h
@@ -298,7 +298,7 @@
   static bool IsFfiEnabled() {
     // dart:ffi is not implemented for the following configurations
 #if !defined(TARGET_ARCH_X64)
-    // https://github.com/dart-lang/sdk/issues/35774
+    // https://github.com/dart-lang/sdk/issues/35774 IA32
     return false;
 #elif !defined(TARGET_OS_LINUX) && !defined(TARGET_OS_MACOS) &&                \
     !defined(TARGET_OS_WINDOWS)
diff --git a/runtime/vm/ffi_trampoline_stubs_x64.cc b/runtime/vm/ffi_trampoline_stubs_x64.cc
index 1afc9b5..a98236d 100644
--- a/runtime/vm/ffi_trampoline_stubs_x64.cc
+++ b/runtime/vm/ffi_trampoline_stubs_x64.cc
@@ -31,432 +31,13 @@
 
 namespace dart {
 
-static intptr_t NumStackArguments(
-    const ZoneGrowableArray<Location>& locations) {
-  intptr_t num_arguments = locations.length();
-  intptr_t num_stack_arguments = 0;
-  for (intptr_t i = 0; i < num_arguments; i++) {
-    if (locations.At(i).IsStackSlot()) {
-      num_stack_arguments++;
-    }
-  }
-  return num_stack_arguments;
-}
-
-// Input parameters:
-//   Register reg : a Null, or something else
-static void GenerateNotNullCheck(Assembler* assembler, Register reg) {
-  Label not_null;
-  Address throw_null_pointer_address =
-      Address(THR, Thread::OffsetFromThread(&kArgumentNullErrorRuntimeEntry));
-
-  __ CompareObject(reg, Object::null_object());
-  __ j(NOT_EQUAL, &not_null, Assembler::kNearJump);
-
-  // TODO(dacoharkes): Create the message here and use
-  // kArgumentErrorRuntimeEntry to report which argument was null.
-  __ movq(CODE_REG, Address(THR, Thread::call_to_runtime_stub_offset()));
-  __ movq(RBX, throw_null_pointer_address);
-  __ movq(R10, Immediate(0));
-  __ call(Address(THR, Thread::call_to_runtime_entry_point_offset()));
-
-  __ Bind(&not_null);
-}
-
-// Saves an int64 in the thread so GC does not trip.
-//
-// Input parameters:
-//   Register src : a C int64
-static void GenerateSaveInt64GCSafe(Assembler* assembler, Register src) {
-  __ movq(Address(THR, Thread::unboxed_int64_runtime_arg_offset()), src);
-}
-
-// Loads an int64 from the thread.
-static void GenerateLoadInt64GCSafe(Assembler* assembler, Register dst) {
-  __ movq(dst, Address(THR, Thread::unboxed_int64_runtime_arg_offset()));
-}
-
-// Takes a Dart int and converts it to a C int64.
-//
-// Input parameters:
-//   Register reg : a Dart Null, Smi, or Mint
-// Output parameters:
-//   Register reg : a C int64
-// Invariant: keeps ArgumentRegisters and XmmArgumentRegisters intact
-void GenerateMarshalInt64(Assembler* assembler, Register reg) {
-  ASSERT(reg != TMP);
-  ASSERT((1 << TMP & CallingConventions::kArgumentRegisters) == 0);
-  Label done, not_smi;
-
-  // Exception on Null
-  GenerateNotNullCheck(assembler, reg);
-
-  // Smi or Mint?
-  __ movq(TMP, reg);
-  __ testq(TMP, Immediate(kSmiTagMask));
-  __ j(NOT_ZERO, &not_smi, Assembler::kNearJump);
-
-  // Smi
-  __ SmiUntag(reg);
-  __ jmp(&done, Assembler::kNearJump);
-
-  // Mint
-  __ Bind(&not_smi);
-  __ movq(reg, FieldAddress(reg, Mint::value_offset()));
-  __ Bind(&done);
-}
-
-// Takes a C int64 and converts it to a Dart int.
-//
-// Input parameters:
-//   RAX : a C int64
-// Output paramaters:
-//   RAX : a Dart Smi or Mint
-static void GenerateUnmarshalInt64(Assembler* assembler) {
-  const Class& mint_class =
-      Class::ZoneHandle(Isolate::Current()->object_store()->mint_class());
-  ASSERT(!mint_class.IsNull());
-  const auto& mint_allocation_stub =
-      Code::ZoneHandle(StubCode::GetAllocationStubForClass(mint_class));
-  ASSERT(!mint_allocation_stub.IsNull());
-  Label done;
-
-  // Try whether it fits in a Smi.
-  __ movq(TMP, RAX);
-  __ SmiTag(RAX);
-  __ j(NO_OVERFLOW, &done, Assembler::kNearJump);
-
-  // Mint
-  // Backup result value (to avoid GC).
-  GenerateSaveInt64GCSafe(assembler, TMP);
-
-  // Allocate object (can call into runtime).
-  __ Call(mint_allocation_stub);
-
-  // Store result value.
-  GenerateLoadInt64GCSafe(assembler, TMP);
-  __ movq(FieldAddress(RAX, Mint::value_offset()), TMP);
-
-  __ Bind(&done);
-}
-
-// Takes a Dart double and converts it into a C double.
-//
-// Input parameters:
-//   Register reg : a Dart Null or Double
-// Output parameters:
-//   XmmRegister xmm_reg : a C double
-// Invariant: keeps ArgumentRegisters and other XmmArgumentRegisters intact
-static void GenerateMarshalDouble(Assembler* assembler,
-                                  Register reg,
-                                  XmmRegister xmm_reg) {
-  ASSERT((1 << reg & CallingConventions::kArgumentRegisters) == 0);
-
-  // Throw a Dart Exception on Null.
-  GenerateNotNullCheck(assembler, reg);
-
-  __ movq(reg, FieldAddress(reg, Double::value_offset()));
-  __ movq(xmm_reg, reg);
-}
-
-// Takes a C double and converts it into a Dart double.
-//
-// Input parameters:
-//   XMM0 : a C double
-// Output parameters:
-//   RAX : a Dart Double
-static void GenerateUnmarshalDouble(Assembler* assembler) {
-  const auto& double_class =
-      Class::ZoneHandle(Isolate::Current()->object_store()->double_class());
-  ASSERT(!double_class.IsNull());
-  const auto& double_allocation_stub =
-      Code::ZoneHandle(StubCode::GetAllocationStubForClass(double_class));
-  ASSERT(!double_allocation_stub.IsNull());
-
-  // Backup result value (to avoid GC).
-  __ movq(RAX, XMM0);
-  GenerateSaveInt64GCSafe(assembler, RAX);
-
-  // Allocate object (can call into runtime).
-  __ Call(double_allocation_stub);
-
-  // Store the result value.
-  GenerateLoadInt64GCSafe(assembler, TMP);
-  __ movq(FieldAddress(RAX, Double::value_offset()), TMP);
-}
-
-// Takes a Dart double and converts into a C float.
-//
-// Input parameters:
-//   Register reg : a Dart double
-// Output parameters:
-//   XmmRegister xxmReg : a C float
-// Invariant: keeps ArgumentRegisters and other XmmArgumentRegisters intact
-static void GenerateMarshalFloat(Assembler* assembler,
-                                 Register reg,
-                                 XmmRegister xmm_reg) {
-  ASSERT((1 << reg & CallingConventions::kArgumentRegisters) == 0);
-
-  GenerateMarshalDouble(assembler, reg, xmm_reg);
-
-  __ cvtsd2ss(xmm_reg, xmm_reg);
-}
-
-// Takes a C float and converts it into a Dart double.
-//
-// Input parameters:
-//   XMM0 : a C float
-// Output paramaters:
-//   RAX : a Dart Double
-static void GenerateUnmarshalFloat(Assembler* assembler) {
-  __ cvtss2sd(XMM0, XMM0);
-  GenerateUnmarshalDouble(assembler);
-}
-
-// Takes a Dart ffi.Pointer and converts it into a C pointer.
-//
-// Input parameters:
-//   Register reg : a Dart ffi.Pointer or Null
-// Output parameters:
-//   Register reg : a C pointer
-static void GenerateMarshalPointer(Assembler* assembler, Register reg) {
-  Label done, not_null;
-
-  __ CompareObject(reg, Object::null_object());
-  __ j(NOT_EQUAL, &not_null, Assembler::kNearJump);
-
-  // If null, the address is 0.
-  __ movq(reg, Immediate(0));
-  __ jmp(&done);
-
-  // If not null but a Pointer, load the address.
-  __ Bind(&not_null);
-  __ movq(reg, FieldAddress(reg, Pointer::c_memory_address_offset()));
-  GenerateMarshalInt64(assembler, reg);
-  __ Bind(&done);
-}
-
-// Takes a C pointer and converts it into a Dart ffi.Pointer or Null.
-//
-// Input parameters:
-//   RAX : a C pointer
-// Outpot paramaters:
-//   RAX : a Dart ffi.Pointer or Null
-static void GenerateUnmarshalPointer(Assembler* assembler,
-                                     Address closure_dart,
-                                     const Class& pointer_class) {
-  Label done, not_null;
-  ASSERT(!pointer_class.IsNull());
-  const auto& pointer_allocation_stub =
-      Code::ZoneHandle(StubCode::GetAllocationStubForClass(pointer_class));
-  ASSERT(!pointer_allocation_stub.IsNull());
-
-  // If the address is 0, return a Dart Null.
-  __ cmpq(RAX, Immediate(0));
-  __ j(NOT_EQUAL, &not_null, Assembler::kNearJump);
-  __ LoadObject(RAX, Object::null_object());
-  __ jmp(&done);
-
-  __ Bind(&not_null);
-  GenerateUnmarshalInt64(assembler);
-  __ pushq(RAX);
-
-  // Allocate object (can call into runtime).
-  __ movq(TMP, closure_dart);
-  __ movq(TMP, FieldAddress(TMP, Closure::function_offset()));
-  __ movq(TMP, FieldAddress(TMP, Function::result_type_offset()));
-  __ pushq(FieldAddress(TMP, Type::arguments_offset()));
-  __ Call(pointer_allocation_stub);
-  __ popq(TMP);  // Pop type arguments.
-
-  // Store the result value.
-  __ popq(RDX);
-  __ movq(FieldAddress(RAX, Pointer::c_memory_address_offset()), RDX);
-  __ Bind(&done);
-}
-
-static void GenerateMarshalArgument(Assembler* assembler,
-                                    const AbstractType& arg_type,
-                                    Register reg,
-                                    XmmRegister xmm_reg) {
-  switch (arg_type.type_class_id()) {
-    case kFfiInt8Cid:
-    case kFfiInt16Cid:
-    case kFfiInt32Cid:
-    case kFfiInt64Cid:
-    case kFfiUint8Cid:
-    case kFfiUint16Cid:
-    case kFfiUint32Cid:
-    case kFfiUint64Cid:
-    case kFfiIntPtrCid:
-      // TODO(dacoharkes): Truncate and sign extend 8 bit and 16 bit, and write
-      // tests. https://github.com/dart-lang/sdk/issues/35787
-      GenerateMarshalInt64(assembler, reg);
-      return;
-    case kFfiFloatCid:
-      GenerateMarshalFloat(assembler, reg, xmm_reg);
-      return;
-    case kFfiDoubleCid:
-      GenerateMarshalDouble(assembler, reg, xmm_reg);
-      return;
-    case kFfiPointerCid:
-    default:  // Subtypes of Pointer.
-      GenerateMarshalPointer(assembler, reg);
-      return;
-  }
-}
-
-static void GenerateUnmarshalResult(Assembler* assembler,
-                                    const AbstractType& result_type,
-                                    Address closure_dart) {
-  switch (result_type.type_class_id()) {
-    case kFfiVoidCid:
-      __ LoadObject(RAX, Object::null_object());
-      return;
-    case kFfiInt8Cid:
-    case kFfiInt16Cid:
-    case kFfiInt32Cid:
-    case kFfiInt64Cid:
-    case kFfiUint8Cid:
-    case kFfiUint16Cid:
-    case kFfiUint32Cid:
-    case kFfiUint64Cid:
-    case kFfiIntPtrCid:
-      GenerateUnmarshalInt64(assembler);
-      return;
-    case kFfiFloatCid:
-      GenerateUnmarshalFloat(assembler);
-      return;
-    case kFfiDoubleCid:
-      GenerateUnmarshalDouble(assembler);
-      return;
-    case kFfiPointerCid:
-    default:  // subtypes of Pointer
-      break;
-  }
-  Class& cls = Class::ZoneHandle(Thread::Current()->zone(),
-                                 Type::Cast(result_type).type_class());
-
-  GenerateUnmarshalPointer(assembler, closure_dart, cls);
-}
-
-// Generates a assembly for dart:ffi trampolines:
-// - marshal arguments
-// - put the arguments in registers and on the c stack
-// - invoke the c function
-// - (c result register is the same as dart, so keep in place)
-// - unmarshal c result
-// - return
-//
-// Input parameters:
-//   RSP + kWordSize *  num_arguments      : closure.
-//   RSP + kWordSize * (num_arguments - 1) : arg 1.
-//   RSP + kWordSize * (num_arguments - 2) : arg 2.
-//   RSP + kWordSize                       : arg n.
-// After entering stub:
-//   RBP = RSP (before stub) - kWordSize
-//   RBP + kWordSize * (num_arguments + 1) : closure.
-//   RBP + kWordSize *  num_arguments      : arg 1.
-//   RBP + kWordSize * (num_arguments - 1) : arg 2.
-//   RBP + kWordSize *  2                  : arg n.
-//
-// TODO(dacoharkes): Test truncation on non 64 bits ints and floats.
-void GenerateFfiTrampoline(Assembler* assembler, const Function& signature) {
-  ZoneGrowableArray<Representation>* arg_representations =
-      ffi::ArgumentRepresentations(signature);
-  ZoneGrowableArray<Location>* arg_locations =
-      ffi::ArgumentLocations(*arg_representations);
-
-  intptr_t num_dart_arguments = signature.num_fixed_parameters();
-  intptr_t num_arguments = num_dart_arguments - 1;  // ignore closure
-
-  __ EnterStubFrame();
-
-  // Save exit frame information to enable stack walking as we are about
-  // to transition to Dart VM C++ code.
-  __ movq(Address(THR, Thread::top_exit_frame_info_offset()), RBP);
-
-#if defined(DEBUG)
-  {
-    Label ok;
-    // Check that we are always entering from Dart code.
-    __ movq(TMP, Immediate(VMTag::kDartCompiledTagId));
-    __ cmpq(TMP, Assembler::VMTagAddress());
-    __ j(EQUAL, &ok, Assembler::kNearJump);
-    __ Stop("Not coming from Dart code.");
-    __ Bind(&ok);
-  }
-#endif
-
-  // Reserve space for arguments and align frame before entering C++ world.
-  __ subq(RSP, Immediate(NumStackArguments(*arg_locations) * kWordSize));
-  if (OS::ActivationFrameAlignment() > 1) {
-    __ andq(RSP, Immediate(~(OS::ActivationFrameAlignment() - 1)));
-  }
-
-  // Prepare address for calling the C function.
-  Address closure_dart = Address(RBP, (num_dart_arguments + 1) * kWordSize);
-  __ movq(RBX, closure_dart);
-  __ movq(RBX, FieldAddress(RBX, Closure::context_offset()));
-  __ movq(RBX, FieldAddress(RBX, Context::variable_offset(0)));
-  GenerateMarshalInt64(assembler, RBX);  // Address is a Smi or Mint.
-
-  // Marshal arguments and store in the right register.
-  for (intptr_t i = 0; i < num_arguments; i++) {
-    Representation rep = arg_representations->At(i);
-    Location loc = arg_locations->At(i);
-
-    // We do marshalling in the the target register or in RAX.
-    Register reg = loc.IsRegister() ? loc.reg() : RAX;
-    // For doubles and floats we use target xmm register or first non param reg.
-    FpuRegister xmm_reg = loc.IsFpuRegister()
-                              ? loc.fpu_reg()
-                              : CallingConventions::xmmFirstNonParameterReg;
-
-    // Load parameter from Dart stack.
-    __ movq(reg, Address(RBP, (num_arguments + 1 - i) * kWordSize));
-
-    // Marshal argument.
-    AbstractType& arg_type =
-        AbstractType::Handle(signature.ParameterTypeAt(i + 1));
-    GenerateMarshalArgument(assembler, arg_type, reg, xmm_reg);
-
-    // Store marshalled argument where c expects value.
-    if (loc.IsStackSlot()) {
-      if (rep == kUnboxedDouble) {
-        __ movq(reg, xmm_reg);
-      }
-      __ movq(loc.ToStackSlotAddress(), reg);
-    }
-  }
-
-  // Mark that the thread is executing VM code.
-  __ movq(Assembler::VMTagAddress(), RBX);
-
-  __ CallCFunction(RBX);
-
-  // Mark that the thread is executing Dart code.
-  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
-
-  // Unmarshal result.
-  AbstractType& return_type = AbstractType::Handle(signature.result_type());
-  GenerateUnmarshalResult(assembler, return_type, closure_dart);
-
-  // Reset exit frame information in Isolate structure.
-  __ movq(Address(THR, Thread::top_exit_frame_info_offset()), Immediate(0));
-
-  __ LeaveStubFrame();
-
-  __ ret();
-}
-
 void GenerateFfiInverseTrampoline(Assembler* assembler,
                                   const Function& signature,
                                   void* dart_entry_point) {
   ZoneGrowableArray<Representation>* arg_representations =
-      ffi::ArgumentRepresentations(signature);
+      compiler::ffi::ArgumentRepresentations(signature);
   ZoneGrowableArray<Location>* arg_locations =
-      ffi::ArgumentLocations(*arg_representations);
+      compiler::ffi::ArgumentLocations(*arg_representations);
 
   intptr_t num_dart_arguments = signature.num_fixed_parameters();
   intptr_t num_arguments = num_dart_arguments - 1;  // Ignore closure.
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index 4393b19..cf4d4a1 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -5985,6 +5985,20 @@
   }
 }
 
+void Function::SetFfiCSignature(const Function& sig) const {
+  ASSERT(IsFfiTrampoline());
+  const Object& obj = Object::Handle(raw_ptr()->data_);
+  ASSERT(!obj.IsNull());
+  FfiTrampolineData::Cast(obj).set_c_signature(sig);
+}
+
+RawFunction* Function::FfiCSignature() const {
+  ASSERT(IsFfiTrampoline());
+  const Object& obj = Object::Handle(raw_ptr()->data_);
+  ASSERT(!obj.IsNull());
+  return FfiTrampolineData::Cast(obj).c_signature();
+}
+
 RawType* Function::SignatureType() const {
   Type& type = Type::Handle(ExistingSignatureType());
   if (type.IsNull()) {
@@ -7975,10 +7989,10 @@
       kind_str = " dynamic-invocation-forwarder";
       break;
     case RawFunction::kInvokeFieldDispatcher:
-      kind_str = "invoke-field-dispatcher";
+      kind_str = " invoke-field-dispatcher";
       break;
     case RawFunction::kIrregexpFunction:
-      kind_str = "irregexp-function";
+      kind_str = " irregexp-function";
       break;
     case RawFunction::kFfiTrampoline:
       kind_str = " ffi-trampoline-function";
@@ -8098,6 +8112,10 @@
   StorePointer(&raw_ptr()->signature_type_, value.raw());
 }
 
+void FfiTrampolineData::set_c_signature(const Function& value) const {
+  StorePointer(&raw_ptr()->c_signature_, value.raw());
+}
+
 RawFfiTrampolineData* FfiTrampolineData::New() {
   ASSERT(Object::ffi_trampoline_data_class() != Class::null());
   RawObject* raw =
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index aa1147a..0b999b4 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -1978,6 +1978,14 @@
   // Update the signature type (with a canonical version).
   void SetSignatureType(const Type& value) const;
 
+  // Set the "C signature" function for an FFI trampoline.
+  // Can only be used on FFI trampolines.
+  void SetFfiCSignature(const Function& sig) const;
+
+  // Retrieves the "C signature" function for an FFI trampoline.
+  // Can only be used on FFI trampolines.
+  RawFunction* FfiCSignature() const;
+
   // Return a new function with instantiated result and parameter types.
   RawFunction* InstantiateSignatureFrom(
       const TypeArguments& instantiator_type_arguments,
@@ -2244,6 +2252,13 @@
   bool IsFactory() const {
     return (kind() == RawFunction::kConstructor) && is_static();
   }
+
+  // Whether this function can receive an invocation where the number and names
+  // of arguments have not been checked.
+  bool CanReceiveDynamicInvocation() const {
+    return IsClosureFunction() || IsFfiTrampoline();
+  }
+
   bool IsDynamicFunction(bool allow_abstract = false) const {
     if (is_static() || (!allow_abstract && is_abstract())) {
       return false;
@@ -2420,6 +2435,13 @@
   bool IsOptimizable() const;
   void SetIsOptimizable(bool value) const;
 
+  // Whether this function must be optimized immediately and cannot be compiled
+  // with the unoptimizing compiler. Such a function must be sure to not
+  // deoptimize, since we won't generate deoptimization info or register
+  // dependencies. It will be compiled into optimized code immediately when it's
+  // run.
+  bool ForceOptimize() const { return IsFfiTrampoline(); }
+
   bool CanBeInlined() const;
 
   MethodRecognizer::Kind recognized_kind() const {
@@ -2989,6 +3011,9 @@
   RawType* signature_type() const { return raw_ptr()->signature_type_; }
   void set_signature_type(const Type& value) const;
 
+  RawFunction* c_signature() const { return raw_ptr()->c_signature_; }
+  void set_c_signature(const Function& value) const;
+
   static RawFfiTrampolineData* New();
 
   FINAL_HEAP_OBJECT_IMPLEMENTATION(FfiTrampolineData, Object);
diff --git a/runtime/vm/parser.cc b/runtime/vm/parser.cc
index d2a2b2e..17a4a79 100644
--- a/runtime/vm/parser.cc
+++ b/runtime/vm/parser.cc
@@ -78,7 +78,8 @@
 
   const bool load_optional_arguments = function.HasOptionalParameters();
 
-  const bool check_arguments = function_.IsClosureFunction();
+  const bool check_arguments =
+      function_.IsClosureFunction() || function.IsFfiTrampoline();
 
   const bool need_argument_descriptor =
       load_optional_arguments || check_arguments || reify_generic_argument;
diff --git a/runtime/vm/raw_object.h b/runtime/vm/raw_object.h
index 5cd9448..94d3351 100644
--- a/runtime/vm/raw_object.h
+++ b/runtime/vm/raw_object.h
@@ -1036,7 +1036,8 @@
 
   VISIT_FROM(RawObject*, signature_type_);
   RawType* signature_type_;
-  VISIT_TO(RawObject*, signature_type_);
+  RawFunction* c_signature_;
+  VISIT_TO(RawObject*, c_signature_);
 };
 
 class RawField : public RawObject {
diff --git a/tests/standalone_2/ffi/function_test.dart b/tests/standalone_2/ffi/function_test.dart
index b7e8129..d22f771 100644
--- a/tests/standalone_2/ffi/function_test.dart
+++ b/tests/standalone_2/ffi/function_test.dart
@@ -20,7 +20,7 @@
     testNativeFunctionFromCast();
     testNativeFunctionFromLookup();
     test64bitInterpretations();
-    //  TODO(36122): testExtension();
+    testExtension();
     testTruncation();
     testNativeFunctionDoubles();
     testNativeFunctionFloats();
diff --git a/tests/standalone_2/standalone_2_kernel.status b/tests/standalone_2/standalone_2_kernel.status
index 31ebd3d..9038446 100644
--- a/tests/standalone_2/standalone_2_kernel.status
+++ b/tests/standalone_2/standalone_2_kernel.status
@@ -230,11 +230,8 @@
 io/web_socket_test: Skip # Timeout
 
 [ $arch != x64 || $compiler != dartk || $mode == product || $system != linux && $system != macos ]
-ffi/function_stress_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product. Windows issues will be fixed in IL CL: https://github.com/dart-lang/sdk/issues/36138
-ffi/subtype_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product.
-
-[ $mode == product || $mode != product ]
-ffi/negative_function_test: Skip # Issues 36033, 36034
+ffi/function_stress_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product.
+ffi/subtype_test: SkipByDesign # FFI must be supported. Also requires --verbose-gc, which isn't included in product. See #36138 for Windows.
 
 [ $compiler != dartk && $compiler != dartkb && $compiler != dartkp || $compiler == dartkp && $system == windows ]
 entrypoints_verification_test: SkipByDesign # Requires VM to run. Cannot run in precompiled Windows because the DLL is linked against dart.exe instead of dart_precompiled_runtime.exe.