[vm, simarm64] Handle FFI callouts.

TEST=ci
Bug: https://github.com/dart-lang/sdk/issues/60204
Change-Id: I2a7546cea0886b05873dbc59f5c9f049aad963c7
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/411960
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
diff --git a/runtime/BUILD.gn b/runtime/BUILD.gn
index 6d91281..450411e 100644
--- a/runtime/BUILD.gn
+++ b/runtime/BUILD.gn
@@ -149,6 +149,9 @@
   }
   if (dart_force_simulator) {
     defines += [ "USING_SIMULATOR" ]
+    if (dart_target_arch == "arm64") {
+      defines += [ "SIMULATOR_FFI" ]
+    }
   }
 }
 
diff --git a/runtime/lib/ffi_dynamic_library.cc b/runtime/lib/ffi_dynamic_library.cc
index ec42580..5fb31a3 100644
--- a/runtime/lib/ffi_dynamic_library.cc
+++ b/runtime/lib/ffi_dynamic_library.cc
@@ -29,12 +29,12 @@
 
 namespace dart {
 
-#if defined(USING_SIMULATOR) || (defined(DART_PRECOMPILER) && !defined(TESTING))
+#if (defined(USING_SIMULATOR) && !defined(SIMULATOR_FFI)) ||                   \
+    (defined(DART_PRECOMPILER) && !defined(TESTING))
 
 DART_NORETURN static void SimulatorUnsupported() {
 #if defined(USING_SIMULATOR)
-  Exceptions::ThrowUnsupportedError(
-      "Not supported on simulated architectures.");
+  Exceptions::ThrowUnsupportedError("Not supported on this simulator.");
 #else
   Exceptions::ThrowUnsupportedError("Not supported in precompiler.");
 #endif
@@ -516,7 +516,7 @@
   return Pointer::New(reinterpret_cast<intptr_t>(FfiResolve));
 }
 
-#endif  // defined(USING_SIMULATOR) ||                                         \
+#endif  // (defined(USING_SIMULATOR) && !defined (SIMULATOR_FFI)) ||           \
         // (defined(DART_PRECOMPILER) && !defined(TESTING))
 
 }  // namespace dart
diff --git a/runtime/tests/vm/vm.status b/runtime/tests/vm/vm.status
index 019c9d3..06d0c55 100644
--- a/runtime/tests/vm/vm.status
+++ b/runtime/tests/vm/vm.status
@@ -196,6 +196,7 @@
 dart/ffi_structs_optimizations_il_test: SkipByDesign # https://dartbug.com/37299 Test uses dart:ffi which is not supported on simulators.
 dart/gc/splay_c_finalizer_test: SkipByDesign # No FFI on simulators
 dart/isolates/dart_api_create_lightweight_isolate_test: SkipByDesign # https://dartbug.com/37299 Test uses dart:ffi which is not supported on simulators.
+dart/isolates/many_isolates_blocked_at_monitor_test: SkipByDesign # https://dartbug.com/37299 FFI not supported on simulator
 dart/isolates/regress_54528_test: SkipByDesign # Invokes gen_kernel/gen_snapshot
 dart/isolates/shared_test: SkipByDesign # https://dartbug.com/37299 Test uses dart:ffi which is not supported on simulators.
 dart/isolates/thread_pool_test: SkipByDesign # https://dartbug.com/37299 Test uses dart:ffi which is not supported on simulators.
diff --git a/runtime/vm/BUILD.gn b/runtime/vm/BUILD.gn
index 6afe2bb..f4be2d9 100644
--- a/runtime/vm/BUILD.gn
+++ b/runtime/vm/BUILD.gn
@@ -202,6 +202,9 @@
     # uses different assembler syntax.
     sources += [ "thread_interrupter_android_arm.S" ]
   }
+  if (!is_win) {
+    sources += [ "simulator_arm64_trampolines.S" ]
+  }
   include_dirs = [ ".." ]
 }
 
diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc
index bcec263..7849e4d 100644
--- a/runtime/vm/compiler/backend/il_arm64.cc
+++ b/runtime/vm/compiler/backend/il_arm64.cc
@@ -1428,6 +1428,10 @@
     __ mov(temp_csp, CSP);
     __ mov(CSP, SP);
 
+#if defined(SIMULATOR_FFI)
+    __ Emit(Instr::kSimulatorFfiRedirectInstruction);
+    ASSERT(branch == R9);
+#endif
     __ blr(branch);
 
     // Restore the Dart stack pointer.
diff --git a/runtime/vm/compiler/backend/locations.h b/runtime/vm/compiler/backend/locations.h
index 3a6e46f..1d38e88 100644
--- a/runtime/vm/compiler/backend/locations.h
+++ b/runtime/vm/compiler/backend/locations.h
@@ -712,6 +712,9 @@
       if (reg == PC) continue;
 #elif defined(TARGET_ARCH_ARM64)
       if (reg == R31) continue;
+#if defined(DART_TARGET_OS_MACOS) || defined(DART_TARGET_OS_WINDOWS)
+      if (reg == R18) continue;
+#endif
 #elif defined(TARGET_ARCH_RISCV32) || defined(TARGET_ARCH_RISCV64)
       if (reg == ZR || reg == TP || reg == GP) continue;
 #endif
diff --git a/runtime/vm/compiler/stub_code_compiler_arm64.cc b/runtime/vm/compiler/stub_code_compiler_arm64.cc
index 713b49b..daffde0 100644
--- a/runtime/vm/compiler/stub_code_compiler_arm64.cc
+++ b/runtime/vm/compiler/stub_code_compiler_arm64.cc
@@ -433,6 +433,9 @@
   __ Bind(&done);
 #endif
 
+#if defined(SIMULATOR_FFI)
+  __ Emit(Instr::kSimulatorFfiRedirectInstruction);
+#endif
   __ blr(R9);
 
   __ mov(SP, CSP);
diff --git a/runtime/vm/constants_arm64.h b/runtime/vm/constants_arm64.h
index ab62555..5788a23 100644
--- a/runtime/vm/constants_arm64.h
+++ b/runtime/vm/constants_arm64.h
@@ -1336,6 +1336,8 @@
   static constexpr int32_t kSimulatorBreakCode =
       0xdeb2;  // For breakpoint in sim.
   static constexpr int32_t kSimulatorRedirectCode = 0xca11;  // For redirection.
+  static constexpr int32_t kSimulatorFfiRedirectCode =
+      0xca12;  // For redirection.
 
   // Breakpoint instruction filling assembler code buffers in debug mode.
   static constexpr int32_t kBreakPointInstruction =  // brk(0xdeb0).
@@ -1350,6 +1352,8 @@
   // Runtime call redirection instruction used by the simulator.
   static constexpr int32_t kSimulatorRedirectInstruction =
       HLT | (kSimulatorRedirectCode << kImm16Shift);
+  static constexpr int32_t kSimulatorFfiRedirectInstruction =
+      HLT | (kSimulatorFfiRedirectCode << kImm16Shift);
 
   // Read one particular bit out of the instruction bits.
   inline int Bit(int nr) const { return (InstructionBits() >> nr) & 1; }
diff --git a/runtime/vm/simulator_arm.cc b/runtime/vm/simulator_arm.cc
index 9d7be40..f34890d 100644
--- a/runtime/vm/simulator_arm.cc
+++ b/runtime/vm/simulator_arm.cc
@@ -1421,7 +1421,7 @@
         Redirection* redirection = Redirection::FromSvcInstruction(instr);
         uword external = redirection->external_function();
         if (IsTracingExecution()) {
-          THR_Print("Call to host function at 0x%" Pd "\n", external);
+          THR_Print("Call to host function at 0x%" Px "\n", external);
         }
         if (redirection->call_kind() == kRuntimeCall) {
           NativeArguments arguments;
diff --git a/runtime/vm/simulator_arm64.cc b/runtime/vm/simulator_arm64.cc
index 2ddc632..29b089653 100644
--- a/runtime/vm/simulator_arm64.cc
+++ b/runtime/vm/simulator_arm64.cc
@@ -950,8 +950,9 @@
                              R31Type r31t) {
   // Register is in range.
   ASSERT((reg >= 0) && (reg < kNumberOfCpuRegisters));
-#if !defined(DART_TARGET_OS_FUCHSIA)
-  ASSERT(instr == nullptr || reg != R18);  // R18 is globally reserved on iOS.
+#if defined(DART_TARGET_OS_MACOS) || defined(DART_TARGET_OS_WINDOWS)
+  // R18 is globally reserved on macOS/iOS, the TEB pointer on Windows.
+  ASSERT(instr == nullptr || reg != R18);
 #endif
 
   if ((reg != R31) || (r31t != R31IsZR)) {
@@ -1682,7 +1683,7 @@
     Redirection* redirection = Redirection::FromHltInstruction(instr);
     uword external = redirection->external_function();
     if (IsTracingExecution()) {
-      THR_Print("Call to host function at 0x%" Pd "\n", external);
+      THR_Print("Call to host function at 0x%" Px "\n", external);
     }
 
     if (redirection->call_kind() == kRuntimeCall) {
@@ -1746,6 +1747,65 @@
   }
 }
 
+struct CalloutContext {
+  uword saved_stack_pointer;
+  uword saved_frame_pointer;
+  uword simulator_stack_pointer;
+  uword simulator_frame_pointer;
+  uword integer_arguments[8];
+  uword double_arguments[8];
+  uword r8;
+  uword target;
+};
+
+extern "C" void FfiCalloutTrampoline(CalloutContext*);
+
+void Simulator::DoRedirectedFfiCall(Instr* instr) {
+#if !defined(HOST_ARCH_ARM64)
+  FATAL("Unsupported FFI call");
+#else
+  // We can't instrument the runtime.
+  memory_.FlushAll();
+
+  SimulatorSetjmpBuffer buffer(this);
+  if (!setjmp(buffer.buffer_)) {
+    int64_t saved_pc = get_pc();
+    uword external = get_register(R9);
+    if (IsTracingExecution()) {
+      THR_Print("Call to FFI function at 0x%" Px "\n", external);
+    }
+
+    CalloutContext ctxt;
+    ctxt.simulator_stack_pointer = get_register(R31);
+    ctxt.simulator_frame_pointer = get_register(FP);
+    for (intptr_t i = 0; i < 8; i++) {
+      ctxt.integer_arguments[i] = get_register(static_cast<Register>(R0 + i));
+      ctxt.double_arguments[i] =
+          get_vregisterd(static_cast<VRegister>(V0 + i), 0);
+    }
+    ctxt.r8 = get_register(R8);
+    ctxt.target = external;
+
+    FfiCalloutTrampoline(&ctxt);
+
+    if (IsTracingExecution()) {
+      THR_Print("Return from FFI function at 0x%" Px "\n", external);
+    }
+
+    ClobberVolatileRegisters();
+    set_register(instr, R0, ctxt.integer_arguments[0]);
+    set_register(instr, R1, ctxt.integer_arguments[1]);
+    set_vregisterd(V0, 0, ctxt.double_arguments[0]);
+    set_vregisterd(V1, 0, ctxt.double_arguments[1]);
+
+    // Skip over hlt and blr.
+    set_pc(saved_pc + 2 * Instr::kInstrSize);
+  } else {
+    // Coming via long jump from a throw. Continue to exception handler.
+  }
+#endif
+}
+
 void Simulator::ClobberVolatileRegisters() {
   // Clear atomic reservation.
   exclusive_access_addr_ = exclusive_access_value_ = 0;
@@ -1787,6 +1847,8 @@
       dbg.Stop(instr, "breakpoint");
     } else if (imm == Instr::kSimulatorRedirectCode) {
       DoRedirectedCall(instr);
+    } else if (imm == Instr::kSimulatorFfiRedirectCode) {
+      DoRedirectedFfiCall(instr);
     } else {
       UnimplementedInstruction(instr);
     }
diff --git a/runtime/vm/simulator_arm64.h b/runtime/vm/simulator_arm64.h
index e550e80..bc812cf 100644
--- a/runtime/vm/simulator_arm64.h
+++ b/runtime/vm/simulator_arm64.h
@@ -234,6 +234,7 @@
   bool ConditionallyExecute(Instr* instr);
 
   void DoRedirectedCall(Instr* instr);
+  void DoRedirectedFfiCall(Instr* instr);
 
   // Decode instructions.
   void InstructionDecode(Instr* instr);
diff --git a/runtime/vm/simulator_arm64_trampolines.S b/runtime/vm/simulator_arm64_trampolines.S
new file mode 100644
index 0000000..ccd19c7
--- /dev/null
+++ b/runtime/vm/simulator_arm64_trampolines.S
@@ -0,0 +1,78 @@
+// Copyright (c) 2025, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// Callout trampoline from the simulator. This is not written as VM stub because
+// we need it be executable in contexts where we cannot JIT. (Alternatively, we
+// could start requiring the VM snapshot to be provided in every mode.)
+
+#if defined(__aarch64__)
+
+.text
+
+#if defined(__APPLE__)
+.globl _FfiCalloutTrampoline
+_FfiCalloutTrampoline:
+#else
+.globl FfiCalloutTrampoline
+.type FfiCalloutTrampoline, %function
+FfiCalloutTrampoline:
+#endif
+  stp fp, lr, [sp, #-16]!
+  mov fp, sp
+
+  // Spill a preserved register to save the context pointer.
+  stp x19, x20, [sp, #-16]!
+  mov x19, x0
+
+  // Copy top frame from Dart stack to C stack
+  ldr x0, [x19, #16]  // CalloutContext.simulator_stack_pointer (CSP)
+  ldr x1, [x19, #24]  // CalloutContext.simulator_frame_pointer
+  add x1, x1, 15   // Round up the frame pointer, since the Dart frame pointer
+  and x1, x1, ~15  // is not double-word aligned.
+.Lcopy:
+  ldp x2, x3, [x1, #-16]!  // From Dart FP
+  stp x2, x3, [sp, #-16]!  // To C SP
+  cmp x1, x0
+  b.gt .Lcopy
+
+  // Load the ABI argument registers. Note that Dart FFI does not support
+  // full 128-bit SIMD arguments, so we don't need to set the full V
+  // registers.
+  ldr x0, [x19, #32]  // CalloutContext.integer_arguments[0]
+  ldr x1, [x19, #40]
+  ldr x2, [x19, #48]
+  ldr x3, [x19, #56]
+  ldr x4, [x19, #64]
+  ldr x5, [x19, #72]
+  ldr x6, [x19, #80]
+  ldr x7, [x19, #88]
+  ldr d0, [x19, #96]  // CalloutContext.double_arguments[0]
+  ldr d1, [x19, #104]
+  ldr d2, [x19, #112]
+  ldr d3, [x19, #120]
+  ldr d4, [x19, #128]
+  ldr d5, [x19, #136]
+  ldr d6, [x19, #144]
+  ldr d7, [x19, #152]
+  ldr x8, [x19, #160]  // CalloutContext.r8
+
+  // Call target.
+  ldr lr, [x19, #168]  // CalloutContext.target
+  blr lr
+
+  // Save the ABI result registers.
+  str x0, [x19, #32]  // CalloutContext.integer_arguments[0]
+  str x1, [x19, #40]
+  str d0, [x19, #96]  // CalloutContext.double_arguments[0]
+  str d1, [x19, #104]
+
+  add sp, fp, -16
+  ldp x19, x20, [sp], #16
+  ldp fp, lr, [sp], #16
+  ret
+#if !defined(__APPLE__)
+.size FfiCalloutTrampoline,.-FfiCalloutTrampoline
+#endif
+
+#endif  // defined(__aarch64__)
diff --git a/runtime/vm/simulator_riscv.cc b/runtime/vm/simulator_riscv.cc
index 9d3c204..5526c20 100644
--- a/runtime/vm/simulator_riscv.cc
+++ b/runtime/vm/simulator_riscv.cc
@@ -2114,7 +2114,7 @@
     Redirection* redirection = Redirection::FromECallInstruction(pc_);
     uword external = redirection->external_function();
     if (IsTracingExecution()) {
-      THR_Print("Call to host function at 0x%" Pd "\n", external);
+      THR_Print("Call to host function at 0x%" Px "\n", external);
     }
 
     if (redirection->call_kind() == kRuntimeCall) {