[vm/ffi] SimDBC on Arm64 Android

Bug: https://github.com/dart-lang/sdk/issues/35773

Change-Id: I6f1f85239b0ffe5c310b9aeea4a4edcd97362bca
Cq-Include-Trybots: luci.dart.try:vm-ffi-android-debug-arm-try, app-kernel-linux-debug-x64-try, vm-kernel-linux-debug-simdbc64-try,vm-kernel-mac-debug-simdbc64-try,vm-kernel-reload-mac-debug-simdbc64-try,vm-kernel-linux-debug-ia32-try,vm-dartkb-linux-debug-simarm64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/104565
Reviewed-by: Samir Jindel <sjindel@google.com>
Commit-Queue: Samir Jindel <sjindel@google.com>
Commit-Queue: Daco Harkes <dacoharkes@google.com>
diff --git a/runtime/vm/compiler/compiler_sources.gni b/runtime/vm/compiler/compiler_sources.gni
index bac526c..2d8c1f2 100644
--- a/runtime/vm/compiler/compiler_sources.gni
+++ b/runtime/vm/compiler/compiler_sources.gni
@@ -152,9 +152,12 @@
 #
 # Not that this diverges from our convention to build every file on every OS
 # but have ifdef guards which make the files empty on some configurations.
-if (is_linux || is_mac) {
+if (!is_win) {
   # MASM on Windows does not support c preproccesor style flags.
-  compiler_sources += [ "ffi_dbc_trampoline_x64_linux_mac.S" ]
+  compiler_sources += [
+    "ffi_dbc_trampoline_arm64.S",
+    "ffi_dbc_trampoline_x64_linux_mac.S",
+  ]
 }
 
 compiler_sources_tests = [
diff --git a/runtime/vm/compiler/ffi_dbc_trampoline.h b/runtime/vm/compiler/ffi_dbc_trampoline.h
index 656d1e1..578fc7f 100644
--- a/runtime/vm/compiler/ffi_dbc_trampoline.h
+++ b/runtime/vm/compiler/ffi_dbc_trampoline.h
@@ -9,7 +9,8 @@
 
 namespace dart {
 
-#if defined(HOST_ARCH_X64) && !defined(HOST_OS_WINDOWS)
+#if !defined(HOST_OS_WINDOWS) &&                                               \
+    (defined(HOST_ARCH_X64) || defined(HOST_ARCH_ARM64))
 
 // Generic Trampoline for DBC dart:ffi calls. Argument needs to be layed out as
 // a FfiMarshalledArguments.
diff --git a/runtime/vm/compiler/ffi_dbc_trampoline_arm64.S b/runtime/vm/compiler/ffi_dbc_trampoline_arm64.S
new file mode 100644
index 0000000..47aba97
--- /dev/null
+++ b/runtime/vm/compiler/ffi_dbc_trampoline_arm64.S
@@ -0,0 +1,66 @@
+#if defined(__aarch64__) /* HOST_ARCH_ARM64 */
+
+.text
+.global FfiTrampolineCall
+.type FfiTrampolineCall, %function
+FfiTrampolineCall:
+
+/* Save argument in scratch register. */
+stp   x19,  x20, [sp, #-16]! /* Push x19 and x20, we use x19 as scratch. */
+mov   x19,  x0              /* Save argument in scratch register. */
+
+/* Enter frame. */
+stp   fp,   lr, [sp, #-16]!
+mov   fp,   sp
+
+/* Reserve framespace for arguments. */
+ldr   x9,   [x19, #(8*18)]  /* Load number of stack arguments. */
+lsl   x9,   x9,  #3         /* Multiply by size (8 bytes). */
+sub   sp,   sp,  x9         /* Reserve num_stack_args stack slots. */
+
+/* Stack alignment. */
+ldr   x10,  [x19, #(8*17)]  /* Load stack alignment mask. */
+mov   x11,  sp
+and   x11,  x11,  x10       /* Align stack. */
+mov   sp,   x11
+
+/* Copy stack arguments. */
+cmp   x9,   #0              /* Check if number of stack arguments is 0. */
+beq   .done                 /* Skip loop if no stack arguments. */
+add   x19,  x19, #(8*19)    /* Offset r19 to point to stack arguments. */
+.loop:                      /* Copy stack arguments loop. */
+sub   x9,   x9,   #8        /* Decrement stack argument iterator. */
+ldr   x10,  [x19, x9]       /* Load value from ffi_marshalled_args. */
+str   x10,  [sp,  x9]       /* Store value on stack. */
+cmp   x9,   #0              /* Compare iterator with 0 */
+bne   .loop                 /* Loop while iterator is not 0 */
+sub   x19,  x19, #(8*19)    /* Restore r19 to original value. */
+.done:                      /* End stack arguments loop. */
+
+/* Copy registers and fpu registers. */
+ldp   x0, x1, [x19, #(8*1)] /* and #(8*2) */
+ldp   x2, x3, [x19, #(8*3)] /* and #(8*4) */
+ldp   x4, x5, [x19, #(8*5)] /* ... */
+ldp   x6, x7, [x19, #(8*7)]
+ldp   d0, d1, [x19, #(8*9)]
+ldp   d2, d3, [x19, #(8*11)]
+ldp   d4, d5, [x19, #(8*13)]
+ldp   d6, d7, [x19, #(8*15)]
+
+/* Do call. */
+ldr   x9,   [x19]           /* Load function address. */
+blr   x9                    /* Call the function. */
+
+/* Copy results back. */
+str   x0,   [x19, #(8*0)]   /* Move integer result in kOffsetIntResult. */
+str   d0,   [x19, #(8*1)]   /* Move double result in kOffsetDoubleResult. */
+
+/* Leave frame. */
+mov   sp,   fp
+ldp   fp,   lr,  [sp], #16
+
+/* Restore caller saved register. */
+ldp   x19,  x20, [sp], #16   /* Pop x19 and x20. */
+ret
+
+#endif /* HOST_ARCH_ARM64 */
diff --git a/runtime/vm/dart_api_impl.h b/runtime/vm/dart_api_impl.h
index ca74fa7..2d8ad35 100644
--- a/runtime/vm/dart_api_impl.h
+++ b/runtime/vm/dart_api_impl.h
@@ -300,8 +300,9 @@
 #if defined(TARGET_ARCH_DBC) && !defined(ARCH_IS_64_BIT)
     // TODO(36809): Support SimDBC32.
     return false;
-#elif defined(TARGET_ARCH_DBC) && !defined(HOST_ARCH_X64)
-    // TODO(35773): Support ia32, arm64, and arm.
+#elif defined(TARGET_ARCH_DBC) &&                                              \
+    !(defined(HOST_ARCH_X64) || defined(HOST_ARCH_ARM64))
+    // TODO(36809): Support ia32 and arm.
     return false;
 #elif defined(TARGET_ARCH_DBC) && defined(HOST_ARCH_X64) &&                    \
     defined(HOST_OS_WINDOWS)
diff --git a/tests/ffi/ffi.status b/tests/ffi/ffi.status
index 118f77e..15f739f 100644
--- a/tests/ffi/ffi.status
+++ b/tests/ffi/ffi.status
@@ -26,7 +26,7 @@
 [ $arch == arm && $system != android ]
 *: Skip # "hardfp" calling convention is not yet supported (iOS is also supported but not tested): dartbug.com/36309
 
-[ $arch == simdbc64 && $system != linux && $system != macos ]
+[ $arch == simdbc64 && $system != android && $system != linux && $system != macos ]
 *: Skip # FFI not yet supported outside x64 Linux: dartbug.com/36809
 
 [ $runtime != dart_precompiled && $runtime != vm ]