[vm/ffi] Regression test for dartbug.com/36993

Follow up of: https://dart-review.googlesource.com/c/sdk/+/103136
Bug: https://github.com/dart-lang/sdk/issues/36993
Change-Id: Ib5e8308b91b9b5ce5b95362dd79d12e79499a75c
Cq-Include-Trybots: luci.dart.try:vm-ffi-android-debug-arm-try, app-kernel-linux-debug-x64-try, vm-kernel-linux-debug-simdbc64-try,vm-kernel-mac-debug-simdbc64-try,vm-kernel-reload-mac-debug-simdbc64-try,vm-kernel-linux-debug-ia32-try,vm-dartkb-linux-debug-simarm64-try,vm-kernel-win-debug-x64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/104820
Commit-Queue: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Samir Jindel <sjindel@google.com>
Auto-Submit: Daco Harkes <dacoharkes@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index dbfc95e..34ccfa1 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -24,6 +24,7 @@
 DECLARE_FLAG(bool, check_code_pointer);
 DECLARE_FLAG(bool, inline_alloc);
 DECLARE_FLAG(bool, precompiled_mode);
+DECLARE_FLAG(bool, use_slow_path);
 
 namespace compiler {
 
@@ -560,7 +561,7 @@
   LoadImmediate(state, compiler::target::Thread::native_execution_state());
   StoreToOffset(kWord, state, THR, Thread::execution_state_offset());
 
-  if (TargetCPUFeatures::arm_version() == ARMv5TE) {
+  if (FLAG_use_slow_path || TargetCPUFeatures::arm_version() == ARMv5TE) {
     EnterSafepointSlowly();
   } else {
     Label slow_path, done, retry;
@@ -592,7 +593,7 @@
 }
 
 void Assembler::TransitionNativeToGenerated(Register addr, Register state) {
-  if (TargetCPUFeatures::arm_version() == ARMv5TE) {
+  if (FLAG_use_slow_path || TargetCPUFeatures::arm_version() == ARMv5TE) {
     ExitSafepointSlowly();
   } else {
     Label slow_path, done, retry;
@@ -1389,7 +1390,9 @@
       code = 4 | (idx << 3);
       break;
     }
-    default: { break; }
+    default: {
+      break;
+    }
   }
 
   EmitSIMDddd(B24 | B23 | B11 | B10 | B6, kWordPair,
diff --git a/runtime/vm/compiler/assembler/assembler_arm64.cc b/runtime/vm/compiler/assembler/assembler_arm64.cc
index d56909a..ecdede6 100644
--- a/runtime/vm/compiler/assembler/assembler_arm64.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm64.cc
@@ -18,6 +18,7 @@
 DECLARE_FLAG(bool, check_code_pointer);
 DECLARE_FLAG(bool, inline_alloc);
 DECLARE_FLAG(bool, precompiled_mode);
+DECLARE_FLAG(bool, use_slow_path);
 
 DEFINE_FLAG(bool, use_far_branches, false, "Always use far branches");
 
@@ -1326,17 +1327,20 @@
   StoreToOffset(state, THR, compiler::target::Thread::execution_state_offset());
 
   Label slow_path, done, retry;
-  movz(addr, Immediate(compiler::target::Thread::safepoint_state_offset()), 0);
-  add(addr, THR, Operand(addr));
-  Bind(&retry);
-  ldxr(state, addr);
-  cmp(state, Operand(Thread::safepoint_state_unacquired()));
-  b(&slow_path, NE);
+  if (!FLAG_use_slow_path) {
+    movz(addr, Immediate(compiler::target::Thread::safepoint_state_offset()),
+         0);
+    add(addr, THR, Operand(addr));
+    Bind(&retry);
+    ldxr(state, addr);
+    cmp(state, Operand(Thread::safepoint_state_unacquired()));
+    b(&slow_path, NE);
 
-  movz(state, Immediate(Thread::safepoint_state_acquired()), 0);
-  stxr(TMP, state, addr);
-  cbz(&done, TMP);  // 0 means stxr was successful.
-  b(&retry);
+    movz(state, Immediate(Thread::safepoint_state_acquired()), 0);
+    stxr(TMP, state, addr);
+    cbz(&done, TMP);  // 0 means stxr was successful.
+    b(&retry);
+  }
 
   Bind(&slow_path);
   ldr(addr,
@@ -1351,17 +1355,20 @@
   Register addr = TMP2;
 
   Label slow_path, done, retry;
-  movz(addr, Immediate(compiler::target::Thread::safepoint_state_offset()), 0);
-  add(addr, THR, Operand(addr));
-  Bind(&retry);
-  ldxr(state, addr);
-  cmp(state, Operand(Thread::safepoint_state_acquired()));
-  b(&slow_path, NE);
+  if (!FLAG_use_slow_path) {
+    movz(addr, Immediate(compiler::target::Thread::safepoint_state_offset()),
+         0);
+    add(addr, THR, Operand(addr));
+    Bind(&retry);
+    ldxr(state, addr);
+    cmp(state, Operand(Thread::safepoint_state_acquired()));
+    b(&slow_path, NE);
 
-  movz(state, Immediate(Thread::safepoint_state_unacquired()), 0);
-  stxr(TMP, state, addr);
-  cbz(&done, TMP);  // 0 means stxr was successful.
-  b(&retry);
+    movz(state, Immediate(Thread::safepoint_state_unacquired()), 0);
+    stxr(TMP, state, addr);
+    cbz(&done, TMP);  // 0 means stxr was successful.
+    b(&retry);
+  }
 
   Bind(&slow_path);
   ldr(addr,
diff --git a/runtime/vm/compiler/assembler/assembler_ia32.cc b/runtime/vm/compiler/assembler/assembler_ia32.cc
index a451572..aca37d1 100644
--- a/runtime/vm/compiler/assembler/assembler_ia32.cc
+++ b/runtime/vm/compiler/assembler/assembler_ia32.cc
@@ -16,6 +16,7 @@
 
 #if !defined(DART_PRECOMPILED_RUNTIME)
 DECLARE_FLAG(bool, inline_alloc);
+DECLARE_FLAG(bool, use_slow_path);
 #endif
 
 namespace compiler {
@@ -2109,16 +2110,17 @@
 
   // Compare and swap the value at Thread::safepoint_state from unacquired to
   // acquired. On success, jump to 'success'; otherwise, fallthrough.
-  pushl(EAX);
-  movl(EAX, Immediate(Thread::safepoint_state_unacquired()));
-  movl(scratch, Immediate(Thread::safepoint_state_acquired()));
-  LockCmpxchgl(Address(THR, Thread::safepoint_state_offset()), scratch);
-  movl(scratch, EAX);
-  popl(EAX);
-  cmpl(scratch, Immediate(Thread::safepoint_state_unacquired()));
-
   Label done;
-  j(EQUAL, &done);
+  if (!FLAG_use_slow_path) {
+    pushl(EAX);
+    movl(EAX, Immediate(Thread::safepoint_state_unacquired()));
+    movl(scratch, Immediate(Thread::safepoint_state_acquired()));
+    LockCmpxchgl(Address(THR, Thread::safepoint_state_offset()), scratch);
+    movl(scratch, EAX);
+    popl(EAX);
+    cmpl(scratch, Immediate(Thread::safepoint_state_unacquired()));
+    j(EQUAL, &done);
+  }
 
   movl(scratch,
        Address(THR, compiler::target::Thread::enter_safepoint_stub_offset()));
@@ -2132,18 +2134,20 @@
 void Assembler::TransitionNativeToGenerated(Register scratch) {
   // Compare and swap the value at Thread::safepoint_state from acquired to
   // unacquired. On success, jump to 'success'; otherwise, fallthrough.
-  pushl(EAX);
-  movl(EAX, Immediate(compiler::target::Thread::safepoint_state_acquired()));
-  movl(scratch,
-       Immediate(compiler::target::Thread::safepoint_state_unacquired()));
-  LockCmpxchgl(Address(THR, compiler::target::Thread::safepoint_state_offset()),
-               scratch);
-  movl(scratch, EAX);
-  popl(EAX);
-  cmpl(scratch, Immediate(Thread::safepoint_state_acquired()));
-
   Label done;
-  j(EQUAL, &done);
+  if (!FLAG_use_slow_path) {
+    pushl(EAX);
+    movl(EAX, Immediate(compiler::target::Thread::safepoint_state_acquired()));
+    movl(scratch,
+         Immediate(compiler::target::Thread::safepoint_state_unacquired()));
+    LockCmpxchgl(
+        Address(THR, compiler::target::Thread::safepoint_state_offset()),
+        scratch);
+    movl(scratch, EAX);
+    popl(EAX);
+    cmpl(scratch, Immediate(Thread::safepoint_state_acquired()));
+    j(EQUAL, &done);
+  }
 
   movl(scratch,
        Address(THR, compiler::target::Thread::exit_safepoint_stub_offset()));
diff --git a/runtime/vm/compiler/assembler/assembler_x64.cc b/runtime/vm/compiler/assembler/assembler_x64.cc
index f9e725b..1e161a9 100644
--- a/runtime/vm/compiler/assembler/assembler_x64.cc
+++ b/runtime/vm/compiler/assembler/assembler_x64.cc
@@ -18,6 +18,7 @@
 DECLARE_FLAG(bool, check_code_pointer);
 DECLARE_FLAG(bool, inline_alloc);
 DECLARE_FLAG(bool, precompiled_mode);
+DECLARE_FLAG(bool, use_slow_path);
 #endif
 
 namespace compiler {
@@ -176,14 +177,16 @@
   // Compare and swap the value at Thread::safepoint_state from unacquired to
   // acquired. If the CAS fails, go to a slow-path stub.
   Label done;
-  pushq(RAX);
-  movq(RAX, Immediate(Thread::safepoint_state_unacquired()));
-  movq(TMP, Immediate(Thread::safepoint_state_acquired()));
-  LockCmpxchgq(Address(THR, Thread::safepoint_state_offset()), TMP);
-  movq(TMP, RAX);
-  popq(RAX);
-  cmpq(TMP, Immediate(Thread::safepoint_state_unacquired()));
-  j(EQUAL, &done);
+  if (!FLAG_use_slow_path) {
+    pushq(RAX);
+    movq(RAX, Immediate(Thread::safepoint_state_unacquired()));
+    movq(TMP, Immediate(Thread::safepoint_state_acquired()));
+    LockCmpxchgq(Address(THR, Thread::safepoint_state_offset()), TMP);
+    movq(TMP, RAX);
+    popq(RAX);
+    cmpq(TMP, Immediate(Thread::safepoint_state_unacquired()));
+    j(EQUAL, &done);
+  }
 
   movq(TMP,
        Address(THR, compiler::target::Thread::enter_safepoint_stub_offset()));
@@ -197,14 +200,16 @@
   // Compare and swap the value at Thread::safepoint_state from acquired to
   // unacquired. On success, jump to 'success'; otherwise, fallthrough.
   Label done;
-  pushq(RAX);
-  movq(RAX, Immediate(Thread::safepoint_state_acquired()));
-  movq(TMP, Immediate(Thread::safepoint_state_unacquired()));
-  LockCmpxchgq(Address(THR, Thread::safepoint_state_offset()), TMP);
-  movq(TMP, RAX);
-  popq(RAX);
-  cmpq(TMP, Immediate(Thread::safepoint_state_acquired()));
-  j(EQUAL, &done);
+  if (!FLAG_use_slow_path) {
+    pushq(RAX);
+    movq(RAX, Immediate(Thread::safepoint_state_acquired()));
+    movq(TMP, Immediate(Thread::safepoint_state_unacquired()));
+    LockCmpxchgq(Address(THR, Thread::safepoint_state_offset()), TMP);
+    movq(TMP, RAX);
+    popq(RAX);
+    cmpq(TMP, Immediate(Thread::safepoint_state_acquired()));
+    j(EQUAL, &done);
+  }
 
   movq(TMP,
        Address(THR, compiler::target::Thread::exit_safepoint_stub_offset()));
diff --git a/runtime/vm/runtime_entry.cc b/runtime/vm/runtime_entry.cc
index 7eebe0f..674623b 100644
--- a/runtime/vm/runtime_entry.cc
+++ b/runtime/vm/runtime_entry.cc
@@ -2759,6 +2759,7 @@
 }
 
 extern "C" void DFLRT_EnterSafepoint(NativeArguments __unusable_) {
+  CHECK_STACK_ALIGNMENT;
   Thread* thread = Thread::Current();
   ASSERT(thread->top_exit_frame_info() != 0);
   ASSERT(thread->execution_state() == Thread::kThreadInNative);
@@ -2767,6 +2768,7 @@
 DEFINE_RAW_LEAF_RUNTIME_ENTRY(EnterSafepoint, 0, false, &DFLRT_EnterSafepoint);
 
 extern "C" void DFLRT_ExitSafepoint(NativeArguments __unusable_) {
+  CHECK_STACK_ALIGNMENT;
   Thread* thread = Thread::Current();
   ASSERT(thread->top_exit_frame_info() != 0);
   ASSERT(thread->execution_state() == Thread::kThreadInNative);
diff --git a/tests/ffi/function_test.dart b/tests/ffi/function_test.dart
index 07f8fc2..014c66e 100644
--- a/tests/ffi/function_test.dart
+++ b/tests/ffi/function_test.dart
@@ -6,6 +6,7 @@
 //
 // VMOptions=
 // VMOptions=--deterministic --optimization-counter-threshold=10
+// VMOptions=--use-slow-path
 // SharedObjects=ffi_test_functions
 
 library FfiTest;