[vm/ffi] SimDBC64 align stack frame
And regression test for dartbug.com/37069
Fixes: https://github.com/dart-lang/sdk/issues/37176
Follow up of: https://dart-review.googlesource.com/c/sdk/+/103814
Bug: https://github.com/dart-lang/sdk/issues/37069
Change-Id: I066561b487c691952496e2ca6878ac82cee0e02e
Cq-Include-Trybots: luci.dart.try:vm-ffi-android-debug-arm-try, app-kernel-linux-debug-x64-try, vm-kernel-linux-debug-simdbc64-try,vm-kernel-mac-debug-simdbc64-try,vm-kernel-reload-mac-debug-simdbc64-try,vm-kernel-linux-debug-ia32-try,vm-dartkb-linux-debug-simarm64-try,vm-kernel-win-debug-x64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/105941
Commit-Queue: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Samir Jindel <sjindel@google.com>
diff --git a/runtime/bin/ffi_test_functions.cc b/runtime/bin/ffi_test_functions.cc
index 52b4512..48757cf 100644
--- a/runtime/bin/ffi_test_functions.cc
+++ b/runtime/bin/ffi_test_functions.cc
@@ -149,7 +149,7 @@
}
// Sums many ints.
-// Used for testing calling conventions. With so many doubles we are using all
+// Used for testing calling conventions. With so many integers we are using all
// normal parameter registers and some stack slots.
DART_EXPORT intptr_t SumManyInts(intptr_t a,
intptr_t b,
@@ -169,6 +169,28 @@
return retval;
}
+// Sums an odd number of ints.
+// Used for testing calling conventions. With so many arguments, and an odd
+// number of arguments, we are testing stack alignment on various architectures.
+DART_EXPORT intptr_t SumManyIntsOdd(intptr_t a,
+ intptr_t b,
+ intptr_t c,
+ intptr_t d,
+ intptr_t e,
+ intptr_t f,
+ intptr_t g,
+ intptr_t h,
+ intptr_t i,
+ intptr_t j,
+ intptr_t k) {
+ std::cout << "SumManyInts(" << a << ", " << b << ", " << c << ", " << d
+ << ", " << e << ", " << f << ", " << g << ", " << h << ", " << i
+ << ", " << j << ", " << k << ")\n";
+ intptr_t retval = a + b + c + d + e + f + g + h + i + j + k;
+ std::cout << "returning " << retval << "\n";
+ return retval;
+}
+
// Sums many doubles.
// Used for testing calling conventions. With so many doubles we are using all
// xmm parameter registers and some stack slots.
@@ -485,6 +507,22 @@
Dart_ExecuteInternalCommand("gc-now");
}
+// Triggers GC. Has 11 dummy arguments as unboxed odd integers which should be
+// ignored by GC.
+DART_EXPORT void Regress37069(uint64_t a,
+ uint64_t b,
+ uint64_t c,
+ uint64_t d,
+ uint64_t e,
+ uint64_t f,
+ uint64_t g,
+ uint64_t h,
+ uint64_t i,
+ uint64_t j,
+ uint64_t k) {
+ Dart_ExecuteInternalCommand("gc-now");
+}
+
// Calls a Dart function to allocate 'count' objects.
// Used for stress-testing GC when re-entering the API.
DART_EXPORT void AllocateThroughDart() {
diff --git a/runtime/lib/ffi.cc b/runtime/lib/ffi.cc
index 5dd4367..f2ac373 100644
--- a/runtime/lib/ffi.cc
+++ b/runtime/lib/ffi.cc
@@ -5,6 +5,7 @@
#include "lib/ffi.h"
#include "include/dart_api.h"
+#include "platform/globals.h"
#include "vm/bootstrap_natives.h"
#include "vm/class_finalizer.h"
#include "vm/compiler/assembler/assembler.h"
@@ -677,6 +678,10 @@
data_[kOffsetNumStackSlots] = num_args;
}
+void FfiMarshalledArguments::SetAlignmentMask(uint64_t alignment_mask) const {
+ data_[kOffsetAlignmentMask] = alignment_mask;
+}
+
intptr_t FfiMarshalledArguments::GetNumStackSlots() const {
return data_[kOffsetNumStackSlots];
}
@@ -691,6 +696,7 @@
const compiler::ffi::FfiSignatureDescriptor& signature,
const uint64_t* arg_values) {
const intptr_t num_stack_slots = signature.num_stack_slots();
+ const uint64_t alignment_mask = ~(OS::ActivationFrameAlignment() - 1);
const intptr_t size =
FfiMarshalledArguments::kOffsetStackSlotValues + num_stack_slots;
uint64_t* data = Thread::Current()->GetFfiMarshalledArguments(size);
@@ -699,6 +705,7 @@
descr.SetFunctionAddress(arg_values[compiler::ffi::kFunctionAddressRegister]);
const intptr_t num_args = signature.length();
descr.SetNumStackSlots(num_stack_slots);
+ descr.SetAlignmentMask(alignment_mask);
for (int i = 0; i < num_args; i++) {
uint64_t arg_value = arg_values[compiler::ffi::kFirstArgumentRegister + i];
HostLocation loc = signature.LocationAt(i);
@@ -737,10 +744,12 @@
RegisterNames::FpuRegisterName(
host::CallingConventions::FpuArgumentRegisters[i]));
}
- const intptr_t index = kOffsetNumStackSlots;
- const intptr_t num_stack_slots = data_[index];
- OS::PrintErr(" %02" Pd " 0x%" Pp " (number of stack slots)\n", index,
- num_stack_slots);
+ const intptr_t alignment_mask = data_[kOffsetAlignmentMask];
+ OS::PrintErr(" %02" Pd " 0x%" Pp " (stack alignment mask)\n",
+ kOffsetAlignmentMask, alignment_mask);
+ const intptr_t num_stack_slots = data_[kOffsetNumStackSlots];
+ OS::PrintErr(" %02" Pd " 0x%" Pp " (number of stack slots)\n",
+ kOffsetNumStackSlots, num_stack_slots);
for (intptr_t i = 0; i < num_stack_slots; i++) {
const intptr_t index = kOffsetStackSlotValues + i;
OS::PrintErr(" %02" Pd " 0x%016" Px64 " (stack slot %" Pd ")\n", index,
diff --git a/runtime/lib/ffi.h b/runtime/lib/ffi.h
index 00c527f..4cfa855 100644
--- a/runtime/lib/ffi.h
+++ b/runtime/lib/ffi.h
@@ -47,6 +47,7 @@
void SetFunctionAddress(uint64_t value) const;
void SetRegister(::dart::host::Register reg, uint64_t value) const;
void SetFpuRegister(::dart::host::FpuRegister reg, uint64_t value) const;
+ void SetAlignmentMask(uint64_t kOffsetAlignmentMask) const;
void SetNumStackSlots(intptr_t num_args) const;
intptr_t GetNumStackSlots() const;
void SetStackSlotValue(intptr_t index, uint64_t value) const;
@@ -59,8 +60,9 @@
static const intptr_t kOffsetRegisters = 1;
static const intptr_t kOffsetFpuRegisters =
kOffsetRegisters + ::dart::host::CallingConventions::kNumArgRegs;
- static const intptr_t kOffsetNumStackSlots =
+ static const intptr_t kOffsetAlignmentMask =
kOffsetFpuRegisters + ::dart::host::CallingConventions::kNumFpuArgRegs;
+ static const intptr_t kOffsetNumStackSlots = kOffsetAlignmentMask + 1;
static const intptr_t kOffsetStackSlotValues = kOffsetNumStackSlots + 1;
static const intptr_t kOffsetIntResult = 0;
diff --git a/runtime/vm/compiler/ffi_dbc_trampoline_x64_linux_mac.S b/runtime/vm/compiler/ffi_dbc_trampoline_x64_linux_mac.S
index e8d1918..4f42279 100644
--- a/runtime/vm/compiler/ffi_dbc_trampoline_x64_linux_mac.S
+++ b/runtime/vm/compiler/ffi_dbc_trampoline_x64_linux_mac.S
@@ -12,20 +12,33 @@
_FfiTrampolineCall:
#endif
-push rbx
+/* Save argument in scratch register. */
+push rbx /* Backup caller saved register. */
mov rbx, rdi /* Save argument in scratch register. */
+/* Enter frame. */
+push rbp
+mov rbp, rsp
+
+/* Reserve framespace for arguments. */
+mov rax, [rbx+0x80] /* Load number of stack arguments. */
+shl rax, 3 /* Multiply by size (8 bytes). */
+sub rsp, rax /* Reserve num_stack_args stack slots. */
+
+/* Stack alignment. */
+and rsp, [rbx+0x78] /* Align stack with stack alignment mask. */
+
/* Copy stack arguments. */
-mov rax, [rbx+0x78] /* Load number of stack arguments. */
cmp rax, 0x0 /* Check if number of stack arguments is 0. */
jz .done /* Skip loop if no stack arguments. */
-add rbx, 0x78 /* Offset RBX to point to stack arguments */
+add rsp, rax /* Unreserve stack slots so we can push arguments. */
+add rbx, 0x80 /* Offset RBX to point to stack arguments */
.loop: /* Copy stack arguments loop. */
-push [rbx+0x8*rax] /* Push stack argument. */
-sub rax, 0x1 /* Decrement stack argument iterator. */
+push [rbx+rax] /* Push stack argument. */
+sub rax, 0x8 /* Decrement stack argument iterator. */
cmp rax, 0x0 /* Compare iterator with 0 */
jnz .loop /* Loop while iterator is not 0 */
-sub rbx, 0x78 /* Restore RBX to original value. */
+sub rbx, 0x80 /* Restore RBX to original value. */
.done: /* End stack arguments loop. */
/* Copy registers and fpu registers. */
@@ -52,12 +65,13 @@
mov [rbx], rax /* Move integer result in kOffsetIntResult */
movsd [rbx+8], xmm0 /* Move double result in kOffsetDoubleResult */
-/* Clean up stack arguments. */
-mov rax, [rbx+0x78] /* Load number of stack arguments. */
-imul rax, 0x8 /* Multiply by stack argument size. */
-add rsp, rax /* Clean up the stack. */
+/* leave frame */
+mov rsp, rbp
+pop rbp
+/* Restore caller saved register. */
pop rbx
+
ret
#endif /* HOST_ARCH_X64 */
diff --git a/runtime/vm/os_android.cc b/runtime/vm/os_android.cc
index 196620f..807b7c9 100644
--- a/runtime/vm/os_android.cc
+++ b/runtime/vm/os_android.cc
@@ -175,9 +175,13 @@
// into a architecture specific file e.g: os_ia32_linux.cc
intptr_t OS::ActivationFrameAlignment() {
#if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) || \
- defined(TARGET_ARCH_ARM64)
+ defined(TARGET_ARCH_ARM64) || \
+ defined(TARGET_ARCH_DBC) && \
+ (defined(HOST_ARCH_IA32) || defined(HOST_ARCH_X64) || \
+ defined(HOST_ARCH_ARM64))
const int kMinimumAlignment = 16;
-#elif defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_DBC)
+#elif defined(TARGET_ARCH_ARM) || \
+ defined(TARGET_ARCH_DBC) && defined(HOST_ARCH_ARM)
const int kMinimumAlignment = 8;
#else
#error Unsupported architecture.
diff --git a/runtime/vm/os_fuchsia.cc b/runtime/vm/os_fuchsia.cc
index 869c5b2..517f3c4 100644
--- a/runtime/vm/os_fuchsia.cc
+++ b/runtime/vm/os_fuchsia.cc
@@ -130,9 +130,13 @@
// into a architecture specific file e.g: os_ia32_fuchsia.cc
intptr_t OS::ActivationFrameAlignment() {
#if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) || \
- defined(TARGET_ARCH_ARM64)
+ defined(TARGET_ARCH_ARM64) || \
+ defined(TARGET_ARCH_DBC) && \
+ (defined(HOST_ARCH_IA32) || defined(HOST_ARCH_X64) || \
+ defined(HOST_ARCH_ARM64))
const int kMinimumAlignment = 16;
-#elif defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_DBC)
+#elif defined(TARGET_ARCH_ARM) || \
+ defined(TARGET_ARCH_DBC) && defined(HOST_ARCH_ARM)
const int kMinimumAlignment = 8;
#else
#error Unsupported architecture.
diff --git a/runtime/vm/os_linux.cc b/runtime/vm/os_linux.cc
index 2ca3bb6..fb65cd1 100644
--- a/runtime/vm/os_linux.cc
+++ b/runtime/vm/os_linux.cc
@@ -497,6 +497,7 @@
intptr_t OS::ActivationFrameAlignment() {
#if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) || \
defined(TARGET_ARCH_ARM64) || defined(TARGET_ARCH_DBC)
+ // DBC alignment should be at least as much as host architecture.
const int kMinimumAlignment = 16;
#elif defined(TARGET_ARCH_ARM)
const int kMinimumAlignment = 8;
diff --git a/runtime/vm/os_macos.cc b/runtime/vm/os_macos.cc
index be6455d..ddbd18f 100644
--- a/runtime/vm/os_macos.cc
+++ b/runtime/vm/os_macos.cc
@@ -16,7 +16,7 @@
#include <sys/time.h> // NOLINT
#include <unistd.h> // NOLINT
#if HOST_OS_IOS
-#include <syslog.h> // NOLINT
+#include <syslog.h> // NOLINT
#endif
#include "platform/utils.h"
@@ -140,7 +140,7 @@
#elif TARGET_ARCH_X64
return 16; // iOS simulator
#elif TARGET_ARCH_DBC
- return 16;
+ return 16; // Should be at least as much as any host architecture.
#else
#error Unimplemented
#endif
@@ -212,7 +212,6 @@
__builtin_extract_return_addr(__builtin_return_address(0)));
}
-
void OS::Print(const char* format, ...) {
#if HOST_OS_IOS
va_list args;
diff --git a/runtime/vm/os_win.cc b/runtime/vm/os_win.cc
index 1ff4eb9..c0f8e39 100644
--- a/runtime/vm/os_win.cc
+++ b/runtime/vm/os_win.cc
@@ -175,9 +175,11 @@
}
intptr_t OS::ActivationFrameAlignment() {
-#if defined(TARGET_ARCH_ARM64)
+#if defined(TARGET_ARCH_ARM64) || \
+ defined(TARGET_ARCH_DBC) && defined(HOST_ARCH_ARM64)
return 16;
-#elif defined(TARGET_ARCH_ARM)
+#elif defined(TARGET_ARCH_ARM) || \
+ defined(TARGET_ARCH_DBC) && defined(HOST_ARCH_ARM)
return 8;
#elif defined(_WIN64)
// Windows 64-bit ABI requires the stack to be 16-byte aligned.
diff --git a/samples/ffi/sample_ffi_functions.dart b/samples/ffi/sample_ffi_functions.dart
index 4a3cf98..9d326e9 100644
--- a/samples/ffi/sample_ffi_functions.dart
+++ b/samples/ffi/sample_ffi_functions.dart
@@ -18,7 +18,7 @@
typedef NativeFunc4 = ffi.IntPtr Function(ffi.IntPtr);
typedef NativeDoubleUnaryOp = ffi.Double Function(ffi.Double);
typedef NativeFloatUnaryOp = ffi.Float Function(ffi.Float);
-typedef NativeOctenaryOp = ffi.IntPtr Function(
+typedef NativeDecenaryOp = ffi.IntPtr Function(
ffi.IntPtr,
ffi.IntPtr,
ffi.IntPtr,
@@ -29,7 +29,7 @@
ffi.IntPtr,
ffi.IntPtr,
ffi.IntPtr);
-typedef NativeDoubleOctenaryOp = ffi.Double Function(
+typedef NativeDoubleDecenaryOp = ffi.Double Function(
ffi.Double,
ffi.Double,
ffi.Double,
@@ -65,9 +65,9 @@
ffi.Pointer<ffi.Int64>);
typedef QuadOp = int Function(int, int, int, int);
typedef DoubleUnaryOp = double Function(double);
-typedef OctenaryOp = int Function(
+typedef DecenaryOp = int Function(
int, int, int, int, int, int, int, int, int, int);
-typedef DoubleOctenaryOp = double Function(double, double, double, double,
+typedef DoubleDecenaryOp = double Function(double, double, double, double,
double, double, double, double, double, double);
typedef VigesimalOp = double Function(
int,
@@ -162,8 +162,8 @@
{
// function with many arguments: arguments get passed in registers and stack
- OctenaryOp sumManyInts = ffiTestFunctions
- .lookupFunction<NativeOctenaryOp, OctenaryOp>("SumManyInts");
+ DecenaryOp sumManyInts = ffiTestFunctions
+ .lookupFunction<NativeDecenaryOp, DecenaryOp>("SumManyInts");
var result = sumManyInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
print(result);
print(result.runtimeType);
@@ -171,8 +171,8 @@
{
// function with many double arguments
- DoubleOctenaryOp sumManyDoubles = ffiTestFunctions.lookupFunction<
- NativeDoubleOctenaryOp, DoubleOctenaryOp>("SumManyDoubles");
+ DoubleDecenaryOp sumManyDoubles = ffiTestFunctions.lookupFunction<
+ NativeDoubleDecenaryOp, DoubleDecenaryOp>("SumManyDoubles");
var result =
sumManyDoubles(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0);
print(result);
diff --git a/tests/ffi/function_gc_test.dart b/tests/ffi/function_gc_test.dart
index 1799ce1..db3e30e 100644
--- a/tests/ffi/function_gc_test.dart
+++ b/tests/ffi/function_gc_test.dart
@@ -24,6 +24,7 @@
testBoxPointer();
testAllocateInNative();
testAllocateInDart();
+ testRegress37069(); //# regress: ok
}
ffi.DynamicLibrary ffiTestFunctions =
@@ -35,11 +36,25 @@
typedef NativeNullaryOpPtr = ffi.Pointer<ffi.Void> Function();
typedef NativeNullaryOp = ffi.Void Function();
typedef NativeUnaryOp = ffi.Void Function(ffi.Uint64);
+typedef NativeUndenaryOp = ffi.Uint64 Function(
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64,
+ ffi.Uint64);
typedef NullaryOp = int Function();
typedef NullaryOpDbl = double Function();
typedef NullaryOpPtr = ffi.Pointer<ffi.Void> Function();
typedef UnaryOp = void Function(int);
typedef NullaryOpVoid = void Function();
+typedef UndenaryOp = int Function(
+ int, int, int, int, int, int, int, int, int, int, int);
//// These functions return values that require boxing into different types.
@@ -88,6 +103,21 @@
// Test GC in the FFI call path by calling a C function which triggers GC
// directly.
void testAllocateInNative() => triggerGc();
+// This also works as a regression test for 37176.
+
+final regress37069 = ffiTestFunctions
+ .lookupFunction<NativeUndenaryOp, UndenaryOp>("Regress37069");
+
+// Test GC in the FFI call path by calling a C function which triggers GC
+// directly.
+void testRegress37069() {
+ regress37069(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+}
+
+class C {
+ final int i;
+ C(this.i);
+}
@pragma("vm:entry-point", "call")
void testAllocationsInDartHelper() => triggerGc();
diff --git a/tests/ffi/function_test.dart b/tests/ffi/function_test.dart
index 014c66e..2865d30 100644
--- a/tests/ffi/function_test.dart
+++ b/tests/ffi/function_test.dart
@@ -29,6 +29,7 @@
testNativeFunctionManyArguments1();
testNativeFunctionManyArguments2();
testNativeFunctionManyArguments3();
+ testNativeFunctionManyArguments4();
testNativeFunctionPointer();
testNullInt();
testNullDouble();
@@ -225,7 +226,7 @@
Expect.approxEquals(1337.0, times1_337Float(1000.0));
}
-typedef NativeOctenaryOp = ffi.IntPtr Function(
+typedef NativeDecenaryOp = ffi.IntPtr Function(
ffi.IntPtr,
ffi.IntPtr,
ffi.IntPtr,
@@ -236,17 +237,39 @@
ffi.IntPtr,
ffi.IntPtr,
ffi.IntPtr);
-typedef OctenaryOp = int Function(
+typedef DecenaryOp = int Function(
int, int, int, int, int, int, int, int, int, int);
-OctenaryOp sumManyInts = ffiTestFunctions
- .lookupFunction<NativeOctenaryOp, OctenaryOp>("SumManyInts");
+DecenaryOp sumManyInts = ffiTestFunctions
+ .lookupFunction<NativeDecenaryOp, DecenaryOp>("SumManyInts");
void testNativeFunctionManyArguments1() {
Expect.equals(55, sumManyInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
-typedef NativeDoubleOctenaryOp = ffi.Double Function(
+typedef NativeUndenaryOp = ffi.IntPtr Function(
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr,
+ ffi.IntPtr);
+typedef UndenaryOp = int Function(
+ int, int, int, int, int, int, int, int, int, int, int);
+
+UndenaryOp sumManyIntsOdd = ffiTestFunctions
+ .lookupFunction<NativeUndenaryOp, UndenaryOp>("SumManyIntsOdd");
+
+void testNativeFunctionManyArguments4() {
+ Expect.equals(66, sumManyIntsOdd(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
+}
+
+typedef NativeDoubleDecenaryOp = ffi.Double Function(
ffi.Double,
ffi.Double,
ffi.Double,
@@ -257,11 +280,11 @@
ffi.Double,
ffi.Double,
ffi.Double);
-typedef DoubleOctenaryOp = double Function(double, double, double, double,
+typedef DoubleDecenaryOp = double Function(double, double, double, double,
double, double, double, double, double, double);
-DoubleOctenaryOp sumManyDoubles = ffiTestFunctions
- .lookupFunction<NativeDoubleOctenaryOp, DoubleOctenaryOp>("SumManyDoubles");
+DoubleDecenaryOp sumManyDoubles = ffiTestFunctions
+ .lookupFunction<NativeDoubleDecenaryOp, DoubleDecenaryOp>("SumManyDoubles");
void testNativeFunctionManyArguments2() {
Expect.approxEquals(