blob: 04d5e3915afb406c19996105d6e315417f4f2ca5 [file] [log] [blame]
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
EXTERN OPENSSL_ia32cap_P
global rsaz_512_sqr
ALIGN 32
rsaz_512_sqr:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_sqr:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$sqr_body:
mov rbp,rdx
mov rdx,QWORD[rsi]
mov rax,QWORD[8+rsi]
mov QWORD[128+rsp],rcx
jmp NEAR $L$oop_sqr
ALIGN 32
$L$oop_sqr:
mov DWORD[((128+8))+rsp],r8d
mov rbx,rdx
mul rdx
mov r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
mul rbx
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
add r14,rax
mov rax,rbx
mov r15,rdx
adc r15,0
add r8,r8
mov rcx,r9
adc r9,r9
mul rax
mov QWORD[rsp],rax
add r8,rdx
adc r9,0
mov QWORD[8+rsp],r8
shr rcx,63
mov r8,QWORD[8+rsi]
mov rax,QWORD[16+rsi]
mul r8
add r10,rax
mov rax,QWORD[24+rsi]
mov rbx,rdx
adc rbx,0
mul r8
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r11,rbx
mov rbx,rdx
adc rbx,0
mul r8
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r12,rbx
mov rbx,rdx
adc rbx,0
mul r8
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r13,rbx
mov rbx,rdx
adc rbx,0
mul r8
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r14,rbx
mov rbx,rdx
adc rbx,0
mul r8
add r15,rax
mov rax,r8
adc rdx,0
add r15,rbx
mov r8,rdx
mov rdx,r10
adc r8,0
add rdx,rdx
lea r10,[r10*2+rcx]
mov rbx,r11
adc r11,r11
mul rax
add r9,rax
adc r10,rdx
adc r11,0
mov QWORD[16+rsp],r9
mov QWORD[24+rsp],r10
shr rbx,63
mov r9,QWORD[16+rsi]
mov rax,QWORD[24+rsi]
mul r9
add r12,rax
mov rax,QWORD[32+rsi]
mov rcx,rdx
adc rcx,0
mul r9
add r13,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r13,rcx
mov rcx,rdx
adc rcx,0
mul r9
add r14,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r14,rcx
mov rcx,rdx
adc rcx,0
mul r9
mov r10,r12
lea r12,[r12*2+rbx]
add r15,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r15,rcx
mov rcx,rdx
adc rcx,0
mul r9
shr r10,63
add r8,rax
mov rax,r9
adc rdx,0
add r8,rcx
mov r9,rdx
adc r9,0
mov rcx,r13
lea r13,[r13*2+r10]
mul rax
add r11,rax
adc r12,rdx
adc r13,0
mov QWORD[32+rsp],r11
mov QWORD[40+rsp],r12
shr rcx,63
mov r10,QWORD[24+rsi]
mov rax,QWORD[32+rsi]
mul r10
add r14,rax
mov rax,QWORD[40+rsi]
mov rbx,rdx
adc rbx,0
mul r10
add r15,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r15,rbx
mov rbx,rdx
adc rbx,0
mul r10
mov r12,r14
lea r14,[r14*2+rcx]
add r8,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r8,rbx
mov rbx,rdx
adc rbx,0
mul r10
shr r12,63
add r9,rax
mov rax,r10
adc rdx,0
add r9,rbx
mov r10,rdx
adc r10,0
mov rbx,r15
lea r15,[r15*2+r12]
mul rax
add r13,rax
adc r14,rdx
adc r15,0
mov QWORD[48+rsp],r13
mov QWORD[56+rsp],r14
shr rbx,63
mov r11,QWORD[32+rsi]
mov rax,QWORD[40+rsi]
mul r11
add r8,rax
mov rax,QWORD[48+rsi]
mov rcx,rdx
adc rcx,0
mul r11
add r9,rax
mov rax,QWORD[56+rsi]
adc rdx,0
mov r12,r8
lea r8,[r8*2+rbx]
add r9,rcx
mov rcx,rdx
adc rcx,0
mul r11
shr r12,63
add r10,rax
mov rax,r11
adc rdx,0
add r10,rcx
mov r11,rdx
adc r11,0
mov rcx,r9
lea r9,[r9*2+r12]
mul rax
add r15,rax
adc r8,rdx
adc r9,0
mov QWORD[64+rsp],r15
mov QWORD[72+rsp],r8
shr rcx,63
mov r12,QWORD[40+rsi]
mov rax,QWORD[48+rsi]
mul r12
add r10,rax
mov rax,QWORD[56+rsi]
mov rbx,rdx
adc rbx,0
mul r12
add r11,rax
mov rax,r12
mov r15,r10
lea r10,[r10*2+rcx]
adc rdx,0
shr r15,63
add r11,rbx
mov r12,rdx
adc r12,0
mov rbx,r11
lea r11,[r11*2+r15]
mul rax
add r9,rax
adc r10,rdx
adc r11,0
mov QWORD[80+rsp],r9
mov QWORD[88+rsp],r10
mov r13,QWORD[48+rsi]
mov rax,QWORD[56+rsi]
mul r13
add r12,rax
mov rax,r13
mov r13,rdx
adc r13,0
xor r14,r14
shl rbx,1
adc r12,r12
adc r13,r13
adc r14,r14
mul rax
add r11,rax
adc r12,rdx
adc r13,0
mov QWORD[96+rsp],r11
mov QWORD[104+rsp],r12
mov rax,QWORD[56+rsi]
mul rax
add r13,rax
adc rdx,0
add r14,rdx
mov QWORD[112+rsp],r13
mov QWORD[120+rsp],r14
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
mov rdx,r8
mov rax,r9
mov r8d,DWORD[((128+8))+rsp]
mov rsi,rdi
dec r8d
jnz NEAR $L$oop_sqr
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$sqr_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_sqr:
global rsaz_512_mul
ALIGN 32
rsaz_512_mul:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$mul_body:
DB 102,72,15,110,199
DB 102,72,15,110,201
mov QWORD[128+rsp],r8
mov rbx,QWORD[rdx]
mov rbp,rdx
call __rsaz_512_mul
DB 102,72,15,126,199
DB 102,72,15,126,205
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul:
global rsaz_512_mul_gather4
ALIGN 32
rsaz_512_mul_gather4:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_gather4:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
mov r9d,r9d
sub rsp,128+24
$L$mul_gather4_body:
mov eax,DWORD[64+r9*4+rdx]
DB 102,72,15,110,199
mov ebx,DWORD[r9*4+rdx]
DB 102,72,15,110,201
mov QWORD[128+rsp],r8
shl rax,32
or rbx,rax
mov rax,QWORD[rsi]
mov rcx,QWORD[8+rsi]
lea rbp,[128+r9*4+rdx]
mul rbx
mov QWORD[rsp],rax
mov rax,rcx
mov r8,rdx
mul rbx
movd xmm4,DWORD[rbp]
add r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
adc r9,0
mul rbx
movd xmm5,DWORD[64+rbp]
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
pslldq xmm5,4
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
por xmm4,xmm5
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
lea rbp,[128+rbp]
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
DB 102,72,15,126,227
add r14,rax
mov rax,QWORD[rsi]
mov r15,rdx
adc r15,0
lea rdi,[8+rsp]
mov ecx,7
jmp NEAR $L$oop_mul_gather
ALIGN 32
$L$oop_mul_gather:
mul rbx
add r8,rax
mov rax,QWORD[8+rsi]
mov QWORD[rdi],r8
mov r8,rdx
adc r8,0
mul rbx
movd xmm4,DWORD[rbp]
add r9,rax
mov rax,QWORD[16+rsi]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
movd xmm5,DWORD[64+rbp]
add r10,rax
mov rax,QWORD[24+rsi]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
pslldq xmm5,4
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r10,r11
mov r11,rdx
adc r11,0
mul rbx
por xmm4,xmm5
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r13,r14
mov r14,rdx
adc r14,0
mul rbx
DB 102,72,15,126,227
add r15,rax
mov rax,QWORD[rsi]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
lea rbp,[128+rbp]
lea rdi,[8+rdi]
dec ecx
jnz NEAR $L$oop_mul_gather
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
DB 102,72,15,126,199
DB 102,72,15,126,205
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_gather4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_gather4:
global rsaz_512_mul_scatter4
ALIGN 32
rsaz_512_mul_scatter4:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_scatter4:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
mov r9d,r9d
sub rsp,128+24
$L$mul_scatter4_body:
lea r8,[r9*4+r8]
DB 102,72,15,110,199
DB 102,72,15,110,202
DB 102,73,15,110,208
mov QWORD[128+rsp],rcx
mov rbp,rdi
mov rbx,QWORD[rdi]
call __rsaz_512_mul
DB 102,72,15,126,199
DB 102,72,15,126,205
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
DB 102,72,15,126,214
sbb rcx,rcx
call __rsaz_512_subtract
mov DWORD[rsi],r8d
shr r8,32
mov DWORD[128+rsi],r9d
shr r9,32
mov DWORD[256+rsi],r10d
shr r10,32
mov DWORD[384+rsi],r11d
shr r11,32
mov DWORD[512+rsi],r12d
shr r12,32
mov DWORD[640+rsi],r13d
shr r13,32
mov DWORD[768+rsi],r14d
shr r14,32
mov DWORD[896+rsi],r15d
shr r15,32
mov DWORD[64+rsi],r8d
mov DWORD[192+rsi],r9d
mov DWORD[320+rsi],r10d
mov DWORD[448+rsi],r11d
mov DWORD[576+rsi],r12d
mov DWORD[704+rsi],r13d
mov DWORD[832+rsi],r14d
mov DWORD[960+rsi],r15d
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_scatter4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_scatter4:
global rsaz_512_mul_by_one
ALIGN 32
rsaz_512_mul_by_one:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_by_one:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$mul_by_one_body:
mov rbp,rdx
mov QWORD[128+rsp],rcx
mov r8,QWORD[rsi]
pxor xmm0,xmm0
mov r9,QWORD[8+rsi]
mov r10,QWORD[16+rsi]
mov r11,QWORD[24+rsi]
mov r12,QWORD[32+rsi]
mov r13,QWORD[40+rsi]
mov r14,QWORD[48+rsi]
mov r15,QWORD[56+rsi]
movdqa XMMWORD[rsp],xmm0
movdqa XMMWORD[16+rsp],xmm0
movdqa XMMWORD[32+rsp],xmm0
movdqa XMMWORD[48+rsp],xmm0
movdqa XMMWORD[64+rsp],xmm0
movdqa XMMWORD[80+rsp],xmm0
movdqa XMMWORD[96+rsp],xmm0
call __rsaz_512_reduce
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_by_one_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_by_one:
ALIGN 32
__rsaz_512_reduce:
mov rbx,r8
imul rbx,QWORD[((128+8))+rsp]
mov rax,QWORD[rbp]
mov ecx,8
jmp NEAR $L$reduction_loop
ALIGN 32
$L$reduction_loop:
mul rbx
mov rax,QWORD[8+rbp]
neg r8
mov r8,rdx
adc r8,0
mul rbx
add r9,rax
mov rax,QWORD[16+rbp]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
add r10,rax
mov rax,QWORD[24+rbp]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
add r11,rax
mov rax,QWORD[32+rbp]
adc rdx,0
add r10,r11
mov rsi,QWORD[((128+8))+rsp]
adc rdx,0
mov r11,rdx
mul rbx
add r12,rax
mov rax,QWORD[40+rbp]
adc rdx,0
imul rsi,r8
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rbp]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rbp]
adc rdx,0
add r13,r14
mov r14,rdx
adc r14,0
mul rbx
mov rbx,rsi
add r15,rax
mov rax,QWORD[rbp]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
dec ecx
jne NEAR $L$reduction_loop
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_subtract:
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
mov r8,QWORD[rbp]
mov r9,QWORD[8+rbp]
neg r8
not r9
and r8,rcx
mov r10,QWORD[16+rbp]
and r9,rcx
not r10
mov r11,QWORD[24+rbp]
and r10,rcx
not r11
mov r12,QWORD[32+rbp]
and r11,rcx
not r12
mov r13,QWORD[40+rbp]
and r12,rcx
not r13
mov r14,QWORD[48+rbp]
and r13,rcx
not r14
mov r15,QWORD[56+rbp]
and r14,rcx
not r15
and r15,rcx
add r8,QWORD[rdi]
adc r9,QWORD[8+rdi]
adc r10,QWORD[16+rdi]
adc r11,QWORD[24+rdi]
adc r12,QWORD[32+rdi]
adc r13,QWORD[40+rdi]
adc r14,QWORD[48+rdi]
adc r15,QWORD[56+rdi]
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_mul:
lea rdi,[8+rsp]
mov rax,QWORD[rsi]
mul rbx
mov QWORD[rdi],rax
mov rax,QWORD[8+rsi]
mov r8,rdx
mul rbx
add r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
adc r9,0
mul rbx
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
add r14,rax
mov rax,QWORD[rsi]
mov r15,rdx
adc r15,0
lea rbp,[8+rbp]
lea rdi,[8+rdi]
mov ecx,7
jmp NEAR $L$oop_mul
ALIGN 32
$L$oop_mul:
mov rbx,QWORD[rbp]
mul rbx
add r8,rax
mov rax,QWORD[8+rsi]
mov QWORD[rdi],r8
mov r8,rdx
adc r8,0
mul rbx
add r9,rax
mov rax,QWORD[16+rsi]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
add r10,rax
mov rax,QWORD[24+rsi]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r10,r11
mov r11,rdx
adc r11,0
mul rbx
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r13,r14
mov r14,rdx
lea rbp,[8+rbp]
adc r14,0
mul rbx
add r15,rax
mov rax,QWORD[rsi]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
lea rdi,[8+rdi]
dec ecx
jnz NEAR $L$oop_mul
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
DB 0F3h,0C3h ;repret
global rsaz_512_scatter4
ALIGN 16
rsaz_512_scatter4:
lea rcx,[r8*4+rcx]
mov r9d,8
jmp NEAR $L$oop_scatter
ALIGN 16
$L$oop_scatter:
mov rax,QWORD[rdx]
lea rdx,[8+rdx]
mov DWORD[rcx],eax
shr rax,32
mov DWORD[64+rcx],eax
lea rcx,[128+rcx]
dec r9d
jnz NEAR $L$oop_scatter
DB 0F3h,0C3h ;repret
global rsaz_512_gather4
ALIGN 16
rsaz_512_gather4:
lea rdx,[r8*4+rdx]
mov r9d,8
jmp NEAR $L$oop_gather
ALIGN 16
$L$oop_gather:
mov eax,DWORD[rdx]
mov r8d,DWORD[64+rdx]
lea rdx,[128+rdx]
shl r8,32
or rax,r8
mov QWORD[rcx],rax
lea rcx,[8+rcx]
dec r9d
jnz NEAR $L$oop_gather
DB 0F3h,0C3h ;repret
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
mov rsi,QWORD[8+r9]
mov r11,QWORD[56+r9]
mov r10d,DWORD[r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jb NEAR $L$common_seh_tail
mov rax,QWORD[152+r8]
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$common_seh_tail
lea rax,[((128+24+48))+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
mov r13,QWORD[((-32))+rax]
mov r14,QWORD[((-40))+rax]
mov r15,QWORD[((-48))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
mov QWORD[240+r8],r15
$L$common_seh_tail:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_rsaz_512_sqr:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_gather4:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_scatter4:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_by_one:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase