.data
align 16
Var1 dd 0
Var2 real4 0.0
Var3 real8 262144.0
.code
Test2 proc
push rbp
push rsi
push rbx
sfence
rdtsc
mov rbp, rax
mov ecx, 1
shl ecx, 18
lea rsi, Var1
align 16
@@:
add eax, [rsi] ; 2
vxorps xmm5, xmm5, xmm5 ; 1
add edx, [rsi] ; 2
vxorps xmm6, xmm6, xmm6 ; 1
add rbx, [rsi] ; 2
vxorps xmm7, xmm7, xmm7 ; 1
xor eax, eax ; 1
cmp rsi, 0 ; 1
jz @end ; 1
vaddss xmm0, xmm0, dword ptr[rsi+4] ; 2
mov rdi, rdi ; 1
vaddss xmm1, xmm1, dword ptr[rsi+4] ; 2
cmp rsi, 0 ; 1
jz @end ; 1
mov rbp, rbp ; 1
vaddss xmm2, xmm2, dword ptr[rsi+4] ; 2
xor edx, edx ; 1
cmp rsi, 0 ; 1
jz @end ; 1
vaddss xmm3, xmm3, xmm3 ; 1
dec ecx ; 1
jnz @b ; 1 ; 22 instructions, 28 uops
sfence
rdtsc
sub rax, rbp
vcvtsi2sd xmm4, xmm4, rax
vdivsd xmm0, xmm4, Var3 ; ~ 2.83 Cycles, 9.98 uops/cycle, 7.7 IPC
pop rbx
pop rsi
pop rbp
@end:
ret
Test2 endp
END