dotproduct_amd64.s 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. //+build !noasm !appengine
  2. // func Sum(x []uint64) uint64
  3. TEXT ·Sum(SB),4, $32-32
  4. MOVQ x_ptr+0(FP), DI
  5. MOVQ x_len+8(FP), AX
  6. XORQ R8, R8
  7. CMPQ AX, $0
  8. JE done
  9. loop:
  10. MOVQ (DI), R9
  11. ADDQ $8, DI
  12. ADDQ R9, R8
  13. DECQ AX
  14. JNZ loop
  15. done:
  16. MOVQ R8, ret+24(FP)
  17. RET
  18. //func _dp_int32(a *int32, b *int32, gN *int32, res *int32)
  19. TEXT ·_dp_int32(SB),4, $0-32
  20. MOVQ a+0(FP), DI
  21. MOVQ b+8(FP), SI
  22. MOVQ gN+16(FP), DX
  23. MOVQ res+24(FP), CX
  24. MOVQ (DX),R8 // mov r8d, dword [rdx]
  25. CMPQ R8,$0 // test r8d, r8d
  26. JLE LBB0_1
  27. XORQ AX, AX // xor eax, eax
  28. XORQ R9, R9 // xor r9d, r9d
  29. LBB0_4:
  30. MOVQ (SI)(AX*4),DX // mov edx, dword [rsi + 4*rax]
  31. MOVQ (DI)(AX*4),BX // imul edx, dword [rdi + 4*rax]
  32. IMULL BX,DX
  33. INCL AX // inc rax
  34. ADDL DX,R9 // add r9d, edx
  35. CMPL AX, R8 // cmp r8, rax
  36. JNE LBB0_4
  37. JMP LBB0_2
  38. LBB0_1:
  39. XORQ R9, R9 // xor r9d, r9d
  40. LBB0_2:
  41. MOVQ R9,(CX) // mov dword [rcx], r9d
  42. RET