|
@@ -0,0 +1,267 @@
|
|
|
+// dotproduct_test.go
|
|
|
+package dotproduct
|
|
|
+
|
|
|
+import (
|
|
|
+ "runtime"
|
|
|
+ "sync/atomic"
|
|
|
+ "testing"
|
|
|
+ "unsafe"
|
|
|
+
|
|
|
+ "golang.org/x/sys/cpu"
|
|
|
+)
|
|
|
+
|
|
|
+func TestAVX2hasAVX(t *testing.T) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ if cpu.X86.HasAVX2 && !cpu.X86.HasAVX {
|
|
|
+ t.Fatal("HasAVX expected true, got false")
|
|
|
+ } else if !cpu.X86.HasAVX2 {
|
|
|
+ t.Log("HasAVX2=False")
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+func TestAVX512HasAVX2AndAVX(t *testing.T) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ if cpu.X86.HasAVX512 && !cpu.X86.HasAVX {
|
|
|
+ t.Fatal("HasAVX expected true, got false")
|
|
|
+ }
|
|
|
+ if cpu.X86.HasAVX512 && !cpu.X86.HasAVX2 {
|
|
|
+ t.Fatal("HasAVX2 expected true, got false")
|
|
|
+ }
|
|
|
+ if !cpu.X86.HasAVX512 {
|
|
|
+ t.Log("HasAVX512=False")
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func DotProduct(a []int32, b []int32, N int32) (sum int32) {
|
|
|
+ //N := len(a)
|
|
|
+ for i := int32(0); i < N; i++ {
|
|
|
+ sum += a[i] * b[i]
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+func TestSumAsm(t *testing.T) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ d3 := make([]uint64, 24)
|
|
|
+ var sumGo uint64
|
|
|
+ for i := 0; i < 24; i++ {
|
|
|
+ d3[i] = uint64(i + 1)
|
|
|
+ sumGo = sumGo + d3[i]
|
|
|
+ }
|
|
|
+ sumAsm := Sum(d3)
|
|
|
+ t.Log("sumGo ", sumGo, ", sumAsm ", sumAsm)
|
|
|
+ } else {
|
|
|
+ t.Skip("test case *TestSumAsm* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+func TestDotProductAsm(t *testing.T) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ const len32 = 24
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+
|
|
|
+ sumGo := DotProduct(d1, d2, len32)
|
|
|
+ sumAsm := DotProductAsm(d1, d2, len32)
|
|
|
+ t.Log("DotProductGo ", sumGo, ", DotProductAsm ", sumAsm)
|
|
|
+
|
|
|
+ } else {
|
|
|
+ t.Skip("test case *TestDotProductAsm* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func TestDotProductAsmAvx(t *testing.T) {
|
|
|
+ if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
|
|
|
+ const len32 = 32
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+ sumGo := DotProduct(d1, d2, len32)
|
|
|
+ sumAvx := DotProductAsmAvx(d1, d2, len32)
|
|
|
+ t.Log("DotProductGo ", sumGo, ", DotProductAvx ", sumAvx)
|
|
|
+ } else {
|
|
|
+ t.Skip("test case *TestDotProductAsmAvx* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+func BenchmarkSum(b *testing.B) {
|
|
|
+ const len32 = 4096
|
|
|
+ d1 := make([]uint64, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = uint64(i + 1)
|
|
|
+
|
|
|
+ }
|
|
|
+ var sum2 uint64 = 0
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ for i := 0; i < b.N; i++ {
|
|
|
+ sum2 += SumGo(d1) % len32
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkSumParallel(b *testing.B) {
|
|
|
+ const len32 = 4096
|
|
|
+ d1 := make([]uint64, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = uint64(i + 1)
|
|
|
+ }
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ b.RunParallel(func(pb *testing.PB) {
|
|
|
+ for pb.Next() {
|
|
|
+ var sum2 uint64 = 0
|
|
|
+ sum2 += SumGo(d1) % len32
|
|
|
+ }
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkSumAsm(b *testing.B) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ const len32 = 4096
|
|
|
+ d1 := make([]uint64, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = uint64(i + 1)
|
|
|
+ }
|
|
|
+ var sum2 uint64 = 0
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ for i := 0; i < b.N; i++ {
|
|
|
+ sum2 += Sum(d1) % len32
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ b.Skip("test case *BenchmarkSumAsm* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkSumAsmParallel(b *testing.B) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ const len32 = 4096
|
|
|
+ d1 := make([]uint64, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = uint64(i + 1)
|
|
|
+ }
|
|
|
+ //glob := b.N
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ b.RunParallel(func(pb *testing.PB) {
|
|
|
+ for pb.Next() {
|
|
|
+ var sum2 uint64 = 0
|
|
|
+
|
|
|
+ //for i := 0; i < 24; i++ {
|
|
|
+ sum2 += Sum(d1) % len32
|
|
|
+ //}
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ } else {
|
|
|
+ b.Skip("test case *BenchmarkSumAsm* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func SumGo(a []uint64) (sum uint64) {
|
|
|
+ N := len(a)
|
|
|
+ for i := 0; i < N; i++ {
|
|
|
+ sum += a[i]
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkDotProduct(b *testing.B) {
|
|
|
+ const len32 = 1024
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+ var sum2 int32 = 0
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ for i := 0; i < b.N; i++ {
|
|
|
+ sum2 += DotProduct(d1, d2, len32) % len32
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkDotProductAsm(b *testing.B) {
|
|
|
+ if runtime.GOARCH == "amd64" {
|
|
|
+ const len32 = 1024
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+ var sum2 int32 = 0
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ for i := 0; i < b.N; i++ {
|
|
|
+ sum2 += DotProductAsm(d1, d2, len32) % len32
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ b.Skip("test case *BenchmarkDotProductAsm* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkDotProductAsmAvx2(b *testing.B) {
|
|
|
+ if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
|
|
|
+ const len32 = 1024
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+ var sum2 int32 = 0
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ for i := 0; i < b.N; i++ {
|
|
|
+ sum2 += DotProductAsmAvx(d1, d2, len32) % len32
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkDotProductAsmAvx2Parallel(b *testing.B) {
|
|
|
+ if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
|
|
|
+ const len32 = 1024
|
|
|
+ d1 := make([]int32, len32)
|
|
|
+ d2 := make([]int32, len32)
|
|
|
+ for i := 0; i < len32; i++ {
|
|
|
+ d1[i] = int32(i + 1)
|
|
|
+ d2[i] = int32(2 * i)
|
|
|
+ }
|
|
|
+ b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ b.RunParallel(func(pb *testing.PB) {
|
|
|
+ for pb.Next() {
|
|
|
+ var sum2 int32 = 0
|
|
|
+ //for i := 0; i < 24; i++ {
|
|
|
+ sum2 += DotProductAsmAvx(d1, d2, len32) % len32
|
|
|
+ //}
|
|
|
+ }
|
|
|
+ })
|
|
|
+ } else {
|
|
|
+ b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func BenchmarkHowItWorksParallel(b *testing.B) {
|
|
|
+ procs := uint32(0)
|
|
|
+ iters := uint64(0)
|
|
|
+ //b.SetParallelism(3) //number of goroutine to each b.RunParallel
|
|
|
+ b.SetBytes(int64(4 * unsafe.Sizeof(uint64(0))))
|
|
|
+ b.ResetTimer()
|
|
|
+ b.RunParallel(func(pb *testing.PB) {
|
|
|
+ atomic.AddUint32(&procs, 1)
|
|
|
+ for pb.Next() {
|
|
|
+ atomic.AddUint64(&iters, 1)
|
|
|
+ }
|
|
|
+ })
|
|
|
+ b.Log("goroutine count :", procs, ", retrial count :", iters)
|
|
|
+}
|