// dotproduct_test.go package dotproduct import ( "runtime" "sync/atomic" "testing" "unsafe" "golang.org/x/sys/cpu" ) func TestAVX2hasAVX(t *testing.T) { if runtime.GOARCH == "amd64" { if cpu.X86.HasAVX2 && !cpu.X86.HasAVX { t.Fatal("HasAVX expected true, got false") } else if !cpu.X86.HasAVX2 { t.Log("HasAVX2=False") } } } func TestAVX512HasAVX2AndAVX(t *testing.T) { if runtime.GOARCH == "amd64" { if cpu.X86.HasAVX512 && !cpu.X86.HasAVX { t.Fatal("HasAVX expected true, got false") } if cpu.X86.HasAVX512 && !cpu.X86.HasAVX2 { t.Fatal("HasAVX2 expected true, got false") } if !cpu.X86.HasAVX512 { t.Log("HasAVX512=False") } } } func DotProduct(a []int32, b []int32, N int32) (sum int32) { //N := len(a) for i := int32(0); i < N; i++ { sum += a[i] * b[i] } return } func TestSumAsm(t *testing.T) { if runtime.GOARCH == "amd64" { d3 := make([]uint64, 24) var sumGo uint64 for i := 0; i < 24; i++ { d3[i] = uint64(i + 1) sumGo = sumGo + d3[i] } sumAsm := Sum(d3) t.Log("sumGo ", sumGo, ", sumAsm ", sumAsm) } else { t.Skip("test case *TestSumAsm* not applicable") } } func TestDotProductAsm(t *testing.T) { if runtime.GOARCH == "amd64" { const len32 = 24 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } sumGo := DotProduct(d1, d2, len32) sumAsm := DotProductAsm(d1, d2, len32) t.Log("DotProductGo ", sumGo, ", DotProductAsm ", sumAsm) } else { t.Skip("test case *TestDotProductAsm* not applicable") } } func TestDotProductAsmAvx(t *testing.T) { if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 { const len32 = 32 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } sumGo := DotProduct(d1, d2, len32) sumAvx := DotProductAsmAvx(d1, d2, len32) t.Log("DotProductGo ", sumGo, ", DotProductAvx ", sumAvx) } else { t.Skip("test case *TestDotProductAsmAvx* not applicable") } } func BenchmarkSum(b *testing.B) { const len32 = 4096 d1 := make([]uint64, len32) for i := 0; i < len32; i++ { d1[i] = uint64(i + 1) } var sum2 uint64 = 0 b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0)))) b.ResetTimer() for i := 0; i < b.N; i++ { sum2 += SumGo(d1) % len32 } } func BenchmarkSumParallel(b *testing.B) { const len32 = 4096 d1 := make([]uint64, len32) for i := 0; i < len32; i++ { d1[i] = uint64(i + 1) } b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0)))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var sum2 uint64 = 0 sum2 += SumGo(d1) % len32 } }) } func BenchmarkSumAsm(b *testing.B) { if runtime.GOARCH == "amd64" { const len32 = 4096 d1 := make([]uint64, len32) for i := 0; i < len32; i++ { d1[i] = uint64(i + 1) } var sum2 uint64 = 0 b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0)))) b.ResetTimer() for i := 0; i < b.N; i++ { sum2 += Sum(d1) % len32 } } else { b.Skip("test case *BenchmarkSumAsm* not applicable") } } func BenchmarkSumAsmParallel(b *testing.B) { if runtime.GOARCH == "amd64" { const len32 = 4096 d1 := make([]uint64, len32) for i := 0; i < len32; i++ { d1[i] = uint64(i + 1) } //glob := b.N b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0)))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var sum2 uint64 = 0 //for i := 0; i < 24; i++ { sum2 += Sum(d1) % len32 //} } }) } else { b.Skip("test case *BenchmarkSumAsm* not applicable") } } func SumGo(a []uint64) (sum uint64) { N := len(a) for i := 0; i < N; i++ { sum += a[i] } return } func BenchmarkDotProduct(b *testing.B) { const len32 = 1024 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } var sum2 int32 = 0 b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0)))) b.ResetTimer() for i := 0; i < b.N; i++ { sum2 += DotProduct(d1, d2, len32) % len32 } } func BenchmarkDotProductAsm(b *testing.B) { if runtime.GOARCH == "amd64" { const len32 = 1024 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } var sum2 int32 = 0 b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0)))) b.ResetTimer() for i := 0; i < b.N; i++ { sum2 += DotProductAsm(d1, d2, len32) % len32 } } else { b.Skip("test case *BenchmarkDotProductAsm* not applicable") } } func BenchmarkDotProductAsmAvx2(b *testing.B) { if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 { const len32 = 1024 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } var sum2 int32 = 0 b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0)))) b.ResetTimer() for i := 0; i < b.N; i++ { sum2 += DotProductAsmAvx(d1, d2, len32) % len32 } } else { b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable") } } func BenchmarkDotProductAsmAvx2Parallel(b *testing.B) { if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 { const len32 = 1024 d1 := make([]int32, len32) d2 := make([]int32, len32) for i := 0; i < len32; i++ { d1[i] = int32(i + 1) d2[i] = int32(2 * i) } b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0)))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { var sum2 int32 = 0 //for i := 0; i < 24; i++ { sum2 += DotProductAsmAvx(d1, d2, len32) % len32 //} } }) } else { b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable") } } func BenchmarkHowItWorksParallel(b *testing.B) { procs := uint32(0) iters := uint64(0) //b.SetParallelism(3) //number of goroutine to each b.RunParallel b.SetBytes(int64(4 * unsafe.Sizeof(uint64(0)))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { atomic.AddUint32(&procs, 1) for pb.Next() { atomic.AddUint64(&iters, 1) } }) b.Log("goroutine count :", procs, ", retrial count :", iters) }