Browse Source

init

Signed-off-by: Jean-Michel Batto <jean-michel.batto@eldarsoft.com>
Jean-Michel Batto 1 month ago
parent
commit
884918dd75

+ 14 - 0
23pattern-go/23pattern.go

@@ -0,0 +1,14 @@
+// 23pattern
+package main
+
+import (
+	"fmt"
+
+	"pattern"
+)
+
+func main() {
+	p := &pattern.ConcreteOperation2{}
+
+	fmt.Println("Hello World pour ConcretOperation2{}! ", p.PrimitiveOperation2())
+}

+ 226 - 0
23pattern-go/pattern/behavorial.go

@@ -0,0 +1,226 @@
+// behavorial
+package pattern
+
+import (
+	"fmt"
+)
+
+// 13. Chain of Responsibility Pattern
+type Handler interface {
+	SetNext(handler Handler)
+	Handle(request string) string
+}
+
+type AbstractHandler struct {
+	next Handler
+}
+
+func (h *AbstractHandler) SetNext(handler Handler) {
+	h.next = handler
+}
+
+type ConcreteHandler1 struct {
+	AbstractHandler
+}
+
+func (h *ConcreteHandler1) Handle(request string) string {
+	if request == "one" {
+		return "Handler1: handled"
+	}
+	if h.next != nil {
+		return h.next.Handle(request)
+	}
+	return ""
+}
+
+// 14. Command Pattern
+type Command interface {
+	Execute() string
+}
+
+type Light struct{}
+
+func (l *Light) TurnOn() string  { return "Light is on" }
+func (l *Light) TurnOff() string { return "Light is off" }
+
+type LightOnCommand struct {
+	light *Light
+}
+
+func (c *LightOnCommand) Execute() string {
+	return c.light.TurnOn()
+}
+
+// 15. Interpreter Pattern
+type Expression interface {
+	Interpret() bool
+}
+
+type TerminalExpression struct {
+	data string
+}
+
+func (t *TerminalExpression) Interpret() bool {
+	return len(t.data) > 0
+}
+
+// 16. Iterator Pattern
+type Iterator interface {
+	HasNext() bool
+	Next() interface{}
+}
+
+type Collection struct {
+	items []interface{}
+}
+
+type CollectionIterator struct {
+	collection *Collection
+	index      int
+}
+
+func (i *CollectionIterator) HasNext() bool {
+	return i.index < len(i.collection.items)
+}
+
+func (i *CollectionIterator) Next() interface{} {
+	if i.HasNext() {
+		item := i.collection.items[i.index]
+		i.index++
+		return item
+	}
+	return nil
+}
+
+// 17. Mediator Pattern
+type Mediator interface {
+	Notify(sender string, event string)
+}
+
+type ConcreteMediator struct {
+	component1 *Component1
+	component2 *Component2
+}
+
+type Component1 struct {
+	mediator Mediator
+}
+
+type Component2 struct {
+	mediator Mediator
+}
+
+func (m *ConcreteMediator) Notify(sender string, event string) {
+	fmt.Printf("Mediator reacts on %s and triggers event: %s\n", sender, event)
+}
+
+// 18. Memento Pattern
+type Memento struct {
+	state string
+}
+
+type Originator struct {
+	state string
+}
+
+func (o *Originator) CreateMemento() *Memento {
+	return &Memento{state: o.state}
+}
+
+func (o *Originator) RestoreMemento(m *Memento) {
+	o.state = m.state
+}
+
+// 19. Observer Pattern
+type Observer interface {
+	Update(string)
+}
+
+type Subject2 struct {
+	observers []Observer
+	state     string
+}
+
+func (s *Subject2) Attach(o Observer) {
+	s.observers = append(s.observers, o)
+}
+
+func (s *Subject2) Notify() {
+	for _, observer := range s.observers {
+		observer.Update(s.state)
+	}
+}
+
+// 20. State Pattern
+type State interface {
+	Handle() string
+}
+
+type Context struct {
+	state State
+}
+
+type ConcreteStateA struct{}
+type ConcreteStateB struct{}
+
+func (s *ConcreteStateA) Handle() string { return "State A" }
+func (s *ConcreteStateB) Handle() string { return "State B" }
+
+// 21. Strategy Pattern
+type Strategy interface {
+	Execute() string
+}
+
+type ConcreteStrategyA struct{}
+type ConcreteStrategyB struct{}
+
+func (s *ConcreteStrategyA) Execute() string { return "Strategy A" }
+func (s *ConcreteStrategyB) Execute() string { return "Strategy B" }
+
+// 22. Template Method Pattern
+type AbstractClass interface {
+	TemplateMethod() string
+	PrimitiveOperation1() string
+	PrimitiveOperation2() string
+}
+
+type ConcreteClass struct {
+	PrimitiveOperation2 func() string
+}
+
+type ConcreteOperation2 struct {
+	//PrimitiveOperation2 func() string
+}
+
+func (c *ConcreteClass) TemplateMethod() string {
+	return c.PrimitiveOperation1() + " " + c.PrimitiveOperation2()
+}
+
+func (c *ConcreteClass) PrimitiveOperation1() string      { return "Step 1" }
+func (c *ConcreteOperation2) PrimitiveOperation2() string { return "Step 2" }
+
+// 23. Visitor Pattern
+type Visitor interface {
+	VisitConcreteElementA(*ConcreteElementA)
+	VisitConcreteElementB(*ConcreteElementB)
+}
+
+type Element interface {
+	Accept(Visitor)
+}
+
+type ConcreteElementA struct{}
+type ConcreteElementB struct{}
+
+func (e *ConcreteElementA) Accept(v Visitor) { v.VisitConcreteElementA(e) }
+func (e *ConcreteElementB) Accept(v Visitor) { v.VisitConcreteElementB(e) }
+
+type ConcreteVisitor struct{}
+
+func (v *ConcreteVisitor) VisitConcreteElementA(e *ConcreteElementA) {
+	fmt.Println("Visited ConcreteElementA")
+}
+
+func (v *ConcreteVisitor) VisitConcreteElementB(e *ConcreteElementB) {
+	fmt.Println("Visited ConcreteElementB")
+}

+ 114 - 0
23pattern-go/pattern/creational.go

@@ -0,0 +1,114 @@
+// creational
+package pattern
+
+import (
+	"fmt"
+	"sync"
+)
+
+// 1. Singleton Pattern
+type singleton struct {
+	data string
+}
+
+var (
+	instance *singleton
+	once     sync.Once
+)
+
+func GetInstance() *singleton {
+	once.Do(func() {
+		instance = &singleton{data: "I am singleton"}
+	})
+	return instance
+}
+
+// 2. Factory Method Pattern
+type Animal interface {
+	Speak() string
+}
+
+type Dog struct{}
+type Cat struct{}
+
+func (d *Dog) Speak() string { return "Woof!" }
+func (c *Cat) Speak() string { return "Meow!" }
+
+func CreateAnimal(animalType string) Animal {
+	switch animalType {
+	case "dog":
+		return &Dog{}
+	case "cat":
+		return &Cat{}
+	default:
+		return nil
+	}
+}
+
+// 3. Abstract Factory Pattern
+type Button interface {
+	Paint()
+}
+
+type WinButton struct{}
+type MacButton struct{}
+
+func (w *WinButton) Paint() { fmt.Println("Windows button") }
+func (m *MacButton) Paint() { fmt.Println("Mac button") }
+
+type GUIFactory interface {
+	CreateButton() Button
+}
+
+type WinFactory struct{}
+type MacFactory struct{}
+
+func (w *WinFactory) CreateButton() Button { return &WinButton{} }
+func (m *MacFactory) CreateButton() Button { return &MacButton{} }
+
+// 4. Builder Pattern
+type House struct {
+	windows int
+	doors   int
+	roof    string
+}
+
+type HouseBuilder struct {
+	house *House
+}
+
+func NewHouseBuilder() *HouseBuilder {
+	return &HouseBuilder{house: &House{}}
+}
+
+func (b *HouseBuilder) SetWindows(count int) *HouseBuilder {
+	b.house.windows = count
+	return b
+}
+
+func (b *HouseBuilder) SetDoors(count int) *HouseBuilder {
+	b.house.doors = count
+	return b
+}
+
+func (b *HouseBuilder) SetRoof(style string) *HouseBuilder {
+	b.house.roof = style
+	return b
+}
+
+func (b *HouseBuilder) Build() *House {
+	return b.house
+}
+
+// 5. Prototype Pattern
+type Prototype interface {
+	Clone() Prototype
+}
+
+type ConcretePrototype struct {
+	name string
+}
+
+func (p *ConcretePrototype) Clone() Prototype {
+	return &ConcretePrototype{name: p.name}
+}

+ 61 - 0
23pattern-go/pattern/pattern_test.go

@@ -0,0 +1,61 @@
+// 23pattern_test
+package pattern
+
+
+import "testing"
+
+func TestCreationalPatterns(t *testing.T) {
+	// Singleton
+	s1 := GetInstance()
+	s2 := GetInstance()
+	if s1 != s2 {
+		t.Error("Singleton instances should be the same")
+	}
+
+	// Factory Method
+	dog := CreateAnimal("dog")
+	if dog.Speak() != "Woof!" {
+		t.Error("Dog should say Woof!")
+	}
+
+	// Builder
+	house := NewHouseBuilder().
+		SetWindows(4).
+		SetDoors(2).
+		SetRoof("slate").
+		Build()
+	if house.windows != 4 {
+		t.Error("House should have 4 windows")
+	}
+}
+
+func TestStructuralPatterns(t *testing.T) {
+	// Adapter
+	printer := &PrinterAdapter{&MyLegacyPrinter{}}
+	result := printer.PrintModern("test")
+	if result != "Legacy: test" {
+		t.Error("Adapter pattern failed")
+	}
+
+	// Decorator
+	coffee := &SimpleCoffee{}
+	coffeeWithMilk := &MilkDecorator{coffee: coffee}
+	if coffeeWithMilk.Cost() != 7 {
+		t.Error("Coffee with milk should cost 7")
+	}
+}
+
+func TestBehavioralPatterns(t *testing.T) {
+	// Strategy
+	strategyA := &ConcreteStrategyA{}
+	if strategyA.Execute() != "Strategy A" {
+		t.Error("Strategy A failed")
+	}
+
+	// State
+	context := &Context{state: &ConcreteStateA{}}
+	if context.state.Handle() != "State A" {
+		t.Error("State pattern failed")
+	}
+}
+

+ 163 - 0
23pattern-go/pattern/structural.go

@@ -0,0 +1,163 @@
+// structural
+package pattern
+
+import (
+	"fmt"
+)
+
+// 6. Adapter Pattern
+type LegacyPrinter interface {
+	Print(s string) string
+}
+
+type MyLegacyPrinter struct{}
+
+func (p *MyLegacyPrinter) Print(s string) string {
+	return fmt.Sprintf("Legacy: %s", s)
+}
+
+type ModernPrinter interface {
+	PrintModern(s string) string
+}
+
+type PrinterAdapter struct {
+	LegacyPrinter
+}
+
+func (p *PrinterAdapter) PrintModern(s string) string {
+	return p.Print(s)
+}
+
+// 7. Bridge Pattern
+type DrawAPI interface {
+	DrawCircle(x, y, radius int)
+}
+
+type RedCircle struct{}
+type GreenCircle struct{}
+
+func (r *RedCircle) DrawCircle(x, y, radius int) {
+	fmt.Printf("Drawing red circle at (%d,%d)\n", x, y)
+}
+
+func (g *GreenCircle) DrawCircle(x, y, radius int) {
+	fmt.Printf("Drawing green circle at (%d,%d)\n", x, y)
+}
+
+type Shape struct {
+	drawAPI DrawAPI
+}
+
+type Circle struct {
+	Shape
+	x, y, radius int
+}
+
+// 8. Composite Pattern
+type Component interface {
+	Operation() string
+}
+
+type Leaf struct {
+	name string
+}
+
+type Composite struct {
+	children []Component
+}
+
+func (l *Leaf) Operation() string {
+	return l.name
+}
+
+func (c *Composite) Operation() string {
+	result := "Branch("
+	for _, child := range c.children {
+		result += child.Operation() + " "
+	}
+	return result + ")"
+}
+
+// 9. Decorator Pattern
+type Coffee interface {
+	Cost() int
+	Description() string
+}
+
+type SimpleCoffee struct{}
+
+func (s *SimpleCoffee) Cost() int           { return 5 }
+func (s *SimpleCoffee) Description() string { return "Simple coffee" }
+
+type MilkDecorator struct {
+	coffee Coffee
+}
+
+func (m *MilkDecorator) Cost() int {
+	return m.coffee.Cost() + 2
+}
+
+func (m *MilkDecorator) Description() string {
+	return m.coffee.Description() + ", milk"
+}
+
+// 10. Facade Pattern
+type SubsystemA struct{}
+type SubsystemB struct{}
+type SubsystemC struct{}
+
+func (s *SubsystemA) OperationA() string { return "Subsystem A" }
+func (s *SubsystemB) OperationB() string { return "Subsystem B" }
+func (s *SubsystemC) OperationC() string { return "Subsystem C" }
+
+type Facade struct {
+	sysA *SubsystemA
+	sysB *SubsystemB
+	sysC *SubsystemC
+}
+
+func (f *Facade) Operation() string {
+	return f.sysA.OperationA() + " " + f.sysB.OperationB() + " " + f.sysC.OperationC()
+}
+
+// 11. Flyweight Pattern
+type CharacterFlyweight struct {
+	char rune
+}
+
+type CharacterFactory struct {
+	chars map[rune]*CharacterFlyweight
+}
+
+func NewCharacterFactory() *CharacterFactory {
+	return &CharacterFactory{chars: make(map[rune]*CharacterFlyweight)}
+}
+
+func (f *CharacterFactory) GetCharacter(c rune) *CharacterFlyweight {
+	if f.chars[c] == nil {
+		f.chars[c] = &CharacterFlyweight{char: c}
+	}
+	return f.chars[c]
+}
+
+// 12. Proxy Pattern
+type Subject interface {
+	Request() string
+}
+
+type RealSubject struct{}
+
+func (s *RealSubject) Request() string {
+	return "RealSubject: Handling request"
+}
+
+type Proxy struct {
+	realSubject *RealSubject
+}
+
+func (p *Proxy) Request() string {
+	if p.realSubject == nil {
+		p.realSubject = &RealSubject{}
+	}
+	return "Proxy: " + p.realSubject.Request()
+}

BIN
cours10-GLCS-JMB-20242025.pdf


+ 11 - 0
dotproductasm/_dotproduct.c

@@ -0,0 +1,11 @@
+#include <stdint.h>
+
+void dp_int32(int32_t *a, int32_t *b, int32_t *len, int32_t *res) {
+    int32_t N = *len;
+    int32_t reslocal = 0;
+    for(int32_t i = 0; i < N; i++) {
+        reslocal = reslocal + a[i]*b[i];
+    }
+    *res = reslocal;
+    return ;
+}

+ 31 - 0
dotproductasm/dotproduct.go

@@ -0,0 +1,31 @@
+//+build !noasm
+//+build !appengine
+
+package dotproduct
+
+//go:noescape
+func _dp_int32(a *int32, b *int32, gN *int32, res *int32)
+
+//go:noescape
+func _dpavx_int32(a *int32, b *int32, gN *int32, res *int32)
+
+//go:noescape
+func Sum(x []uint64) uint64
+
+func DotProductAsm(a []int32, b []int32, N int32) int32 {
+	var val *int32
+	var res1 int32
+	val = &res1
+	_dp_int32((&a[0]), (&b[0]), (&N), (val))
+	//fmt.Printf("val %+v %+v\n", N, res1)
+	return *val
+}
+
+func DotProductAsmAvx(a []int32, b []int32, N int32) int32 {
+	var val *int32
+	var res1 int32
+	val = &res1
+	_dpavx_int32((&a[0]), (&b[0]), (&N), (val))
+	//fmt.Printf("val %+v %+v\n", N, res1)
+	return *val
+}

+ 51 - 0
dotproductasm/dotproduct_amd64.s

@@ -0,0 +1,51 @@
+//+build !noasm !appengine
+
+// func Sum(x []uint64) uint64
+TEXT ·Sum(SB),4, $32-32
+    MOVQ x_ptr+0(FP), DI
+    MOVQ x_len+8(FP), AX
+    XORQ R8, R8
+    CMPQ AX, $0
+    JE done
+loop:
+    MOVQ (DI), R9
+    ADDQ $8, DI
+    ADDQ R9, R8
+    DECQ AX
+    JNZ loop
+done:
+    MOVQ R8, ret+24(FP)
+    RET
+
+//func _dp_int32(a *int32, b *int32, gN *int32, res *int32)
+TEXT    ·_dp_int32(SB),4,  $0-32        
+        MOVQ a+0(FP), DI
+        MOVQ b+8(FP), SI
+        MOVQ gN+16(FP), DX
+        MOVQ res+24(FP), CX
+        MOVQ (DX),R8  				// mov    r8d, dword [rdx]
+        CMPQ R8,$0 					// test    r8d, r8d
+        JLE  LBB0_1
+        XORQ AX, AX 				// xor    eax, eax
+        XORQ R9, R9 				// xor    r9d, r9d
+
+LBB0_4: 
+        MOVQ (SI)(AX*4),DX           // mov    edx, dword [rsi + 4*rax]
+        MOVQ (DI)(AX*4),BX          // imul    edx, dword [rdi + 4*rax]
+        IMULL BX,DX
+        INCL    AX	                // inc    rax
+        ADDL DX,R9                   // add    r9d, edx
+        CMPL AX, R8                  // cmp    r8, rax
+        JNE  LBB0_4
+        JMP  LBB0_2
+
+LBB0_1:
+        XORQ R9, R9				 	// xor    r9d, r9d
+
+LBB0_2:
+        MOVQ R9,(CX) 				// mov    dword [rcx], r9d
+        RET
+        
+        
+        
+        

+ 267 - 0
dotproductasm/dotproduct_test.go

@@ -0,0 +1,267 @@
+// dotproduct_test.go
+package dotproduct
+
+import (
+	"runtime"
+	"sync/atomic"
+	"testing"
+	"unsafe"
+
+	"golang.org/x/sys/cpu"
+)
+
+func TestAVX2hasAVX(t *testing.T) {
+	if runtime.GOARCH == "amd64" {
+		if cpu.X86.HasAVX2 && !cpu.X86.HasAVX {
+			t.Fatal("HasAVX expected true, got false")
+		} else if !cpu.X86.HasAVX2 {
+			t.Log("HasAVX2=False")
+		}
+	}
+}
+func TestAVX512HasAVX2AndAVX(t *testing.T) {
+	if runtime.GOARCH == "amd64" {
+		if cpu.X86.HasAVX512 && !cpu.X86.HasAVX {
+			t.Fatal("HasAVX expected true, got false")
+		}
+		if cpu.X86.HasAVX512 && !cpu.X86.HasAVX2 {
+			t.Fatal("HasAVX2 expected true, got false")
+		}
+		if !cpu.X86.HasAVX512 {
+			t.Log("HasAVX512=False")
+		}
+	}
+}
+
+func DotProduct(a []int32, b []int32, N int32) (sum int32) {
+	//N := len(a)
+	for i := int32(0); i < N; i++ {
+		sum += a[i] * b[i]
+	}
+	return
+}
+func TestSumAsm(t *testing.T) {
+	if runtime.GOARCH == "amd64" {
+		d3 := make([]uint64, 24)
+		var sumGo uint64
+		for i := 0; i < 24; i++ {
+			d3[i] = uint64(i + 1)
+			sumGo = sumGo + d3[i]
+		}
+		sumAsm := Sum(d3)
+		t.Log("sumGo ", sumGo, ", sumAsm ", sumAsm)
+	} else {
+		t.Skip("test case *TestSumAsm* not applicable")
+	}
+}
+func TestDotProductAsm(t *testing.T) {
+	if runtime.GOARCH == "amd64" {
+		const len32 = 24
+		d1 := make([]int32, len32)
+		d2 := make([]int32, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = int32(i + 1)
+			d2[i] = int32(2 * i)
+		}
+
+		sumGo := DotProduct(d1, d2, len32)
+		sumAsm := DotProductAsm(d1, d2, len32)
+		t.Log("DotProductGo ", sumGo, ", DotProductAsm ", sumAsm)
+
+	} else {
+		t.Skip("test case *TestDotProductAsm* not applicable")
+	}
+}
+
+func TestDotProductAsmAvx(t *testing.T) {
+	if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
+		const len32 = 32
+		d1 := make([]int32, len32)
+		d2 := make([]int32, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = int32(i + 1)
+			d2[i] = int32(2 * i)
+		}
+		sumGo := DotProduct(d1, d2, len32)
+		sumAvx := DotProductAsmAvx(d1, d2, len32)
+		t.Log("DotProductGo ", sumGo, ", DotProductAvx ", sumAvx)
+	} else {
+		t.Skip("test case *TestDotProductAsmAvx* not applicable")
+	}
+}
+func BenchmarkSum(b *testing.B) {
+	const len32 = 4096
+	d1 := make([]uint64, len32)
+	for i := 0; i < len32; i++ {
+		d1[i] = uint64(i + 1)
+
+	}
+	var sum2 uint64 = 0
+	b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum2 += SumGo(d1) % len32
+	}
+}
+
+func BenchmarkSumParallel(b *testing.B) {
+	const len32 = 4096
+	d1 := make([]uint64, len32)
+	for i := 0; i < len32; i++ {
+		d1[i] = uint64(i + 1)
+	}
+	b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			var sum2 uint64 = 0
+				sum2 += SumGo(d1) % len32		
+		}
+	})
+}
+
+func BenchmarkSumAsm(b *testing.B) {
+	if runtime.GOARCH == "amd64" {
+		const len32 = 4096
+		d1 := make([]uint64, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = uint64(i + 1)
+		}
+		var sum2 uint64 = 0
+		b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			sum2 += Sum(d1) % len32
+		}
+	} else {
+		b.Skip("test case *BenchmarkSumAsm* not applicable")
+	}
+}
+
+func BenchmarkSumAsmParallel(b *testing.B) {
+	if runtime.GOARCH == "amd64" {
+		const len32 = 4096
+		d1 := make([]uint64, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = uint64(i + 1)
+		}
+		//glob := b.N
+		b.SetBytes(int64(len32 * unsafe.Sizeof(uint64(0))))
+		b.ResetTimer()
+		b.RunParallel(func(pb *testing.PB) {
+			for pb.Next() {
+				var sum2 uint64 = 0
+
+				//for i := 0; i < 24; i++ {
+				sum2 += Sum(d1) % len32
+				//}
+			}
+		})
+
+	} else {
+		b.Skip("test case *BenchmarkSumAsm* not applicable")
+	}
+}
+
+func SumGo(a []uint64) (sum uint64) {
+	N := len(a)
+	for i := 0; i < N; i++ {
+		sum += a[i]
+	}
+	return
+}
+
+func BenchmarkDotProduct(b *testing.B) {
+	const len32 = 1024
+	d1 := make([]int32, len32)
+	d2 := make([]int32, len32)
+	for i := 0; i < len32; i++ {
+		d1[i] = int32(i + 1)
+		d2[i] = int32(2 * i)
+	}
+	var sum2 int32 = 0
+	b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum2 += DotProduct(d1, d2, len32) % len32
+	}
+}
+
+func BenchmarkDotProductAsm(b *testing.B) {
+	if runtime.GOARCH == "amd64" {
+		const len32 = 1024
+		d1 := make([]int32, len32)
+		d2 := make([]int32, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = int32(i + 1)
+			d2[i] = int32(2 * i)
+		}
+		var sum2 int32 = 0
+		b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			sum2 += DotProductAsm(d1, d2, len32) % len32
+		}
+	} else {
+		b.Skip("test case *BenchmarkDotProductAsm* not applicable")
+	}
+}
+
+func BenchmarkDotProductAsmAvx2(b *testing.B) {
+	if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
+		const len32 = 1024
+		d1 := make([]int32, len32)
+		d2 := make([]int32, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = int32(i + 1)
+			d2[i] = int32(2 * i)
+		}
+		var sum2 int32 = 0
+		b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			sum2 += DotProductAsmAvx(d1, d2, len32) % len32
+		}
+	} else {
+		b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable")
+	}
+}
+
+func BenchmarkDotProductAsmAvx2Parallel(b *testing.B) {
+	if runtime.GOARCH == "amd64" && cpu.X86.HasAVX2 {
+		const len32 = 1024
+		d1 := make([]int32, len32)
+		d2 := make([]int32, len32)
+		for i := 0; i < len32; i++ {
+			d1[i] = int32(i + 1)
+			d2[i] = int32(2 * i)
+		}
+		b.SetBytes(int64(len32 * unsafe.Sizeof(int32(0))))
+		b.ResetTimer()
+		b.RunParallel(func(pb *testing.PB) {
+			for pb.Next() {
+				var sum2 int32 = 0
+				//for i := 0; i < 24; i++ {
+				sum2 += DotProductAsmAvx(d1, d2, len32) % len32
+				//}
+			}
+		})
+	} else {
+		b.Skip("test case *BenchmarkDotProductAsmAvx2* not applicable")
+	}
+}
+
+func BenchmarkHowItWorksParallel(b *testing.B) {
+	procs := uint32(0)
+	iters := uint64(0)
+	//b.SetParallelism(3) //number of goroutine to each b.RunParallel
+	b.SetBytes(int64(4 * unsafe.Sizeof(uint64(0))))
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		atomic.AddUint32(&procs, 1)
+		for pb.Next() {
+			atomic.AddUint64(&iters, 1)
+		}
+	})
+	b.Log("goroutine count :", procs, ", retrial count :", iters)
+}

+ 45 - 0
dotproductasm/dotproductavx_amd64.s

@@ -0,0 +1,45 @@
+// definition of func _dpavx_int32(a *int32, b *int32, gN *int32, res *int32)
+// $32 denotes the size in bytes of the stack-frame.
+// $32 specifies the size of the arguments passed in by the caller.
+TEXT ·_dpavx_int32(SB),4, $32-32 
+// Move the address of a, address of b, and array length to registers
+// SI, DI, and CX respectively. For simplicity, we assume the length of
+// array a and b are equal and addresses have a 32-byte alignment.
+
+		MOVQ a+0(FP), DI
+        MOVQ b+8(FP), SI
+        MOVQ gN+16(FP), CX
+        MOVQ res+24(FP), DX
+        MOVQ (CX),R8 // value of gN
+        MOVQ DX,R9 // return address
+// Y4 is an accumulator that sums all vector multiplication results.
+// Compute Y3 = Y1 * Y2 and Y4 = Y4 + Y3 using the VMOVDQU, VPMULLD
+// and VPADDD instructions. If the array length is greater than 8,
+// loop execution until we reach the end of array. Store Y4 to the stack
+// frame address, vr, which is 32 bytes (512 bits) long
+  VPXOR Y4, Y4, Y4
+  XORQ AX,AX
+start:
+  VMOVDQU (SI), Y1
+  ADDQ $32, SI
+  VMOVDQU (DI), Y2
+  ADDQ $32, DI
+  VPMULLD Y1, Y2, Y3
+  VPADDD Y3, Y4, Y4  
+  ADDQ $8, AX
+  CMPL AX, R8 
+  JNE start
+  VMOVDQU Y4, d0-32(SP) // vector result to stack
+// Convert the vector result to a scalar result by summing
+// the INT32 elements and return the result.
+  LEAQ d0-32(SP), BX
+  MOVQ $8, CX          // array length
+  XORQ SI, SI    // clean SI
+redux:  //8 bytes => int32 reduction
+  ADDL (BX), SI
+  ADDQ $4,  BX
+  DECQ CX
+  JNZ  redux
+  MOVL SI,(R9)
+  RET
+

+ 74 - 0
dotproductasm/dotproductclang_amd64.s

@@ -0,0 +1,74 @@
+TEXT ·_dpavx_int32clang(SB),4, $0-32
+
+        MOVQ a+0(FP), DI
+        MOVQ b+8(FP), SI
+        MOVQ gN+16(FP), DX
+        MOVQ res+24(FP), CX
+
+        MOVQ (DX),R8         // mov    r8d, dword [rdx]
+        CMPQ R8,$0			 // test    r8d, r8d
+        JLE  LBB0_8 // LBB0_1
+        CMPQ R8,$15         // cmp    r8d, 15
+        JA   LBB0_4
+        XORQ R9, R9		// xor    r9d, r9d
+        XORQ AX, AX		// xor    eax, eax
+        JMP  LBB0_8 //LBB0_7
+
+LBB0_1:
+        XORQ AX, AX		 // xor    eax, eax
+        JMP  LBB0_8
+
+LBB0_4:
+    	MOVQ R8,R9	 					// mov    r9d, r8d
+        ANDQ $-16,R9        			// and    r9d, -16      
+        VPXOR X0,X0,X0   			 // vpxor    xmm0, xmm0, xmm0   
+        XORQ AX, AX             		// xor    eax, eax
+        VPXOR X1, X1, X1    		// vpxor    xmm1, xmm1, xmm1
+        VPXOR X2, X2, X2         // vpxor    xmm2, xmm2, xmm2
+        VPXOR X3, X3, X3         // vpxor    xmm3, xmm3, xmm3
+        
+LBB0_5:
+        VMOVDQU (SI)(AX*4), X4             // vmovdqu    xmm4, oword [rsi + 4*rax]
+        VMOVDQU 16(SI)(AX*4), X5              // vmovdqu    xmm5, oword [rsi + 4*rax + 16]
+        VMOVDQU 32(SI)(AX*4), X6              // vmovdqu    xmm6, oword [rsi + 4*rax + 32]
+        VMOVDQU 48(SI)(AX*4), X7              // vmovdqu    xmm7, oword [rsi + 4*rax + 48]
+        
+        VPMULLD (DI)(AX*4), X4, X4  		// vpmulld    xmm4, xmm4, oword [rdi + 4*rax]
+        VPADDD    X0, X4, X0                // vpaddd    xmm0, xmm4, xmm0
+        
+        VPMULLD  (DI)(AX*4),X5, X4 			// vpmulld    xmm4, xmm5, oword [rdi + 4*rax + 16]
+        VPADDD    X1, X4, X1                           // vpaddd    xmm1, xmm4, xmm1
+        VPMULLD (DI)(AX*4), X6, X4 			// vpmulld    xmm4, xmm6, oword [rdi + 4*rax + 32]
+        VPMULLD (DI)(AX*4), X7, X5			 // vpmulld    xmm5, xmm7, oword [rdi + 4*rax + 48]
+        VPADDD    X2, X4, X2   // vpaddd    xmm2, xmm4, xmm2
+        VPADDD    X3, X5, X3   // vpaddd    xmm3, xmm5, xmm3
+        ADDQ    $16,AX               // add    rax, 16
+        CMPQ AX,R9                   // cmp    r9, rax
+        JNE  LBB0_5
+        VPADDD    X0, X1, X0    // vpaddd    xmm0, xmm1, xmm0
+        VPADDD    X0, X2, X0    // vpaddd    xmm0, xmm2, xmm0
+        VPADDD    X0, X3, X0    // vpaddd    xmm0, xmm3, xmm0
+        
+        VPSHUFD    $78 , X0,X1    // vpshufd    xmm1, xmm0, 78
+        VPADDD    X1, X0, X0    // vpaddd    xmm0, xmm0, xmm1
+        VPSHUFD    $229, X0, X1   // vpshufd    xmm1, xmm0, 229
+        VPADDD    X1, X0, X0    // vpaddd    xmm0, xmm0, xmm1
+        
+        VMOVD X0,AX                // vmovd    eax, xmm0
+        CMPQ R8,R9                   // cmp    r9, r8
+        JE   LBB0_8
+
+LBB0_7:
+        MOVL (SI)(R9*4),DX   // mov    edx, dword [rsi + 4*r9]
+        IMULL (DI)(R9*4),DX  // imul    edx, dword [rdi + 4*r9]
+        ADDQ DX,AX           // add    eax, edx
+        ADDQ $1,R9           // add    r9, 1
+        CMPQ R9,R8  	     // cmp    r8, r9
+        JNE  LBB0_7
+
+LBB0_8:
+        MOVQ (CX),AX		 // mov    dword [rcx], eax
+        VZEROUPPER // JMB
+        RET
+        
+        

+ 2 - 0
dotproductasm/go.sum

@@ -0,0 +1,2 @@
+golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM=
+golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=