diff --git a/.vscode/launch.json b/.vscode/launch.json index 5fcfad64..f1bba173 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -159,8 +159,10 @@ "program": "${workspaceFolder}/samples/fir", "args": [ "-timing", - "-length=64", + "-length=8192", "-report-all", + "-gpus=1,2", + "-use-unified-memory=false", ], }, { diff --git a/accelsim_tracing/.gitignore b/accelsim_tracing/.gitignore new file mode 100644 index 00000000..5588025c --- /dev/null +++ b/accelsim_tracing/.gitignore @@ -0,0 +1,3 @@ +accelsim_tracing +output/ +example/ \ No newline at end of file diff --git a/accelsim_tracing/README.md b/accelsim_tracing/README.md new file mode 100644 index 00000000..92051850 Binary files /dev/null and b/accelsim_tracing/README.md differ diff --git a/accelsim_tracing/alu/alu.go b/accelsim_tracing/alu/alu.go new file mode 100644 index 00000000..71b05002 --- /dev/null +++ b/accelsim_tracing/alu/alu.go @@ -0,0 +1,17 @@ +package alu + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type ALU interface { + withParent(aluGroup *ALUGroup) ALU + Execute(inst nvidia.Instruction) +} + +func (a *ALUGroup) newALU() ALU { + switch a.meta.aluType { + case "int32": + return newInt32ALU().withParent(a) + default: + panic("Unknown ALU type") + } +} diff --git a/accelsim_tracing/alu/builder.go b/accelsim_tracing/alu/builder.go new file mode 100644 index 00000000..921f6780 --- /dev/null +++ b/accelsim_tracing/alu/builder.go @@ -0,0 +1,45 @@ +package alu + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type ALUGroup struct { + meta *aluGroupMetaData + alus []ALU +} + +type aluGroupMetaData struct { + aluType string + aluNum int32 +} + +func NewALUGroup() *ALUGroup { + return &ALUGroup{ + meta: &aluGroupMetaData{ + aluType: "undefined", + aluNum: 0, + }, + } +} + +func (a *ALUGroup) WithALUType(aluType string) *ALUGroup { + a.meta.aluType = aluType + return a +} + +func (a *ALUGroup) WithALUNum(num int32) *ALUGroup { + a.meta.aluNum = num + return a +} + +func (a *ALUGroup) Build() { + a.alus = make([]ALU, a.meta.aluNum) + for i := range a.alus { + a.alus[i] = a.newALU() + } +} + +func (a *ALUGroup) Execute(inst nvidia.Instruction) { + for _, alu := range a.alus { + alu.Execute(inst) + } +} diff --git a/accelsim_tracing/alu/doc.go b/accelsim_tracing/alu/doc.go new file mode 100644 index 00000000..ff02a5be --- /dev/null +++ b/accelsim_tracing/alu/doc.go @@ -0,0 +1,2 @@ +// Package alu contains the ALU implementation +package alu diff --git a/accelsim_tracing/alu/int32.go b/accelsim_tracing/alu/int32.go new file mode 100644 index 00000000..caa45c68 --- /dev/null +++ b/accelsim_tracing/alu/int32.go @@ -0,0 +1,19 @@ +package alu + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type int32ALU struct { + parent *ALUGroup +} + +func newInt32ALU() *int32ALU { + return &int32ALU{} +} + +func (a *int32ALU) withParent(aluGroup *ALUGroup) ALU { + a.parent = aluGroup + return a +} + +func (a *int32ALU) Execute(inst nvidia.Instruction) { +} diff --git a/accelsim_tracing/benchmark/build.go b/accelsim_tracing/benchmark/build.go new file mode 100644 index 00000000..7de4dbb9 --- /dev/null +++ b/accelsim_tracing/benchmark/build.go @@ -0,0 +1,45 @@ +package benchmark + +import ( + "errors" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/trace" +) + +type BenchMark struct { + fromTrace bool + traceDirPath string + trace *trace.Trace +} + +func NewBenchMark() *BenchMark { + return &BenchMark{ + fromTrace: false, + traceDirPath: "", + trace: nil, + } +} + +func (bm *BenchMark) WithTraceDirPath(path string) *BenchMark { + bm.traceDirPath = path + bm.fromTrace = true + return bm +} + +func (bm *BenchMark) Build() error { + if bm.fromTrace == false { + return errors.New("no trace dir path specified") + } + bm.trace = trace.NewTrace().WithTraceDirPath(bm.traceDirPath) + bm.trace.Build() + return nil +} + +func (bm *BenchMark) Exec(gpu *gpu.GPU) error { + if bm.fromTrace == false { + panic("No trace dir path specified") + } + err := bm.trace.Exec(gpu) + return err +} diff --git a/accelsim_tracing/benchmark/doc.go b/accelsim_tracing/benchmark/doc.go new file mode 100644 index 00000000..bc000d26 --- /dev/null +++ b/accelsim_tracing/benchmark/doc.go @@ -0,0 +1,2 @@ +// Package benchmark contains the driver which links traces and the simulator +package benchmark diff --git a/accelsim_tracing/gpc/builder.go b/accelsim_tracing/gpc/builder.go new file mode 100644 index 00000000..8b78cba3 --- /dev/null +++ b/accelsim_tracing/gpc/builder.go @@ -0,0 +1,142 @@ +package gpc + +import ( + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/sm" +) + +type GPC struct { + meta *gpcMetaData + dispatcher gpcDispatcher + sms []*sm.SM +} + +type gpcMetaData struct { + smNum int32 + smUnitNum int32 + + gpcStrategy string + smStrategy string + smUnitStrategy string + + l2CacheSize int32 + l1CacheSize int32 + l0CacheSize int32 + + registerFileSize int32 + laneSize int32 + + alus []struct { + aluType string + aluNum int32 + } +} + +func NewGPC() *GPC { + return &GPC{ + meta: &gpcMetaData{ + smNum: 0, + smUnitNum: 0, + + gpcStrategy: "default", + smStrategy: "default", + smUnitStrategy: "default", + + l2CacheSize: 0, + l1CacheSize: 0, + l0CacheSize: 0, + + registerFileSize: 0, + laneSize: 0, + + alus: nil, + }, + dispatcher: nil, + sms: nil, + } +} + +func (g *GPC) WithSMNum(num int32) *GPC { + g.meta.smNum = num + return g +} + +func (g *GPC) WithGPCStrategy(strategy string) *GPC { + g.meta.gpcStrategy = strategy + return g +} + +func (g *GPC) WithSMUnitNum(num int32) *GPC { + g.meta.smUnitNum = num + return g +} + +func (g *GPC) WithSMStrategy(strategy string) *GPC { + g.meta.smStrategy = strategy + return g +} + +func (g *GPC) WithSMUnitStrategy(strategy string) *GPC { + g.meta.smUnitStrategy = strategy + return g +} + +func (g *GPC) WithL2CacheSize(size int32) *GPC { + g.meta.l2CacheSize = size + return g +} + +func (g *GPC) WithL1CacheSize(size int32) *GPC { + g.meta.l1CacheSize = size + return g +} + +func (g *GPC) WithL0CacheSize(size int32) *GPC { + g.meta.l0CacheSize = size + return g +} + +func (g *GPC) WithRegisterFileSize(size int32) *GPC { + g.meta.registerFileSize = size + return g +} + +func (g *GPC) WithLaneSize(size int32) *GPC { + g.meta.laneSize = size + return g +} + +func (g *GPC) WithALU(aluType string, num int32) *GPC { + g.meta.alus = append(g.meta.alus, struct { + aluType string + aluNum int32 + }{aluType: aluType, aluNum: num}) + return g +} + +func (g *GPC) Build() { + g.buildDispatcher() + g.sms = make([]*sm.SM, g.meta.smNum) + for i := 0; i < int(g.meta.smNum); i++ { + g.sms[i] = sm.NewSM(). + WithSMStrategy(g.meta.smStrategy). + WithSMUnitNum(g.meta.smUnitNum). + WithSMUnitStrategy(g.meta.smUnitStrategy). + WithL1CacheSize(g.meta.l1CacheSize). + WithL0CacheSize(g.meta.l0CacheSize). + WithRegisterFileSize(g.meta.registerFileSize). + WithLaneSize(g.meta.laneSize) + for _, alu := range g.meta.alus { + g.sms[i].WithALU(alu.aluType, alu.aluNum) + } + g.sms[i].Build() + } +} + +func (g *GPC) IsFree() bool { + return true +} + +func (g *GPC) Execute(tb *nvidia.ThreadBlock) { + g.dispatcher.dispatch(tb) +} diff --git a/accelsim_tracing/gpc/default.go b/accelsim_tracing/gpc/default.go new file mode 100644 index 00000000..40a8b5ab --- /dev/null +++ b/accelsim_tracing/gpc/default.go @@ -0,0 +1,32 @@ +package gpc + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type defaultDispatcher struct { + parent *GPC +} + +func newDefaultDispatcher() *defaultDispatcher { + return &defaultDispatcher{} +} + +func (d *defaultDispatcher) withParent(gpc *GPC) gpcDispatcher { + d.parent = gpc + return d +} + +func (d *defaultDispatcher) dispatch(tb *nvidia.ThreadBlock) { + for { + flag := false + for _, sm := range d.parent.sms { + if sm.IsFree() { + sm.Execute(tb) + flag = true + break + } + } + if flag { + break + } + } +} diff --git a/accelsim_tracing/gpc/dispatcher.go b/accelsim_tracing/gpc/dispatcher.go new file mode 100644 index 00000000..a9eed59d --- /dev/null +++ b/accelsim_tracing/gpc/dispatcher.go @@ -0,0 +1,17 @@ +package gpc + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type gpcDispatcher interface { + withParent(gpc *GPC) gpcDispatcher + dispatch(tb *nvidia.ThreadBlock) +} + +func (g *GPC) buildDispatcher() { + switch g.meta.gpcStrategy { + case "default": + g.dispatcher = newDefaultDispatcher().withParent(g) + default: + panic("Unknown dispatcher strategy") + } +} diff --git a/accelsim_tracing/gpc/doc.go b/accelsim_tracing/gpc/doc.go new file mode 100644 index 00000000..26c9aba3 --- /dev/null +++ b/accelsim_tracing/gpc/doc.go @@ -0,0 +1,2 @@ +// Package gpc implements the simulation components for the GPC Level. +package gpc diff --git a/accelsim_tracing/gpu/builder.go b/accelsim_tracing/gpu/builder.go new file mode 100644 index 00000000..1667cfc7 --- /dev/null +++ b/accelsim_tracing/gpu/builder.go @@ -0,0 +1,157 @@ +package gpu + +import ( + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpc" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type GPU struct { + meta *gpuMetaData + dispatcher gpuDispatcher + gpcs []*gpc.GPC +} + +type gpuMetaData struct { + gpcNum int32 + smNum int32 + smUnitNum int32 + + gpuStrategy string + gpcStrategy string + smStrategy string + smUnitStrategy string + + l2CacheSize int32 + l1CacheSize int32 + l0CacheSize int32 + + registerFileSize int32 + laneSize int32 + + alus []struct { + aluType string + aluNum int32 + } +} + +func NewGPU() *GPU { + return &GPU{ + meta: &gpuMetaData{ + gpcNum: 0, + smNum: 0, + smUnitNum: 0, + + gpuStrategy: "default", + gpcStrategy: "default", + smStrategy: "default", + smUnitStrategy: "default", + + l2CacheSize: 0, + l1CacheSize: 0, + l0CacheSize: 0, + + registerFileSize: 0, + laneSize: 0, + + alus: nil, + }, + dispatcher: nil, + gpcs: nil, + } +} + +func (g *GPU) WithGPCNum(num int32) *GPU { + g.meta.gpcNum = num + return g +} + +func (g *GPU) WithSMNum(num int32) *GPU { + g.meta.smNum = num + return g +} + +func (g *GPU) WithSMUnitNum(num int32) *GPU { + g.meta.smUnitNum = num + return g +} + +func (g *GPU) WithGPUStrategy(strategy string) *GPU { + g.meta.gpuStrategy = strategy + return g +} + +func (g *GPU) WithGPCStrategy(strategy string) *GPU { + g.meta.gpcStrategy = strategy + return g +} + +func (g *GPU) WithSMStrategy(strategy string) *GPU { + g.meta.smStrategy = strategy + return g +} + +func (g *GPU) WithSMUnitStrategy(strategy string) *GPU { + g.meta.smUnitStrategy = strategy + return g +} + +func (g *GPU) WithL2CacheSize(size int32) *GPU { + g.meta.l2CacheSize = size + return g +} + +func (g *GPU) WithL1CacheSize(size int32) *GPU { + g.meta.l1CacheSize = size + return g +} + +func (g *GPU) WithL0CacheSize(size int32) *GPU { + g.meta.l0CacheSize = size + return g +} + +func (g *GPU) WithRegisterFileSize(size int32) *GPU { + g.meta.registerFileSize = size + return g +} + +func (g *GPU) WithLaneSize(size int32) *GPU { + g.meta.laneSize = size + return g +} + +func (g *GPU) WithALU(aluType string, num int32) *GPU { + g.meta.alus = append(g.meta.alus, struct { + aluType string + aluNum int32 + }{aluType: aluType, aluNum: num}) + return g +} + +func (g *GPU) Build() { + g.buildDispatcher() + g.gpcs = make([]*gpc.GPC, g.meta.gpcNum) + for i := 0; i < int(g.meta.gpcNum); i++ { + g.gpcs[i] = gpc.NewGPC(). + WithSMNum(g.meta.smNum). + WithSMUnitNum(g.meta.smUnitNum). + WithGPCStrategy(g.meta.gpcStrategy). + WithSMStrategy(g.meta.smStrategy). + WithSMUnitStrategy(g.meta.smUnitStrategy). + WithL2CacheSize(g.meta.l2CacheSize). + WithL1CacheSize(g.meta.l1CacheSize). + WithL0CacheSize(g.meta.l0CacheSize). + WithRegisterFileSize(g.meta.registerFileSize). + WithLaneSize(g.meta.laneSize) + for _, alu := range g.meta.alus { + g.gpcs[i].WithALU(alu.aluType, alu.aluNum) + } + g.gpcs[i].Build() + } +} + +// RunThreadBlock runs a threadblock on the GPU +// [todo] how to handle the relationship between trace.threadblock and truethreadblock +func (g *GPU) RunThreadBlock(tb *nvidia.ThreadBlock) { + g.dispatcher.dispatch(tb) +} diff --git a/accelsim_tracing/gpu/default.go b/accelsim_tracing/gpu/default.go new file mode 100644 index 00000000..56fc9101 --- /dev/null +++ b/accelsim_tracing/gpu/default.go @@ -0,0 +1,34 @@ +package gpu + +import ( + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type defaultDispatcher struct { + parent *GPU +} + +func newDefaultDispatcher() *defaultDispatcher { + return &defaultDispatcher{} +} + +func (d *defaultDispatcher) withParent(gpu *GPU) gpuDispatcher { + d.parent = gpu + return d +} + +func (d *defaultDispatcher) dispatch(tb *nvidia.ThreadBlock) { + for { + flag := false + for _, gpc := range d.parent.gpcs { + if gpc.IsFree() { + gpc.Execute(tb) + flag = true + break + } + } + if flag { + break + } + } +} diff --git a/accelsim_tracing/gpu/dispatcher.go b/accelsim_tracing/gpu/dispatcher.go new file mode 100644 index 00000000..515ab81a --- /dev/null +++ b/accelsim_tracing/gpu/dispatcher.go @@ -0,0 +1,17 @@ +package gpu + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type gpuDispatcher interface { + withParent(gpu *GPU) gpuDispatcher + dispatch(tb *nvidia.ThreadBlock) +} + +func (g *GPU) buildDispatcher() { + switch g.meta.gpuStrategy { + case "default": + g.dispatcher = newDefaultDispatcher().withParent(g) + default: + panic("Unknown dispatcher strategy") + } +} diff --git a/accelsim_tracing/gpu/doc.go b/accelsim_tracing/gpu/doc.go new file mode 100644 index 00000000..2f735511 --- /dev/null +++ b/accelsim_tracing/gpu/doc.go @@ -0,0 +1,2 @@ +// Package gpu implements the simulation components for the GPU level. +package gpu diff --git a/accelsim_tracing/nvidia/byte.go b/accelsim_tracing/nvidia/byte.go new file mode 100644 index 00000000..72d614d6 --- /dev/null +++ b/accelsim_tracing/nvidia/byte.go @@ -0,0 +1,7 @@ +package nvidia + +const ( + BYTE = 8 + WORD = 16 + DWORD = 32 +) diff --git a/accelsim_tracing/nvidia/dim3.go b/accelsim_tracing/nvidia/dim3.go new file mode 100644 index 00000000..cd231980 --- /dev/null +++ b/accelsim_tracing/nvidia/dim3.go @@ -0,0 +1,3 @@ +package nvidia + +type Dim3 = [3]int diff --git a/accelsim_tracing/nvidia/doc.go b/accelsim_tracing/nvidia/doc.go new file mode 100644 index 00000000..bd4c0d06 --- /dev/null +++ b/accelsim_tracing/nvidia/doc.go @@ -0,0 +1,2 @@ +// Package nvidia includes basic const, types and structs for nvidia tracing +package nvidia diff --git a/accelsim_tracing/nvidia/opcode.go b/accelsim_tracing/nvidia/opcode.go new file mode 100644 index 00000000..7413edca --- /dev/null +++ b/accelsim_tracing/nvidia/opcode.go @@ -0,0 +1,57 @@ +package nvidia + +import "log" + +// VariableType [todo] how to construct these? +type VariableType int32 + +const ( + VariableDefault VariableType = iota + VariableError + VariableINT32 + VariableFP32 + VariableFP64 +) + +type OpCodeType int32 + +const ( + OpCodeDefault OpCodeType = iota + OpCodeError + IMADMOVU32 +) + +type Opcode struct { + rawText string + opType OpCodeType + varType VariableType +} + +func NewOpcode(rawText string) *Opcode { + op, ok := opcodeTable[rawText] + if !ok { + op = Opcode{rawText, OpCodeError, VariableError} + log.Panic("Unknown opcode: ", rawText) + } + return &op +} + +func (op *Opcode) String() string { + return op.rawText +} + +func (op *Opcode) OpcodeType() OpCodeType { + return op.opType +} + +func (op *Opcode) VariableType() VariableType { + return op.varType +} + +var opcodeTable map[string]Opcode + +func init() { + opcodeTable = make(map[string]Opcode) + + opcodeTable["IMAD.MOV.U32"] = Opcode{"IMAD.MOV.U32", IMADMOVU32, VariableINT32} +} diff --git a/accelsim_tracing/nvidia/register.go b/accelsim_tracing/nvidia/register.go new file mode 100644 index 00000000..c7e7f074 --- /dev/null +++ b/accelsim_tracing/nvidia/register.go @@ -0,0 +1,44 @@ +package nvidia + +import ( + "fmt" + "log" +) + +type Register struct { + rawText string + regID int32 + isZero bool +} + +func NewRegister(rawText string) *Register { + reg, ok := registerTable[rawText] + if !ok { + reg = Register{rawText, -1, false} + log.Panic("Unknown register: ", rawText) + } + return ® +} + +func (r *Register) String() string { + return r.rawText +} + +func (r *Register) ID() int32 { + return r.regID +} + +func (r *Register) IsZeroRegister() bool { + return r.isZero +} + +var registerTable map[string]Register + +func init() { + registerTable = make(map[string]Register) + + for i := 0; i < 32; i++ { + registerTable[fmt.Sprintf("R%d", i)] = Register{fmt.Sprintf("R%d", i), int32(i), false} + } + registerTable["R255"] = Register{"R255", 255, true} +} diff --git a/accelsim_tracing/nvidia/thread_block.go b/accelsim_tracing/nvidia/thread_block.go new file mode 100644 index 00000000..2c4754ef --- /dev/null +++ b/accelsim_tracing/nvidia/thread_block.go @@ -0,0 +1,26 @@ +package nvidia + +type ThreadBlock struct { + WarpNum int + Warps []*Warp +} + +type Warp struct { + InstNum int + Insts []*Instruction +} + +type Instruction struct { + PC int32 + Mask int64 + DestNum int32 + DestRegs []*Register + OpCode *Opcode + SrcNum int32 + SrcRegs []*Register + MemWidth int32 + AddressCompress int32 + MemAddress int64 + MemAddressSuffix1 int32 + MemAddressSuffix2 []int32 +} diff --git a/accelsim_tracing/sm/builder.go b/accelsim_tracing/sm/builder.go new file mode 100644 index 00000000..812f1c71 --- /dev/null +++ b/accelsim_tracing/sm/builder.go @@ -0,0 +1,119 @@ +package sm + +import ( + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/smunit" +) + +type SM struct { + meta *smMetaData + dispatcher smDispatcher + smUnits []*smunit.SMUnit +} + +type smMetaData struct { + smUnitNum int32 + + smStrategy string + smUnitStrategy string + + l2CacheSize int32 + l1CacheSize int32 + l0CacheSize int32 + + registerFileSize int32 + laneSize int32 + + alus []struct { + aluType string + aluNum int32 + } +} + +func NewSM() *SM { + return &SM{ + meta: &smMetaData{ + smUnitNum: 0, + + smStrategy: "default", + smUnitStrategy: "default", + + l1CacheSize: 0, + l0CacheSize: 0, + + registerFileSize: 0, + laneSize: 0, + + alus: nil, + }, + dispatcher: nil, + smUnits: nil, + } +} + +func (s *SM) WithSMStrategy(strategy string) *SM { + s.meta.smStrategy = strategy + return s +} + +func (s *SM) WithSMUnitNum(num int32) *SM { + s.meta.smUnitNum = num + return s +} + +func (s *SM) WithSMUnitStrategy(strategy string) *SM { + s.meta.smUnitStrategy = strategy + return s +} + +func (s *SM) WithL1CacheSize(size int32) *SM { + s.meta.l1CacheSize = size + return s +} + +func (s *SM) WithL0CacheSize(size int32) *SM { + s.meta.l0CacheSize = size + return s +} + +func (s *SM) WithRegisterFileSize(size int32) *SM { + s.meta.registerFileSize = size + return s +} + +func (s *SM) WithLaneSize(size int32) *SM { + s.meta.laneSize = size + return s +} + +func (s *SM) WithALU(aluType string, aluNum int32) *SM { + s.meta.alus = append(s.meta.alus, struct { + aluType string + aluNum int32 + }{aluType: aluType, aluNum: aluNum}) + return s +} + +func (s *SM) Build() { + s.buildDispatcher() + s.smUnits = make([]*smunit.SMUnit, s.meta.smUnitNum) + for i := 0; i < int(s.meta.smUnitNum); i++ { + s.smUnits[i] = smunit.NewSMUnit(). + WithSMUnitStrategy(s.meta.smUnitStrategy). + WithL0CacheSize(s.meta.l0CacheSize). + WithRegisterFileSize(s.meta.registerFileSize). + WithLaneSize(s.meta.laneSize) + for _, alu := range s.meta.alus { + s.smUnits[i].WithALU(alu.aluType, alu.aluNum) + } + s.smUnits[i].Build() + } +} + +func (s *SM) IsFree() bool { + return true +} + +func (s *SM) Execute(tb *nvidia.ThreadBlock) { + s.dispatcher.dispatch(tb) +} diff --git a/accelsim_tracing/sm/default.go b/accelsim_tracing/sm/default.go new file mode 100644 index 00000000..d13f70f5 --- /dev/null +++ b/accelsim_tracing/sm/default.go @@ -0,0 +1,34 @@ +package sm + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type defaultDispatcher struct { + parent *SM +} + +func newDefaultDispatcher() *defaultDispatcher { + return &defaultDispatcher{} +} + +func (d *defaultDispatcher) withParent(sm *SM) smDispatcher { + d.parent = sm + return d +} + +func (d *defaultDispatcher) dispatch(tb *nvidia.ThreadBlock) { + for _, warp := range tb.Warps { + for { + flag := false + for _, smUnit := range d.parent.smUnits { + if smUnit.IsFree() { + smUnit.Execute(warp) + flag = true + break + } + } + if flag { + break + } + } + } +} diff --git a/accelsim_tracing/sm/dispacther.go b/accelsim_tracing/sm/dispacther.go new file mode 100644 index 00000000..6d2a2a61 --- /dev/null +++ b/accelsim_tracing/sm/dispacther.go @@ -0,0 +1,17 @@ +package sm + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type smDispatcher interface { + withParent(sm *SM) smDispatcher + dispatch(tb *nvidia.ThreadBlock) +} + +func (s *SM) buildDispatcher() { + switch s.meta.smStrategy { + case "default": + s.dispatcher = newDefaultDispatcher().withParent(s) + default: + panic("Unknown dispatch strategy") + } +} diff --git a/accelsim_tracing/sm/doc.go b/accelsim_tracing/sm/doc.go new file mode 100644 index 00000000..e53f0941 --- /dev/null +++ b/accelsim_tracing/sm/doc.go @@ -0,0 +1,2 @@ +// Package sm implements the simulation components for the SM level. +package sm diff --git a/accelsim_tracing/smunit/builder.go b/accelsim_tracing/smunit/builder.go new file mode 100644 index 00000000..ced59820 --- /dev/null +++ b/accelsim_tracing/smunit/builder.go @@ -0,0 +1,93 @@ +package smunit + +import ( + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/alu" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type SMUnit struct { + meta *smUnitMetaData + dispatcher smUnitDispatcher + registerFile *RegisterFile + aluGroup []*alu.ALUGroup +} + +type smUnitMetaData struct { + smUnitStrategy string + + l0CacheSize int32 + + registerFileSize int32 + laneSize int32 + + alus []struct { + aluType string + aluNum int32 + } +} + +func NewSMUnit() *SMUnit { + return &SMUnit{ + meta: &smUnitMetaData{ + smUnitStrategy: "default", + + l0CacheSize: 0, + + registerFileSize: 0, + laneSize: 0, + + alus: nil, + }, + dispatcher: nil, + registerFile: nil, + aluGroup: nil, + } +} + +func (s *SMUnit) WithSMUnitStrategy(strategy string) *SMUnit { + s.meta.smUnitStrategy = strategy + return s +} + +func (s *SMUnit) WithL0CacheSize(size int32) *SMUnit { + s.meta.l0CacheSize = size + return s +} + +func (s *SMUnit) WithRegisterFileSize(size int32) *SMUnit { + s.meta.registerFileSize = size + return s +} + +func (s *SMUnit) WithLaneSize(size int32) *SMUnit { + s.meta.laneSize = size + return s +} + +func (s *SMUnit) WithALU(aluType string, num int32) *SMUnit { + s.meta.alus = append(s.meta.alus, struct { + aluType string + aluNum int32 + }{aluType: aluType, aluNum: num}) + return s +} + +func (s *SMUnit) Build() { + s.buildDispatcher() + s.buildRegisterFile(s.meta.registerFileSize, s.meta.laneSize) + s.aluGroup = make([]*alu.ALUGroup, len(s.meta.alus)) + for i, a := range s.meta.alus { + s.aluGroup[i] = alu.NewALUGroup(). + WithALUType(a.aluType). + WithALUNum(a.aluNum) + s.aluGroup[i].Build() + } +} + +func (s *SMUnit) IsFree() bool { + return true +} + +func (s *SMUnit) Execute(warp *nvidia.Warp) { + s.dispatcher.dispatch(warp) +} diff --git a/accelsim_tracing/smunit/default.go b/accelsim_tracing/smunit/default.go new file mode 100644 index 00000000..220fd191 --- /dev/null +++ b/accelsim_tracing/smunit/default.go @@ -0,0 +1,19 @@ +package smunit + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type defaultDispatcher struct { + parent *SMUnit +} + +func newDefaultDispatcher() *defaultDispatcher { + return &defaultDispatcher{} +} + +func (d *defaultDispatcher) withParent(sm *SMUnit) smUnitDispatcher { + d.parent = sm + return d +} + +func (d *defaultDispatcher) dispatch(warp *nvidia.Warp) { +} diff --git a/accelsim_tracing/smunit/dispatcher.go b/accelsim_tracing/smunit/dispatcher.go new file mode 100644 index 00000000..125be691 --- /dev/null +++ b/accelsim_tracing/smunit/dispatcher.go @@ -0,0 +1,17 @@ +package smunit + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +type smUnitDispatcher interface { + withParent(sm *SMUnit) smUnitDispatcher + dispatch(tb *nvidia.Warp) +} + +func (s *SMUnit) buildDispatcher() { + switch s.meta.smUnitStrategy { + case "default": + s.dispatcher = newDefaultDispatcher().withParent(s) + default: + panic("Unknown dispatch strategy") + } +} diff --git a/accelsim_tracing/smunit/doc.go b/accelsim_tracing/smunit/doc.go new file mode 100644 index 00000000..18798c1f --- /dev/null +++ b/accelsim_tracing/smunit/doc.go @@ -0,0 +1,2 @@ +// Package smunit implements the simulation components for the SM Unit level. +package smunit \ No newline at end of file diff --git a/accelsim_tracing/smunit/registerfile.go b/accelsim_tracing/smunit/registerfile.go new file mode 100644 index 00000000..a99a97be --- /dev/null +++ b/accelsim_tracing/smunit/registerfile.go @@ -0,0 +1,21 @@ +package smunit + +type RegisterFile struct { + RfSize int32 + rfLaneSize int32 + buf []byte + byteSizePerLane int32 +} + +func (r *RegisterFile) Read(offset int32, width int32) { +} + +func (r *RegisterFile) Write(offset int32, width int32) { +} + +func (s *SMUnit) buildRegisterFile(size int32, sizePerLane int32) { + s.registerFile = &RegisterFile{ + buf: make([]byte, size), + byteSizePerLane: sizePerLane, + } +} diff --git a/accelsim_tracing/trace/convert.go b/accelsim_tracing/trace/convert.go new file mode 100644 index 00000000..aa1fdca2 --- /dev/null +++ b/accelsim_tracing/trace/convert.go @@ -0,0 +1,41 @@ +package trace + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" + +func (tb *threadBlock) generateNVThreadBlock() *nvidia.ThreadBlock { + nvtb := &nvidia.ThreadBlock{ + WarpNum: len(tb.warps), + } + for _, wp := range tb.warps { + nvtb.Warps = append(nvtb.Warps, wp.generateNVWarp()) + } + return nvtb +} + +func (wp *warp) generateNVWarp() *nvidia.Warp { + nvwp := &nvidia.Warp{ + InstNum: len(wp.instructions), + } + for _, inst := range wp.instructions { + nvwp.Insts = append(nvwp.Insts, inst.generateNVInst()) + } + return nvwp +} + +func (inst *instruction) generateNVInst() *nvidia.Instruction { + nvinst := &nvidia.Instruction{ + PC: inst.PC, + Mask: inst.Mask, + DestNum: inst.DestNum, + DestRegs: inst.DestRegs, + OpCode: inst.OpCode, + SrcNum: inst.SrcNum, + SrcRegs: inst.SrcRegs, + MemWidth: inst.MemWidth, + AddressCompress: inst.AddressCompress, + MemAddress: inst.MemAddress, + MemAddressSuffix1: inst.MemAddressSuffix1, + MemAddressSuffix2: inst.MemAddressSuffix2, + } + return nvinst +} diff --git a/accelsim_tracing/trace/doc.go b/accelsim_tracing/trace/doc.go new file mode 100644 index 00000000..984da148 --- /dev/null +++ b/accelsim_tracing/trace/doc.go @@ -0,0 +1,2 @@ +// Package trace deals with trace parsing +package trace diff --git a/accelsim_tracing/trace/kernel.go b/accelsim_tracing/trace/kernel.go new file mode 100644 index 00000000..86ace4ec --- /dev/null +++ b/accelsim_tracing/trace/kernel.go @@ -0,0 +1,26 @@ +package trace + +import ( + "path" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" +) + +type kernel struct { // trace execs interface + parent *Trace + + rawText string + filePath string + traceGroup *traceGroup +} + +func (te *kernel) Type() string { + return "kernel" +} + +func (te *kernel) Execute(gpu *gpu.GPU) error { + tg := NewTraceGroup().WithFilePath(path.Join(te.parent.traceDirPath, te.filePath)) + tg.Build() + err := tg.Exec(gpu) + return err +} diff --git a/accelsim_tracing/trace/memory_copy.go b/accelsim_tracing/trace/memory_copy.go new file mode 100644 index 00000000..2acf02ad --- /dev/null +++ b/accelsim_tracing/trace/memory_copy.go @@ -0,0 +1,24 @@ +package trace + +import "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" + +type memCopy struct { // trace execs interface + parent *Trace + + rawText string + h2d bool + startAddr uint64 + length uint64 +} + +type memCopyParent struct { + trace *Trace +} + +func (te *memCopy) Type() string { + return "memcopy" +} + +func (te *memCopy) Execute(gpu *gpu.GPU) error { + return nil +} diff --git a/accelsim_tracing/trace/thread_block.go b/accelsim_tracing/trace/thread_block.go new file mode 100644 index 00000000..9c96537c --- /dev/null +++ b/accelsim_tracing/trace/thread_block.go @@ -0,0 +1,53 @@ +package trace + +import ( + "fmt" + "log" + "strings" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type threadBlock struct { + parent *traceGroup + rawContext struct { + blockDim string + } + + threadBlockDim nvidia.Dim3 + warps []*warp +} + +func parseThreadBlocks(lines []string) *threadBlock { + tb := &threadBlock{} + dim := parseThreadBlockDim(lines) + tb.threadBlockDim = *dim + for i, line := range lines { + if strings.HasPrefix(line, "warp") { + wp := parseWarp(lines[i:]) // [todo] too many copies + wp.parent = tb + tb.warps = append(tb.warps, wp) + } + } + return tb +} + +func parseThreadBlockDim(lines []string) *nvidia.Dim3 { + for _, line := range lines { + if strings.HasPrefix(line, "thread block") { + d := &nvidia.Dim3{} + elems := strings.Split(line, "=") + if len(elems) != 2 { + log.Panicf("Invalid thread block dim line: %s", line) + } + value := strings.TrimSpace(elems[1]) + _, err := fmt.Sscanf(value, "%d,%d,%d", &d[0], &d[1], &d[2]) + if err != nil { + log.Panicf("Invalid thread block dim value: %s", value) + } + return d + } + } + log.Panic("Cannot find thread block dim") + return nil +} diff --git a/accelsim_tracing/trace/trace.go b/accelsim_tracing/trace/trace.go new file mode 100644 index 00000000..da8a7516 --- /dev/null +++ b/accelsim_tracing/trace/trace.go @@ -0,0 +1,63 @@ +package trace + +import ( + "bufio" + "log" + "os" + "path" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" +) + +type Trace struct { + traceDirPath string + traceExecs []traceExecs +} + +func NewTrace() *Trace { + return &Trace{ + traceDirPath: "", + traceExecs: nil, + } +} + +func (t *Trace) WithTraceDirPath(path string) *Trace { + t.traceDirPath = path + return t +} + +func (t *Trace) Build() { + t.parseKernelsList() +} + +func (t *Trace) Exec(gpu *gpu.GPU) error { + for _, tg := range t.traceExecs { + err := tg.Execute(gpu) + if err != nil { + return err + } + } + return nil +} + +func (t *Trace) parseKernelsList() { + filePath := path.Join(t.traceDirPath, "kernelslist.g") + file, err := os.Open(filePath) + if err != nil { + log.Panic(err) + } + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + if scanner.Text() != "" { + lines = append(lines, scanner.Text()) + } + } + + for _, line := range lines { + te := parseTraceExecs(line, t) + t.traceExecs = append(t.traceExecs, te) + } +} diff --git a/accelsim_tracing/trace/trace_execs.go b/accelsim_tracing/trace/trace_execs.go new file mode 100644 index 00000000..a2c81c15 --- /dev/null +++ b/accelsim_tracing/trace/trace_execs.go @@ -0,0 +1,45 @@ +package trace + +import ( + "fmt" + "log" + "strings" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" +) + +type traceExecs interface { + Type() string + Execute(gpu *gpu.GPU) error +} + +func parseTraceExecs(rawText string, trace *Trace) traceExecs { + if strings.HasPrefix(rawText, "Memcpy") { + /* + format : H2D or D2H, start, length + example : HtoD,0x7f0,0x1000 + */ + res := strings.Split(rawText, ",") + m := &memCopy{ + parent: trace, + rawText: rawText, + h2d: strings.Contains(res[0], "HtoD"), + } + fmt.Sscanf(res[1], "%v", &m.startAddr) + fmt.Sscanf(res[2], "%v", &m.length) + return m + } else if strings.HasPrefix(rawText, "kernel") { + /* + format : kernel name + example : kernel_0 + */ + k := &kernel{ + parent: trace, + rawText: rawText, + filePath: rawText, + } + return k + } + log.Panicf("Unknown trace group rawText: %s", rawText) + return nil +} diff --git a/accelsim_tracing/trace/trace_group.go b/accelsim_tracing/trace/trace_group.go new file mode 100644 index 00000000..edd930a1 --- /dev/null +++ b/accelsim_tracing/trace/trace_group.go @@ -0,0 +1,97 @@ +package trace + +import ( + "bufio" + "container/list" + "log" + "os" + "strings" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" +) + +type traceGroup struct { + filePath string + file *os.File + scanner *bufio.Scanner + hasParsedTraceHeader bool + traceHeader *traceHeader + threadBlockQueue *list.List +} + +func NewTraceGroup() *traceGroup { + return &traceGroup{ + filePath: "", + threadBlockQueue: list.New(), + } +} + +func (tg *traceGroup) WithFilePath(path string) *traceGroup { + tg.filePath = path + return tg +} + +func (tg *traceGroup) Build() { + tg.buildFileScanner() + tg.parseTraceHeader() +} + +func (tg *traceGroup) Exec(gpu *gpu.GPU) error { + // [todo] threadblocks can be parallelized to save memory + tg.parseThreadBlocks() + + for it := tg.threadBlockQueue.Front(); it != nil; it = it.Next() { + gpu.RunThreadBlock(it.Value.(*threadBlock).generateNVThreadBlock()) + } + + tg.file.Close() + return nil +} + +func (tg *traceGroup) buildFileScanner() { + file, err := os.Open(tg.filePath) + if err != nil { + log.Panic(err) + } + tg.file = file // [note] close after exec + tg.scanner = bufio.NewScanner(file) +} + +func (tg *traceGroup) parseTraceHeader() { + if tg.hasParsedTraceHeader { + return + } + + headerLines := make([]string, 0) + for tg.scanner.Scan() { // [note] get prefix lines that start with "-" + if strings.HasPrefix(tg.scanner.Text(), "-") { + headerLines = append(headerLines, tg.scanner.Text()) + } else if tg.scanner.Text() != "" { + break + } + } + + tg.traceHeader = parseHeaderParam(headerLines) + tg.hasParsedTraceHeader = true + tg.traceHeader.parent = tg +} + +func (tg *traceGroup) parseThreadBlocks() { + if !tg.hasParsedTraceHeader { + log.Panic("Trace header has not been parsed") + } + for tg.scanner.Scan() { + if strings.TrimSpace(tg.scanner.Text()) == "#BEGIN_TB" { + threadBlocklines := make([]string, 0) // [note] store whole lines of a thread block + for tg.scanner.Scan() { + if strings.TrimSpace(tg.scanner.Text()) == "#END_TB" { + tb := parseThreadBlocks(threadBlocklines) + tb.parent = tg + tg.threadBlockQueue.PushBack(tb) + break + } + threadBlocklines = append(threadBlocklines, tg.scanner.Text()) + } + } + } +} diff --git a/accelsim_tracing/trace/trace_header.go b/accelsim_tracing/trace/trace_header.go new file mode 100644 index 00000000..d43b7462 --- /dev/null +++ b/accelsim_tracing/trace/trace_header.go @@ -0,0 +1,106 @@ +package trace + +import ( + "fmt" + "log" + "strings" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type traceHeader struct { + parent *traceGroup + rawContext struct { + kernelName string + kernelID string + gridDim string + blockDim string + shmem string + nregs string + binaryVersion string + cudaStreamID string + shmemBaseAddr string + localMemBaseAddr string + nvbitVersion string + accelsimTracerVersion string + } + + kernelName string + kernelID int32 + gridDim nvidia.Dim3 + blockDim nvidia.Dim3 + shmem int32 + nregs int32 + binaryVersion int32 + cudaStreamID int32 + shmemBaseAddr int64 + localMemBaseAddr int64 + nvbitVersion string + accelsimTracerVersion string +} + +func parseHeaderParam(lines []string) *traceHeader { + th := &traceHeader{} + + for _, line := range lines { + elems := strings.Split(line, "=") + if len(elems) != 2 { + log.Panicf("Invalid trace header line: %s", line) + } + key := strings.TrimSpace(elems[0]) + value := strings.TrimSpace(elems[1]) + + th.updateParam(key[1:], value, line) + } + return th +} + +// Shaoyu: Maybe we can parse the attrs in order and avoid using swicth-case here +// +//nolint:funlen,gocyclo +func (th *traceHeader) updateParam(key string, value string, rawText string) { + err := error(nil) + switch key { + case "kernel name": + th.rawContext.kernelName = rawText + th.kernelName = value + case "kernel id": + th.rawContext.kernelID = rawText + _, err = fmt.Sscanf(value, "%d", &th.kernelID) + case "grid dim": + th.rawContext.gridDim = rawText + _, err = fmt.Sscanf(value, "(%d,%d,%d)", &th.gridDim[0], &th.gridDim[1], &th.gridDim[2]) + case "block dim": + th.rawContext.blockDim = rawText + _, err = fmt.Sscanf(value, "(%d,%d,%d)", &th.blockDim[0], &th.blockDim[1], &th.blockDim[2]) + case "shmem": + th.rawContext.shmem = rawText + _, err = fmt.Sscanf(value, "%d", &th.shmem) + case "nregs": + th.rawContext.nregs = rawText + _, err = fmt.Sscanf(value, "%d", &th.nregs) + case "binary version": + th.rawContext.binaryVersion = rawText + _, err = fmt.Sscanf(value, "%d", &th.binaryVersion) + case "cuda stream id": + th.rawContext.cudaStreamID = rawText + _, err = fmt.Sscanf(value, "%d", &th.cudaStreamID) + case "shmem base_addr": + th.rawContext.shmemBaseAddr = rawText + _, err = fmt.Sscanf(value, "%v", &th.shmemBaseAddr) + case "local mem base_addr": + th.rawContext.localMemBaseAddr = rawText + _, err = fmt.Sscanf(value, "%v", &th.localMemBaseAddr) + case "nvbit version": + th.rawContext.nvbitVersion = rawText + th.nvbitVersion = value + case "accelsim tracer version": + th.rawContext.accelsimTracerVersion = rawText + th.accelsimTracerVersion = value + default: + log.Printf("Unknown trace header key: %s", key) + } + if err != nil { + log.Panicf("Invalid trace header value for [%s]: %s", key, value) + } +} diff --git a/accelsim_tracing/trace/warp.go b/accelsim_tracing/trace/warp.go new file mode 100644 index 00000000..a95515dc --- /dev/null +++ b/accelsim_tracing/trace/warp.go @@ -0,0 +1,95 @@ +package trace + +import ( + "fmt" + "log" + "strconv" + "strings" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type warp struct { + parent *threadBlock + rawContext struct { + warpID string + instsCount string + } + + warpID int32 + instsCount int32 + instructions []instruction +} + +type instruction struct { + parent *warp + rawText string + + PC int32 + Mask int64 + DestNum int32 + DestRegs []*nvidia.Register + OpCode *nvidia.Opcode + SrcNum int32 + SrcRegs []*nvidia.Register + MemWidth int32 + AddressCompress int32 + MemAddress int64 + MemAddressSuffix1 int32 + MemAddressSuffix2 []int32 +} + +func parseWarp(lines []string) *warp { + wp := &warp{} + elems0 := strings.Split(lines[0], "=") + elems1 := strings.Split(lines[1], "=") + if len(elems0) != 2 || len(elems1) != 2 { + log.Panicf("Invalid warp header: %s, %s", lines[0], lines[1]) + } + wp.rawContext.warpID = lines[0] + wp.rawContext.instsCount = lines[1] + _, err0 := fmt.Sscanf(strings.TrimSpace(elems0[1]), "%d", &wp.warpID) + _, err1 := fmt.Sscanf(strings.TrimSpace(elems1[1]), "%d", &wp.instsCount) + if err0 != nil || err1 != nil { + log.Panicf("Invalid warp header: %s, %s", lines[0], lines[1]) + } + for i := 2; i < 2+int(wp.instsCount); i++ { + inst := parseInst(lines[i]) + inst.parent = wp + wp.instructions = append(wp.instructions, inst) + } + return wp +} + +func parseInst(line string) instruction { + inst := &instruction{} + elems := strings.Fields(line) + fmt.Sscanf(elems[0]+elems[1]+elems[2], "%x%x%d", &inst.PC, &inst.Mask, &inst.SrcNum) + for i := 0; i < int(inst.SrcNum); i++ { + inst.SrcRegs = append(inst.SrcRegs, nvidia.NewRegister(elems[3+i])) + } + fmt.Sscanf(elems[3+int(inst.SrcNum)], "%d", &inst.DestNum) + for i := 0; i < int(inst.DestNum); i++ { + inst.DestRegs = append(inst.DestRegs, nvidia.NewRegister(elems[4+int(inst.SrcNum)+i])) + } + inst.parseMemory(elems[4+int(inst.SrcNum)+int(inst.DestNum):]) + return *inst +} + +// [todo]: understand memory format +func (inst *instruction) parseMemory(elems []string) { + fmt.Sscanf(elems[0], "%d", &inst.MemWidth) + if inst.MemWidth == 0 { + return + } + fmt.Sscanf(elems[1]+elems[2], "%d0x%x", &inst.AddressCompress, &inst.MemAddress) + switch inst.AddressCompress { + case 1: + fmt.Sscanf(elems[2], "%d", &inst.MemAddressSuffix1) + case 2: + for _, s := range elems[2:] { + s32, _ := strconv.Atoi(s) + inst.MemAddressSuffix2 = append(inst.MemAddressSuffix2, int32(s32)) + } + } +} diff --git a/accelsim_tracing/tracer.go b/accelsim_tracing/tracer.go new file mode 100644 index 00000000..cb0df615 --- /dev/null +++ b/accelsim_tracing/tracer.go @@ -0,0 +1,62 @@ +package main + +import ( + "flag" + "fmt" + "log" + + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/benchmark" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/gpu" + "github.com/sarchlab/mgpusim/v3/accelsim_tracing/nvidia" +) + +type inputArguments struct { + inputTraceDir string + // deparse bool + // outputTraceDir string +} + +func getInputArguments() *inputArguments { + i := &inputArguments{} + + flag.Usage = func() { + fmt.Println("Usage: ./as_trace_parser [options] trace") + flag.PrintDefaults() + } + + flag.Parse() + if len(flag.Args()) < 1 { + flag.Usage() + log.Panic("Error: should specify an input trace") + } + + i.inputTraceDir = flag.Arg(0) + return i +} + +func buildAmpereGPU() *gpu.GPU { + gpu := gpu.NewGPU(). + WithGPUStrategy("default"). + WithGPCNum(8). + WithSMNum(16). + WithSMUnitNum(4). + WithGPCStrategy("default"). + WithSMStrategy("default"). + WithSMUnitStrategy("default"). + WithL2CacheSize(4*1024*1024*nvidia.BYTE). + WithL1CacheSize(192*1024*nvidia.BYTE). + WithL0CacheSize(16*1024*nvidia.BYTE). + WithRegisterFileSize(256*1024*nvidia.BYTE). + WithLaneSize(4*nvidia.BYTE). + WithALU("int32", 16) + gpu.Build() + return gpu +} + +func main() { + args := getInputArguments() + gpu := buildAmpereGPU() + benchmark := benchmark.NewBenchMark().WithTraceDirPath(args.inputTraceDir) + benchmark.Build() + benchmark.Exec(gpu) +} diff --git a/samples/runner/r9nanobuilder.go b/samples/runner/r9nanobuilder.go index f76b015a..a0d3a23d 100644 --- a/samples/runner/r9nanobuilder.go +++ b/samples/runner/r9nanobuilder.go @@ -745,12 +745,6 @@ func (b *R9NanoGPUBuilder) populateInstMemoryHierarchy(sa *shaderArray) { } func (b *R9NanoGPUBuilder) buildRDMAEngine() { - // b.rdmaEngine = rdma.NewEngine( - // fmt.Sprintf("%s.RDMA", b.gpuName), - // b.engine, - // b.lowModuleFinderForL1, - // nil, - // ) name := fmt.Sprintf("%s.RDMA", b.gpuName) b.rdmaEngine = rdma.MakeBuilder(). WithEngine(b.engine). @@ -762,6 +756,10 @@ func (b *R9NanoGPUBuilder) buildRDMAEngine() { if b.monitor != nil { b.monitor.RegisterComponent(b.rdmaEngine) } + + if b.enableVisTracing { + tracing.CollectTrace(b.rdmaEngine, b.visTracer) + } } func (b *R9NanoGPUBuilder) buildPageMigrationController() { diff --git a/timing/rdma/comp.go b/timing/rdma/comp.go index ed95fa6b..0b8683dd 100644 --- a/timing/rdma/comp.go +++ b/timing/rdma/comp.go @@ -214,8 +214,7 @@ func (c *Comp) processReqFromL1( if err == nil { c.ToL1.Retrieve(now) - tracing.TraceReqReceive(req, c) - tracing.TraceReqInitiate(cloned, c, tracing.MsgIDAtReceiver(req, c)) + c.traceInsideOutStart(req, cloned) //fmt.Printf("%s req inside %s -> outside %s\n", //e.Name(), req.GetID(), cloned.GetID()) @@ -247,8 +246,7 @@ func (c *Comp) processReqFromOutside( if err == nil { c.ToOutside.Retrieve(now) - tracing.TraceReqReceive(req, c) - tracing.TraceReqInitiate(cloned, c, tracing.MsgIDAtReceiver(req, c)) + c.traceOutsideInStart(req, cloned) //fmt.Printf("%s req outside %s -> inside %s\n", //e.Name(), req.GetID(), cloned.GetID()) @@ -284,8 +282,7 @@ func (c *Comp) processRspFromL2( //fmt.Printf("%s rsp inside %s -> outside %s\n", //e.Name(), rsp.GetID(), rspToOutside.GetID()) - tracing.TraceReqFinalize(trans.toInside, c) - tracing.TraceReqComplete(trans.fromOutside, c) + c.traceOutsideInEnd(trans) c.transactionsFromOutside = append(c.transactionsFromOutside[:transactionIndex], @@ -312,8 +309,7 @@ func (c *Comp) processRspFromOutside( if err == nil { c.ToOutside.Retrieve(now) - tracing.TraceReqFinalize(trans.toOutside, c) - tracing.TraceReqComplete(trans.fromInside, c) + c.traceInsideOutEnd(trans) //fmt.Printf("%s rsp outside %s -> inside %s\n", //e.Name(), rsp.GetID(), rspToInside.GetID()) @@ -404,3 +400,69 @@ func (c *Comp) cloneRsp(origin mem.AccessRsp, rspTo string) mem.AccessRsp { func (c *Comp) SetFreq(freq sim.Freq) { c.TickingComponent.Freq = freq } + +func (c *Comp) traceInsideOutStart(req mem.AccessReq, cloned mem.AccessReq) { + if len(c.Hooks()) == 0 { + return + } + + tracing.StartTaskWithSpecificLocation( + tracing.MsgIDAtReceiver(req, c), + req.Meta().ID+"_req_out", + c, + "req_in", + reflect.TypeOf(req).String(), + c.Name()+".InsideOut", + req, + ) + + tracing.StartTaskWithSpecificLocation( + cloned.Meta().ID+"_req_out", + tracing.MsgIDAtReceiver(req, c), + c, + "req_out", + reflect.TypeOf(req).String(), + c.Name()+".InsideOut", + cloned, + ) +} + +func (c *Comp) traceOutsideInStart(req mem.AccessReq, cloned mem.AccessReq) { + if len(c.Hooks()) == 0 { + return + } + + tracing.StartTaskWithSpecificLocation( + tracing.MsgIDAtReceiver(req, c), + req.Meta().ID+"_req_out", + c, + "req_in", + reflect.TypeOf(req).String(), + c.Name()+".OutsideIn", + req, + ) + + tracing.StartTaskWithSpecificLocation( + cloned.Meta().ID+"_req_out", + tracing.MsgIDAtReceiver(req, c), + c, + "req_out", + reflect.TypeOf(req).String(), + c.Name()+".OutsideIn", + cloned, + ) +} + +func (c *Comp) traceInsideOutEnd(trans transaction) { + if len(c.Hooks()) == 0 { + return + } + + tracing.TraceReqFinalize(trans.toOutside, c) + tracing.TraceReqComplete(trans.fromInside, c) +} + +func (c *Comp) traceOutsideInEnd(trans transaction) { + tracing.TraceReqFinalize(trans.toInside, c) + tracing.TraceReqComplete(trans.fromOutside, c) +}