diff --git a/.vscode/launch.json b/.vscode/launch.json index 83822569..d6f08f5e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -166,7 +166,7 @@ "program": "${workspaceFolder}/samples/fir", "args": [ "-timing", - "-length=8192", + "-length=81920", "-report-all", ], }, diff --git a/driver/builder.go b/driver/builder.go index 3cf47ed8..caa066fe 100644 --- a/driver/builder.go +++ b/driver/builder.go @@ -17,6 +17,7 @@ type Builder struct { useMagicMemoryCopy bool middlewareD2HCycles int middlewareH2DCycles int + cpuMemorySize uint64 } // MakeBuilder creates a driver builder with some default configuration @@ -74,6 +75,12 @@ func (b Builder) WithH2DCycles(h2dCycles int) Builder { return b } +// WithMemorySize sets the memory size of the CPU. +func (b Builder) WithCPUMemorySize(memorySize uint64) Builder { + b.memorySize = memorySize + return b +} + // Build creates a driver. func (b Builder) Build(name string) *Driver { driver := new(Driver) @@ -125,7 +132,7 @@ func (b *Builder) createCPU(d *Driver) { Type: internal.DeviceTypeCPU, MemState: internal.NewDeviceMemoryState(d.Log2PageSize), } - cpu.SetTotalMemSize(4 * mem.GB) + cpu.SetTotalMemSize(b.memorySize) d.memAllocator.RegisterDevice(cpu) d.devices = append(d.devices, cpu) diff --git a/driver/driver.go b/driver/driver.go index 28d17459..9131f317 100644 --- a/driver/driver.go +++ b/driver/driver.go @@ -142,6 +142,7 @@ type DeviceProperties struct { func (d *Driver) RegisterGPU( commandProcessorPort sim.Port, properties DeviceProperties, + gmmuPageTable vm.PageTable, ) { d.GPUs = append(d.GPUs, commandProcessorPort) @@ -156,8 +157,13 @@ func (d *Driver) RegisterGPU( } gpuDevice.SetTotalMemSize(properties.DRAMSize) d.memAllocator.RegisterDevice(gpuDevice) - d.devices = append(d.devices, gpuDevice) + + for _, page := range d.memAllocator.GetVAddrToPageMapping() { + if page.DeviceID == uint64(gpuDevice.ID) { + gmmuPageTable.Insert(page) + } + } } // Tick ticks diff --git a/driver/internal/memoryallocator.go b/driver/internal/memoryallocator.go index 1a887ea5..ef947077 100644 --- a/driver/internal/memoryallocator.go +++ b/driver/internal/memoryallocator.go @@ -22,6 +22,8 @@ type MemoryAllocator interface { vAddr uint64, unified bool, ) vm.Page + + GetVAddrToPageMapping() map[uint64]vm.Page } // NewMemoryAllocator creates a new memory allocator. @@ -63,9 +65,7 @@ func (a *memoryAllocatorImpl) RegisterDevice(device *Device) { state := device.MemState state.setInitialAddress(a.totalStorageByteSize) - a.totalStorageByteSize += state.getStorageSize() - a.devices[device.ID] = device } @@ -285,3 +285,13 @@ func (a *memoryAllocatorImpl) Free(ptr uint64) { a.removePage(ptr) } + +func (a *memoryAllocatorImpl) GetVAddrToPageMapping() map[uint64]vm.Page { + a.Lock() + defer a.Unlock() + copy := make(map[uint64]vm.Page, len(a.vAddrToPageMapping)) + for vAddr, page := range a.vAddrToPageMapping { + copy[vAddr] = page + } + return copy +} diff --git a/samples/runner/emuplatform.go b/samples/runner/emuplatform.go index be08f308..c1b9c32e 100644 --- a/samples/runner/emuplatform.go +++ b/samples/runner/emuplatform.go @@ -98,7 +98,8 @@ func (b EmuBuilder) Build() *Platform { gpuDriver.RegisterGPU(cpPort, driver.DeviceProperties{ DRAMSize: 4 * mem.GB, CUCount: 64, - }) + }, + gpuBuilder.pageTable) connection.PlugIn(cpPort, 64) b.gpus = append(b.gpus, gpu) diff --git a/samples/runner/flag.go b/samples/runner/flag.go index 53763080..d0d772ee 100644 --- a/samples/runner/flag.go +++ b/samples/runner/flag.go @@ -131,6 +131,13 @@ func (r *Runner) ParseFlag() *Runner { r.ReportSIMDBusyTime = true r.ReportDRAMTransactionCount = true r.ReportRDMATransactionCount = true + r.ReportGMMULatency = true + r.ReportMMULatency = true + r.ReportGMMUTransactionCount = true + r.ReportMMUTransactionCount = true + r.ReportSIMDBusyTime = true + r.ReportGMMUCacheHitRate = true + r.ReportGMMUCacheLatency = true r.ReportCPIStack = true } diff --git a/samples/runner/platform.go b/samples/runner/platform.go index 3a846589..5562c08e 100644 --- a/samples/runner/platform.go +++ b/samples/runner/platform.go @@ -1,6 +1,8 @@ package runner import ( + "github.com/sarchlab/akita/v3/mem/vm/gmmu" + "github.com/sarchlab/akita/v3/mem/vm/mmu" "github.com/sarchlab/akita/v3/sim" "github.com/sarchlab/akita/v3/tracing" "github.com/sarchlab/mgpusim/v3/driver" @@ -27,16 +29,20 @@ type GPU struct { Domain *sim.Domain CommandProcessor *cp.CommandProcessor RDMAEngine *rdma.Comp - PMC *pagemigrationcontroller.PageMigrationController - CUs []TraceableComponent - SIMDs []TraceableComponent - L1VCaches []TraceableComponent - L1SCaches []TraceableComponent - L1ICaches []TraceableComponent - L2Caches []TraceableComponent - L1VTLBs []TraceableComponent - L1STLBs []TraceableComponent - L1ITLBs []TraceableComponent - L2TLBs []TraceableComponent - MemControllers []TraceableComponent + MMUEngine *mmu.MMU + GMMUEngine *gmmu.GMMU + + PMC *pagemigrationcontroller.PageMigrationController + CUs []TraceableComponent + SIMDs []TraceableComponent + L1VCaches []TraceableComponent + L1SCaches []TraceableComponent + L1ICaches []TraceableComponent + L2Caches []TraceableComponent + L1VTLBs []TraceableComponent + L1STLBs []TraceableComponent + L1ITLBs []TraceableComponent + L2TLBs []TraceableComponent + GMMUCache []TraceableComponent + MemControllers []TraceableComponent } diff --git a/samples/runner/r9nanobuilder.go b/samples/runner/r9nanobuilder.go index a0d3a23d..823bb254 100644 --- a/samples/runner/r9nanobuilder.go +++ b/samples/runner/r9nanobuilder.go @@ -11,7 +11,9 @@ import ( "github.com/sarchlab/akita/v3/mem/cache/writethrough" "github.com/sarchlab/akita/v3/mem/dram" "github.com/sarchlab/akita/v3/mem/mem" + "github.com/sarchlab/akita/v3/mem/vm" "github.com/sarchlab/akita/v3/mem/vm/addresstranslator" + "github.com/sarchlab/akita/v3/mem/vm/gmmu" "github.com/sarchlab/akita/v3/mem/vm/mmu" "github.com/sarchlab/akita/v3/mem/vm/tlb" "github.com/sarchlab/akita/v3/monitoring" @@ -65,6 +67,8 @@ type R9NanoGPUBuilder struct { l1sTLBs []*tlb.TLB l1iTLBs []*tlb.TLB l2TLBs []*tlb.TLB + gmmuCache *tlb.TLB + gmmu *gmmu.GMMU drams []*dram.MemController lowModuleFinderForL1 *mem.InterleavedLowModuleFinder lowModuleFinderForL2 *mem.InterleavedLowModuleFinder @@ -73,6 +77,7 @@ type R9NanoGPUBuilder struct { rdmaEngine *rdma.Comp pageMigrationController *pagemigrationcontroller.PageMigrationController globalStorage *mem.Storage + pageTable vm.PageTable internalConn *sim.DirectConnection l1TLBToL2TLBConnection *sim.DirectConnection @@ -225,6 +230,14 @@ func (b R9NanoGPUBuilder) WithGlobalStorage( return b } +// WithGMMUPageTable lets GPU to initialize it's page table. +func (b R9NanoGPUBuilder) WithGMMUPageTable( + pageTable vm.PageTable, +) R9NanoGPUBuilder { + b.pageTable = pageTable + return b +} + // Build creates a pre-configure GPU similar to the AMD R9 Nano GPU. func (b R9NanoGPUBuilder) Build(name string, id uint64) *GPU { b.createGPU(name, id) @@ -232,12 +245,16 @@ func (b R9NanoGPUBuilder) Build(name string, id uint64) *GPU { b.buildL2Caches() b.buildDRAMControllers() b.buildCP() + b.buildGMMU() + b.buildGMMUCache() b.buildL2TLB() b.connectCP() b.connectL2AndDRAM() b.connectL1ToL2() b.connectL1TLBToL2TLB() + b.connectL2TLBToGMMUCache() + b.connectGMMUCachetoGMMU() b.populateExternalPorts() @@ -254,6 +271,7 @@ func (b *R9NanoGPUBuilder) populateExternalPorts() { name := fmt.Sprintf("Translation_%02d", i) b.gpu.Domain.AddPort(name, l2TLB.GetPortByName("Bottom")) } + b.gpu.Domain.AddPort("GMMU", b.gmmu.GetPortByName("Bottom")) } func (b *R9NanoGPUBuilder) createGPU(name string, id uint64) { @@ -540,6 +558,67 @@ func (b *R9NanoGPUBuilder) buildL2Caches() { if b.monitor != nil { b.monitor.RegisterComponent(l2) } + if b.perfAnalyzer != nil { + b.perfAnalyzer.RegisterComponent(l2) + } + } +} + +func (b *R9NanoGPUBuilder) buildGMMUCache() { + // numWays := 128 + // test:= int(b.dramSize / (1 << b.log2PageSize) / uint64(numWays)) + builder := tlb.MakeBuilder(). + WithEngine(b.engine). + WithFreq(b.freq). + WithNumWays(8). + WithNumSets(16). + WithNumMSHREntry(32). + WithNumReqPerCycle(32). + WithPageSize(1 << b.log2PageSize). + WithLowModule(b.gmmu.GetPortByName("Top")) + + gmmuCache := builder.Build(fmt.Sprintf("%s.GMMUCache", b.gpuName)) + b.gmmuCache = gmmuCache + b.gpu.GMMUCache = append(b.gpu.GMMUCache, gmmuCache) + // b.gpu.L2TLBs = append(b.gpu.L2TLBs, l2TLB) + + if b.enableVisTracing { + tracing.CollectTrace(b.gmmuCache, b.visTracer) + } + + if b.monitor != nil { + b.monitor.RegisterComponent(b.gmmuCache) + } + + if b.perfAnalyzer != nil { + b.perfAnalyzer.RegisterComponent(b.gmmuCache) + } +} + +func (b *R9NanoGPUBuilder) buildGMMU() { + gmmu := gmmu.MakeBuilder(). + WithEngine(b.engine). + WithFreq(b.freq). + WithDeviceID(b.gpuID). + WithLog2PageSize(b.log2PageSize). + WithMaxNumReqInFlight(8). + WithPageTable(b.pageTable).WithPageWalkingLatency(100). + WithLowModule(b.mmu.GetPortByName("Top")). + Build(fmt.Sprintf("%s.GMMU", b.gpuName)) + + b.gmmu = gmmu + b.gpu.GMMUEngine = b.gmmu + + if b.enableVisTracing { + tracing.CollectTrace(b.gmmu, b.visTracer) + } + + if b.monitor != nil { + b.monitor.RegisterComponent(b.gmmu) + } + + if b.perfAnalyzer != nil { + b.perfAnalyzer.RegisterComponent(b.gmmu) } } @@ -824,7 +903,7 @@ func (b *R9NanoGPUBuilder) buildL2TLB() { WithNumMSHREntry(64). WithNumReqPerCycle(1024). WithPageSize(1 << b.log2PageSize). - WithLowModule(b.mmu.GetPortByName("Top")) + WithLowModule(b.gmmuCache.GetPortByName("Top")) l2TLB := builder.Build(fmt.Sprintf("%s.L2TLB", b.gpuName)) b.l2TLBs = append(b.l2TLBs, l2TLB) @@ -854,3 +933,24 @@ func (b *R9NanoGPUBuilder) connectWithDirectConnection( conn.PlugIn(port1, bufferSize) conn.PlugIn(port2, bufferSize) } + +func (b *R9NanoGPUBuilder) connectL2TLBToGMMUCache() { + conn := sim.NewDirectConnection( + b.gpuName+".L2TLBtoGMMUCache", + b.engine, b.freq, + ) + conn.PlugIn(b.gmmuCache.GetPortByName("Top"), 64) + + for _, l2TLB := range b.l2TLBs { + conn.PlugIn(l2TLB.GetPortByName("Bottom"), 64) + } +} + +func (b *R9NanoGPUBuilder) connectGMMUCachetoGMMU() { + conn := sim.NewDirectConnection( + b.gpuName+".GMMUCacheToGMMU", + b.engine, b.freq, + ) + conn.PlugIn(b.gmmu.GetPortByName("Top"), 64) + conn.PlugIn(b.gmmuCache.GetPortByName("Bottom"), 64) +} diff --git a/samples/runner/report.go b/samples/runner/report.go index 07c50c78..7e2ee2bc 100644 --- a/samples/runner/report.go +++ b/samples/runner/report.go @@ -366,7 +366,13 @@ func (r *Runner) reportStats() { r.reportCacheHitRate() r.reportTLBHitRate() r.reportRDMATransactionCount() + r.reportGMMUTransactionCount() + r.reportMMUTransactionCount() r.reportDRAMTransactionCount() + r.reportGMMUCacheHitRate() + r.reportGMMUCacheLatency() + r.reportMMULatency() + r.reportGMMULatency() r.dumpMetrics() } @@ -570,3 +576,95 @@ func (r *Runner) reportDRAMTransactionCount() { func (r *Runner) dumpMetrics() { r.metricsCollector.Dump(*filenameFlag) } + +func (r *Runner) reportGMMUTransactionCount() { + for _, t := range r.gmmuTransactionCounters { + r.metricsCollector.Collect( + t.gmmuEngine.Name(), + "outgoing_trans_count", + float64(t.outgoingTracer.TotalCount()), + ) + r.metricsCollector.Collect( + t.gmmuEngine.Name(), + "incoming_trans_count", + float64(t.incomingTracer.TotalCount()), + ) + } +} + +func (r *Runner) reportMMUTransactionCount() { + for _, t := range r.mmuTransactionCounters { + r.metricsCollector.Collect( + t.mmuEngine.Name(), + "outgoing_trans_count", + float64(t.outgoingTracer.TotalCount()), + ) + r.metricsCollector.Collect( + t.mmuEngine.Name(), + "incoming_trans_count", + float64(t.incomingTracer.TotalCount()), + ) + } +} + +func (r *Runner) reportGMMUCacheHitRate() { + for _, tracer := range r.gmmuCacheHitRateTracers { + hit := tracer.tracer.GetStepCount("hit") + miss := tracer.tracer.GetStepCount("miss") + mshrHit := tracer.tracer.GetStepCount("mshr-hit") + + totalTransaction := hit + miss + mshrHit + + if totalTransaction == 0 { + continue + } + + r.metricsCollector.Collect( + tracer.gmmuCache.Name(), "hit", float64(hit)) + r.metricsCollector.Collect( + tracer.gmmuCache.Name(), "miss", float64(miss)) + r.metricsCollector.Collect( + tracer.gmmuCache.Name(), "mshr-hit", float64(mshrHit)) + } +} + +func (r *Runner) reportGMMUCacheLatency() { + for _, tracer := range r.gmmuCacheLatencyTracers { + if tracer.tracer.AverageTime() == 0 { + continue + } + + r.metricsCollector.Collect( + tracer.gmmuCache.Name(), + "req_average_latency", + float64(tracer.tracer.AverageTime()), + ) + } +} + +func (r *Runner) reportGMMULatency() { + for _, tracer := range r.gmmuLatencyTracers { + if tracer.tracer.AverageTime() == 0 { + continue + } + + r.metricsCollector.Collect( + tracer.gmmu.Name(), + "req_average_latency", + float64(tracer.tracer.AverageTime()), + ) + } +} +func (r *Runner) reportMMULatency() { + for _, tracer := range r.mmuLatencyTracers { + if tracer.tracer.AverageTime() == 0 { + continue + } + + r.metricsCollector.Collect( + "MMU", + "req_average_latency", + float64(tracer.tracer.AverageTime()), + ) + } +} diff --git a/samples/runner/runner.go b/samples/runner/runner.go index 77839b01..2d68ad1e 100644 --- a/samples/runner/runner.go +++ b/samples/runner/runner.go @@ -5,6 +5,7 @@ import ( "log" // Enable profiling + _ "net/http/pprof" "strconv" "strings" @@ -35,12 +36,19 @@ type Runner struct { cacheHitRateTracers []cacheHitRateTracer tlbHitRateTracers []tlbHitRateTracer rdmaTransactionCounters []rdmaTransactionCountTracer - dramTracers []dramTransactionCountTracer - benchmarks []benchmarks.Benchmark - monitor *monitoring.Monitor - metricsCollector *collector - simdBusyTimeTracers []simdBusyTimeTracer - cuCPITraces []cuCPIStackTracer + gmmuCacheLatencyTracers []gmmuCacheLatencyTracer + gmmuCacheHitRateTracers []gmmuCacheHitRateTracer + mmuTransactionCounters []mmuTransactionCountTracer + mmuLatencyTracers []mmuLatencyTracer + gmmuTransactionCounters []gmmuTransactionCountTracer + gmmuLatencyTracers []gmmuLatencyTracer + + dramTracers []dramTransactionCountTracer + benchmarks []benchmarks.Benchmark + monitor *monitoring.Monitor + metricsCollector *collector + simdBusyTimeTracers []simdBusyTimeTracer + cuCPITraces []cuCPIStackTracer Timing bool Verify bool @@ -50,10 +58,17 @@ type Runner struct { ReportCacheHitRate bool ReportTLBHitRate bool ReportRDMATransactionCount bool + ReportGMMULatency bool + ReportMMULatency bool + ReportGMMUTransactionCount bool + ReportMMUTransactionCount bool + ReportDRAMTransactionCount bool UseUnifiedMemory bool ReportSIMDBusyTime bool ReportCPIStack bool + ReportGMMUCacheLatency bool + ReportGMMUCacheHitRate bool GPUIDs []int } diff --git a/samples/runner/timingplatform.go b/samples/runner/timingplatform.go index b2ea6726..2ee3e832 100644 --- a/samples/runner/timingplatform.go +++ b/samples/runner/timingplatform.go @@ -164,6 +164,10 @@ func (b R9NanoPlatformBuilder) Build() *Platform { pcieConnector.EstablishRoute() + for _, gpu := range b.gpus { + gpu.MMUEngine = mmuComponent + } + return &Platform{ Engine: b.engine, Driver: gpuDriver, @@ -342,6 +346,14 @@ func (b R9NanoPlatformBuilder) createMMU( b.monitor.RegisterComponent(mmuComponent) } + if b.perfAnalyzer != nil { + b.perfAnalyzer.RegisterComponent(mmuComponent) + } + + if b.visTracer != nil { + tracing.CollectTrace(mmuComponent, b.visTracer) + } + return mmuComponent, pageTable } @@ -426,7 +438,7 @@ func (b *R9NanoPlatformBuilder) createGPU( CUCount: b.numCUPerSA * b.numSAPerGPU, DRAMSize: 4 * mem.GB, }, - ) + gpuBuilder.pageTable) gpu.CommandProcessor.Driver = gpuDriver.GetPortByName("GPU") b.configRDMAEngine(gpu, rdmaAddressTable) diff --git a/samples/runner/tracers.go b/samples/runner/tracers.go new file mode 100644 index 00000000..e051437b --- /dev/null +++ b/samples/runner/tracers.go @@ -0,0 +1,245 @@ +package runner + +import ( + "strings" + + "github.com/sarchlab/akita/v3/mem/vm/gmmu" + "github.com/sarchlab/akita/v3/mem/vm/mmu" + "github.com/sarchlab/akita/v3/sim" + "github.com/sarchlab/akita/v3/tracing" +) + +type rdmaLatencyTracer struct { + tracer *tracing.AverageTimeTracer + rdma TraceableComponent +} + +type mmuLatencyTracer struct { + tracer *tracing.AverageTimeTracer + mmu TraceableComponent +} + +type gmmuLatencyTracer struct { + tracer *tracing.AverageTimeTracer + gmmu TraceableComponent +} + +type tlbLatencyTracer struct { + tracer *tracing.AverageTimeTracer + tlb TraceableComponent +} + +type gmmuTransactionCountTracer struct { + outgoingTracer *tracing.AverageTimeTracer + incomingTracer *tracing.AverageTimeTracer + gmmuEngine *gmmu.GMMU +} + +// type gmmuTransactionCountTracer struct { +// tracer *mmuTracer +// gmmu *gmmu.GMMU +// } +type mmuTransactionCountTracer struct { + outgoingTracer *tracing.AverageTimeTracer + incomingTracer *tracing.AverageTimeTracer + mmuEngine *mmu.MMU +} + +type gmmuCacheHitRateTracer struct { + tracer *tracing.StepCountTracer + gmmuCache TraceableComponent +} + +type gmmuCacheLatencyTracer struct { + tracer *tracing.AverageTimeTracer + gmmuCache TraceableComponent +} + +// func (r *Runner) addSIMDBusyTimeTracer() { +// if !r.ReportSIMDBusyTime { +// return +// } + +// for _, gpu := range r.platform.GPUs { +// for _, simd := range gpu.SIMDs { +// perSIMDBusyTimeTracer := tracing.NewBusyTimeTracer( +// r.platform.Engine, +// func(task tracing.Task) bool { +// return task.Kind == "pipeline" +// }) +// r.simdBusyTimeTracers = append(r.simdBusyTimeTracers, +// simdBusyTimeTracer{ +// tracer: perSIMDBusyTimeTracer, +// simd: simd, +// }) +// tracing.CollectTrace(simd, perSIMDBusyTimeTracer) +// } +// } +// } + +func (r *Runner) addMMUEngineTracer() { + if !r.ReportMMUTransactionCount { + return + } + + for _, gpu := range r.platform.GPUs { + t := mmuTransactionCountTracer{} + // t.mmuEngine = gpu.MMUEngine + t.mmuEngine = gpu.MMUEngine + t.incomingTracer = tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + if task.Kind != "req_in" { + return false + } + + isFromOutside := strings.Contains( + task.Detail.(sim.Msg).Meta().Dst.Name(), "MMU") + if !isFromOutside { + return false + } + + return true + }) + t.outgoingTracer = tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + if task.Kind != "req_in" { + return false + } + + isFromOutside := strings.Contains( + task.Detail.(sim.Msg).Meta().Src.Name(), "MMU") + if isFromOutside { + return false + } + + return true + }) + + tracing.CollectTrace(t.mmuEngine, t.incomingTracer) + tracing.CollectTrace(t.mmuEngine, t.outgoingTracer) + + r.mmuTransactionCounters = append(r.mmuTransactionCounters, t) + } +} + +func (r *Runner) addGMMUEngineTracer() { + if !r.ReportMMUTransactionCount { + return + } + + for _, gpu := range r.platform.GPUs { + t := gmmuTransactionCountTracer{} + // t := mmuTransactionCountTracer{} + t.gmmuEngine = gpu.GMMUEngine + t.incomingTracer = tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + if task.Kind != "req_in" { + return false + } + + isFromOutside := strings.Contains( + task.Detail.(sim.Msg).Meta().Dst.Name(), "GMMU") + if !isFromOutside { + return false + } + + return true + }) + t.outgoingTracer = tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + if task.Kind != "req_in" { + return false + } + + isFromOutside := strings.Contains( + task.Detail.(sim.Msg).Meta().Src.Name(), "GMMU") + if isFromOutside { + return false + } + + return true + }) + + tracing.CollectTrace(t.gmmuEngine, t.incomingTracer) + tracing.CollectTrace(t.gmmuEngine, t.outgoingTracer) + + r.gmmuTransactionCounters = append(r.gmmuTransactionCounters, t) + } +} + +func (r *Runner) addMMULatencyTracer() { + if !r.ReportMMULatency { + return + } + + for _, gpu := range r.platform.GPUs { + mmu := gpu.MMUEngine + tracer := tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + return task.Kind == "req_in" + }) + r.mmuLatencyTracers = append(r.mmuLatencyTracers, + mmuLatencyTracer{tracer: tracer, mmu: mmu}) + tracing.CollectTrace(mmu, tracer) + + } +} + +func (r *Runner) addGMMULatencyTracer() { + if !r.ReportGMMULatency { + return + } + + for _, gpu := range r.platform.GPUs { + gmmu := gpu.GMMUEngine + tracer := tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + return task.Kind == "req_in" + }) + r.gmmuLatencyTracers = append(r.gmmuLatencyTracers, + gmmuLatencyTracer{tracer: tracer, gmmu: gmmu}) + tracing.CollectTrace(gmmu, tracer) + + } +} + +func (r *Runner) addGMMUCacheLatencyTracer() { + if !r.ReportGMMUCacheLatency { + return + } + + for _, gpu := range r.platform.GPUs { + for _, gmmuCache := range gpu.GMMUCache { + tracer := tracing.NewAverageTimeTracer( + r.platform.Engine, + func(task tracing.Task) bool { + return task.Kind == "req_in" + }) + r.gmmuCacheLatencyTracers = append(r.gmmuCacheLatencyTracers, + gmmuCacheLatencyTracer{tracer: tracer, gmmuCache: gmmuCache}) + tracing.CollectTrace(gmmuCache, tracer) + } + } +} + +func (r *Runner) addGMMUCacheHitRateTracer() { + if !r.ReportGMMUCacheHitRate { + return + } + + for _, gpu := range r.platform.GPUs { + for _, gmmuCache := range gpu.GMMUCache { + tracer := tracing.NewStepCountTracer( + func(task tracing.Task) bool { return true }) + r.gmmuCacheHitRateTracers = append(r.gmmuCacheHitRateTracers, + gmmuCacheHitRateTracer{tracer: tracer, gmmuCache: gmmuCache}) + tracing.CollectTrace(gmmuCache, tracer) + } + } +}