diff --git a/driver/builder.go b/driver/builder.go index 3cf47ed8..b93d0cd7 100644 --- a/driver/builder.go +++ b/driver/builder.go @@ -17,6 +17,7 @@ type Builder struct { useMagicMemoryCopy bool middlewareD2HCycles int middlewareH2DCycles int + cpuMemorySize uint64 } // MakeBuilder creates a driver builder with some default configuration @@ -74,6 +75,11 @@ func (b Builder) WithH2DCycles(h2dCycles int) Builder { return b } +func (b Builder) WithCPUMemorySize(memorySize uint64) Builder { + b.cpuMemorySize = memorySize + return b +} + // Build creates a driver. func (b Builder) Build(name string) *Driver { driver := new(Driver) @@ -125,7 +131,7 @@ func (b *Builder) createCPU(d *Driver) { Type: internal.DeviceTypeCPU, MemState: internal.NewDeviceMemoryState(d.Log2PageSize), } - cpu.SetTotalMemSize(4 * mem.GB) + cpu.SetTotalMemSize(b.cpuMemorySize) d.memAllocator.RegisterDevice(cpu) d.devices = append(d.devices, cpu) diff --git a/driver/driver.go b/driver/driver.go index 28d17459..2cdb3118 100644 --- a/driver/driver.go +++ b/driver/driver.go @@ -153,10 +153,10 @@ func (d *Driver) RegisterGPU( CUCount: properties.CUCount, DRAMSize: properties.DRAMSize, }, + PageTable: vm.NewPageTable(d.Log2PageSize), } gpuDevice.SetTotalMemSize(properties.DRAMSize) d.memAllocator.RegisterDevice(gpuDevice) - d.devices = append(d.devices, gpuDevice) } diff --git a/driver/internal/device.go b/driver/internal/device.go index eaf8b1cc..4033d058 100644 --- a/driver/internal/device.go +++ b/driver/internal/device.go @@ -1,5 +1,7 @@ package internal +import "github.com/sarchlab/akita/v3/mem/vm" + // DeviceType marks the type of a device. type DeviceType int @@ -38,6 +40,7 @@ type Device struct { nextActualGPUIndex int MemState DeviceMemoryState Properties DeviceProperties + PageTable vm.PageTable } // SetTotalMemSize sets total memory size diff --git a/driver/internal/memoryallocator.go b/driver/internal/memoryallocator.go index 1a887ea5..7aaea995 100644 --- a/driver/internal/memoryallocator.go +++ b/driver/internal/memoryallocator.go @@ -63,9 +63,8 @@ func (a *memoryAllocatorImpl) RegisterDevice(device *Device) { state := device.MemState state.setInitialAddress(a.totalStorageByteSize) - a.totalStorageByteSize += state.getStorageSize() - + device.PageTable = vm.NewPageTable(a.log2PageSize) a.devices[device.ID] = device } @@ -163,7 +162,7 @@ func (a *memoryAllocatorImpl) allocatePages( // fmt.Printf("page.addr is %x piage Device ID is %d \n", page.PAddr, page.DeviceID) // debug.PrintStack() - a.pageTable.Insert(page) + device.PageTable.Insert(page) a.vAddrToPageMapping[page.VAddr] = page } @@ -187,8 +186,11 @@ func (a *memoryAllocatorImpl) Remap( vAddrs = append(vAddrs, addr) addr += pageSize } - - a.allocateMultiplePagesWithGivenVAddrs(pid, deviceID, vAddrs, false) + device := a.devices[deviceID] + pages := a.allocateMultiplePagesWithGivenVAddrs(pid, deviceID, vAddrs, false) + for _, page := range pages { + device.PageTable.Insert(page) + } } func (a *memoryAllocatorImpl) RemovePage(vAddr uint64) { @@ -244,8 +246,8 @@ func (a *memoryAllocatorImpl) allocatePageWithGivenVAddr( DeviceID: uint64(deviceID), Unified: isUnified, } + device.PageTable.Insert(page) a.vAddrToPageMapping[page.VAddr] = page - a.pageTable.Update(page) return page } diff --git a/go.mod b/go.mod index f2b366cc..7ac31345 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/onsi/ginkgo/v2 v2.16.0 github.com/onsi/gomega v1.31.1 github.com/rs/xid v1.5.0 - github.com/sarchlab/akita/v3 v3.0.0 + github.com/sarchlab/akita/v3 v3.1.0 github.com/tebeka/atexit v0.3.0 gonum.org/v1/gonum v0.14.0 ) diff --git a/go.sum b/go.sum index d28c9fe3..fbc168a5 100644 --- a/go.sum +++ b/go.sum @@ -48,8 +48,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= -github.com/sarchlab/akita/v3 v3.0.0 h1:kLRhaBpDA6yXYn5suiIFGnWJ0xr/eqGDjMTMGSwaXUM= -github.com/sarchlab/akita/v3 v3.0.0/go.mod h1:p74MfNeElTYFy9H/gwsPzZQXPfw/e87/6HiIYCZexWc= +github.com/sarchlab/akita/v3 v3.1.0 h1:pt17MC5A7NqfZKFRuE9CSQh1L50YJE+/zh2x3DBt5Ow= +github.com/sarchlab/akita/v3 v3.1.0/go.mod h1:63FwQtSD9gCrOF5XGIq4Z6md3QqBgZ5yRDI5K2nGwfA= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/samples/runner/platform.go b/samples/runner/platform.go index 3a846589..6d72bcd8 100644 --- a/samples/runner/platform.go +++ b/samples/runner/platform.go @@ -1,6 +1,8 @@ package runner import ( + "github.com/sarchlab/akita/v3/mem/vm/gmmu" + "github.com/sarchlab/akita/v3/mem/vm/mmu" "github.com/sarchlab/akita/v3/sim" "github.com/sarchlab/akita/v3/tracing" "github.com/sarchlab/mgpusim/v3/driver" @@ -39,4 +41,7 @@ type GPU struct { L1ITLBs []TraceableComponent L2TLBs []TraceableComponent MemControllers []TraceableComponent + MMUEngine *mmu.MMU + GMMUEngine *gmmu.Comp + GMMUCache []TraceableComponent } diff --git a/samples/runner/r9nanobuilder.go b/samples/runner/r9nanobuilder.go index a0d3a23d..a508ac56 100644 --- a/samples/runner/r9nanobuilder.go +++ b/samples/runner/r9nanobuilder.go @@ -11,7 +11,9 @@ import ( "github.com/sarchlab/akita/v3/mem/cache/writethrough" "github.com/sarchlab/akita/v3/mem/dram" "github.com/sarchlab/akita/v3/mem/mem" + "github.com/sarchlab/akita/v3/mem/vm" "github.com/sarchlab/akita/v3/mem/vm/addresstranslator" + "github.com/sarchlab/akita/v3/mem/vm/gmmu" "github.com/sarchlab/akita/v3/mem/vm/mmu" "github.com/sarchlab/akita/v3/mem/vm/tlb" "github.com/sarchlab/akita/v3/monitoring" @@ -78,6 +80,10 @@ type R9NanoGPUBuilder struct { l1TLBToL2TLBConnection *sim.DirectConnection l1ToL2Connection *sim.DirectConnection l2ToDramConnection *sim.DirectConnection + + gmmuCache *tlb.TLB + gmmu *gmmu.Comp + pageTable vm.PageTable } // MakeR9NanoGPUBuilder provides a GPU builder that can builds the R9Nano GPU. @@ -233,11 +239,15 @@ func (b R9NanoGPUBuilder) Build(name string, id uint64) *GPU { b.buildDRAMControllers() b.buildCP() b.buildL2TLB() + b.buildGMMU() + b.buildGMMUCache() b.connectCP() b.connectL2AndDRAM() b.connectL1ToL2() b.connectL1TLBToL2TLB() + b.connectL2TLBToGMMUCache() + b.connectGMMUCachetoGMMU() b.populateExternalPorts() @@ -254,6 +264,7 @@ func (b *R9NanoGPUBuilder) populateExternalPorts() { name := fmt.Sprintf("Translation_%02d", i) b.gpu.Domain.AddPort(name, l2TLB.GetPortByName("Bottom")) } + b.gpu.Domain.AddPort("GMMU", b.gmmu.GetPortByName("Bottom")) } func (b *R9NanoGPUBuilder) createGPU(name string, id uint64) { @@ -824,7 +835,7 @@ func (b *R9NanoGPUBuilder) buildL2TLB() { WithNumMSHREntry(64). WithNumReqPerCycle(1024). WithPageSize(1 << b.log2PageSize). - WithLowModule(b.mmu.GetPortByName("Top")) + WithLowModule(b.gmmuCache.GetPortByName("Top")) l2TLB := builder.Build(fmt.Sprintf("%s.L2TLB", b.gpuName)) b.l2TLBs = append(b.l2TLBs, l2TLB) @@ -854,3 +865,85 @@ func (b *R9NanoGPUBuilder) connectWithDirectConnection( conn.PlugIn(port1, bufferSize) conn.PlugIn(port2, bufferSize) } + +func (b R9NanoGPUBuilder) WithGMMUPageTable( + pageTable vm.PageTable, +) R9NanoGPUBuilder { + b.pageTable = pageTable + return b +} + +func (b *R9NanoGPUBuilder) buildGMMUCache() { + // numWays := 128 + // test:= int(b.dramSize / (1 << b.log2PageSize) / uint64(numWays)) + builder := tlb.MakeBuilder(). + WithEngine(b.engine). + WithFreq(b.freq). + WithNumWays(8). + WithNumSets(16). + WithNumMSHREntry(32). + WithNumReqPerCycle(32). + WithPageSize(1 << b.log2PageSize). + WithLowModule(b.gmmu.GetPortByName("Top")) + + gmmuCache := builder.Build(fmt.Sprintf("%s.GMMUCache", b.gpuName)) + b.gmmuCache = gmmuCache + b.gpu.GMMUCache = append(b.gpu.GMMUCache, gmmuCache) + // b.gpu.L2TLBs = append(b.gpu.L2TLBs, l2TLB) + + if b.enableVisTracing { + tracing.CollectTrace(b.gmmuCache, b.visTracer) + } + + if b.monitor != nil { + b.monitor.RegisterComponent(b.gmmuCache) + } + + if b.perfAnalyzer != nil { + b.perfAnalyzer.RegisterComponent(b.gmmuCache) + } +} + +func (b *R9NanoGPUBuilder) buildGMMU() { + gmmu := gmmu.MakeBuilder(). + WithEngine(b.engine). + WithFreq(b.freq). + WithDeviceID(b.gpuID). + WithLog2PageSize(b.log2PageSize). + WithMaxNumReqInFlight(8). + WithPageTable(b.pageTable).WithPageWalkingLatency(100). + WithLowModule(b.mmu.GetPortByName("Top")). + Build(fmt.Sprintf("%s.GMMU", b.gpuName)) + + b.gmmu = gmmu + b.gpu.GMMUEngine = b.gmmu + + if b.enableVisTracing { + tracing.CollectTrace(b.gmmu, b.visTracer) + } + + if b.monitor != nil { + b.monitor.RegisterComponent(b.gmmu) + } +} + +func (b *R9NanoGPUBuilder) connectL2TLBToGMMUCache() { + conn := sim.NewDirectConnection( + b.gpuName+".L2TLBtoGMMUCache", + b.engine, b.freq, + ) + conn.PlugIn(b.gmmuCache.GetPortByName("Top"), 64) + + for _, l2TLB := range b.l2TLBs { + conn.PlugIn(l2TLB.GetPortByName("Bottom"), 64) + } +} + +func (b *R9NanoGPUBuilder) connectGMMUCachetoGMMU() { + conn := sim.NewDirectConnection( + b.gpuName+".GMMUCacheToGMMU", + b.engine, b.freq, + ) + conn.PlugIn(b.gmmu.GetPortByName("Top"), 64) + conn.PlugIn(b.gmmuCache.GetPortByName("Bottom"), 64) +} diff --git a/samples/runner/timingplatform.go b/samples/runner/timingplatform.go index b2ea6726..68142178 100644 --- a/samples/runner/timingplatform.go +++ b/samples/runner/timingplatform.go @@ -164,6 +164,10 @@ func (b R9NanoPlatformBuilder) Build() *Platform { pcieConnector.EstablishRoute() + for _, gpu := range b.gpus { + gpu.MMUEngine = mmuComponent + } + return &Platform{ Engine: b.engine, Driver: gpuDriver, @@ -425,8 +429,8 @@ func (b *R9NanoPlatformBuilder) createGPU( driver.DeviceProperties{ CUCount: b.numCUPerSA * b.numSAPerGPU, DRAMSize: 4 * mem.GB, - }, - ) + }) + gpu.CommandProcessor.Driver = gpuDriver.GetPortByName("GPU") b.configRDMAEngine(gpu, rdmaAddressTable)