Skip to content

Commit

Permalink
Merge pull request #9 from sarchlab/8-rdma-itself-should-not-be-a-per…
Browse files Browse the repository at this point in the history
…formace-bottleneck

RDMA itself should not be a performance bottleneck
  • Loading branch information
syifan authored Sep 21, 2023
2 parents a4c4159 + 2ad880b commit 5199aab
Show file tree
Hide file tree
Showing 15 changed files with 563 additions and 465 deletions.
15 changes: 5 additions & 10 deletions .github/workflows/mgpusim_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ on: push
jobs:
compile:
name: Compile
runs-on:
group: Marin
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -21,8 +20,7 @@ jobs:

lint:
name: Lint
runs-on:
group: Marin
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -40,8 +38,7 @@ jobs:

unit_test:
name: Unit Test
runs-on:
group: Marin
runs-on: ubuntu-latest
needs: [compile, lint]
steps:
- name: Checkout
Expand All @@ -60,8 +57,7 @@ jobs:

deterministicity_test:
name: Deterministicity Test
runs-on:
group: Marin
runs-on: ubuntu-latest
needs: [unit_test]
steps:
- name: Checkout
Expand All @@ -87,8 +83,7 @@ jobs:

single_gpu_acceptance_test:
name: Single GPU Acceptance Test
runs-on:
group: Marin
runs-on: ubuntu-latest
needs: [unit_test]
steps:
- name: Checkout
Expand Down
5 changes: 5 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,12 @@
"args": [
"-timing",
"-length=64",
<<<<<<< HEAD
"-report-all",
],
=======
]
>>>>>>> a4c4159dfeef26022d3c97e9ac583fabf5c1d2bd
},
{
"name": "Conv2d",
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ require (

// replace gitlab.com/akita/dnn => ../dnn

go 1.20
go 1.20
2 changes: 1 addition & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,4 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
8 changes: 7 additions & 1 deletion samples/runner/flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ var filenameFlag = flag.String("metric-file-name", "metrics",
"Modify the name of the output csv file.")
var magicMemoryCopy = flag.Bool("magic-memory-copy", false,
"Copy data from CPU directly to global memory")
var perfAnalyzerDirFlag = flag.String("buffer-level-trace-dir", "",
var bufferLevelTraceDirFlag = flag.String("buffer-level-trace-dir", "",
"The directory to dump the buffer level traces.")
var bufferLevelTracePeriodFlag = flag.Float64("buffer-level-trace-period", 0.0,
"The period to dump the buffer level trace.")
Expand All @@ -46,6 +46,12 @@ var customPortForAkitaRTM = flag.Int("akitartm-port", 0,
this number is not given or a invalid number is given number, a random port
will be used.`)

var analyszerNameFlag = flag.String("analyzer-Name", "",
"The name of the analyzer to use.")

var analyszerPeriodFlag = flag.Float64("analyzer-period", 0.0,
"The period to dump the analyzer results.")

var visTracing = flag.Bool("trace-vis", false,
"Generate trace for visualization purposes.")
var visTracerDB = flag.String("trace-vis-db", "sqlite",
Expand Down
2 changes: 1 addition & 1 deletion samples/runner/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type Platform struct {
type GPU struct {
Domain *sim.Domain
CommandProcessor *cp.CommandProcessor
RDMAEngine *rdma.Engine
RDMAEngine *rdma.Comp
PMC *pagemigrationcontroller.PageMigrationController
CUs []TraceableComponent
SIMDs []TraceableComponent
Expand Down
25 changes: 15 additions & 10 deletions samples/runner/r9nanobuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ type R9NanoGPUBuilder struct {
lowModuleFinderForL2 *mem.InterleavedLowModuleFinder
lowModuleFinderForPMC *mem.InterleavedLowModuleFinder
dmaEngine *cp.DMAEngine
rdmaEngine *rdma.Engine
rdmaEngine *rdma.Comp
pageMigrationController *pagemigrationcontroller.PageMigrationController
globalStorage *mem.Storage

Expand Down Expand Up @@ -195,8 +195,7 @@ func (b R9NanoGPUBuilder) WithMonitor(m *monitoring.Monitor) R9NanoGPUBuilder {
return b
}

// WithPerfAnalyzer sets the performance analyzer to apply to all the internal
// components of the GPU.
// WithPerfAnalyzer sets the buffer analyzer to use.
func (b R9NanoGPUBuilder) WithPerfAnalyzer(
a *analysis.PerfAnalyzer,
) R9NanoGPUBuilder {
Expand Down Expand Up @@ -746,12 +745,18 @@ func (b *R9NanoGPUBuilder) populateInstMemoryHierarchy(sa *shaderArray) {
}

func (b *R9NanoGPUBuilder) buildRDMAEngine() {
b.rdmaEngine = rdma.NewEngine(
fmt.Sprintf("%s.RDMA", b.gpuName),
b.engine,
b.lowModuleFinderForL1,
nil,
)
// b.rdmaEngine = rdma.NewEngine(
// fmt.Sprintf("%s.RDMA", b.gpuName),
// b.engine,
// b.lowModuleFinderForL1,
// nil,
// )
name := fmt.Sprintf("%s.RDMA", b.gpuName)
b.rdmaEngine = rdma.MakeBuilder().
WithEngine(b.engine).
WithFreq(1 * sim.GHz).
WithLocalModules(b.lowModuleFinderForL1).
Build(name)
b.gpu.RDMAEngine = b.rdmaEngine

if b.monitor != nil {
Expand Down Expand Up @@ -793,7 +798,7 @@ func (b *R9NanoGPUBuilder) buildCP() {
WithEngine(b.engine).
WithFreq(b.freq).
WithMonitor(b.monitor).
WithBufferAnalyzer(b.perfAnalyzer)
WithPerfAnalyzer(b.perfAnalyzer)

if b.enableVisTracing {
builder = builder.WithVisTracer(b.visTracer)
Expand Down
2 changes: 1 addition & 1 deletion samples/runner/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type dramTransactionCountTracer struct {
type rdmaTransactionCountTracer struct {
outgoingTracer *tracing.AverageTimeTracer
incomingTracer *tracing.AverageTimeTracer
rdmaEngine *rdma.Engine
rdmaEngine *rdma.Comp
}

type simdBusyTimeTracer struct {
Expand Down
14 changes: 7 additions & 7 deletions samples/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func (r *Runner) buildTimingPlatform() {

b = b.WithMonitor(r.monitor)

b = r.setPerfAnalyzer(b)
b = r.setAnalyszer(b)

if *magicMemoryCopy {
b = b.WithMagicMemoryCopy()
Expand All @@ -146,17 +146,17 @@ func (r *Runner) buildTimingPlatform() {
r.monitor.StartServer()
}

func (*Runner) setPerfAnalyzer(
func (*Runner) setAnalyszer(
b R9NanoPlatformBuilder,
) R9NanoPlatformBuilder {
if *bufferLevelTracePeriodFlag != 0 && *perfAnalyzerDirFlag == "" {
panic("Buffer level trace directory is not specified")
if *analyszerPeriodFlag != 0 && *analyszerNameFlag == "" {
panic("must specify -analyszer-name when using -analyszer-period")
}

if *perfAnalyzerDirFlag != "" {
if *analyszerNameFlag != "" {
b = b.WithPerfAnalyzer(
*perfAnalyzerDirFlag,
*bufferLevelTracePeriodFlag,
*analyszerNameFlag,
*analyszerPeriodFlag,
)
}
return b
Expand Down
44 changes: 21 additions & 23 deletions samples/runner/timingplatform.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"log"
"os"

"github.com/sarchlab/akita/v3/analysis"
memtraces "github.com/sarchlab/akita/v3/mem/trace"

"github.com/sarchlab/akita/v3/analysis"
"github.com/sarchlab/akita/v3/mem/mem"
"github.com/sarchlab/akita/v3/mem/vm"
"github.com/sarchlab/akita/v3/mem/vm/mmu"
Expand All @@ -31,12 +31,12 @@ type R9NanoPlatformBuilder struct {
useMagicMemoryCopy bool
log2PageSize uint64

engine sim.Engine
monitor *monitoring.Monitor
perfDBFilename string
perfAnalyzingPeriod float64
perfAnalyzer *analysis.PerfAnalyzer
visTracer tracing.Tracer
engine sim.Engine
monitor *monitoring.Monitor
perfAnalysisFileName string
perfAnalyzingPeriod float64
perfAnalyzer *analysis.PerfAnalyzer
visTracer tracing.Tracer

globalStorage *mem.Storage

Expand All @@ -52,7 +52,6 @@ func MakeR9NanoBuilder() R9NanoPlatformBuilder {
log2PageSize: 12,
traceVisStartTime: -1,
traceVisEndTime: -1,
perfDBFilename: "perf",
}
return b
}
Expand Down Expand Up @@ -117,13 +116,13 @@ func (b R9NanoPlatformBuilder) WithMonitor(
return b
}

// WithPerfAnalyzer sets the performance analyzer that is used to analyze the
// WithPerfAnalyzer sets the trace that dumps the WithPerfAnalyzer levers.
func (b R9NanoPlatformBuilder) WithPerfAnalyzer(
dbFileName string,
recordPeriod float64,
traceDirName string,
tracePeriod float64,
) R9NanoPlatformBuilder {
b.perfDBFilename = dbFileName
b.perfAnalyzingPeriod = recordPeriod
b.perfAnalysisFileName = traceDirName
b.perfAnalyzingPeriod = tracePeriod
return b
}

Expand All @@ -140,7 +139,7 @@ func (b R9NanoPlatformBuilder) Build() *Platform {
b.monitor.RegisterEngine(b.engine)
}

b.setupPerfAnalyzer()
b.setupPerformanceAnalyzer()
b.setupVisTracing()

b.globalStorage = mem.NewStorage(uint64(1+b.numGPU) * 4 * mem.GB)
Expand Down Expand Up @@ -234,19 +233,18 @@ func (b *R9NanoPlatformBuilder) setupVisTracing() {
}

visTracer := tracing.NewDBTracer(b.engine, backend)
visTracer.SetTimeRange(b.traceVisStartTime, b.traceVisEndTime)

b.visTracer = visTracer
}

func (b *R9NanoPlatformBuilder) setupPerfAnalyzer() {
period := sim.VTimeInSec(1e-4)
b.perfAnalyzer = analysis.MakePerfAnalyzerBuilder().
WithPeriod(period).
WithSQLiteBackend().
WithDBFilename(b.perfDBFilename).
Build()

b.perfAnalyzer.RegisterEngine(b.engine)
func (b *R9NanoPlatformBuilder) setupPerformanceAnalyzer() {
if b.perfAnalysisFileName != "" {
b.perfAnalyzer = analysis.MakePerfAnalyzerBuilder().
WithPeriod(sim.VTimeInSec(b.perfAnalyzingPeriod)).
WithDBFilename(b.perfAnalysisFileName).
Build()
}
}

func (b *R9NanoPlatformBuilder) createGPUs(
Expand Down
12 changes: 6 additions & 6 deletions timing/cp/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type Builder struct {
engine sim.Engine
visTracer tracing.Tracer
monitor *monitoring.Monitor
bufferAnalyzer *analysis.PerfAnalyzer
perfAnalyzer *analysis.PerfAnalyzer
numDispatchers int
}

Expand Down Expand Up @@ -57,12 +57,12 @@ func (b Builder) WithMonitor(monitor *monitoring.Monitor) Builder {
return b
}

// WithBufferAnalyzer sets the buffer analyzer used to analyze the
// WithPerfAnalyzer sets the buffer analyzer used to analyze the
// command processor's buffers.
func (b Builder) WithBufferAnalyzer(
func (b Builder) WithPerfAnalyzer(
analyzer *analysis.PerfAnalyzer,
) Builder {
b.bufferAnalyzer = analyzer
b.perfAnalyzer = analyzer
return b
}

Expand All @@ -86,8 +86,8 @@ func (b Builder) Build(name string) *CommandProcessor {
tracing.CollectTrace(cp, b.visTracer)
}

if b.bufferAnalyzer != nil {
b.bufferAnalyzer.RegisterComponent(cp)
if b.perfAnalyzer != nil {
b.perfAnalyzer.RegisterComponent(cp)
}

return cp
Expand Down
Loading

0 comments on commit 5199aab

Please sign in to comment.