Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-13672 control: Calculate engine memory reservation on nr targets #12472

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/control/cmd/daos_server/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func TestDaosServer_Auto_confGen(t *testing.T) {
controlMetadata := storage.ControlMetadata{
Path: metadataMountPath,
}
// SCM tmpfs 5GiB size calculated after subtracting reservations from MemTotalKiB.
// SCM tmpfs 4GiB size calculated after subtracting reservations from MemTotalKiB.
tmpfsEngineCfgs := []*engine.Config{
control.MockEngineCfgTmpfs(0, 4,
control.MockBdevTierWithRole(0, storage.BdevRoleWAL, 2),
Expand Down Expand Up @@ -428,9 +428,9 @@ func TestDaosServer_Auto_confGen(t *testing.T) {
},
MemInfo: &common.MemInfo{
HugepageSizeKiB: 2048,
// Total mem to meet requirements 39GiB hugeMem, 1GiB per
// engine rsvd, 6GiB sys rsvd, 5GiB per engine for tmpfs.
MemTotalKiB: (humanize.GiByte * (39 + 2 + 6 + 10)) / humanize.KiByte,
// Total mem to meet requirements 39GiB hugeMem, 2GiB per
// engine rsvd, 8GiB sys rsvd, 4GiB per engine for tmpfs.
MemTotalKiB: (humanize.GiByte * (39 + 4 + 8 + 8)) / humanize.KiByte,
},
NvmeDevices: storage.NvmeControllers{
storage.MockNvmeController(1),
Expand Down
6 changes: 3 additions & 3 deletions src/control/cmd/dmg/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,10 @@ func TestAuto_confGen(t *testing.T) {
Message: control.MockServerScanResp(t, "withSpaceUsage"),
}
storRespHighMem := control.MockServerScanResp(t, "withSpaceUsage")
// Total mem to meet requirements 34GiB hugeMem, 1GiB per engine rsvd, 8GiB sys rsvd,
// Total mem to meet requirements 34GiB hugeMem, 2GiB per engine rsvd, 8GiB sys rsvd,
// 5GiB per engine for tmpfs.
mockRamdiskSize := 5
storRespHighMem.MemInfo.MemTotalKb = (humanize.GiByte * (34 + 2 + 8 + 10)) / humanize.KiByte
storRespHighMem.MemInfo.MemTotalKb = (humanize.GiByte * (34 + 4 + 8 + 10)) / humanize.KiByte
storHostRespHighMem := &control.HostResponse{
Addr: "host1",
Message: storRespHighMem,
Expand Down Expand Up @@ -259,7 +259,7 @@ func TestAuto_confGen(t *testing.T) {
},
expErr: errors.New("unrecognized net-class"),
},
"successful fetch of host storage and fabric; tmpfs scm; no control_metadata path": {
"successful fetch of host storage and febric; tmpfs scm; no control_metadata path": {
tmpfsSCM: true,
hostResponsesSet: [][]*control.HostResponse{
{netHostResp},
Expand Down
2 changes: 1 addition & 1 deletion src/control/lib/control/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1573,7 +1573,7 @@ func TestControl_AutoConfig_genConfig(t *testing.T) {
MockEngineCfgTmpfs(1, 0, mockBdevTier(1, 3), mockBdevTier(1, 4, 5)),
},
hpSize: defHpSizeKb,
memTotal: (52 * humanize.GiByte) / humanize.KiByte,
memTotal: (54 * humanize.GiByte) / humanize.KiByte,
expCfg: MockServerCfg(exmplEngineCfg0.Fabric.Provider,
[]*engine.Config{
MockEngineCfgTmpfs(0, 4, /* tmpfs size in gib */
Expand Down
15 changes: 12 additions & 3 deletions src/control/server/config/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,12 @@ func (cfg *Server) CalcRamdiskSize(log logging.Logger, hpSizeKiB, memKiB int) (u
// Calculate reserved system memory in bytes.
memSys := uint64(cfg.SystemRamReserved * humanize.GiByte)

return storage.CalcRamdiskSize(log, memTotal, memHuge, memSys,
storage.DefaultEngineMemRsvd, len(cfg.Engines))
if len(cfg.Engines) == 0 {
return 0, errors.New("no engines in config")
}

return storage.CalcRamdiskSize(log, memTotal, memHuge, memSys, cfg.Engines[0].TargetCount,
len(cfg.Engines))
}

// CalcMemForRamdiskSize calculates minimum memory needed for a given RAM-disk size.
Expand All @@ -522,8 +526,13 @@ func (cfg *Server) CalcMemForRamdiskSize(log logging.Logger, hpSizeKiB int, ramd
// Calculate reserved system memory in bytes.
memSys := uint64(cfg.SystemRamReserved * humanize.GiByte)

if len(cfg.Engines) == 0 {
return 0, errors.New("no engines in config")
}

//len(cfg.Engines))
return storage.CalcMemForRamdiskSize(log, ramdiskSize, memHuge, memSys,
storage.DefaultEngineMemRsvd, len(cfg.Engines))
cfg.Engines[0].TargetCount, len(cfg.Engines))
}

// SetRamdiskSize calculates maximum RAM-disk size using total memory as reported by /proc/meminfo.
Expand Down
8 changes: 4 additions & 4 deletions src/control/server/config/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) {
c.Engines[0].Storage.Tiers.ScmConfigs()[0].Scm.RamdiskSize = 11
return c.WithNrHugepages(16896)
},
expErr: FaultConfigRamdiskOverMaxMem(humanize.GiByte*11, humanize.GiByte*10, 0),
expErr: FaultConfigRamdiskOverMaxMem(humanize.GiByte*11, humanize.GiByte*9, 0),
},
"low mem": {
// 46 total - 40 reserved = 6 for tmpfs (3 gib per engine - too low)
Expand All @@ -1019,7 +1019,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) {
},
// error indicates min RAM needed = 40 + 4 gib per engine
expErr: storage.FaultRamdiskLowMem("Total", storage.MinRamdiskMem,
humanize.GiByte*48, humanize.GiByte*46),
humanize.GiByte*50, humanize.GiByte*46),
},
"custom value set": {
memTotBytes: humanize.GiByte * 60,
Expand All @@ -1036,7 +1036,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) {
extraConfig: func(c *Server) *Server {
return c.WithNrHugepages(16896)
},
expRamdiskSize: 10,
expRamdiskSize: 9,
},
"custom system_ram_reserved value set": {
// 33 huge mem + 2 sys rsv + 2 engine rsv = 37 gib reserved mem
Expand All @@ -1046,7 +1046,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) {
c.SystemRamReserved = 2
return c.WithNrHugepages(16896)
},
expRamdiskSize: 11,
expRamdiskSize: 10,
},
"no scm configured on second engine": {
memTotBytes: humanize.GiByte * 80,
Expand Down
51 changes: 34 additions & 17 deletions src/control/server/storage/scm.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,17 @@ const (
// ScmUnknownMode indicates a pMem AppDirect region is in an unsupported memory mode.
ScmUnknownMode

// DefaultSysMemRsvd is the default amount of memory reserved for system when calculating
// RAM-disk size for DAOS I/O engine.
DefaultSysMemRsvd = humanize.GiByte * 8
// DefaultEngineMemRsvd is the default amount of memory reserved per-engine when
// calculating RAM-disk size for DAOS I/O engine.
DefaultEngineMemRsvd = humanize.GiByte * 1
// MinRamdiskMem is the minimum amount of memory needed for each engine's tmpfs RAM-disk.
MinRamdiskMem = humanize.GiByte * 4
)

// Memory reservation constant defaults to be used when calculating RAM-disk size for DAOS I/O engine.
const (
DefaultSysMemRsvd = humanize.GiByte * 8 // per-system
DefaultTgtMemRsvd = humanize.MiByte * 128 // per-engine-target
DefaultEngineMemRsvd = humanize.GiByte * 1 // per-engine
)

func (ss ScmState) String() string {
if val, exists := map[ScmState]string{
ScmStateUnknown: "Unknown",
Expand Down Expand Up @@ -561,20 +562,28 @@ func (f *ScmFwForwarder) UpdateFirmware(req ScmFirmwareUpdateRequest) (*ScmFirmw
}

// CalcRamdiskSize returns recommended tmpfs RAM-disk size calculated as
// (total mem - hugepage mem - sys rsvd mem - (engine rsvd mem * nr engines)) / nr engines.
// (total mem - hugepage mem - sys rsvd mem - engine rsvd mem) / nr engines.
// All values in units of bytes and return value is for a single RAM-disk/engine.
func CalcRamdiskSize(log logging.Logger, memTotal, memHuge, memSys, memEng uint64, engCount int) (uint64, error) {
func CalcRamdiskSize(log logging.Logger, memTotal, memHuge, memSys uint64, tgtCount, engCount int) (uint64, error) {
if memTotal == 0 {
return 0, errors.New("requires nonzero total mem")
}
if engCount == 0 {
return 0, errors.New("requires nonzero nr engines")
if tgtCount <= 0 {
return 0, errors.New("requires positive nonzero nr engine targets")
}
if engCount <= 0 {
return 0, errors.New("requires positive nonzero nr engines")
}

memEng := uint64(tgtCount) * DefaultTgtMemRsvd
if memEng < DefaultEngineMemRsvd {
memEng = DefaultEngineMemRsvd
}

msgStats := fmt.Sprintf("mem stats: total %s (%d) - (hugepages %s + sys rsvd %s + "+
"(engine rsvd %s * nr engines %d))", humanize.IBytes(memTotal), memTotal,
humanize.IBytes(memHuge), humanize.IBytes(memSys), humanize.IBytes(memEng),
engCount)
"(engine rsvd %s * nr engines %d). %d tgts-per-engine)", humanize.IBytes(memTotal),
memTotal, humanize.IBytes(memHuge), humanize.IBytes(memSys),
humanize.IBytes(memEng), engCount, tgtCount)

memRsvd := memHuge + memSys + (memEng * uint64(engCount))
if memTotal < memRsvd {
Expand All @@ -590,18 +599,26 @@ func CalcRamdiskSize(log logging.Logger, memTotal, memHuge, memSys, memEng uint6
}

// CalcMemForRamdiskSize returns the minimum RAM required for the input requested RAM-disk size.
func CalcMemForRamdiskSize(log logging.Logger, ramdiskSize, memHuge, memSys, memEng uint64, engCount int) (uint64, error) {
func CalcMemForRamdiskSize(log logging.Logger, ramdiskSize, memHuge, memSys uint64, tgtCount, engCount int) (uint64, error) {
if ramdiskSize == 0 {
return 0, errors.New("requires nonzero ram-disk size")
}
if tgtCount <= 0 {
return 0, errors.New("requires positive nonzero nr engine targets")
}
if engCount == 0 {
return 0, errors.New("requires nonzero nr engines")
}

memEng := uint64(tgtCount) * DefaultTgtMemRsvd
if memEng < DefaultEngineMemRsvd {
memEng = DefaultEngineMemRsvd
}

msgStats := fmt.Sprintf("required ram-disk size %s (%d). mem hugepage: %s, nr engines: %d, "+
"sys mem rsvd: %s, engine mem rsvd: %s", humanize.IBytes(ramdiskSize), ramdiskSize,
humanize.IBytes(memHuge), engCount, humanize.IBytes(memSys),
humanize.IBytes(memEng))
"sys mem rsvd: %s, engine mem rsvd: %s, %d tgts-per-engine",
humanize.IBytes(ramdiskSize), ramdiskSize, humanize.IBytes(memHuge), engCount,
humanize.IBytes(memSys), humanize.IBytes(memEng), tgtCount)

memRsvd := memHuge + memSys + (memEng * uint64(engCount))
memReqd := memRsvd + (ramdiskSize * uint64(engCount))
Expand Down
57 changes: 36 additions & 21 deletions src/control/server/storage/scm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,63 +21,78 @@ func Test_CalcRamdiskSize(t *testing.T) {
memTotal uint64
memHuge uint64
memSys uint64
memEng uint64
tgtCount int
engCount int
expSize uint64
expErr error
}{
"no mem": {
expErr: errors.New("requires nonzero total mem"),
},
"no targets": {
memTotal: humanize.GiByte,
expErr: errors.New("requires positive nonzero nr engine targets"),
},
"no engines": {
memTotal: humanize.GiByte,
expErr: errors.New("requires nonzero nr engines"),
tgtCount: 8,
expErr: errors.New("requires positive nonzero nr engines"),
},
"default values; low mem": {
memTotal: humanize.GiByte * 18,
memTotal: humanize.GiByte * 20,
memHuge: humanize.GiByte * 12,
memSys: DefaultSysMemRsvd,
memEng: DefaultEngineMemRsvd,
tgtCount: 8,
engCount: 1,
expErr: errors.New("insufficient ram"),
expErr: errors.New("insufficient ram"), // 20 - (12+8+1) = -1
},
"default values; high mem": {
memTotal: humanize.GiByte * 23,
memHuge: humanize.GiByte * 12,
memTotal: humanize.GiByte * 60,
memHuge: humanize.GiByte * 30,
memSys: DefaultSysMemRsvd,
memEng: DefaultEngineMemRsvd,
engCount: 1,
expSize: humanize.GiByte * 2,
tgtCount: 16,
engCount: 2,
expSize: humanize.GiByte * 9, // (60 - (30+8+4)) / 2
},
"default values; low nr targets": {
memTotal: humanize.GiByte * 60,
memHuge: humanize.GiByte * 30,
memSys: DefaultSysMemRsvd,
tgtCount: 1,
engCount: 2,
expSize: humanize.GiByte * 10, // (60 - (30+8+2)) / 2
},
"custom values; low sys reservation": {
memTotal: humanize.GiByte * 60,
memHuge: humanize.GiByte * 30,
memSys: humanize.GiByte * 4,
memEng: DefaultEngineMemRsvd,
memTotal: humanize.GiByte * 18,
memHuge: humanize.GiByte * 12,
tgtCount: 16,
engCount: 2,
expSize: humanize.GiByte * 11, // (60 - (30+4+4)) / 2
},
"custom values; high eng reservation": {
memSys: DefaultSysMemRsvd,
memEng: humanize.GiByte * 3,
memTotal: humanize.GiByte * 23,
memHuge: humanize.GiByte * 12,
"custom values; high sys reservation": {
memTotal: humanize.GiByte * 60,
memHuge: humanize.GiByte * 30,
memSys: humanize.GiByte * 27,
tgtCount: 16,
engCount: 2,
expErr: errors.New("insufficient ram"),
expErr: errors.New("insufficient ram"), // 60 - (30+27+4) = -1
},
} {
t.Run(name, func(t *testing.T) {
log, buf := logging.NewTestLogger(name)
defer test.ShowBufferOnFailure(t, buf)

gotSize, gotErr := CalcRamdiskSize(log, tc.memTotal, tc.memHuge, tc.memSys,
tc.memEng, tc.engCount)
tc.tgtCount, tc.engCount)
test.CmpErr(t, tc.expErr, gotErr)
if tc.expErr != nil {
return
}

if gotSize != tc.expSize {
t.Fatalf("expected %d, got %d", tc.expSize, gotSize)
t.Fatalf("expected %s, got %s",
humanize.IBytes(tc.expSize), humanize.IBytes(gotSize))
}
})
}
Expand Down