diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 013187f8f8b..d3406f4d61e 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -141,9 +141,10 @@ func TestAuto_confGen(t *testing.T) { Message: control.MockServerScanResp(t, "withSpaceUsage"), } storRespHighMem := control.MockServerScanResp(t, "withSpaceUsage") - // Total mem to meet requirements 34GiB hugeMem, 2GiB per engine rsvd, 6GiB sys rsvd, + // Total mem to meet requirements 34GiB hugeMem, 2GiB per engine rsvd, 16GiB sys rsvd, // 5GiB per engine for tmpfs. - storRespHighMem.MemInfo.MemTotalKb = (humanize.GiByte * (34 + 4 + 6 + 10)) / humanize.KiByte + storRespHighMem.MemInfo.MemTotalKb = (humanize.GiByte * (34 + 4 + 16 + 10)) / humanize.KiByte + mockRamdiskSize := 5 storHostRespHighMem := &control.HostResponse{ Addr: "host1", Message: storRespHighMem, @@ -151,7 +152,6 @@ func TestAuto_confGen(t *testing.T) { e0 := control.MockEngineCfg(0, 2, 4, 6, 8).WithHelperStreamCount(4) e1 := control.MockEngineCfg(1, 1, 3, 5, 7).WithHelperStreamCount(4) exmplEngineCfgs := []*engine.Config{e0, e1} - mockRamdiskSize := 5 // RoundDownGiB(16*0.75/2) metadataMountPath := "/mnt/daos_md" controlMetadata := storage.ControlMetadata{ Path: metadataMountPath, @@ -406,7 +406,7 @@ disable_vfio: false disable_vmd: false enable_hotplug: false nr_hugepages: 6144 -system_ram_reserved: 6 +system_ram_reserved: 16 disable_hugepages: false control_log_mask: INFO control_log_file: /tmp/daos_server.log diff --git a/src/control/lib/control/auto_test.go b/src/control/lib/control/auto_test.go index 8e282d73f89..93a1d8ccd2a 100644 --- a/src/control/lib/control/auto_test.go +++ b/src/control/lib/control/auto_test.go @@ -1573,7 +1573,7 @@ func TestControl_AutoConfig_genConfig(t *testing.T) { MockEngineCfgTmpfs(1, 0, mockBdevTier(1, 3), mockBdevTier(1, 4, 5)), }, hpSize: defHpSizeKb, - memTotal: (54 * humanize.GiByte) / humanize.KiByte, + memTotal: (64 * humanize.GiByte) / humanize.KiByte, expCfg: MockServerCfg(exmplEngineCfg0.Fabric.Provider, []*engine.Config{ MockEngineCfgTmpfs(0, 5, /* tmpfs size in gib */ diff --git a/src/control/server/storage/scm.go b/src/control/server/storage/scm.go index b8a440fdedb..3296575d54b 100644 --- a/src/control/server/storage/scm.go +++ b/src/control/server/storage/scm.go @@ -51,7 +51,7 @@ const ( // Memory reservation constant defaults to be used when calculating RAM-disk size for DAOS I/O engine. const ( - DefaultSysMemRsvd = humanize.GiByte * 6 // per-system + DefaultSysMemRsvd = humanize.GiByte * 16 // per-system DefaultTgtMemRsvd = humanize.MiByte * 128 // per-engine-target DefaultEngineMemRsvd = humanize.GiByte * 1 // per-engine ) diff --git a/src/control/server/storage/scm_test.go b/src/control/server/storage/scm_test.go index b3f6d4327c6..08e8638f8f4 100644 --- a/src/control/server/storage/scm_test.go +++ b/src/control/server/storage/scm_test.go @@ -39,12 +39,12 @@ func Test_CalcRamdiskSize(t *testing.T) { expErr: errors.New("requires positive nonzero nr engines"), }, "default values; low mem": { - memTotal: humanize.GiByte * 20, + memTotal: humanize.GiByte * 30, memHuge: humanize.GiByte * 14, memSys: DefaultSysMemRsvd, tgtCount: 8, engCount: 1, - expErr: errors.New("insufficient ram"), // 20 - (14+6+1) = -1 + expErr: errors.New("insufficient ram"), // 30 - (14+16+1) = -1 }, "default values; high mem": { memTotal: humanize.GiByte * 60, @@ -52,7 +52,7 @@ func Test_CalcRamdiskSize(t *testing.T) { memSys: DefaultSysMemRsvd, tgtCount: 16, engCount: 2, - expSize: humanize.GiByte * 10, // (60 - (30+6+4)) / 2 + expSize: humanize.GiByte * 5, // (60 - (30+16+4)) / 2 }, "default values; low nr targets": { memTotal: humanize.GiByte * 60, @@ -60,7 +60,7 @@ func Test_CalcRamdiskSize(t *testing.T) { memSys: DefaultSysMemRsvd, tgtCount: 1, engCount: 2, - expSize: humanize.GiByte * 11, // (60 - (30+6+2)) / 2 + expSize: humanize.GiByte * 6, // (60 - (30+16+2)) / 2 }, "custom values; low sys reservation": { memTotal: humanize.GiByte * 60, diff --git a/src/tests/ftest/container/snapshot_aggregation.yaml b/src/tests/ftest/container/snapshot_aggregation.yaml index 5b63868d6dc..8c4fa97c7fc 100644 --- a/src/tests/ftest/container/snapshot_aggregation.yaml +++ b/src/tests/ftest/container/snapshot_aggregation.yaml @@ -21,6 +21,7 @@ server_config: fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + system_ram_reserved: 8 pool: control_method: dmg scm_size: 80G diff --git a/src/tests/ftest/control/dmg_server_set_logmasks.yaml b/src/tests/ftest/control/dmg_server_set_logmasks.yaml index 70d4d93995e..2edfba61ec6 100644 --- a/src/tests/ftest/control/dmg_server_set_logmasks.yaml +++ b/src/tests/ftest/control/dmg_server_set_logmasks.yaml @@ -4,6 +4,7 @@ timeout: 120 server_config: name: daos_server engines_per_host: 1 + system_ram_reserved: 6 engines: 0: targets: 4 diff --git a/src/tests/ftest/pool/create_all_vm.yaml b/src/tests/ftest/pool/create_all_vm.yaml index 2c053e5b038..3188671eade 100644 --- a/src/tests/ftest/pool/create_all_vm.yaml +++ b/src/tests/ftest/pool/create_all_vm.yaml @@ -30,6 +30,7 @@ test_two_pools: server_config: name: daos_server engines_per_host: 1 + system_ram_reserved: 6 engines: 0: targets: 4 diff --git a/src/tests/ftest/security/cont_overwrite_acl.yaml b/src/tests/ftest/security/cont_overwrite_acl.yaml index dd5f4cabd79..83974c98fd6 100644 --- a/src/tests/ftest/security/cont_overwrite_acl.yaml +++ b/src/tests/ftest/security/cont_overwrite_acl.yaml @@ -7,6 +7,7 @@ timeout: 120 server_config: name: daos_server engines_per_host: 1 + system_ram_reserved: 6 engines: 0: targets: 4 diff --git a/src/tests/ftest/security/cont_update_acl.yaml b/src/tests/ftest/security/cont_update_acl.yaml index 65091e2b06a..78c114e7e27 100644 --- a/src/tests/ftest/security/cont_update_acl.yaml +++ b/src/tests/ftest/security/cont_update_acl.yaml @@ -7,6 +7,7 @@ timeout: 120 server_config: name: daos_server engines_per_host: 1 + system_ram_reserved: 6 engines: 0: targets: 4 diff --git a/src/tests/ftest/telemetry/pool_space_metrics.yaml b/src/tests/ftest/telemetry/pool_space_metrics.yaml index d041e936c54..459e2a1954d 100644 --- a/src/tests/ftest/telemetry/pool_space_metrics.yaml +++ b/src/tests/ftest/telemetry/pool_space_metrics.yaml @@ -1,7 +1,7 @@ hosts: test_servers: 2 test_clients: 1 -timeout: 120 +timeout: 180 server_config: name: daos_server engines_per_host: 2 diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 3fba33dda67..73772a44ed4 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -235,9 +235,10 @@ ## of RAM resulting in MemAvailable value being too low to support the calculated RAM-disk size ## increasing the value will reduce the calculate size. Alternatively in situations where total ## RAM is low, reducing the value may prevent problems where RAM-disk size calculated is below the -## minimum of 4gib. +## minimum of 4gib. Increasing the value may help avoid the potential of OOM killer terminating +## engine processes but could also result in stopping DAOS from using available memory resources. # -## default: 6 +## default: 16 #system_ram_reserved: 5 # #