Skip to content

Commit

Permalink
DAOS-14408 common: ensure NDCTL not used for storage class ram (#15203
Browse files Browse the repository at this point in the history
)

* DAOS-14408 common: enable NDCTL for DCPM

This PR prepares DAOS to be used with NDCTL enabled in PMDK, which means:
- NDCTL must not be used when non-DCPM (simulate PMem) - `storage class: "ram"` is used:
`PMEMOBJ_CONF=sds.at_create=0` env variable disables NDCTL features in the PMDK
This change affects all tests run on simulated PMem (e.g. inside VMs).
Some DOAS utility applications may also require `PMEMOBJ_CONF=sds.at_create=0` to be set.

- The default ULT stack size must be at least 20KiB to avoid stack overuse by PMDK with NDCTL enabled and be aligned with Linux page size.
`ABT_THREAD_STACKSIZE=20480` env variable is used to increase the default ULT stack size.
This env variable is set by control/server module just before engine is started.
Much bigger stack is used for pmempool open/create-related tasks e.g. `tgt_vos_create_one` to avoid stack overusage.

This modification shall not affect md-on-ssd mode as long as `storage class: "ram"` is used for the first tier in the `storage` configuration.
This change does not require any configuration changes to existing systems.

The new PMDK package with NDCTL enabled (daos-stack/pmdk#38) will land as soon as this PR is merged.

Change-Id: If4c3f7d88a97e4e4f5526da71f4b374a2844057b
Signed-off-by: Jan Michalski <jan.michalski@intel.com>
  • Loading branch information
grom72 authored and jolivier23 committed Nov 1, 2024
1 parent 8f896d0 commit b7ea05b
Show file tree
Hide file tree
Showing 16 changed files with 361 additions and 14 deletions.
1 change: 1 addition & 0 deletions .github/workflows/landing-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- ci/**
- requirements-build.txt
- requirements-utest.txt
- utils/build.config

permissions: {}

Expand Down
16 changes: 16 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
daos (2.6.1-4) unstable; urgency=medium
[ Tomasz Gromadzki ]
* Add support of the PMDK package 2.1.0 with NDCTL enabled.
* Increase the default ULT stack size to 20KiB if the engine uses
the DCPM storage class.
* Prevent using the RAM storage class (simulated PMem) when
the shutdown state (SDS) is active.
* Automatically disable SDS for the RAM storage class on engine startup.
* Force explicitly setting the PMEMOBJ_CONF='sds.at_create=0'
environment variable to deactivate SDS for the DAOS tools
(ddb, daos_perf, vos_perf, etc.) when used WITHOUT DCPM.
Otherwise, a user is supposed to be stopped by an error
like: "Unsafe shutdown count is not supported for this source".

-- Tomasz Gromadzki <tomasz.gromadzki@intel.com> Wed, 02 Oct 2024 12:00:00 +0200

daos (2.6.1-3) unstable; urgency=medium
[ Phillip Henderson ]
* Third release candidate for 2.6.1
Expand Down
6 changes: 4 additions & 2 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Build-Depends: debhelper (>= 10),
python3-distro,
libabt-dev,
libucx-dev,
libpmemobj-dev (>= 2.0.0),
libpmemobj-dev (>= 2.1.0),
libfuse3-dev,
libprotobuf-c-dev,
libjson-c-dev,
Expand Down Expand Up @@ -118,7 +118,9 @@ Depends: python (>=3.8), python3, python-yaml, python3-yaml,
daos-client (= ${binary:Version}),
daos-admin (= ${binary:Version}),
golang-go (>= 2:1.21),
libcapstone-dev
libcapstone-dev,
libndctl-dev,
libdaxctl-dev
Description: The Distributed Asynchronous Object Storage (DAOS) is an open-source
software-defined object store designed from the ground up for
massively distributed Non Volatile Memory (NVM). DAOS takes advantage
Expand Down
1 change: 0 additions & 1 deletion site_scons/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ def define_components(reqs):
retriever=GitRepoRetriever(),
commands=[['make',
'all',
'NDCTL_ENABLE=n',
'BUILD_EXAMPLES=n',
'BUILD_BENCHMARKS=n',
'DOC=n',
Expand Down
86 changes: 86 additions & 0 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package engine
import (
"fmt"
"os"
"strconv"
"strings"

"github.com/pkg/errors"
Expand All @@ -28,6 +29,8 @@ const (
envLogMasks = "D_LOG_MASK"
envLogDbgStreams = "DD_MASK"
envLogSubsystems = "DD_SUBSYS"

minABTThreadStackSizeDCPM = 20480
)

// FabricConfig encapsulates networking fabric configuration.
Expand Down Expand Up @@ -342,7 +345,80 @@ func (c *Config) Validate() error {
if err := ValidateLogSubsystems(subsystems); err != nil {
return errors.Wrap(err, "validate engine log subsystems")
}
return nil
}

// Ensure at least 20KiB ABT stack size for an engine with DCPM storage class.
func (c *Config) UpdatePMDKEnvarsStackSizeDCPM() error {
stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE")
if err != nil {
c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d",
minABTThreadStackSizeDCPM))
return nil
}
// Ensure at least 20KiB ABT stack size for an engine with DCPM storage class.
stackSizeValue, err := strconv.Atoi(stackSizeStr)
if err != nil {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s",
stackSizeStr)
}
if stackSizeValue < minABTThreadStackSizeDCPM {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+
"for DCPM storage class, found %d", minABTThreadStackSizeDCPM,
stackSizeValue)
}
return nil
}

// Ensure proper configuration of shutdown (SDS) state
func (c *Config) UpdatePMDKEnvarsPMemobjConf(isDCPM bool) error {
pmemobjConfStr, pmemobjConfErr := c.GetEnvVar("PMEMOBJ_CONF")
//also work for empty string
hasSdsAtCreate := strings.Contains(pmemobjConfStr, "sds.at_create")
if isDCPM {
if !hasSdsAtCreate {
return nil
}
// Confirm default handling of shutdown state (SDS) for DCPM storage class.
return errors.New("env_var PMEMOBJ_CONF should NOT contain 'sds.at_create=?' " +
"for DCPM storage class, found '" + pmemobjConfStr + "'")
}

// Disable shutdown state (SDS) (part of RAS) for RAM-based simulated SCM.
if pmemobjConfErr != nil {
c.EnvVars = append(c.EnvVars, "PMEMOBJ_CONF=sds.at_create=0")
return nil
}
if !hasSdsAtCreate {
envVars, _ := common.DeleteKeyValue(c.EnvVars, "PMEMOBJ_CONF")
c.EnvVars = append(envVars, "PMEMOBJ_CONF="+pmemobjConfStr+
";sds.at_create=0")
return nil
}
if strings.Contains(pmemobjConfStr, "sds.at_create=1") {
return errors.New("env_var PMEMOBJ_CONF should contain 'sds.at_create=0' " +
"for non-DCPM storage class, found '" + pmemobjConfStr + "'")
}
return nil
}

// Ensure proper environment variables for PMDK w/ NDCTL enabled based on
// the actual configuration of the storage class.
func (c *Config) UpdatePMDKEnvars() error {

if len(c.Storage.Tiers) == 0 {
return errors.New("Invalid config - no tier 0 defined")
}

isDCPM := c.Storage.Tiers[0].Class == storage.ClassDcpm

if err := c.UpdatePMDKEnvarsPMemobjConf(isDCPM); err != nil {
return err
}

if isDCPM {
return c.UpdatePMDKEnvarsStackSizeDCPM()
}
return nil
}

Expand Down Expand Up @@ -690,3 +766,13 @@ func (c *Config) WithStorageIndex(i uint32) *Config {
c.Storage.EngineIdx = uint(i)
return c
}

// WithEnvVarAbtThreadStackSize sets environment variable ABT_THREAD_STACKSIZE.
func (c *Config) WithEnvVarAbtThreadStackSize(stack_size uint16) *Config {
return c.WithEnvVars(fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", stack_size))
}

// WithEnvVarPMemObjSdsAtCreate sets PMEMOBJ_CONF env. var. to sds.at_create=0/1 value
func (c *Config) WithEnvVarPMemObjSdsAtCreate(value uint8) *Config {
return c.WithEnvVars(fmt.Sprintf("PMEMOBJ_CONF=sds.at_create=%d", value))
}
207 changes: 207 additions & 0 deletions src/control/server/engine/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1104,3 +1104,210 @@ func TestFabricConfig_Update(t *testing.T) {
})
}
}

func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass("dcpm"))
}

for name, tc := range map[string]struct {
cfg *Config
expErr error
expABTthreadStackSize int
}{
"empty config should not fail": {
cfg: MockConfig(),
expABTthreadStackSize: minABTThreadStackSizeDCPM,
},
"valid config for DCPM should not fail": {
cfg: validConfig().WithEnvVarAbtThreadStackSize(minABTThreadStackSizeDCPM),
expABTthreadStackSize: minABTThreadStackSizeDCPM,
},
"config for DCPM without thread size should not fail": {
cfg: validConfig(),
expABTthreadStackSize: minABTThreadStackSizeDCPM,
},
"config for DCPM with stack size big enough should not fail": {
cfg: validConfig().
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeDCPM + 1),
expABTthreadStackSize: minABTThreadStackSizeDCPM + 1,
},
"config for DCPM with stack size too small should fail": {
cfg: validConfig().
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeDCPM - 1),
expErr: errors.New(fmt.Sprintf("env_var ABT_THREAD_STACKSIZE "+
"should be >= %d for DCPM storage class, found %d",
minABTThreadStackSizeDCPM, minABTThreadStackSizeDCPM-1)),
},
"config for DCPM with invalid ABT_THREAD_STACKSIZE value should fail": {
cfg: validConfig().WithEnvVars("ABT_THREAD_STACKSIZE=foo_bar"),
expErr: errors.New("env_var ABT_THREAD_STACKSIZE has invalid value: foo_bar"),
},
} {
t.Run(name, func(t *testing.T) {
err := tc.cfg.UpdatePMDKEnvarsStackSizeDCPM()
test.CmpErr(t, tc.expErr, err)
if err == nil {
stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE")
test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE")
stackSizeVal, err := strconv.Atoi(stackSizeStr)
test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE")
test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal,
"Invalid ABT_THREAD_STACKSIZE value")
}
})
}
}

func TestConfig_UpdatePMDKEnvarsPMemobjConfDCPM(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().WithStorageClass("dcpm"))
}

for name, tc := range map[string]struct {
cfg *Config
expErr error
}{
"empty config should not fail": {
cfg: MockConfig(),
},
"valid config for DCPM should not fail": {
cfg: validConfig(),
},
"config for DCPM with forced sds.at_create (1) should fail": {
cfg: validConfig().WithEnvVarPMemObjSdsAtCreate(1),
expErr: errors.New("env_var PMEMOBJ_CONF should NOT contain " +
"'sds.at_create=?' for DCPM storage class, found 'sds.at_create=1'"),
},
"config for DCPM with forced sds.at_create (0) should fail": {
cfg: validConfig().WithEnvVarPMemObjSdsAtCreate(0),
expErr: errors.New("env_var PMEMOBJ_CONF should NOT contain " +
"'sds.at_create=?' for DCPM storage class, found 'sds.at_create=0'"),
},
} {
t.Run(name, func(t *testing.T) {
test.CmpErr(t, tc.expErr, tc.cfg.UpdatePMDKEnvarsPMemobjConf(true))
})
}
}

func TestConfig_UpdatePMDKEnvarsPMemobjConfNRam(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass("dcpm"))
}

for name, tc := range map[string]struct {
cfg *Config
expErr error
expPMEMOBJ_CONF string
}{
"empty config should not fail": {
cfg: validConfig(),
expPMEMOBJ_CONF: "sds.at_create=0",
},
"config for ram without PMEMOBJ_CONF should not fail": {
cfg: MockConfig(),
expPMEMOBJ_CONF: "sds.at_create=0",
},
"valid config for should not fail": {
cfg: validConfig().WithEnvVarPMemObjSdsAtCreate(0),
expPMEMOBJ_CONF: "sds.at_create=0",
},
"config for ram w/ PMEMOBJ_CONF w/o sds.at_create should should be updated": {
cfg: validConfig().WithEnvVars("PMEMOBJ_CONF=foo_bar"),
expPMEMOBJ_CONF: "foo_bar;sds.at_create=0",
},
"config for ram with sds.at_create set to 1 should fail": {
cfg: validConfig().WithEnvVarPMemObjSdsAtCreate(1),
expErr: errors.New("env_var PMEMOBJ_CONF should contain " +
"'sds.at_create=0' for non-DCPM storage class" +
", found 'sds.at_create=1'"),
},
"config for ram w/ PMEMOBJ_CONF w/ sds.at_create=1 should fail": {
cfg: validConfig().
WithEnvVars("PMEMOBJ_CONF=sds.at_create=1;foo-bar"),
expErr: errors.New("env_var PMEMOBJ_CONF should contain " +
"'sds.at_create=0' for non-DCPM storage class" +
", found 'sds.at_create=1;foo-bar'"),
},
} {
t.Run(name, func(t *testing.T) {
test.CmpErr(t, tc.expErr, tc.cfg.UpdatePMDKEnvarsPMemobjConf(false))
if len(tc.expPMEMOBJ_CONF) > 0 {
sds_at_create, err := tc.cfg.GetEnvVar("PMEMOBJ_CONF")
test.AssertTrue(t, err == nil, "Missing env var PMEMOBJ_CONF")
test.AssertEqual(t, tc.expPMEMOBJ_CONF, sds_at_create,
"Invalid PMEMOBJ_CONF")
}

})
}
}

func TestConfig_UpdatePMDKEnvars(t *testing.T) {
validConfig := func(storageclas string) *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass(storageclas))
}
for name, tc := range map[string]struct {
cfg *Config
expErr error
expPMEMOBJ_CONF string
expABTthreadStackSize int
}{
"empty config should fail": {
cfg: MockConfig(),
expErr: errors.New("Invalid config - no tier 0 defined"),
expABTthreadStackSize: -1,
},
"valid config for RAM should not fail": {
cfg: validConfig("ram").
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeDCPM - 1),
expPMEMOBJ_CONF: "sds.at_create=0",
expABTthreadStackSize: minABTThreadStackSizeDCPM - 1,
},
"invalid config for RAM should fail": {
cfg: validConfig("ram").WithEnvVarPMemObjSdsAtCreate(1),
expErr: errors.New("env_var PMEMOBJ_CONF should contain " +
"'sds.at_create=0' for non-DCPM storage class, " +
"found 'sds.at_create=1'"),
expABTthreadStackSize: -1,
},
"valid config for DCPM should not fail": {
cfg: validConfig("dcpm"),
expABTthreadStackSize: minABTThreadStackSizeDCPM,
},
"invalid config for DCPM should not fail": {
cfg: validConfig("dcpm").
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeDCPM - 1),
expErr: errors.New("env_var ABT_THREAD_STACKSIZE should be >= 20480 " +
"for DCPM storage class, found 20479"),
expABTthreadStackSize: minABTThreadStackSizeDCPM - 1,
},
} {
t.Run(name, func(t *testing.T) {
errTc := tc.cfg.UpdatePMDKEnvars()
test.CmpErr(t, tc.expErr, errTc)
if len(tc.expPMEMOBJ_CONF) > 0 {
sds_at_create, err := tc.cfg.GetEnvVar("PMEMOBJ_CONF")
test.AssertTrue(t, err == nil, "Missing env var PMEMOBJ_CONF")
test.AssertEqual(t, tc.expPMEMOBJ_CONF, sds_at_create,
"Invalid PMEMOBJ_CONF")
}
if tc.expABTthreadStackSize >= 0 {
stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE")
test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE")
stackSizeVal, err := strconv.Atoi(stackSizeStr)
test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE")
test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal,
"Invalid ABT_THREAD_STACKSIZE value")
}
})
}
}
6 changes: 6 additions & 0 deletions src/control/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ func processConfig(log logging.Logger, cfg *config.Server, fis *hardware.FabricI
return err
}

for _, ec := range cfg.Engines {
if err := ec.UpdatePMDKEnvars(); err != nil {
return err
}
}

return nil
}

Expand Down
Loading

0 comments on commit b7ea05b

Please sign in to comment.