Skip to content

Commit

Permalink
DAOS-13777 control: Invalid storage information with MD-on-SSD (#12485)
Browse files Browse the repository at this point in the history
With storage query usage, only take into account NVMe devices which are
used to store data.

Signed-off-by: Cedric Koch-Hofer <cedric.koch-hofer@intel.com>
  • Loading branch information
knard-intel committed Jul 17, 2023
1 parent 883cbdb commit d502996
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 8 deletions.
4 changes: 4 additions & 0 deletions src/control/lib/control/mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ type (
AvailBytes uint64 // Available raw storage
UsableBytes uint64 // Effective storage available for data
NvmeState *storage.NvmeDevState
NvmeRole *storage.BdevRoles
}

MockScmConfig struct {
Expand Down Expand Up @@ -673,6 +674,9 @@ func MockStorageScanResp(t *testing.T,
if mockNvmeConfig.NvmeState != nil {
smdDevice.NvmeState = *mockNvmeConfig.NvmeState
}
if mockNvmeConfig.NvmeRole != nil {
smdDevice.Roles = *mockNvmeConfig.NvmeRole
}
smdDevice.Rank = mockNvmeConfig.Rank
nvmeControllers = append(nvmeControllers, nvmeController)
}
Expand Down
6 changes: 6 additions & 0 deletions src/control/lib/control/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,12 @@ func processSCMSpaceStats(log logging.Logger, filterRank filterRankFn, scmNamesp
func processNVMeSpaceStats(log logging.Logger, filterRank filterRankFn, nvmeControllers storage.NvmeControllers, rankNVMeFreeSpace rankFreeSpaceMap) error {
for _, nvmeController := range nvmeControllers {
for _, smdDevice := range nvmeController.SmdDevices {
if !smdDevice.Roles.IsEmpty() && (smdDevice.Roles.OptionBits&storage.BdevRoleData) == 0 {
log.Debugf("Skipping SMD device %s (rank %d, ctrlr %s) not used for storing data",
smdDevice.UUID, smdDevice.Rank, smdDevice.TrAddr, smdDevice.Rank)
continue
}

if smdDevice.NvmeState != storage.NvmeStateNormal {
log.Noticef("SMD device %s (rank %d, ctrlr %s) not usable (device state %q)",
smdDevice.UUID, smdDevice.Rank, smdDevice.TrAddr, smdDevice.NvmeState.String())
Expand Down
77 changes: 77 additions & 0 deletions src/control/lib/control/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,83 @@ func TestControl_GetMaxPoolSize(t *testing.T) {
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"single MD-on-SSD server": {
HostsConfigArray: []MockHostStorageConfig{
{
HostName: "foo",
ScmConfig: []MockScmConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(100) * uint64(humanize.GByte),
AvailBytes: uint64(100) * uint64(humanize.GByte),
UsableBytes: uint64(100) * uint64(humanize.GByte),
},
Rank: 0,
},
},
NvmeConfig: []MockNvmeConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(1) * uint64(humanize.TByte),
AvailBytes: uint64(1) * uint64(humanize.TByte),
UsableBytes: uint64(1) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{
storage.OptionBits(storage.BdevRoleData),
},
},
Rank: 0,
},
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(2) * uint64(humanize.TByte),
AvailBytes: uint64(2) * uint64(humanize.TByte),
UsableBytes: uint64(2) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{
storage.OptionBits(storage.BdevRoleWAL | storage.BdevRoleMeta),
},
},
Rank: 0,
},
},
},
},
ExpectedOutput: ExpectedOutput{
ScmBytes: uint64(100) * uint64(humanize.GByte),
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"single Ephemeral server": {
HostsConfigArray: []MockHostStorageConfig{
{
HostName: "foo",
ScmConfig: []MockScmConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(100) * uint64(humanize.GByte),
AvailBytes: uint64(100) * uint64(humanize.GByte),
UsableBytes: uint64(100) * uint64(humanize.GByte),
},
Rank: 0,
},
},
NvmeConfig: []MockNvmeConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(1) * uint64(humanize.TByte),
AvailBytes: uint64(1) * uint64(humanize.TByte),
UsableBytes: uint64(1) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{storage.OptionBits(0)},
},
Rank: 0,
},
},
},
},
ExpectedOutput: ExpectedOutput{
ScmBytes: uint64(100) * uint64(humanize.GByte),
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"double server": {
HostsConfigArray: []MockHostStorageConfig{
{
Expand Down
17 changes: 9 additions & 8 deletions src/control/server/ctl_storage_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,15 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) {
for idx, dev := range ctlr.GetSmdDevices() {
rank := dev.GetRank()

if dev.GetRoleBits() != 0 && (dev.GetRoleBits()&storage.BdevRoleData) == 0 {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), dev.GetRoleBits())
dev.TotalBytes = 0
dev.AvailBytes = 0
dev.UsableBytes = 0
continue
}

if dev.GetDevState() != ctlpb.NvmeDevState_NORMAL {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not usable: device state %q",
dev.GetUuid(), rank, ctlr.GetPciAddr(), ctlpb.NvmeDevState_name[int32(dev.DevState)])
Expand All @@ -371,14 +380,6 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) {
continue
}

if dev.GetRoleBits() != 0 && (dev.GetRoleBits()&storage.BdevRoleData) == 0 {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), dev.GetRoleBits())
dev.AvailBytes = 0
dev.UsableBytes = 0
continue
}

c.log.Tracef("Initial available size of SMD device %s (rank %d, ctlr %s): %s (%d bytes)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), humanize.Bytes(dev.GetAvailBytes()), dev.GetAvailBytes())

Expand Down
Loading

0 comments on commit d502996

Please sign in to comment.