Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-13777 control: Invalid storage information with MD-on-SSD #12485

Merged
merged 11 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/control/lib/control/mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ type (
AvailBytes uint64 // Available raw storage
UsableBytes uint64 // Effective storage available for data
NvmeState *storage.NvmeDevState
NvmeRole *storage.BdevRoles
}

MockScmConfig struct {
Expand Down Expand Up @@ -673,6 +674,9 @@ func MockStorageScanResp(t *testing.T,
if mockNvmeConfig.NvmeState != nil {
smdDevice.NvmeState = *mockNvmeConfig.NvmeState
}
if mockNvmeConfig.NvmeRole != nil {
smdDevice.Roles = *mockNvmeConfig.NvmeRole
}
smdDevice.Rank = mockNvmeConfig.Rank
nvmeControllers = append(nvmeControllers, nvmeController)
}
Expand Down
6 changes: 6 additions & 0 deletions src/control/lib/control/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,12 @@ func processSCMSpaceStats(log logging.Logger, filterRank filterRankFn, scmNamesp
func processNVMeSpaceStats(log logging.Logger, filterRank filterRankFn, nvmeControllers storage.NvmeControllers, rankNVMeFreeSpace rankFreeSpaceMap) error {
for _, nvmeController := range nvmeControllers {
for _, smdDevice := range nvmeController.SmdDevices {
if !smdDevice.Roles.IsEmpty() && (smdDevice.Roles.OptionBits&storage.BdevRoleData) == 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good change for ctl_storage_rpc_test.go, but I was hoping to see a test case for this one as well, showing that we now skip the non-data SSDs.

Copy link
Contributor Author

@knard-intel knard-intel Jun 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Add unit tests for processNVMeSpaceStats() function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the remark.
The unit test allowed me to fix a potential issue.
Indeed, the device was not skipped: forgot the continue statement.
However, as the usable size was reset to zero in the control plane it was not detected with the functional tests.
With future update of the control plane, this last action could have not been done any more and a bug would arise.

log.Debugf("Skipping SMD device %s (rank %d, ctrlr %s) not used for storing data",
smdDevice.UUID, smdDevice.Rank, smdDevice.TrAddr, smdDevice.Rank)
continue
}

if smdDevice.NvmeState != storage.NvmeStateNormal {
log.Noticef("SMD device %s (rank %d, ctrlr %s) not usable (device state %q)",
smdDevice.UUID, smdDevice.Rank, smdDevice.TrAddr, smdDevice.NvmeState.String())
Expand Down
77 changes: 77 additions & 0 deletions src/control/lib/control/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,83 @@ func TestControl_GetMaxPoolSize(t *testing.T) {
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"single MD-on-SSD server": {
HostsConfigArray: []MockHostStorageConfig{
{
HostName: "foo",
ScmConfig: []MockScmConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(100) * uint64(humanize.GByte),
AvailBytes: uint64(100) * uint64(humanize.GByte),
UsableBytes: uint64(100) * uint64(humanize.GByte),
},
Rank: 0,
},
},
NvmeConfig: []MockNvmeConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(1) * uint64(humanize.TByte),
AvailBytes: uint64(1) * uint64(humanize.TByte),
UsableBytes: uint64(1) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{
storage.OptionBits(storage.BdevRoleData),
},
},
Rank: 0,
},
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(2) * uint64(humanize.TByte),
AvailBytes: uint64(2) * uint64(humanize.TByte),
UsableBytes: uint64(2) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{
storage.OptionBits(storage.BdevRoleWAL | storage.BdevRoleMeta),
},
},
Rank: 0,
},
},
},
},
ExpectedOutput: ExpectedOutput{
ScmBytes: uint64(100) * uint64(humanize.GByte),
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"single Ephemeral server": {
HostsConfigArray: []MockHostStorageConfig{
{
HostName: "foo",
ScmConfig: []MockScmConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(100) * uint64(humanize.GByte),
AvailBytes: uint64(100) * uint64(humanize.GByte),
UsableBytes: uint64(100) * uint64(humanize.GByte),
},
Rank: 0,
},
},
NvmeConfig: []MockNvmeConfig{
{
MockStorageConfig: MockStorageConfig{
TotalBytes: uint64(1) * uint64(humanize.TByte),
AvailBytes: uint64(1) * uint64(humanize.TByte),
UsableBytes: uint64(1) * uint64(humanize.TByte),
NvmeRole: &storage.BdevRoles{storage.OptionBits(0)},
},
Rank: 0,
},
},
},
},
ExpectedOutput: ExpectedOutput{
ScmBytes: uint64(100) * uint64(humanize.GByte),
NvmeBytes: uint64(1) * uint64(humanize.TByte),
},
},
"double server": {
HostsConfigArray: []MockHostStorageConfig{
{
Expand Down
17 changes: 9 additions & 8 deletions src/control/server/ctl_storage_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,15 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) {
for idx, dev := range ctlr.GetSmdDevices() {
rank := dev.GetRank()

if dev.GetRoleBits() != 0 && (dev.GetRoleBits()&storage.BdevRoleData) == 0 {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), dev.GetRoleBits())
dev.TotalBytes = 0
dev.AvailBytes = 0
dev.UsableBytes = 0
continue
}

if dev.GetDevState() != ctlpb.NvmeDevState_NORMAL {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not usable: device state %q",
dev.GetUuid(), rank, ctlr.GetPciAddr(), ctlpb.NvmeDevState_name[int32(dev.DevState)])
Expand All @@ -371,14 +380,6 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) {
continue
}

if dev.GetRoleBits() != 0 && (dev.GetRoleBits()&storage.BdevRoleData) == 0 {
c.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), dev.GetRoleBits())
dev.AvailBytes = 0
dev.UsableBytes = 0
continue
}

c.log.Tracef("Initial available size of SMD device %s (rank %d, ctlr %s): %s (%d bytes)",
dev.GetUuid(), rank, ctlr.GetPciAddr(), humanize.Bytes(dev.GetAvailBytes()), dev.GetAvailBytes())

Expand Down
Loading