diff --git a/src/bio/bio_device.c b/src/bio/bio_device.c index 7edaf09cbf9..82077a5ae33 100644 --- a/src/bio/bio_device.c +++ b/src/bio/bio_device.c @@ -606,11 +606,46 @@ struct led_opts { int status; }; +static Ctl__LedState +led_state_spdk2daos(enum spdk_vmd_led_state in) +{ + switch (in) { + case SPDK_VMD_LED_STATE_OFF: + return CTL__LED_STATE__OFF; + case SPDK_VMD_LED_STATE_IDENTIFY: + return CTL__LED_STATE__QUICK_BLINK; + case SPDK_VMD_LED_STATE_FAULT: + return CTL__LED_STATE__ON; + case SPDK_VMD_LED_STATE_REBUILD: + return CTL__LED_STATE__SLOW_BLINK; + default: + return CTL__LED_STATE__NA; + } +} + +static enum spdk_vmd_led_state +led_state_daos2spdk(Ctl__LedState in) +{ + switch (in) { + case CTL__LED_STATE__OFF: + return SPDK_VMD_LED_STATE_OFF; + case CTL__LED_STATE__QUICK_BLINK: + return SPDK_VMD_LED_STATE_IDENTIFY; + case CTL__LED_STATE__ON: + return SPDK_VMD_LED_STATE_FAULT; + case CTL__LED_STATE__SLOW_BLINK: + return SPDK_VMD_LED_STATE_REBUILD; + default: + return SPDK_VMD_LED_STATE_UNKNOWN; + } +} + static void led_device_action(void *ctx, struct spdk_pci_device *pci_device) { struct led_opts *opts = ctx; enum spdk_vmd_led_state cur_led_state; + Ctl__LedState d_led_state; const char *pci_dev_type = NULL; char addr_buf[ADDR_STR_MAX_LEN + 1]; int rc; @@ -656,14 +691,17 @@ led_device_action(void *ctx, struct spdk_pci_device *pci_device) return; } + /* Convert state to Ctl__LedState from SPDK led_state */ + d_led_state = led_state_spdk2daos(cur_led_state); + D_DEBUG(DB_MGMT, "led on dev %s has state: %s (action: %s, new state: %s)\n", addr_buf, - LED_STATE_NAME(cur_led_state), LED_ACTION_NAME(opts->action), + LED_STATE_NAME(d_led_state), LED_ACTION_NAME(opts->action), LED_STATE_NAME(opts->led_state)); switch (opts->action) { case CTL__LED_ACTION__GET: /* Return early with current device state set */ - opts->led_state = (Ctl__LedState)cur_led_state; + opts->led_state = d_led_state; return; case CTL__LED_ACTION__SET: break; @@ -678,14 +716,14 @@ led_device_action(void *ctx, struct spdk_pci_device *pci_device) return; } - if (cur_led_state == (enum spdk_vmd_led_state)opts->led_state) { + if (d_led_state == opts->led_state) { D_DEBUG(DB_MGMT, "VMD device %s LED state already in state %s\n", addr_buf, LED_STATE_NAME(opts->led_state)); return; } /* Set the LED to the new state */ - rc = spdk_vmd_set_led_state(pci_device, (enum spdk_vmd_led_state)opts->led_state); + rc = spdk_vmd_set_led_state(pci_device, led_state_daos2spdk(opts->led_state)); if (spdk_unlikely(rc != 0)) { D_ERROR("Failed to set the VMD LED state on %s (%s)\n", addr_buf, spdk_strerror(-rc)); @@ -700,11 +738,12 @@ led_device_action(void *ctx, struct spdk_pci_device *pci_device) opts->status = -DER_NOSYS; return; } + d_led_state = led_state_spdk2daos(cur_led_state); /* Verify the correct state is set */ - if (cur_led_state != (enum spdk_vmd_led_state)opts->led_state) { + if (d_led_state != opts->led_state) { D_ERROR("Unexpected LED state on %s, want %s got %s\n", addr_buf, - LED_STATE_NAME(opts->led_state), LED_STATE_NAME(cur_led_state)); + LED_STATE_NAME(opts->led_state), LED_STATE_NAME(d_led_state)); opts->status = -DER_INVAL; } } diff --git a/src/bio/smd.pb-c.c b/src/bio/smd.pb-c.c index b3ed3284385..de49e886e19 100644 --- a/src/bio/smd.pb-c.c +++ b/src/bio/smd.pb-c.c @@ -2833,19 +2833,19 @@ const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = }; static const ProtobufCEnumValue ctl__led_state__enum_values_by_number[5] = { - { "OFF", "CTL__LED_STATE__OFF", 0 }, + { "NA", "CTL__LED_STATE__NA", 0 }, { "QUICK_BLINK", "CTL__LED_STATE__QUICK_BLINK", 1 }, { "ON", "CTL__LED_STATE__ON", 2 }, { "SLOW_BLINK", "CTL__LED_STATE__SLOW_BLINK", 3 }, - { "NA", "CTL__LED_STATE__NA", 4 }, + { "OFF", "CTL__LED_STATE__OFF", 4 }, }; static const ProtobufCIntRange ctl__led_state__value_ranges[] = { {0, 0},{0, 5} }; static const ProtobufCEnumValueIndex ctl__led_state__enum_values_by_name[5] = { - { "NA", 4 }, - { "OFF", 0 }, + { "NA", 0 }, + { "OFF", 4 }, { "ON", 2 }, { "QUICK_BLINK", 1 }, { "SLOW_BLINK", 3 }, diff --git a/src/bio/smd.pb-c.h b/src/bio/smd.pb-c.h index 19ac9fc3d14..fd4ca542b60 100644 --- a/src/bio/smd.pb-c.h +++ b/src/bio/smd.pb-c.h @@ -66,9 +66,9 @@ typedef enum _Ctl__NvmeDevState { } Ctl__NvmeDevState; typedef enum _Ctl__LedState { /* - * Equivalent to SPDK_VMD_LED_STATE_OFF + * Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) */ - CTL__LED_STATE__OFF = 0, + CTL__LED_STATE__NA = 0, /* * Equivalent to SPDK_VMD_LED_STATE_IDENTIFY (4Hz blink) */ @@ -82,9 +82,9 @@ typedef enum _Ctl__LedState { */ CTL__LED_STATE__SLOW_BLINK = 3, /* - * Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) + * Equivalent to SPDK_VMD_LED_STATE_OFF */ - CTL__LED_STATE__NA = 4 + CTL__LED_STATE__OFF = 4 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__LED_STATE) } Ctl__LedState; typedef enum _Ctl__LedAction { @@ -305,7 +305,7 @@ struct _Ctl__NvmeController }; #define CTL__NVME_CONTROLLER__INIT \ { PROTOBUF_C_MESSAGE_INIT (&ctl__nvme_controller__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, NULL, 0,NULL, 0,NULL, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, NULL, 0,NULL, 0,NULL, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__NA, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } /* @@ -558,7 +558,7 @@ struct _Ctl__LedManageReq }; #define CTL__LED_MANAGE_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&ctl__led_manage_req__descriptor) \ - , (char *)protobuf_c_empty_string, CTL__LED_ACTION__GET, CTL__LED_STATE__OFF, 0 } + , (char *)protobuf_c_empty_string, CTL__LED_ACTION__GET, CTL__LED_STATE__NA, 0 } struct _Ctl__DevReplaceReq diff --git a/src/control/cmd/dmg/pretty/storage.go b/src/control/cmd/dmg/pretty/storage.go index 4f2a02fbe9d..8146d89496e 100644 --- a/src/control/cmd/dmg/pretty/storage.go +++ b/src/control/cmd/dmg/pretty/storage.go @@ -179,7 +179,8 @@ func printSmdDevice(dev *storage.SmdDevice, iw io.Writer, opts ...PrintConfigOpt fc := getPrintConfig(opts...) if fc.LEDInfoOnly { - if _, err := fmt.Fprintf(iw, "TrAddr:%s", dev.Ctrlr.PciAddr); err != nil { + if _, err := fmt.Fprintf(iw, "TrAddr:%s NSID:%d", dev.Ctrlr.PciAddr, + dev.CtrlrNamespaceID); err != nil { return err } if dev.UUID != "" { @@ -193,7 +194,8 @@ func printSmdDevice(dev *storage.SmdDevice, iw io.Writer, opts ...PrintConfigOpt return nil } - if _, err := fmt.Fprintf(iw, "UUID:%s [TrAddr:%s]\n", dev.UUID, dev.Ctrlr.PciAddr); err != nil { + if _, err := fmt.Fprintf(iw, "UUID:%s [TrAddr:%s NSID:%d]\n", dev.UUID, dev.Ctrlr.PciAddr, + dev.CtrlrNamespaceID); err != nil { return err } diff --git a/src/control/cmd/dmg/pretty/storage_nvme.go b/src/control/cmd/dmg/pretty/storage_nvme.go index fefc3eef285..ec759994ee5 100644 --- a/src/control/cmd/dmg/pretty/storage_nvme.go +++ b/src/control/cmd/dmg/pretty/storage_nvme.go @@ -7,7 +7,6 @@ package pretty import ( - "errors" "fmt" "io" "sort" @@ -18,6 +17,7 @@ import ( "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/lib/txtfmt" "github.com/daos-stack/daos/src/control/server/storage" ) @@ -165,7 +165,6 @@ func parseNvmeFormatResults(inResults storage.NvmeControllers) storage.NvmeContr parsedResults := make(storage.NvmeControllers, 0, len(inResults)) for _, result := range inResults { if result.PciAddr != storage.NilBdevAddress { - // ignore skip results parsedResults = append(parsedResults, result) } } @@ -173,24 +172,31 @@ func parseNvmeFormatResults(inResults storage.NvmeControllers) storage.NvmeContr return parsedResults } -func printNvmeFormatResults(devices storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error { - if len(devices) == 0 { +func printNvmeFormatResults(ctrlrs storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error { + if len(ctrlrs) == 0 { fmt.Fprintln(out, "\tNo NVMe devices found") return nil } pciTitle := "NVMe PCI" resultTitle := "Format Result" + rolesTitle := "Role(s)" - formatter := txtfmt.NewTableFormatter(pciTitle, resultTitle) + formatter := txtfmt.NewTableFormatter(pciTitle, resultTitle, rolesTitle) formatter.InitWriter(out) var table []txtfmt.TableRow - sort.Slice(devices, func(i, j int) bool { return devices[i].PciAddr < devices[j].PciAddr }) + sort.Slice(ctrlrs, func(i, j int) bool { return ctrlrs[i].PciAddr < ctrlrs[j].PciAddr }) - for _, device := range parseNvmeFormatResults(devices) { - row := txtfmt.TableRow{pciTitle: device.PciAddr} - row[resultTitle] = device.Info + for _, ctrlr := range parseNvmeFormatResults(ctrlrs) { + row := txtfmt.TableRow{pciTitle: ctrlr.PciAddr} + row[resultTitle] = ctrlr.Info + roles := "NA" + // Assumes that all SMD devices on a controller have the same roles. + if len(ctrlr.SmdDevices) > 0 { + roles = fmt.Sprintf("%s", ctrlr.SmdDevices[0].Roles.String()) + } + row[rolesTitle] = roles table = append(table, row) } @@ -211,11 +217,13 @@ func PrintNvmeControllers(controllers storage.NvmeControllers, out io.Writer, op pciTitle := "NVMe PCI" modelTitle := "Model" fwTitle := "FW Revision" - socketTitle := "Socket ID" + socketTitle := "Socket" capacityTitle := "Capacity" + rolesTitle := "Role(s)" + rankTitle := "Rank" formatter := txtfmt.NewTableFormatter( - pciTitle, modelTitle, fwTitle, socketTitle, capacityTitle, + pciTitle, modelTitle, fwTitle, socketTitle, capacityTitle, rolesTitle, rankTitle, ) formatter.InitWriter(out) var table []txtfmt.TableRow @@ -228,6 +236,18 @@ func PrintNvmeControllers(controllers storage.NvmeControllers, out io.Writer, op row[fwTitle] = ctrlr.FwRev row[socketTitle] = fmt.Sprint(ctrlr.SocketID) row[capacityTitle] = humanize.Bytes(ctrlr.Capacity()) + roles := "NA" + rank := "None" + // Assumes that all SMD devices on a controller have the same roles and rank. + if len(ctrlr.SmdDevices) > 0 { + sd := ctrlr.SmdDevices[0] + roles = sd.Roles.String() + if sd.Rank != ranklist.NilRank { + rank = sd.Rank.String() + } + } + row[rolesTitle] = roles + row[rankTitle] = rank table = append(table, row) } @@ -266,50 +286,3 @@ func PrintNvmeHealthMap(hsm control.HostStorageMap, out io.Writer, opts ...Print return w.Err } - -// PrintNvmeMetaMap generates a human-readable representation of the supplied -// HostStorageMap, with a focus on presenting the NVMe Device Server Meta Data. -func PrintNvmeMetaMap(hsm control.HostStorageMap, out io.Writer, opts ...PrintConfigOption) error { - w := txtfmt.NewErrWriter(out) - - for _, key := range hsm.Keys() { - hss := hsm[key] - hosts := getPrintHosts(hss.HostSet.RangedString(), opts...) - lineBreak := strings.Repeat("-", len(hosts)) - fmt.Fprintf(out, "%s\n%s\n%s\n", lineBreak, hosts, lineBreak) - - if len(hss.HostStorage.NvmeDevices) == 0 { - fmt.Fprintln(out, " No NVMe devices detected") - continue - } - - for _, controller := range hss.HostStorage.NvmeDevices { - if controller == nil { - return errors.New("nil controller in NvmeDevices") - } - if err := printNvmeControllerSummary(controller, out, opts...); err != nil { - return err - } - iw := txtfmt.NewIndentWriter(out) - if len(controller.SmdDevices) > 0 { - fmt.Fprintln(iw, "SMD Devices") - - for _, device := range controller.SmdDevices { - iw1 := txtfmt.NewIndentWriter(iw) - - // Attach parent controller details to SMD before printing. - device.Ctrlr = *controller - - if err := printSmdDevice(device, iw1, opts...); err != nil { - return err - } - } - } else { - fmt.Fprintln(iw, "No SMD devices found") - } - fmt.Fprintln(out) - } - } - - return w.Err -} diff --git a/src/control/cmd/dmg/pretty/storage_nvme_test.go b/src/control/cmd/dmg/pretty/storage_nvme_test.go index c75b0061e14..08dc6b8426d 100644 --- a/src/control/cmd/dmg/pretty/storage_nvme_test.go +++ b/src/control/cmd/dmg/pretty/storage_nvme_test.go @@ -17,10 +17,24 @@ import ( "github.com/google/go-cmp/cmp" "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/server/storage" ) func TestPretty_PrintNVMeController(t *testing.T) { + ctrlrWithSmd := func(idx int32, roleBits int) *storage.NvmeController { + c := storage.MockNvmeController(idx) + sd := storage.MockSmdDevice(nil, idx) + sd.Roles = storage.BdevRoles{storage.OptionBits(roleBits)} + sd.Rank = ranklist.Rank(idx) + c.SmdDevices = []*storage.SmdDevice{sd} + return c + } + ctrlrWithNilRank := func(idx int32) *storage.NvmeController { + c := ctrlrWithSmd(idx, 0) + c.SmdDevices[0].Rank = ranklist.NilRank + return c + } for name, tc := range map[string]struct { devices storage.NvmeControllers expPrintStr string @@ -31,10 +45,10 @@ func TestPretty_PrintNVMeController(t *testing.T) { storage.MockNvmeController(2), }, expPrintStr: ` -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB -0000:02:00.0 model-2 fwRev-2 0 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA None +0000:02:00.0 model-2 fwRev-2 0 2.0 TB NA None `, }, "vmd backing devices": { @@ -43,10 +57,46 @@ NVMe PCI Model FW Revision Socket ID Capacity &storage.NvmeController{PciAddr: "050505:03:00.0"}, }, expPrintStr: ` -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -050505:01:00.0 0 0 B -050505:03:00.0 0 0 B +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +050505:01:00.0 0 0 B NA None +050505:03:00.0 0 0 B NA None +`, + }, + "controllers with roles": { + devices: storage.NvmeControllers{ + ctrlrWithSmd(1, 1), + ctrlrWithSmd(2, 6), + }, + expPrintStr: ` +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB data 1 +0000:02:00.0 model-2 fwRev-2 0 2.0 TB meta,wal 2 +`, + }, + "controllers with no roles": { + devices: storage.NvmeControllers{ + ctrlrWithSmd(1, 0), + ctrlrWithSmd(2, 0), + }, + expPrintStr: ` +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 1 +0000:02:00.0 model-2 fwRev-2 0 2.0 TB NA 2 +`, + }, + "controllers with no rank": { + devices: storage.NvmeControllers{ + ctrlrWithNilRank(1), + ctrlrWithNilRank(2), + }, + expPrintStr: ` +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA None +0000:02:00.0 model-2 fwRev-2 0 2.0 TB NA None `, }, } { @@ -327,172 +377,3 @@ PCI:%s Model:%s FW:%s Socket:%d Capacity:%s }) } } - -func TestPretty_PrintNVMetaMap(t *testing.T) { - mockNvmeController := func(idx int32) *storage.NvmeController { - c := storage.MockNvmeController(idx) - c.SmdDevices = []*storage.SmdDevice{ - storage.MockSmdDevice(nil, idx), - } - return c - } - var ( - controllerA = mockNvmeController(1) - controllerB = mockNvmeController(2) - controllerC = mockNvmeController(1) - controllerD = mockNvmeController(2) - controllerE = mockNvmeController(1) - controllerF = mockNvmeController(2) - ) - controllerA.SmdDevices = nil - controllerB.SmdDevices = nil - controllerE.SmdDevices = []*storage.SmdDevice{ - storage.MockSmdDevice(nil, 0), - storage.MockSmdDevice(nil, 1), - } - controllerF.SmdDevices = []*storage.SmdDevice{ - storage.MockSmdDevice(nil, 2), - storage.MockSmdDevice(nil, 3), - } - for name, tc := range map[string]struct { - hsm control.HostStorageMap - expPrintStr string - }{ - "no controllers": { - hsm: mockHostStorageMap(t, &mockHostStorage{"host1", &control.HostStorage{}}), - expPrintStr: ` ------ -host1 ------ - No NVMe devices detected -`, - }, - "no smd devices on controllers": { - hsm: mockHostStorageMap(t, - &mockHostStorage{ - "host1", - &control.HostStorage{ - NvmeDevices: storage.NvmeControllers{ - controllerA, controllerB, - }, - }, - }, - ), - expPrintStr: fmt.Sprintf(` ------ -host1 ------ -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - No SMD devices found - -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - No SMD devices found - -`, - controllerA.PciAddr, controllerA.Model, controllerA.FwRev, - controllerA.SocketID, humanize.Bytes(controllerA.Capacity()), - controllerB.PciAddr, controllerB.Model, controllerB.FwRev, - controllerB.SocketID, humanize.Bytes(controllerB.Capacity())), - }, - "single smd device on each controller": { - hsm: mockHostStorageMap(t, - &mockHostStorage{ - "host1", - &control.HostStorage{ - NvmeDevices: storage.NvmeControllers{ - controllerC, controllerD, - }, - }, - }, - ), - expPrintStr: fmt.Sprintf(` ------ -host1 ------ -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - SMD Devices - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - SMD Devices - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - -`, - controllerC.PciAddr, controllerC.Model, controllerC.FwRev, - controllerC.SocketID, humanize.Bytes(controllerC.Capacity()), - controllerC.SmdDevices[0].UUID, controllerC.PciAddr, - controllerC.SmdDevices[0].TargetIDs, - controllerC.SmdDevices[0].Rank, - controllerC.NvmeState, controllerC.LedState, - - controllerD.PciAddr, controllerD.Model, controllerD.FwRev, - controllerD.SocketID, humanize.Bytes(controllerD.Capacity()), - controllerD.SmdDevices[0].UUID, controllerD.PciAddr, - controllerD.SmdDevices[0].TargetIDs, - controllerD.SmdDevices[0].Rank, - controllerD.NvmeState, controllerD.LedState), - }, - "multiple smd devices on each controller": { - hsm: mockHostStorageMap(t, - &mockHostStorage{ - "host1", - &control.HostStorage{ - NvmeDevices: storage.NvmeControllers{ - controllerE, - controllerF, - }, - }, - }, - ), - expPrintStr: fmt.Sprintf(` ------ -host1 ------ -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - SMD Devices - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - -PCI:%s Model:%s FW:%s Socket:%d Capacity:%s - SMD Devices - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - UUID:%s [TrAddr:%s] - Roles:data,meta,wal Targets:%v Rank:%d State:%s LED:%s - -`, - controllerE.PciAddr, controllerE.Model, controllerE.FwRev, - controllerE.SocketID, humanize.Bytes(controllerE.Capacity()), - controllerE.SmdDevices[0].UUID, controllerE.PciAddr, - controllerE.SmdDevices[0].TargetIDs, controllerE.SmdDevices[0].Rank, - controllerE.NvmeState, controllerE.LedState, - controllerE.SmdDevices[1].UUID, controllerE.PciAddr, - controllerE.SmdDevices[1].TargetIDs, controllerE.SmdDevices[1].Rank, - controllerE.NvmeState, controllerE.LedState, - - controllerF.PciAddr, controllerF.Model, controllerF.FwRev, - controllerF.SocketID, humanize.Bytes(controllerF.Capacity()), - controllerF.SmdDevices[0].UUID, controllerF.PciAddr, - controllerF.SmdDevices[0].TargetIDs, controllerF.SmdDevices[0].Rank, - controllerF.NvmeState, controllerF.LedState, - controllerF.SmdDevices[1].UUID, controllerF.PciAddr, - controllerF.SmdDevices[1].TargetIDs, controllerF.SmdDevices[1].Rank, - controllerF.NvmeState, controllerF.LedState), - }, - } { - t.Run(name, func(t *testing.T) { - var bld strings.Builder - if err := PrintNvmeMetaMap(tc.hsm, &bld); err != nil { - t.Fatal(err) - } - - if diff := cmp.Diff(strings.TrimLeft(tc.expPrintStr, "\n"), bld.String()); diff != "" { - t.Fatalf("unexpected print output (-want, +got):\n%s\n", diff) - } - }) - } -} diff --git a/src/control/cmd/dmg/pretty/storage_scm.go b/src/control/cmd/dmg/pretty/storage_scm.go index 62123550d9f..cffdaae5b90 100644 --- a/src/control/cmd/dmg/pretty/storage_scm.go +++ b/src/control/cmd/dmg/pretty/storage_scm.go @@ -54,10 +54,10 @@ func PrintScmModules(modules storage.ScmModules, out io.Writer, opts ...PrintCon return w.Err } - physicalIdTitle := "SCM Module ID" - socketTitle := "Socket ID" - memCtrlrTitle := "Memory Ctrlr ID" - channelTitle := "Channel ID" + physicalIdTitle := "SCM Module" + socketTitle := "Socket" + memCtrlrTitle := "Memory Ctrlr" + channelTitle := "Channel" slotTitle := "Channel Slot" capacityTitle := "Capacity" @@ -96,7 +96,7 @@ func PrintScmNamespaces(namespaces storage.ScmNamespaces, out io.Writer, opts .. } deviceTitle := "SCM Namespace" - socketTitle := "Socket ID" + socketTitle := "Socket" capacityTitle := "Capacity" formatter := txtfmt.NewTableFormatter(deviceTitle, socketTitle, capacityTitle) diff --git a/src/control/cmd/dmg/pretty/storage_test.go b/src/control/cmd/dmg/pretty/storage_test.go index 724031035ee..4e5279c31bf 100644 --- a/src/control/cmd/dmg/pretty/storage_test.go +++ b/src/control/cmd/dmg/pretty/storage_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -474,9 +474,9 @@ host1 HugePage Size: 2048 KB No SCM modules found -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -501,9 +501,9 @@ Errors: host1 ----- HugePage Size: 2048 KB -SCM Module ID Socket ID Memory Ctrlr ID Channel ID Channel Slot Capacity -------------- --------- --------------- ---------- ------------ -------- -1 1 1 1 1 954 MiB +SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity +---------- ------ ------------ ------- ------------ -------- +1 1 1 1 1 954 MiB No NVMe devices found @@ -583,13 +583,13 @@ HugePage Size: 2048 KB host1 ----- HugePage Size: 2048 KB -SCM Module ID Socket ID Memory Ctrlr ID Channel ID Channel Slot Capacity -------------- --------- --------------- ---------- ------------ -------- -1 1 1 1 1 954 MiB +SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity +---------- ------ ------------ ------- ------------ -------- +1 1 1 1 1 954 MiB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -609,13 +609,13 @@ NVMe PCI Model FW Revision Socket ID Capacity host1 ----- HugePage Size: 2048 KB -SCM Namespace Socket ID Capacity -------------- --------- -------- -pmem0 0 1.0 TB +SCM Namespace Socket Capacity +------------- ------ -------- +pmem0 0 1.0 TB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -639,13 +639,13 @@ NVMe PCI Model FW Revision Socket ID Capacity host[1-2] --------- HugePage Size: 2048 KB -SCM Module ID Socket ID Memory Ctrlr ID Channel ID Channel Slot Capacity -------------- --------- --------------- ---------- ------------ -------- -1 1 1 1 1 954 MiB +SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity +---------- ------ ------------ ------- ------------ -------- +1 1 1 1 1 954 MiB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -669,9 +669,9 @@ NVMe PCI Model FW Revision Socket ID Capacity host1 ----- HugePage Size: 2048 KB -SCM Module ID Socket ID Memory Ctrlr ID Channel ID Channel Slot Capacity -------------- --------- --------------- ---------- ------------ -------- -1 1 1 1 1 954 MiB +SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity +---------- ------ ------------ ------- ------------ -------- +1 1 1 1 1 954 MiB No NVMe devices found @@ -681,9 +681,9 @@ host2 HugePage Size: 2048 KB No SCM modules found -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -699,13 +699,13 @@ NVMe PCI Model FW Revision Socket ID Capacity host[0-1023] ------------ HugePage Size: 2048 KB -SCM Module ID Socket ID Memory Ctrlr ID Channel ID Channel Slot Capacity -------------- --------- --------------- ---------- ------------ -------- -1 1 1 1 1 954 MiB +SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity +---------- ------ ------------ ------- ------------ -------- +1 1 1 1 1 954 MiB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -739,9 +739,9 @@ host-[0001-0004] HugePage Size: 2048 KB No SCM modules found -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -775,9 +775,9 @@ host-j-[0001-0004] HugePage Size: 2048 KB No SCM modules found -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 model-1 fwRev-1 1 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB NA 0 `, }, @@ -809,29 +809,29 @@ NVMe PCI Model FW Revision Socket ID Capacity host[1,3] --------- HugePage Size: 2048 KB -SCM Namespace Socket ID Capacity -------------- --------- -------- -pmem0 0 1.0 TB -pmem1 1 2.0 TB +SCM Namespace Socket Capacity +------------- ------ -------- +pmem0 0 1.0 TB +pmem1 1 2.0 TB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 1 2.0 TB -0000:04:00.0 0 2.0 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 1 2.0 TB data,meta,wal 0 +0000:04:00.0 0 2.0 TB data,meta,wal 0 --------- host[2,4] --------- HugePage Size: 2048 KB -SCM Namespace Socket ID Capacity -------------- --------- -------- -pmem0 0 1.0 TB -pmem1 1 2.0 TB +SCM Namespace Socket Capacity +------------- ------ -------- +pmem0 0 1.0 TB +pmem1 1 2.0 TB -NVMe PCI Model FW Revision Socket ID Capacity --------- ----- ----------- --------- -------- -0000:01:00.0 1 2.1 TB -0000:04:00.0 0 2.1 TB +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 1 2.1 TB data,meta,wal 0 +0000:04:00.0 0 2.1 TB data,meta,wal 0 `, }, @@ -1151,9 +1151,9 @@ SCM Mount Format Result --------- ------------- /mnt/2 CTL_SUCCESS -NVMe PCI Format Result --------- ------------- -2 CTL_SUCCESS +NVMe PCI Format Result Role(s) +-------- ------------- ------- +2 CTL_SUCCESS NA `, }, @@ -1178,9 +1178,9 @@ SCM Mount Format Result /mnt/1 CTL_SUCCESS /mnt/2 CTL_SUCCESS -NVMe PCI Format Result --------- ------------- -1 CTL_SUCCESS +NVMe PCI Format Result Role(s) +-------- ------------- ------- +1 CTL_SUCCESS NA `, }, @@ -1200,10 +1200,10 @@ SCM Mount Format Result /mnt/1 CTL_SUCCESS /mnt/2 CTL_SUCCESS -NVMe PCI Format Result --------- ------------- -1 CTL_SUCCESS -2 CTL_SUCCESS +NVMe PCI Format Result Role(s) +-------- ------------- ------- +1 CTL_SUCCESS NA +2 CTL_SUCCESS NA `, }, @@ -1227,9 +1227,9 @@ SCM Mount Format Result --------- ------------- /mnt/2 CTL_SUCCESS -NVMe PCI Format Result --------- ------------- -2 CTL_SUCCESS +NVMe PCI Format Result Role(s) +-------- ------------- ------- +2 CTL_SUCCESS NA `, }, @@ -1249,10 +1249,34 @@ SCM Mount Format Result /mnt/1 CTL_SUCCESS /mnt/2 CTL_SUCCESS -NVMe PCI Format Result --------- ------------- -1 CTL_SUCCESS -2 CTL_SUCCESS +NVMe PCI Format Result Role(s) +-------- ------------- ------- +1 CTL_SUCCESS NA +2 CTL_SUCCESS NA + +`, + }, + "2 Hosts, 2 SCM, 2 NVMe; MD-on-SSD roles": { + resp: control.MockFormatResp(t, control.MockFormatConf{ + Hosts: 2, + ScmPerHost: 2, + NvmePerHost: 2, + NvmeRoleBits: int(storage.BdevRoleAll), + }), + expPrintStr: ` + +--------- +host[1-2] +--------- +SCM Mount Format Result +--------- ------------- +/mnt/1 CTL_SUCCESS +/mnt/2 CTL_SUCCESS + +NVMe PCI Format Result Role(s) +-------- ------------- ------- +1 CTL_SUCCESS data,meta,wal +2 CTL_SUCCESS data,meta,wal `, }, @@ -1442,13 +1466,13 @@ host1 host1 ----- Devices - UUID:00000000-0000-0000-0000-000000000000 [TrAddr:0000:8a:00.0] + UUID:00000000-0000-0000-0000-000000000000 [TrAddr:0000:8a:00.0 NSID:0] Roles:wal SysXS Targets:[0 1 2] Rank:0 State:NEW LED:OFF - UUID:00000001-0001-0001-0001-000000000001 [TrAddr:0000:8b:00.0] + UUID:00000001-0001-0001-0001-000000000001 [TrAddr:0000:8b:00.0 NSID:0] Roles:data,meta Targets:[3 4 5] Rank:0 State:EVICTED LED:ON - UUID:00000002-0002-0002-0002-000000000002 [TrAddr:0000:da:00.0] + UUID:00000002-0002-0002-0002-000000000002 [TrAddr:0000:da:00.0 NSID:0] Roles:wal SysXS Targets:[0 1 2] Rank:1 State:UNKNOWN LED:NA - UUID:00000003-0003-0003-0003-000000000003 [TrAddr:0000:db:00.0] + UUID:00000003-0003-0003-0003-000000000003 [TrAddr:0000:db:00.0 NSID:0] Roles:data,meta Targets:[3 4 5] Rank:1 State:NORMAL LED:QUICK_BLINK `, }, @@ -1478,11 +1502,12 @@ host1 SmdInfo: &control.SmdInfo{ Devices: []*storage.SmdDevice{ { - UUID: test.MockUUID(0), - TargetIDs: []int32{0, 1, 2}, - Rank: 0, - Ctrlr: *mockController, - Roles: storage.BdevRoles{storage.BdevRoleAll}, + UUID: test.MockUUID(0), + TargetIDs: []int32{0, 1, 2}, + Rank: 0, + Ctrlr: *mockController, + CtrlrNamespaceID: 1, + Roles: storage.BdevRoles{storage.BdevRoleAll}, }, }, }, @@ -1494,7 +1519,7 @@ host1 host1 ----- Devices - UUID:00000000-0000-0000-0000-000000000000 [TrAddr:0000:01:00.0] + UUID:00000000-0000-0000-0000-000000000000 [TrAddr:0000:01:00.0 NSID:1] Roles:data,meta,wal Targets:[0 1 2] Rank:0 State:NORMAL LED:OFF Health Stats: Temperature:%dK(%.02fC) @@ -1579,7 +1604,7 @@ host1 host1 ----- Devices - TrAddr:0000:db:00.0 [UUID:842c739b-86b5-462f-a7ba-b4a91b674f3d] LED:QUICK_BLINK + TrAddr:0000:db:00.0 NSID:0 [UUID:842c739b-86b5-462f-a7ba-b4a91b674f3d] LED:QUICK_BLINK `, }, "identify led; no uuid specified": { @@ -1592,7 +1617,8 @@ host1 SmdInfo: &control.SmdInfo{ Devices: []*storage.SmdDevice{ { - Ctrlr: identCtrlr, + Ctrlr: identCtrlr, + CtrlrNamespaceID: 1, }, }, }, @@ -1604,7 +1630,7 @@ host1 host1 ----- Devices - TrAddr:0000:db:00.0 LED:QUICK_BLINK + TrAddr:0000:db:00.0 NSID:1 LED:QUICK_BLINK `, }, } { diff --git a/src/control/cmd/dmg/storage.go b/src/control/cmd/dmg/storage.go index fea3160f74c..711407808f5 100644 --- a/src/control/cmd/dmg/storage.go +++ b/src/control/cmd/dmg/storage.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -37,25 +37,20 @@ type storageScanCmd struct { cmdutil.JSONOutputCmd Verbose bool `short:"v" long:"verbose" description:"List SCM & NVMe device details"` NvmeHealth bool `short:"n" long:"nvme-health" description:"Display NVMe device health statistics"` - NvmeMeta bool `short:"m" long:"nvme-meta" description:"Display server meta data held on NVMe storage"` } // Execute is run when storageScanCmd activates. // // Runs NVMe and SCM storage scan on all connected servers. func (cmd *storageScanCmd) Execute(_ []string) error { - if cmd.NvmeHealth && cmd.NvmeMeta { - return errors.New("cannot use --nvme-health and --nvme-meta together") - } - if cmd.Verbose && (cmd.NvmeHealth || cmd.NvmeMeta) { - return errors.New("cannot use --verbose with --nvme-health or --nvme-meta") + if cmd.Verbose && cmd.NvmeHealth { + return errors.New("cannot use --verbose with --nvme-health") } req := &control.StorageScanReq{ NvmeHealth: cmd.NvmeHealth, - NvmeMeta: cmd.NvmeMeta, - // don't strip nvme details if verbose or health or meta set - NvmeBasic: !(cmd.Verbose || cmd.NvmeHealth || cmd.NvmeMeta), + // Strip nvme details if verbose and health flags are unset. + NvmeBasic: !(cmd.Verbose || cmd.NvmeHealth), } req.SetHostList(cmd.getHostList()) @@ -81,16 +76,11 @@ func (cmd *storageScanCmd) Execute(_ []string) error { } var out strings.Builder - switch { - case cmd.NvmeHealth: + if cmd.NvmeHealth { if err := pretty.PrintNvmeHealthMap(resp.HostStorage, &out); err != nil { return err } - case cmd.NvmeMeta: - if err := pretty.PrintNvmeMetaMap(resp.HostStorage, &out); err != nil { - return err - } - default: + } else { verbose := pretty.PrintWithVerboseOutput(cmd.Verbose) if err := pretty.PrintHostStorageMap(resp.HostStorage, &out, verbose); err != nil { return err diff --git a/src/control/cmd/dmg/storage_test.go b/src/control/cmd/dmg/storage_test.go index 300747fa285..124b46e984a 100644 --- a/src/control/cmd/dmg/storage_test.go +++ b/src/control/cmd/dmg/storage_test.go @@ -88,30 +88,6 @@ func TestStorageCommands(t *testing.T) { "", errors.New("cannot use --verbose"), }, - { - "Scan NVMe meta data short", - "storage scan -m", - printRequest(t, &control.StorageScanReq{NvmeMeta: true}), - nil, - }, - { - "Scan NVMe meta data long", - "storage scan --nvme-meta", - printRequest(t, &control.StorageScanReq{NvmeMeta: true}), - nil, - }, - { - "Scan NVMe meta with verbose", - "storage scan --nvme-meta --verbose", - "", - errors.New("cannot use --verbose"), - }, - { - "Scan NVMe meta and health", - "storage scan --nvme-meta --nvme-health --verbose", - "", - errors.New("cannot use --nvme-health and --nvme-meta"), - }, { "Rebind NVMe; no PCI address", "storage nvme-rebind", diff --git a/src/control/common/proto/ctl/smd.pb.go b/src/control/common/proto/ctl/smd.pb.go index 176c1294735..2fd9edf0aed 100644 --- a/src/control/common/proto/ctl/smd.pb.go +++ b/src/control/common/proto/ctl/smd.pb.go @@ -84,28 +84,28 @@ func (NvmeDevState) EnumDescriptor() ([]byte, []int) { type LedState int32 const ( - LedState_OFF LedState = 0 // Equivalent to SPDK_VMD_LED_STATE_OFF + LedState_NA LedState = 0 // Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) LedState_QUICK_BLINK LedState = 1 // Equivalent to SPDK_VMD_LED_STATE_IDENTIFY (4Hz blink) LedState_ON LedState = 2 // Equivalent to SPDK_VMD_LED_STATE_FAULT (solid on) LedState_SLOW_BLINK LedState = 3 // Equivalent to SPDK_VMD_LED_STATE_REBUILD (1Hz blink) - LedState_NA LedState = 4 // Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) + LedState_OFF LedState = 4 // Equivalent to SPDK_VMD_LED_STATE_OFF ) // Enum value maps for LedState. var ( LedState_name = map[int32]string{ - 0: "OFF", + 0: "NA", 1: "QUICK_BLINK", 2: "ON", 3: "SLOW_BLINK", - 4: "NA", + 4: "OFF", } LedState_value = map[string]int32{ - "OFF": 0, + "NA": 0, "QUICK_BLINK": 1, "ON": 2, "SLOW_BLINK": 3, - "NA": 4, + "OFF": 4, } ) @@ -773,7 +773,7 @@ func (x *NvmeController) GetLedState() LedState { if x != nil { return x.LedState } - return LedState_OFF + return LedState_NA } func (x *NvmeController) GetPciDevType() string { @@ -1324,7 +1324,7 @@ func (x *LedManageReq) GetLedState() LedState { if x != nil { return x.LedState } - return LedState_OFF + return LedState_NA } func (x *LedManageReq) GetLedDurationMins() uint32 { @@ -2296,10 +2296,10 @@ var file_ctl_smd_proto_rawDesc = []byte{ 0x4d, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x02, 0x12, 0x0b, 0x0a, 0x07, 0x45, 0x56, 0x49, 0x43, 0x54, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x4e, 0x50, 0x4c, 0x55, 0x47, 0x47, 0x45, 0x44, 0x10, 0x04, 0x2a, 0x44, 0x0a, 0x08, 0x4c, 0x65, - 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x46, 0x46, 0x10, 0x00, 0x12, - 0x0f, 0x0a, 0x0b, 0x51, 0x55, 0x49, 0x43, 0x4b, 0x5f, 0x42, 0x4c, 0x49, 0x4e, 0x4b, 0x10, 0x01, - 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4e, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x4c, 0x4f, 0x57, - 0x5f, 0x42, 0x4c, 0x49, 0x4e, 0x4b, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x4e, 0x41, 0x10, 0x04, + 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x06, 0x0a, 0x02, 0x4e, 0x41, 0x10, 0x00, 0x12, 0x0f, + 0x0a, 0x0b, 0x51, 0x55, 0x49, 0x43, 0x4b, 0x5f, 0x42, 0x4c, 0x49, 0x4e, 0x4b, 0x10, 0x01, 0x12, + 0x06, 0x0a, 0x02, 0x4f, 0x4e, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x4c, 0x4f, 0x57, 0x5f, + 0x42, 0x4c, 0x49, 0x4e, 0x4b, 0x10, 0x03, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x46, 0x46, 0x10, 0x04, 0x2a, 0x28, 0x0a, 0x09, 0x4c, 0x65, 0x64, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x07, 0x0a, 0x03, 0x47, 0x45, 0x54, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x45, 0x54, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x53, 0x45, 0x54, 0x10, 0x02, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, diff --git a/src/control/common/proto/ctl/storage_nvme.pb.go b/src/control/common/proto/ctl/storage_nvme.pb.go index 071ba10e04a..62fede43ed4 100644 --- a/src/control/common/proto/ctl/storage_nvme.pb.go +++ b/src/control/common/proto/ctl/storage_nvme.pb.go @@ -32,8 +32,9 @@ type NvmeControllerResult struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - PciAddr string `protobuf:"bytes,1,opt,name=pci_addr,json=pciAddr,proto3" json:"pci_addr,omitempty"` // PCI address of NVMe controller - State *ResponseState `protobuf:"bytes,2,opt,name=state,proto3" json:"state,omitempty"` // state of current operation + PciAddr string `protobuf:"bytes,1,opt,name=pci_addr,json=pciAddr,proto3" json:"pci_addr,omitempty"` // PCI address of NVMe controller + State *ResponseState `protobuf:"bytes,2,opt,name=state,proto3" json:"state,omitempty"` // state of current operation + RoleBits uint32 `protobuf:"varint,3,opt,name=role_bits,json=roleBits,proto3" json:"role_bits,omitempty"` // Device active roles (bitmask) } func (x *NvmeControllerResult) Reset() { @@ -82,6 +83,13 @@ func (x *NvmeControllerResult) GetState() *ResponseState { return nil } +func (x *NvmeControllerResult) GetRoleBits() uint32 { + if x != nil { + return x.RoleBits + } + return 0 +} + type ScanNvmeReq struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -260,33 +268,35 @@ var file_ctl_storage_nvme_proto_rawDesc = []byte{ 0x0a, 0x16, 0x63, 0x74, 0x6c, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x6e, 0x76, 0x6d, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x03, 0x63, 0x74, 0x6c, 0x1a, 0x10, 0x63, 0x74, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, - 0x0d, 0x63, 0x74, 0x6c, 0x2f, 0x73, 0x6d, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x5b, + 0x0d, 0x63, 0x74, 0x6c, 0x2f, 0x73, 0x6d, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x78, 0x0a, 0x14, 0x4e, 0x76, 0x6d, 0x65, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x70, 0x63, 0x69, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x63, 0x69, 0x41, 0x64, 0x64, 0x72, 0x12, 0x28, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x63, 0x74, 0x6c, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x53, - 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x22, 0x85, 0x01, 0x0a, 0x0b, - 0x53, 0x63, 0x61, 0x6e, 0x4e, 0x76, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x12, 0x16, 0x0a, 0x06, 0x48, - 0x65, 0x61, 0x6c, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x48, 0x65, 0x61, - 0x6c, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x65, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x04, 0x4d, 0x65, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x73, 0x69, 0x63, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x42, 0x61, 0x73, 0x69, 0x63, 0x12, 0x1a, 0x0a, - 0x08, 0x4d, 0x65, 0x74, 0x61, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, - 0x08, 0x4d, 0x65, 0x74, 0x61, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x52, 0x64, 0x62, - 0x53, 0x69, 0x7a, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x52, 0x64, 0x62, 0x53, - 0x69, 0x7a, 0x65, 0x22, 0x65, 0x0a, 0x0c, 0x53, 0x63, 0x61, 0x6e, 0x4e, 0x76, 0x6d, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x2b, 0x0a, 0x06, 0x63, 0x74, 0x72, 0x6c, 0x72, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x63, 0x74, 0x6c, 0x2e, 0x4e, 0x76, 0x6d, 0x65, 0x43, 0x6f, - 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x52, 0x06, 0x63, 0x74, 0x72, 0x6c, 0x72, 0x73, - 0x12, 0x28, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x63, 0x74, 0x6c, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x22, 0x0f, 0x0a, 0x0d, 0x46, 0x6f, - 0x72, 0x6d, 0x61, 0x74, 0x4e, 0x76, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x42, 0x39, 0x5a, 0x37, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, - 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, - 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2f, 0x63, 0x74, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x72, + 0x6f, 0x6c, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x08, + 0x72, 0x6f, 0x6c, 0x65, 0x42, 0x69, 0x74, 0x73, 0x22, 0x85, 0x01, 0x0a, 0x0b, 0x53, 0x63, 0x61, + 0x6e, 0x4e, 0x76, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x12, 0x16, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, + 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, + 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x65, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, + 0x4d, 0x65, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x73, 0x69, 0x63, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x42, 0x61, 0x73, 0x69, 0x63, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x65, + 0x74, 0x61, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, 0x4d, 0x65, + 0x74, 0x61, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x52, 0x64, 0x62, 0x53, 0x69, 0x7a, + 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x52, 0x64, 0x62, 0x53, 0x69, 0x7a, 0x65, + 0x22, 0x65, 0x0a, 0x0c, 0x53, 0x63, 0x61, 0x6e, 0x4e, 0x76, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x2b, 0x0a, 0x06, 0x63, 0x74, 0x72, 0x6c, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x13, 0x2e, 0x63, 0x74, 0x6c, 0x2e, 0x4e, 0x76, 0x6d, 0x65, 0x43, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x52, 0x06, 0x63, 0x74, 0x72, 0x6c, 0x72, 0x73, 0x12, 0x28, 0x0a, + 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x63, + 0x74, 0x6c, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x22, 0x0f, 0x0a, 0x0d, 0x46, 0x6f, 0x72, 0x6d, 0x61, + 0x74, 0x4e, 0x76, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, + 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, + 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, + 0x63, 0x74, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/auto_test.go b/src/control/lib/control/auto_test.go index 749c603276c..beceb7b7b59 100644 --- a/src/control/lib/control/auto_test.go +++ b/src/control/lib/control/auto_test.go @@ -522,10 +522,14 @@ func TestControl_AutoConfig_getStorageSet(t *testing.T) { expStorageSet: &HostStorageSet{ HostSet: hostlist.MustCreateSet("host[1-2]"), HostStorage: &HostStorage{ - NvmeDevices: storage.NvmeControllers{storage.MockNvmeController()}, - ScmModules: storage.ScmModules{storage.MockScmModule()}, - ScmNamespaces: storage.ScmNamespaces{storage.MockScmNamespace(0)}, - MemInfo: MockMemInfo(), + NvmeDevices: storage.NvmeControllers{ + mockNvmeCtrlrWithSmd(storage.OptionBits(0)), + }, + ScmModules: storage.ScmModules{storage.MockScmModule()}, + ScmNamespaces: storage.ScmNamespaces{ + storage.MockScmNamespace(0), + }, + MemInfo: MockMemInfo(), }, }, }, @@ -569,7 +573,7 @@ func TestControl_AutoConfig_getStorageSet(t *testing.T) { }, defResCmpOpts()...) if diff := cmp.Diff(tc.expStorageSet, storageSet, cmpOpts...); diff != "" { - t.Fatalf("unexpected network set (-want, +got):\n%s\n", diff) + t.Fatalf("unexpected storage set (-want, +got):\n%s\n", diff) } }) } diff --git a/src/control/lib/control/mocks.go b/src/control/lib/control/mocks.go index c70bd89e579..bf080d8c8f6 100644 --- a/src/control/lib/control/mocks.go +++ b/src/control/lib/control/mocks.go @@ -175,8 +175,6 @@ func (mi *MockInvoker) InvokeUnaryRPCAsync(ctx context.Context, uReq UnaryReques return } } - - mi.log.Debug("sending mock response") responses <- hr } close(responses) @@ -318,21 +316,30 @@ func MockMemInfo() *common.MemInfo { } } +func mockNvmeCtrlrWithSmd(bdevRoles storage.OptionBits, varIdx ...int32) *storage.NvmeController { + idx := test.GetIndex(varIdx...) + nc := storage.MockNvmeController(idx) + sd := storage.MockSmdDevice(nil, idx) + sd.Roles = storage.BdevRoles{bdevRoles} + nc.SmdDevices = []*storage.SmdDevice{sd} + return nc +} + func standardServerScanResponse(t *testing.T) *ctlpb.StorageScanResp { pbSsr := &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{}, Scm: &ctlpb.ScanScmResp{}, MemInfo: commonpb.MockPBMemInfo(), } + nvmeControllers := storage.NvmeControllers{ - storage.MockNvmeController(), - } - scmModules := storage.ScmModules{ - storage.MockScmModule(), + mockNvmeCtrlrWithSmd(storage.OptionBits(0)), } if err := convert.Types(nvmeControllers, &pbSsr.Nvme.Ctrlrs); err != nil { t.Fatal(err) } + + scmModules := storage.ScmModules{storage.MockScmModule()} if err := convert.Types(scmModules, &pbSsr.Scm.Modules); err != nil { t.Fatal(err) } @@ -355,7 +362,7 @@ func MockServerScanResp(t *testing.T, variant string) *ctlpb.StorageScanResp { ctrlrs := func(idxs ...int) storage.NvmeControllers { ncs := make(storage.NvmeControllers, 0, len(idxs)) for _, i := range idxs { - nc := storage.MockNvmeController(int32(i)) + nc := mockNvmeCtrlrWithSmd(storage.BdevRoleAll, int32(i)) ncs = append(ncs, nc) } return ncs @@ -541,6 +548,7 @@ type MockFormatConf struct { NvmePerHost int ScmFailures map[int]struct{} NvmeFailures map[int]struct{} + NvmeRoleBits int } // MockFormatResp returns a populated StorageFormatResp based on input config. @@ -583,6 +591,13 @@ func MockFormatResp(t *testing.T, mfc MockFormatConf) *StorageFormatResp { hs.NvmeDevices = append(hs.NvmeDevices, &storage.NvmeController{ Info: ctlpb.ResponseStatus_CTL_SUCCESS.String(), PciAddr: fmt.Sprintf("%d", j+1), + SmdDevices: []*storage.SmdDevice{ + { + Roles: storage.BdevRoles{ + storage.OptionBits(mfc.NvmeRoleBits), + }, + }, + }, }) } if err := hsm.Add(hostName, hs); err != nil { diff --git a/src/control/lib/control/storage.go b/src/control/lib/control/storage.go index 12f8389eadc..8f015970b27 100644 --- a/src/control/lib/control/storage.go +++ b/src/control/lib/control/storage.go @@ -141,7 +141,6 @@ type ( unaryRequest Usage bool NvmeHealth bool - NvmeMeta bool NvmeBasic bool } @@ -240,7 +239,7 @@ func StorageScan(ctx context.Context, rpcClient UnaryInvoker, req *StorageScanRe Basic: req.NvmeBasic, // Health and meta details required to populate usage statistics. Health: req.NvmeHealth || req.Usage, - Meta: req.NvmeMeta || req.Usage, + Meta: req.Usage, }, }) }) @@ -306,6 +305,13 @@ func (sfr *StorageFormatResp) addHostResponse(hr *HostResponse) (err error) { hs.NvmeDevices = append(hs.NvmeDevices, &storage.NvmeController{ Info: info, PciAddr: nr.GetPciAddr(), + SmdDevices: []*storage.SmdDevice{ + { + Roles: storage.BdevRoles{ + storage.OptionBits(nr.RoleBits), + }, + }, + }, }) default: if err := ctlStateToErr(nr.GetState()); err != nil { diff --git a/src/control/lib/spdk/nvme.go b/src/control/lib/spdk/nvme.go index b6b38edd044..b2a06c05612 100644 --- a/src/control/lib/spdk/nvme.go +++ b/src/control/lib/spdk/nvme.go @@ -76,15 +76,6 @@ func wrapCleanError(inErr error, cleanErr error) (outErr error) { return } -func ctrlrPCIAddresses(ctrlrs storage.NvmeControllers) []string { - pciAddrs := make([]string, 0, len(ctrlrs)) - for _, c := range ctrlrs { - pciAddrs = append(pciAddrs, c.PciAddr) - } - - return pciAddrs -} - func resultPCIAddresses(results []*FormatResult) []string { pciAddrs := make([]string, 0, len(results)) for _, r := range results { diff --git a/src/control/lib/spdk/nvme_default.go b/src/control/lib/spdk/nvme_default.go index d9efbc69fc9..4e1f40376a1 100644 --- a/src/control/lib/spdk/nvme_default.go +++ b/src/control/lib/spdk/nvme_default.go @@ -57,8 +57,11 @@ func (n *NvmeImpl) Discover(log logging.Logger) (storage.NvmeControllers, error) ctrlrs, err := collectCtrlrs(C.nvme_discover(), "NVMe Discover(): C.nvme_discover") - pciAddrs := ctrlrPCIAddresses(ctrlrs) - log.Debugf("discovered nvme ssds: %v", pciAddrs) + pciAddrs := make([]string, 0, len(ctrlrs)) + for _, c := range ctrlrs { + log.Debugf("nvme ssd scanned: %+v", c) + pciAddrs = append(pciAddrs, c.PciAddr) + } return ctrlrs, wrapCleanError(err, cleanLockfiles(log, realRemove, pciAddrs...)) } diff --git a/src/control/lib/spdk/src/nvme_control_common.c b/src/control/lib/spdk/src/nvme_control_common.c index 5bbecccbcb5..4d7d138fd08 100644 --- a/src/control/lib/spdk/src/nvme_control_common.c +++ b/src/control/lib/spdk/src/nvme_control_common.c @@ -250,25 +250,17 @@ _discover(prober probe, bool detach, health_getter get_health) } static int -str2ctrlr(char **dst, const void *src) +str2ctrlr(char **dst, const void *src, size_t size) { - int len; - assert(src != NULL); assert(dst != NULL); assert(*dst == NULL); - len = strnlen((const char *)src, NVME_DETAIL_BUFLEN); - if (len == NVME_DETAIL_BUFLEN) { - perror("src buf too big"); - return -NVMEC_ERR_CHK_SIZE; - } - - *dst = calloc(1, len + 1); + *dst = calloc(1, size + 1); if (*dst == NULL) return -ENOMEM; - if (copy_ascii(*dst, len + 1, src, len) != 0) { + if (copy_ascii(*dst, size + 1, src, size) != 0) { perror("copy_ascii"); return -NVMEC_ERR_CHK_SIZE; } @@ -281,18 +273,15 @@ copy_ctrlr_data(struct nvme_ctrlr_t *cdst, const struct spdk_nvme_ctrlr_data *cd { int rc; - rc = str2ctrlr(&cdst->model, cdata->mn); - if (rc != 0) { + rc = str2ctrlr(&cdst->model, cdata->mn, sizeof(cdata->mn)); + if (rc != 0) return rc; - } - rc = str2ctrlr(&cdst->serial, cdata->sn); - if (rc != 0) { + rc = str2ctrlr(&cdst->serial, cdata->sn, sizeof(cdata->sn)); + if (rc != 0) return rc; - } - rc = str2ctrlr(&cdst->fw_rev, cdata->fr); - if (rc != 0) { + rc = str2ctrlr(&cdst->fw_rev, cdata->fr, sizeof(cdata->fr)); + if (rc != 0) return rc; - } return 0; } diff --git a/src/control/server/ctl_storage.go b/src/control/server/ctl_storage.go index 443f2a0bc76..12ceb21a321 100644 --- a/src/control/server/ctl_storage.go +++ b/src/control/server/ctl_storage.go @@ -101,25 +101,26 @@ func (cs *ControlService) getScmUsage(ssr *storage.ScmScanResponse) (*storage.Sc instances := cs.harness.Instances() - nss := make(storage.ScmNamespaces, len(instances)) - for idx, ei := range instances { - if !ei.IsReady() { + nss := make(storage.ScmNamespaces, 0, len(instances)) + for _, engine := range instances { + if !engine.IsReady() { continue // skip if not running } - cfg, err := ei.GetStorage().GetScmConfig() + cfg, err := engine.GetStorage().GetScmConfig() if err != nil { return nil, err } - mount, err := ei.GetStorage().GetScmUsage() + mount, err := engine.GetStorage().GetScmUsage() if err != nil { return nil, err } + var ns *storage.ScmNamespace switch mount.Class { case storage.ClassRam: // generate fake namespace for emulated ramdisk mounts - nss[idx] = &storage.ScmNamespace{ + ns = &storage.ScmNamespace{ Mount: mount, BlockDevice: "ramdisk", Size: uint64(humanize.GiByte * cfg.Scm.RamdiskSize), @@ -127,29 +128,32 @@ func (cs *ControlService) getScmUsage(ssr *storage.ScmScanResponse) (*storage.Sc case storage.ClassDcpm: // update namespace mount info for online storage if ssr.Namespaces == nil { return nil, errors.Errorf("instance %d: input scm scan response missing namespaces", - ei.Index()) + engine.Index()) } - ns := findPMemInScan(ssr, mount.DeviceList) + ns = findPMemInScan(ssr, mount.DeviceList) if ns == nil { return nil, errors.Errorf("instance %d: no pmem namespace for mount %s", - ei.Index(), mount.Path) + engine.Index(), mount.Path) } ns.Mount = mount - nss[idx] = ns } - if nss[idx].Mount != nil { - rank, err := ei.GetRank() + if ns.Mount != nil { + rank, err := engine.GetRank() if err != nil { return nil, errors.Wrapf(err, "instance %d: no rank associated for mount %s", - ei.Index(), mount.Path) + engine.Index(), mount.Path) } - nss[idx].Mount.Rank = rank + ns.Mount.Rank = rank } - cs.log.Debugf("updated scm fs usage on device %s mounted at %s: %+v", - nss[idx].BlockDevice, mount.Path, nss[idx].Mount) + cs.log.Debugf("updated scm fs usage on device %s mounted at %s: %+v", ns.BlockDevice, + mount.Path, ns.Mount) + nss = append(nss, ns) } + if len(nss) == 0 { + return nil, errors.New("no scm details found") + } return &storage.ScmScanResponse{Namespaces: nss}, nil } diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index a7a87de805a..178304b8eb2 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -13,17 +13,16 @@ import ( "strconv" "github.com/dustin/go-humanize" - "github.com/dustin/go-humanize/english" "github.com/pkg/errors" "golang.org/x/net/context" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/proto/convert" - "github.com/daos-stack/daos/src/control/common/proto/ctl" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/server/engine" "github.com/daos-stack/daos/src/control/server/storage" @@ -70,8 +69,39 @@ var ( type scanBdevsFn func(storage.BdevScanRequest) (*storage.BdevScanResponse, error) +func ctrlrToPciStr(nc *ctlpb.NvmeController) (string, error) { + pciAddr, err := hardware.NewPCIAddress(nc.GetPciAddr()) + if err != nil { + return "", errors.Wrapf(err, "Invalid PCI address") + } + if pciAddr.IsVMDBackingAddress() { + if pciAddr, err = pciAddr.BackingToVMDAddress(); err != nil { + return "", errors.Wrapf(err, "Invalid VMD address") + } + } + + return pciAddr.String(), nil +} + +func findBdevTier(pciAddr string, tcs storage.TierConfigs) *storage.TierConfig { + for _, tc := range tcs { + if !tc.IsBdev() { + continue + } + for _, name := range tc.Bdev.DeviceList.Devices() { + if pciAddr == name { + return tc + } + } + } + + return nil +} + // Convert bdev scan results to protobuf response. -func bdevScanToProtoResp(scan scanBdevsFn, req storage.BdevScanRequest) (*ctlpb.ScanNvmeResp, error) { +func bdevScanToProtoResp(scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { + req := storage.BdevScanRequest{DeviceList: bdevCfgs.Bdevs()} + resp, err := scan(req) if err != nil { return nil, err @@ -83,38 +113,57 @@ func bdevScanToProtoResp(scan scanBdevsFn, req storage.BdevScanRequest) (*ctlpb. return nil, err } + if bdevCfgs.HaveRealNVMe() { + // Update proto Ctrlrs with role info for offline display. + for _, c := range pbCtrlrs { + pciAddrStr, err := ctrlrToPciStr(c) + if err != nil { + return nil, err + } + bc := findBdevTier(pciAddrStr, bdevCfgs) + if bc == nil { + return nil, errors.Errorf("unknown PCI device, scanned ctrlr %q "+ + "not found in cfg", pciAddrStr) + } + if len(c.SmdDevices) != 0 { + return nil, errors.Errorf("scanned ctrlr %q has unexpected smd", + pciAddrStr) + } + c.SmdDevices = append(c.SmdDevices, &ctlpb.SmdDevice{ + RoleBits: uint32(bc.Bdev.DeviceRoles.OptionBits), + Rank: uint32(ranklist.NilRank), + }) + } + } + return &ctlpb.ScanNvmeResp{ State: new(ctlpb.ResponseState), Ctrlrs: pbCtrlrs, }, nil } -// Scan bdevs through harness's ControlService (not per-engine). -func bdevScanGlobal(cs *ControlService, cfgBdevs *storage.BdevDeviceList) (*ctlpb.ScanNvmeResp, error) { - req := storage.BdevScanRequest{DeviceList: cfgBdevs} - return bdevScanToProtoResp(cs.storage.ScanBdevs, req) -} - // Scan bdevs through each engine and collate response results. func bdevScanEngines(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace) (*ctlpb.ScanNvmeResp, error) { var errLast error instances := cs.harness.Instances() resp := &ctlpb.ScanNvmeResp{} - for _, ei := range instances { + for _, engine := range instances { eReq := new(ctlpb.ScanNvmeReq) *eReq = *req if req.Meta { - ms, rs, err := computeMetaRdbSz(cs, ei, nsps) + ms, rs, err := computeMetaRdbSz(cs, engine, nsps) if err != nil { return nil, errors.Wrap(err, "computing meta and rdb size") } eReq.MetaSize, eReq.RdbSize = ms, rs } - respEng, err := scanEngineBdevs(ctx, ei, eReq) + // If partial number of engines return results, indicate errors for non-ready + // engines whilst returning successful scanmresults. + respEng, err := scanEngineBdevs(ctx, engine, eReq) if err != nil { - err = errors.Wrapf(err, "instance %d", ei.Index()) + err = errors.Wrapf(err, "instance %d", engine.Index()) if errLast == nil && len(instances) > 1 { errLast = err // Save err to preserve partial results. cs.log.Error(err.Error()) @@ -140,10 +189,8 @@ func bdevScanTrimResults(req *ctlpb.ScanNvmeReq, resp *ctlpb.ScanNvmeResp) *ctlp if !req.GetHealth() { pbc.HealthStats = nil } - if !req.GetMeta() { - pbc.SmdDevices = nil - } if req.GetBasic() { + pbc.SmdDevices = nil pbc.Serial = "" pbc.Model = "" pbc.FwRev = "" @@ -163,11 +210,15 @@ func engineHasStarted(instances []Engine) bool { return false } -func bdevScanAssigned(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace, hasStarted *bool, cfgBdevs *storage.BdevDeviceList) (*ctlpb.ScanNvmeResp, error) { +func bdevScanAssigned(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace, hasStarted *bool, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { *hasStarted = engineHasStarted(cs.harness.Instances()) if !*hasStarted { cs.log.Debugf("scan bdevs from control service as no engines started") - return bdevScanGlobal(cs, cfgBdevs) + if req.Meta { + return nil, errors.New("meta smd usage info unavailable as engines stopped") + } + + return bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) } // Delegate scan to engine instances as soon as one engine with assigned bdevs has started. @@ -182,16 +233,24 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n return nil, errors.New("nil request") } - cfgBdevs := getBdevCfgsFromSrvCfg(cs.srvCfg).Bdevs() + defer func() { + if err == nil && req.Meta { + cs.adjustNvmeSize(resp) + } + }() + + bdevCfgs := getBdevCfgsFromSrvCfg(cs.srvCfg) + nrCfgBdevs := bdevCfgs.Bdevs().Len() - if cfgBdevs.Len() == 0 { + if nrCfgBdevs == 0 { cs.log.Debugf("scan bdevs from control service as no bdevs in cfg") // No bdevs configured for engines to claim so scan through control service. - resp, err = bdevScanGlobal(cs, cfgBdevs) + resp, err = bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) if err != nil { return nil, err } + return bdevScanTrimResults(req, resp), nil } @@ -200,28 +259,42 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n // been claimed by SPDK but details are not yet available over dRPC. var hasStarted bool - resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, cfgBdevs) + resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, bdevCfgs) + if err != nil { + return nil, err + } + + nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) if err != nil { return nil, err } + if nrScannedBdevs == nrCfgBdevs { + return bdevScanTrimResults(req, resp), nil + } - // Retry once if global scan returns unexpected number of controllers in case engines + // Retry once if harness scan returns unexpected number of controllers in case engines // claimed devices between when started state was checked and scan was executed. - if !hasStarted && len(resp.Ctrlrs) != cfgBdevs.Len() { - cs.log.Debugf("retrying bdev scan as unexpected nr returned, want %d got %d", - cfgBdevs.Len(), len(resp.Ctrlrs)) + if !hasStarted { + cs.log.Debugf("retrying harness bdev scan as unexpected nr returned, want %d got %d", + nrCfgBdevs, nrScannedBdevs) - resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, cfgBdevs) + resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, bdevCfgs) if err != nil { return nil, err } - } - if len(resp.Ctrlrs) != cfgBdevs.Len() { - cs.log.Noticef("bdev scan returned unexpected nr, want %d got %d", - cfgBdevs.Len(), len(resp.Ctrlrs)) + nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) + if err != nil { + return nil, err + } + if nrScannedBdevs == nrCfgBdevs { + return bdevScanTrimResults(req, resp), nil + } } + cs.log.Noticef("harness bdev scan returned unexpected nr, want %d got %d", nrCfgBdevs, + nrScannedBdevs) + return bdevScanTrimResults(req, resp), nil } @@ -253,56 +326,55 @@ func newScanScmResp(inResp *storage.ScmScanResponse, inErr error) (*ctlpb.ScanSc } // scanScm will return mount details and usage for either emulated RAM or real PMem. -func (c *ControlService) scanScm(ctx context.Context, req *ctlpb.ScanScmReq) (*ctlpb.ScanScmResp, error) { +func (cs *ControlService) scanScm(ctx context.Context, req *ctlpb.ScanScmReq) (*ctlpb.ScanScmResp, error) { if req == nil { return nil, errors.New("nil scm request") } - ssr, scanErr := c.ScmScan(storage.ScmScanRequest{}) + ssr, err := cs.ScmScan(storage.ScmScanRequest{}) + if err != nil || !req.GetUsage() { + return newScanScmResp(ssr, err) + } + + ssr, err = cs.getScmUsage(ssr) + if err != nil { + return nil, err + } - if scanErr != nil || !req.GetUsage() { - return newScanScmResp(ssr, scanErr) + resp, err := newScanScmResp(ssr, nil) + if err != nil { + return nil, err } - return newScanScmResp(c.getScmUsage(ssr)) + cs.adjustScmSize(resp) + + return resp, nil } // Returns the engine configuration managing the given NVMe controller -func (c *ControlService) getEngineCfgFromNvmeCtl(nc *ctl.NvmeController) (*engine.Config, error) { - pciAddr, err := hardware.NewPCIAddress(nc.GetPciAddr()) +func (cs *ControlService) getEngineCfgFromNvmeCtl(nc *ctlpb.NvmeController) (*engine.Config, error) { + pciAddrStr, err := ctrlrToPciStr(nc) if err != nil { - return nil, errors.Errorf("Invalid PCI address: %s", err) - } - if pciAddr.IsVMDBackingAddress() { - if pciAddr, err = pciAddr.BackingToVMDAddress(); err != nil { - return nil, errors.Errorf("Invalid VMD address: %s", err) - } + return nil, err } - ctlrAddr := pciAddr.String() - for index := range c.srvCfg.Engines { - for _, tierCfg := range c.srvCfg.Engines[index].Storage.Tiers { - if !tierCfg.IsBdev() { - continue - } - for _, devName := range tierCfg.Bdev.DeviceList.Devices() { - if devName == ctlrAddr { - return c.srvCfg.Engines[index], nil - } - } + for index := range cs.srvCfg.Engines { + if findBdevTier(pciAddrStr, cs.srvCfg.Engines[index].Storage.Tiers) != nil { + return cs.srvCfg.Engines[index], nil } } - return nil, errors.Errorf("unknown PCI device %q", pciAddr) + return nil, errors.Errorf("unknown PCI device, scanned ctrlr %q not found in cfg", + pciAddrStr) } // Returns the engine configuration managing the given SCM name-space -func (c *ControlService) getEngineCfgFromScmNsp(nsp *ctl.ScmNamespace) (*engine.Config, error) { +func (cs *ControlService) getEngineCfgFromScmNsp(nsp *ctlpb.ScmNamespace) (*engine.Config, error) { mountPoint := nsp.GetMount().Path - for index := range c.srvCfg.Engines { - for _, tierCfg := range c.srvCfg.Engines[index].Storage.Tiers { + for index := range cs.srvCfg.Engines { + for _, tierCfg := range cs.srvCfg.Engines[index].Storage.Tiers { if tierCfg.IsSCM() && tierCfg.Scm.MountPoint == mountPoint { - return c.srvCfg.Engines[index], nil + return cs.srvCfg.Engines[index], nil } } } @@ -311,10 +383,10 @@ func (c *ControlService) getEngineCfgFromScmNsp(nsp *ctl.ScmNamespace) (*engine. } // return the size of the RDB file used for managing SCM metadata -func (c *ControlService) getRdbSize(engineCfg *engine.Config) (uint64, error) { +func (cs *ControlService) getRdbSize(engineCfg *engine.Config) (uint64, error) { mdCapStr, err := engineCfg.GetEnvVar(daos.DaosMdCapEnv) if err != nil { - c.log.Debugf("using default RDB file size with engine %d: %s (%d Bytes)", + cs.log.Debugf("using default RDB file size with engine %d: %s (%d Bytes)", engineCfg.Index, humanize.Bytes(daos.DefaultDaosMdCapSize), daos.DefaultDaosMdCapSize) return uint64(daos.DefaultDaosMdCapSize), nil @@ -326,7 +398,7 @@ func (c *ControlService) getRdbSize(engineCfg *engine.Config) (uint64, error) { mdCapStr) } rdbSize = rdbSize << 20 - c.log.Debugf("using custom RDB size with engine %d: %s (%d Bytes)", + cs.log.Debugf("using custom RDB size with engine %d: %s (%d Bytes)", engineCfg.Index, humanize.Bytes(rdbSize), rdbSize) return rdbSize, nil @@ -370,7 +442,7 @@ func metaRdbComputeSz(cs *ControlService, ei Engine, nsps []*ctlpb.ScmNamespace) } type deviceToAdjust struct { - ctlr *ctl.NvmeController + ctlr *ctlpb.NvmeController idx int rank uint32 } @@ -381,7 +453,7 @@ type deviceSizeStat struct { } // Add a device to the input map of device to which the usable size have to be adjusted -func (c *ControlService) addDeviceToAdjust(devsStat map[uint32]*deviceSizeStat, devToAdjust *deviceToAdjust, dataClusterCount uint64) { +func (cs *ControlService) addDeviceToAdjust(devsStat map[uint32]*deviceSizeStat, devToAdjust *deviceToAdjust, dataClusterCount uint64) { dev := devToAdjust.ctlr.GetSmdDevices()[devToAdjust.idx] if devsStat[devToAdjust.rank] == nil { devsStat[devToAdjust.rank] = &deviceSizeStat{ @@ -391,10 +463,10 @@ func (c *ControlService) addDeviceToAdjust(devsStat map[uint32]*deviceSizeStat, devsStat[devToAdjust.rank].devs = append(devsStat[devToAdjust.rank].devs, devToAdjust) targetCount := uint64(len(dev.GetTgtIds())) clusterPerTarget := dataClusterCount / targetCount - c.log.Tracef("SMD device %s (rank %d, ctlr %s) added to the list of device to adjust", + cs.log.Tracef("SMD device %s (rank %d, ctlr %s) added to the list of device to adjust", dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) if clusterPerTarget < devsStat[devToAdjust.rank].clusterPerTarget { - c.log.Tracef("Updating number of clusters per target of rank %d: old=%d new=%d", + cs.log.Tracef("Updating number of clusters per target of rank %d: old=%d new=%d", devToAdjust.rank, devsStat[devToAdjust.rank].clusterPerTarget, clusterPerTarget) devsStat[devToAdjust.rank].clusterPerTarget = clusterPerTarget } @@ -409,7 +481,7 @@ func getClusterCount(sizeBytes uint64, targetNb uint64, clusterSize uint64) uint return clusterCount * targetNb } -func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdjust deviceToAdjust) (subtrClusterCount uint64) { +func (cs *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdjust deviceToAdjust) (subtrClusterCount uint64) { dev := devToAdjust.ctlr.GetSmdDevices()[devToAdjust.idx] clusterSize := uint64(dev.GetClusterSize()) engineTargetNb := uint64(engineCfg.TargetCount) @@ -418,14 +490,14 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju // TODO DAOS-14223: GetMetaSize() should reflect custom values set through pool // create --meta-size option. clusterCount := getClusterCount(dev.GetMetaSize(), engineTargetNb, clusterSize) - c.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", + cs.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } if dev.GetRoleBits()&storage.BdevRoleWAL != 0 { clusterCount := getClusterCount(dev.GetMetaWalSize(), engineTargetNb, clusterSize) - c.log.Tracef("Removing %d Metadata WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", + cs.log.Tracef("Removing %d Metadata WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } @@ -436,14 +508,14 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { clusterCount := getClusterCount(dev.GetRdbSize(), 1, clusterSize) - c.log.Tracef("Removing %d RDB clusters (cluster size: %d) the usable size of the SMD device %s (rank %d, ctlr %s)", + cs.log.Tracef("Removing %d RDB clusters (cluster size: %d) the usable size of the SMD device %s (rank %d, ctlr %s)", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } if dev.GetRoleBits()&storage.BdevRoleWAL != 0 { clusterCount := getClusterCount(dev.GetRdbWalSize(), 1, clusterSize) - c.log.Tracef("Removing %d RDB WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s)", + cs.log.Tracef("Removing %d RDB WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s)", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } @@ -452,12 +524,12 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju } // Adjust the NVME available size to its real usable size. -func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { +func (cs *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { devsStat := make(map[uint32]*deviceSizeStat, 0) for _, ctlr := range resp.GetCtrlrs() { - engineCfg, err := c.getEngineCfgFromNvmeCtl(ctlr) + engineCfg, err := cs.getEngineCfgFromNvmeCtl(ctlr) if err != nil { - c.log.Noticef("Skipping NVME controller %s: %s", ctlr.GetPciAddr(), err.Error()) + cs.log.Noticef("Skipping NVME controller %s: %s", ctlr.GetPciAddr(), err.Error()) continue } @@ -465,7 +537,7 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { rank := dev.GetRank() if dev.GetRoleBits() != 0 && (dev.GetRoleBits()&storage.BdevRoleData) == 0 { - c.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)", + cs.log.Debugf("SMD device %s (rank %d, ctlr %s) not used to store data (Role bits 0x%X)", dev.GetUuid(), rank, ctlr.GetPciAddr(), dev.GetRoleBits()) dev.TotalBytes = 0 dev.AvailBytes = 0 @@ -474,7 +546,7 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { } if ctlr.GetDevState() != ctlpb.NvmeDevState_NORMAL { - c.log.Debugf("SMD device %s (rank %d, ctlr %s) not usable: device state %q", + cs.log.Debugf("SMD device %s (rank %d, ctlr %s) not usable: device state %q", dev.GetUuid(), rank, ctlr.GetPciAddr(), ctlpb.NvmeDevState_name[int32(ctlr.DevState)]) dev.AvailBytes = 0 dev.UsableBytes = 0 @@ -482,20 +554,20 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { } if dev.GetClusterSize() == 0 || len(dev.GetTgtIds()) == 0 { - c.log.Noticef("SMD device %s (rank %d, ctlr %s) not usable: missing storage info", + cs.log.Noticef("SMD device %s (rank %d, ctlr %s) not usable: missing storage info", dev.GetUuid(), rank, ctlr.GetPciAddr()) dev.AvailBytes = 0 dev.UsableBytes = 0 continue } - c.log.Tracef("Initial available size of SMD device %s (rank %d, ctlr %s): %s (%d bytes)", + cs.log.Tracef("Initial available size of SMD device %s (rank %d, ctlr %s): %s (%d bytes)", dev.GetUuid(), rank, ctlr.GetPciAddr(), humanize.Bytes(dev.GetAvailBytes()), dev.GetAvailBytes()) clusterSize := uint64(dev.GetClusterSize()) availBytes := (dev.GetAvailBytes() / clusterSize) * clusterSize if dev.GetAvailBytes() != availBytes { - c.log.Tracef("Adjusting available size of SMD device %s (rank %d, ctlr %s): from %s (%d Bytes) to %s (%d bytes)", + cs.log.Tracef("Adjusting available size of SMD device %s (rank %d, ctlr %s): from %s (%d Bytes) to %s (%d bytes)", dev.GetUuid(), rank, ctlr.GetPciAddr(), humanize.Bytes(dev.GetAvailBytes()), dev.GetAvailBytes(), humanize.Bytes(availBytes), availBytes) @@ -509,21 +581,21 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { } dataClusterCount := dev.GetAvailBytes() / clusterSize if dev.GetRoleBits() == 0 { - c.log.Tracef("No meta-data stored on SMD device %s (rank %d, ctlr %s)", + cs.log.Tracef("No meta-data stored on SMD device %s (rank %d, ctlr %s)", dev.GetUuid(), rank, ctlr.GetPciAddr()) - c.addDeviceToAdjust(devsStat, &devToAdjust, dataClusterCount) + cs.addDeviceToAdjust(devsStat, &devToAdjust, dataClusterCount) continue } - subtrClusterCount := c.getMetaClusterCount(engineCfg, devToAdjust) + subtrClusterCount := cs.getMetaClusterCount(engineCfg, devToAdjust) if subtrClusterCount >= dataClusterCount { - c.log.Debugf("No more usable space in SMD device %s (rank %d, ctlr %s)", + cs.log.Debugf("No more usable space in SMD device %s (rank %d, ctlr %s)", dev.GetUuid(), rank, ctlr.GetPciAddr()) dev.UsableBytes = 0 continue } dataClusterCount -= subtrClusterCount - c.addDeviceToAdjust(devsStat, &devToAdjust, dataClusterCount) + cs.addDeviceToAdjust(devsStat, &devToAdjust, dataClusterCount) } } @@ -532,7 +604,7 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { smdDev := dev.ctlr.GetSmdDevices()[dev.idx] targetCount := uint64(len(smdDev.GetTgtIds())) smdDev.UsableBytes = targetCount * item.clusterPerTarget * smdDev.GetClusterSize() - c.log.Debugf("Defining usable size of the SMD device %s (rank %d, ctlr %s) to %s (%d bytes)", + cs.log.Debugf("Defining usable size of the SMD device %s (rank %d, ctlr %s) to %s (%d bytes)", smdDev.GetUuid(), rank, dev.ctlr.GetPciAddr(), humanize.Bytes(smdDev.GetUsableBytes()), smdDev.GetUsableBytes()) } @@ -540,45 +612,45 @@ func (c *ControlService) adjustNvmeSize(resp *ctlpb.ScanNvmeResp) { } // Adjust the SCM available size to the real usable size. -func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { +func (cs *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { for _, scmNamespace := range resp.GetNamespaces() { mnt := scmNamespace.GetMount() mountPath := mnt.GetPath() mnt.UsableBytes = mnt.GetAvailBytes() - c.log.Debugf("Initial usable size of SCM %s: %s (%d bytes)", mountPath, + cs.log.Debugf("Initial usable size of SCM %s: %s (%d bytes)", mountPath, humanize.Bytes(mnt.GetUsableBytes()), mnt.GetUsableBytes()) - engineCfg, err := c.getEngineCfgFromScmNsp(scmNamespace) + engineCfg, err := cs.getEngineCfgFromScmNsp(scmNamespace) if err != nil { - c.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", + cs.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", mountPath, err.Error()) mnt.UsableBytes = 0 continue } - mdBytes, err := c.getRdbSize(engineCfg) + mdBytes, err := cs.getRdbSize(engineCfg) if err != nil { - c.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", + cs.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", mountPath, err.Error()) mnt.UsableBytes = 0 continue } - c.log.Tracef("Removing RDB (%s, %d bytes) from the usable size of the SCM device %q", + cs.log.Tracef("Removing RDB (%s, %d bytes) from the usable size of the SCM device %q", humanize.Bytes(mdBytes), mdBytes, mountPath) if mdBytes >= mnt.GetUsableBytes() { - c.log.Debugf("No more usable space in SCM device %s", mountPath) + cs.log.Debugf("No more usable space in SCM device %s", mountPath) mnt.UsableBytes = 0 continue } mnt.UsableBytes -= mdBytes - removeControlPlaneMetadata := func(m *ctl.ScmNamespace_Mount) { + removeControlPlaneMetadata := func(m *ctlpb.ScmNamespace_Mount) { mountPath := m.GetPath() - c.log.Tracef("Removing control plane metadata (%s, %d bytes) from the usable size of the SCM device %q", + cs.log.Tracef("Removing control plane metadata (%s, %d bytes) from the usable size of the SCM device %q", humanize.Bytes(mdDaosScmBytes), mdDaosScmBytes, mountPath) if mdDaosScmBytes >= m.GetUsableBytes() { - c.log.Debugf("No more usable space in SCM device %s", mountPath) + cs.log.Debugf("No more usable space in SCM device %s", mountPath) m.UsableBytes = 0 return } @@ -588,7 +660,7 @@ func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { removeControlPlaneMetadata(mnt) } else { if !engineCfg.Storage.ControlMetadata.HasPath() { - c.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", + cs.log.Noticef("Adjusting usable size to 0 Bytes of SCM device %q: %s", mountPath, "MD on SSD feature enabled without path for Control Metadata") mnt.UsableBytes = 0 @@ -598,7 +670,7 @@ func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { cmdPath := engineCfg.Storage.ControlMetadata.Path if hasPrefix, err := common.HasPrefixPath(mountPath, cmdPath); hasPrefix || err != nil { if err != nil { - c.log.Noticef("Invalid SCM mount path or Control Metadata path: %q", err.Error()) + cs.log.Noticef("Invalid SCM mount path or Control Metadata path: %q", err.Error()) } if hasPrefix { removeControlPlaneMetadata(mnt) @@ -606,61 +678,36 @@ func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { } } - c.log.Tracef("Removing (%s, %d bytes) of usable size from the SCM device %q: space used by the file system metadata", + cs.log.Tracef("Removing (%s, %d bytes) of usable size from the SCM device %q: space used by the file system metadata", humanize.Bytes(mdFsScmBytes), mdFsScmBytes, mountPath) mnt.UsableBytes -= mdFsScmBytes usableBytes := scmNamespace.Mount.GetUsableBytes() - c.log.Debugf("Usable size of SCM device %q: %s (%d bytes)", + cs.log.Debugf("Usable size of SCM device %q: %s (%d bytes)", scmNamespace.Mount.GetPath(), humanize.Bytes(usableBytes), usableBytes) } } // StorageScan discovers non-volatile storage hardware on node. -func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScanReq) (*ctlpb.StorageScanResp, error) { +func (cs *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScanReq) (*ctlpb.StorageScanResp, error) { if req == nil { return nil, errors.New("nil request") } resp := new(ctlpb.StorageScanResp) - // In the case that usage stats are being requested, relevant flags for both SCM and NVMe - // will be set and so fail if engines are not ready for comms. This restriction should not - // be applied if only the Meta flag is set in the NVMe component of the request to continue - // to support off-line storage scan functionality which uses cached stats (e.g. dmg storage - // scan --nvme-meta). - // - // TODO DAOS-13228: Remove --nvme-meta scan option and the below workaround. - // If usage or meta requested, fail if no engines started and skip stopped - // engines in bdev scan. Only return results for ready engines over dRPC. - if req.Scm.Usage && req.Nvme.Meta { - nrInstances := len(c.harness.Instances()) - readyRanks := c.harness.readyRanks() - if len(readyRanks) != nrInstances { - return nil, errors.Wrapf(errEngineNotReady, "%s, ready: %v", - english.Plural(nrInstances, "engine", "engines"), - readyRanks) - } - } - - respScm, err := c.scanScm(ctx, req.Scm) + respScm, err := cs.scanScm(ctx, req.Scm) if err != nil { return nil, err } - if req.Scm.GetUsage() { - c.adjustScmSize(respScm) - } resp.Scm = respScm - respNvme, err := scanBdevs(ctx, c, req.Nvme, respScm.Namespaces) + respNvme, err := scanBdevs(ctx, cs, req.Nvme, respScm.Namespaces) if err != nil { return nil, err } - if req.Nvme.GetMeta() { - c.adjustNvmeSize(respNvme) - } resp.Nvme = respNvme - mi, err := c.getMemInfo() + mi, err := cs.getMemInfo() if err != nil { return nil, err } @@ -671,9 +718,9 @@ func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScan return resp, nil } -func (c *ControlService) formatMetadata(instances []Engine, reformat bool) (bool, error) { +func (cs *ControlService) formatMetadata(instances []Engine, reformat bool) (bool, error) { // Format control metadata first, if needed - if needs, err := c.storage.ControlMetadataNeedsFormat(); err != nil { + if needs, err := cs.storage.ControlMetadataNeedsFormat(); err != nil { return false, errors.Wrap(err, "detecting if metadata format is needed") } else if needs || reformat { engineIdxs := make([]uint, len(instances)) @@ -681,15 +728,15 @@ func (c *ControlService) formatMetadata(instances []Engine, reformat bool) (bool engineIdxs[i] = uint(eng.Index()) } - c.log.Debug("formatting control metadata storage") - if err := c.storage.FormatControlMetadata(engineIdxs); err != nil { + cs.log.Debug("formatting control metadata storage") + if err := cs.storage.FormatControlMetadata(engineIdxs); err != nil { return false, errors.Wrap(err, "formatting control metadata storage") } return true, nil } - c.log.Debug("no control metadata format needed") + cs.log.Debug("no control metadata format needed") return false, nil } @@ -821,7 +868,7 @@ type formatNvmeReq struct { mdFormatted bool } -func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageFormatResp) { +func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageFormatResp) error { // Allow format to complete on one instance even if another fails for idx, engine := range req.instances { _, hasError := req.errored[idx] @@ -849,13 +896,16 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma pbCtrlrs := proto.NvmeControllers(respBdevs.Ctrlrs) ctrlrs, err := pbCtrlrs.ToNative() if err != nil { - req.errored[idx] = err.Error() - resp.Crets = append(resp.Crets, engine.newCret("", err)) - continue + return errors.Wrapf(err, "convert %T to %T", pbCtrlrs, ctrlrs) + } + + ei, ok := engine.(*EngineInstance) + if !ok { + return errors.New("Engine interface obj is not an EngineInstance") } // SCM formatted correctly on this instance, format NVMe - cResults := formatEngineBdevs(engine.(*EngineInstance), ctrlrs) + cResults := formatEngineBdevs(ei, ctrlrs) if cResults.HasErrors() { req.errored[idx] = cResults.Errors() @@ -870,6 +920,8 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma resp.Crets = append(resp.Crets, cResults...) } + + return nil } // StorageFormat delegates to Storage implementation's Format methods to prepare @@ -880,8 +932,8 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma // // Send response containing multiple results of format operations on scm mounts // and nvme controllers. -func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFormatReq) (*ctlpb.StorageFormatResp, error) { - instances := c.harness.Instances() +func (cs *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFormatReq) (*ctlpb.StorageFormatResp, error) { + instances := cs.harness.Instances() resp := new(ctlpb.StorageFormatResp) resp.Mrets = make([]*ctlpb.ScmMountResult, 0, len(instances)) resp.Crets = make([]*ctlpb.NvmeControllerResult, 0, len(instances)) @@ -891,16 +943,16 @@ func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFo return resp, nil } - mdFormatted, err := c.formatMetadata(instances, req.Reformat) + mdFormatted, err := cs.formatMetadata(instances, req.Reformat) if err != nil { return nil, err } fsr := formatScmReq{ - log: c.log, + log: cs.log, reformat: req.Reformat, instances: instances, - getMemInfo: c.getMemInfo, + getMemInfo: cs.getMemInfo, } instanceErrors, instanceSkips, err := formatScm(ctx, fsr, resp) if err != nil { @@ -908,7 +960,7 @@ func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFo } fnr := formatNvmeReq{ - log: c.log, + log: cs.log, instances: instances, errored: instanceErrors, skipped: instanceSkips, @@ -922,7 +974,7 @@ func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFo // because devices have already been claimed during format. for idx, ei := range instances { if msg, hasError := instanceErrors[idx]; hasError { - c.log.Errorf("instance %d: %s", idx, msg) + cs.log.Errorf("instance %d: %s", idx, msg) continue } ei.NotifyStorageReady() @@ -932,7 +984,7 @@ func (c *ControlService) StorageFormat(ctx context.Context, req *ctlpb.StorageFo } // StorageNvmeRebind rebinds SSD from kernel and binds to user-space to allow DAOS to use it. -func (c *ControlService) StorageNvmeRebind(ctx context.Context, req *ctlpb.NvmeRebindReq) (*ctlpb.NvmeRebindResp, error) { +func (cs *ControlService) StorageNvmeRebind(ctx context.Context, req *ctlpb.NvmeRebindReq) (*ctlpb.NvmeRebindResp, error) { if req == nil { return nil, errors.New("nil request") } @@ -951,9 +1003,9 @@ func (c *ControlService) StorageNvmeRebind(ctx context.Context, req *ctlpb.NvmeR } resp := new(ctlpb.NvmeRebindResp) - if _, err := c.NvmePrepare(prepReq); err != nil { + if _, err := cs.NvmePrepare(prepReq); err != nil { err = errors.Wrap(err, "nvme rebind") - c.log.Error(err.Error()) + cs.log.Error(err.Error()) resp.State = &ctlpb.ResponseState{ Error: err.Error(), @@ -969,12 +1021,12 @@ func (c *ControlService) StorageNvmeRebind(ctx context.Context, req *ctlpb.NvmeR // StorageNvmeAddDevice adds a newly added SSD to a DAOS engine's NVMe config to allow it to be used. // // If StorageTierIndex is set to -1 in request, add the device to the first configured bdev tier. -func (c *ControlService) StorageNvmeAddDevice(ctx context.Context, req *ctlpb.NvmeAddDeviceReq) (resp *ctlpb.NvmeAddDeviceResp, err error) { +func (cs *ControlService) StorageNvmeAddDevice(ctx context.Context, req *ctlpb.NvmeAddDeviceReq) (resp *ctlpb.NvmeAddDeviceResp, err error) { if req == nil { return nil, errors.New("nil request") } - engines := c.harness.Instances() + engines := cs.harness.Instances() engineIndex := req.GetEngineIndex() if len(engines) <= int(engineIndex) { @@ -1003,17 +1055,17 @@ func (c *ControlService) StorageNvmeAddDevice(ctx context.Context, req *ctlpb.Nv tierIndex) } - c.log.Debugf("bdev list to be updated: %+v", tierCfg.Bdev.DeviceList) + cs.log.Debugf("bdev list to be updated: %+v", tierCfg.Bdev.DeviceList) if err := tierCfg.Bdev.DeviceList.AddStrings(req.PciAddr); err != nil { return nil, errors.Errorf("updating bdev list for tier %d", tierIndex) } - c.log.Debugf("updated bdev list: %+v", tierCfg.Bdev.DeviceList) + cs.log.Debugf("updated bdev list: %+v", tierCfg.Bdev.DeviceList) // TODO: Supply scan results for VMD backing device address mapping. resp = new(ctlpb.NvmeAddDeviceResp) - if err := engineStorage.WriteNvmeConfig(ctx, c.log, nil); err != nil { + if err := engineStorage.WriteNvmeConfig(ctx, cs.log, nil); err != nil { err = errors.Wrapf(err, "write nvme config for engine %d", engineIndex) - c.log.Error(err.Error()) + cs.log.Error(err.Error()) // report write conf call result in response resp.State = &ctlpb.ResponseState{ diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 3fb19e80300..56142173835 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -28,6 +28,7 @@ import ( "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/events" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/provider/system" "github.com/daos-stack/daos/src/control/server/config" @@ -72,15 +73,15 @@ func TestServer_bdevScan(t *testing.T) { "nil request": { expErr: errors.New("nil request"), }, - "no bdevs in config; scan local fails": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; no bdevs in config; scan fails": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{{}}, provErr: errors.New("fail"), engStopped: []bool{false}, expErr: errors.New("fail"), }, - "no bdevs in config; scan local; devlist passed to backend": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; no bdevs in config; devlist passed to backend": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{{}}, engStopped: []bool{false}, expResp: &ctlpb.ScanNvmeResp{ @@ -93,7 +94,20 @@ func TestServer_bdevScan(t *testing.T) { {DeviceList: new(storage.BdevDeviceList)}, }, }, - "bdevs in config; engine not started; scan local; devlist passed to backend": { + // This should succeed so nil NVMe stats can be returned in SCM-only scenarios. + "scan local; no bdevs in config; meta requested": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{{}}, + engStopped: []bool{false}, + provRes: &storage.BdevScanResponse{}, + expResp: &ctlpb.ScanNvmeResp{ + State: new(ctlpb.ResponseState), + }, + expBackendScanCalls: []storage.BdevScanRequest{ + {DeviceList: new(storage.BdevDeviceList)}, + }, + }, + "scan local; bdevs in config; meta requested": { req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, engTierCfgs: []storage.TierConfigs{ { @@ -103,6 +117,19 @@ func TestServer_bdevScan(t *testing.T) { test.MockPCIAddr(2)), }, }, + engStopped: []bool{true}, + expErr: errors.New("info unavailable"), + }, + "scan local; bdevs in config; devlist passed to backend; no roles": { + req: &ctlpb.ScanNvmeReq{Health: true}, + engTierCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), + }, + }, provRes: &storage.BdevScanResponse{ Controllers: storage.NvmeControllers{ storage.MockNvmeController(1), @@ -112,8 +139,26 @@ func TestServer_bdevScan(t *testing.T) { engStopped: []bool{true}, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), - proto.MockNvmeController(2), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: 0, // No roles. + }, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: 0, // No roles. + }, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, @@ -124,8 +169,61 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine not started; scan local; retry on empty response": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; bdevs in config; devlist passed to backend; roles from cfg": { + req: &ctlpb.ScanNvmeReq{Health: true}, + engTierCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(1)). + WithBdevDeviceRoles(storage.BdevRoleWAL), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(2)). + WithBdevDeviceRoles(storage.BdevRoleMeta | storage.BdevRoleData), + }, + }, + provRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{ + storage.MockNvmeController(1), + storage.MockNvmeController(2), + }, + }, + engStopped: []bool{true}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: uint32(storage.BdevRoleWAL), + }, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: uint32(storage.BdevRoleMeta | storage.BdevRoleData), + }, + } + return c + }(), + }, + State: new(ctlpb.ResponseState), + }, + expBackendScanCalls: []storage.BdevScanRequest{ + { + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(1), test.MockPCIAddr(2)), + }, + }, + }, + "scan local; bdevs in config; devlist passed to backend; retry on empty response": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). @@ -153,7 +251,7 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine started; scan remote": { + "scan remote; bdevs in config": { req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, engTierCfgs: []storage.TierConfigs{ { @@ -364,8 +462,8 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine not started; scan local; vmd enabled": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; bdevs in config; vmd enabled": { + req: &ctlpb.ScanNvmeReq{}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). @@ -382,18 +480,33 @@ func TestServer_bdevScan(t *testing.T) { engStopped: []bool{true}, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, - &ctlpb.NvmeController{PciAddr: "050505:03:00.0"}, + func() *ctlpb.NvmeController { + nc := &ctlpb.NvmeController{ + PciAddr: "050505:01:00.0", + } + nc.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return nc + }(), + func() *ctlpb.NvmeController { + nc := &ctlpb.NvmeController{ + PciAddr: "050505:03:00.0", + } + nc.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return nc + }(), }, State: new(ctlpb.ResponseState), }, expBackendScanCalls: []storage.BdevScanRequest{ {DeviceList: storage.MustNewBdevDeviceList("0000:05:05.5")}, - {DeviceList: storage.MustNewBdevDeviceList("0000:05:05.5")}, }, }, - "bdevs in config; engine started; scan remote; vmd enabled": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan remote; bdevs in config; vmd enabled": { + req: &ctlpb.ScanNvmeReq{Meta: true}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). @@ -719,7 +832,7 @@ func TestServer_CtlSvc_StorageScan(t *testing.T) { MemInfo: proto.MockPBMemInfo(), }, }, - "scan usage": { + "scan usage; engines not ready": { req: &ctlpb.StorageScanReq{ Scm: &ctlpb.ScanScmReq{ Usage: true, @@ -729,7 +842,7 @@ func TestServer_CtlSvc_StorageScan(t *testing.T) { }, }, enginesNotReady: true, - expErr: errEngineNotReady, + expErr: errors.New("no scm details found"), }, } { t.Run(name, func(t *testing.T) { diff --git a/src/control/server/harness.go b/src/control/server/harness.go index ae90d3ed711..af8eb206ce5 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -62,6 +62,7 @@ type Engine interface { OnReady(...onReadyFn) GetStorage() *storage.Provider Debugf(format string, args ...interface{}) + Tracef(format string, args ...interface{}) } // EngineHarness is responsible for managing Engine instances. diff --git a/src/control/server/instance.go b/src/control/server/instance.go index b32831f03b9..c75eef5f3dd 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -366,3 +366,7 @@ func (ei *EngineInstance) callSetUp(ctx context.Context) error { func (ei *EngineInstance) Debugf(format string, args ...interface{}) { ei.log.Debugf(format, args...) } + +func (ei *EngineInstance) Tracef(format string, args ...interface{}) { + ei.log.Tracef(format, args...) +} diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 7b2b38cc57a..8cc36304228 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2023 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -118,7 +118,7 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context) error { if !needsMetaFormat && !needsScmFormat { ei.log.Debugf("instance %d: no SCM format required; checking for superblock", idx) - needsSuperblock, err := ei.NeedsSuperblock() + needsSuperblock, err := ei.needsSuperblock() if err != nil { ei.log.Errorf("instance %d: failed to check instance superblock: %s", idx, err) } diff --git a/src/control/server/instance_storage_rpc.go b/src/control/server/instance_storage_rpc.go index 9ce68660f25..74926d9692a 100644 --- a/src/control/server/instance_storage_rpc.go +++ b/src/control/server/instance_storage_rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2023 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -18,6 +18,7 @@ import ( "github.com/daos-stack/daos/src/control/common/proto" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/lib/hardware" "github.com/daos-stack/daos/src/control/server/storage" ) @@ -81,9 +82,9 @@ func (ei *EngineInstance) scmFormat(force bool) (*ctlpb.ScmMountResult, error) { func formatEngineBdevs(ei *EngineInstance, ctrlrs storage.NvmeControllers) (results proto.NvmeControllerResults) { // If no superblock exists, format NVMe and populate response with results. - needsSuperblock, err := ei.NeedsSuperblock() + needsSuperblock, err := ei.needsSuperblock() if err != nil { - ei.log.Errorf("engine storage for %s instance %d: NeedsSuperblock(): %s", + ei.log.Errorf("engine storage for %s instance %d: needsSuperblock(): %s", build.DataPlaneName, ei.Index(), err) return proto.NvmeControllerResults{ @@ -100,19 +101,22 @@ func formatEngineBdevs(ei *EngineInstance, ctrlrs storage.NvmeControllers) (resu for _, tr := range ei.storage.FormatBdevTiers(ctrlrs) { if tr.Error != nil { - results = append(results, ei.newCret(fmt.Sprintf("tier %d", tr.Tier), tr.Error)) + results = append(results, ei.newCret(fmt.Sprintf("tier %d", tr.Tier), + tr.Error)) continue } for devAddr, status := range tr.Result.DeviceResponses { - ei.log.Debugf("instance %d: tier %d: device fmt of %s, status %+v", - ei.Index(), tr.Tier, devAddr, status) + ei.log.Debugf("instance %d: tier %d: device fmt of %s, status %+v, roles %q", + ei.Index(), tr.Tier, devAddr, status, tr.DeviceRoles) // TODO DAOS-5828: passing status.Error directly triggers segfault var err error if status.Error != nil { err = status.Error } - results = append(results, ei.newCret(devAddr, err)) + res := ei.newCret(devAddr, err) + res.RoleBits = uint32(tr.DeviceRoles.OptionBits) + results = append(results, res) } } @@ -206,11 +210,22 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc c.SmdDevices = nil c.HealthStats = nil seenCtrlrs[addr] = c - pbResp.Ctrlrs = append(pbResp.Ctrlrs, c) } c := seenCtrlrs[addr] + // Only minimal info provided in standard scan to enable result aggregation across + // homogeneous hosts. + engineRank, err := engine.GetRank() + if err != nil { + engine.Debugf("instance %d GetRank: %s", engine.Index(), err.Error()) + } + nsd := &ctlpb.SmdDevice{ + RoleBits: sd.RoleBits, + CtrlrNamespaceId: sd.CtrlrNamespaceId, + Rank: engineRank.Uint32(), + } + // Populate health if requested. healthUpdated := false if pbReq.Health { @@ -226,11 +241,11 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc healthUpdated = upd } - // Populate SMD (meta) if requested. + // Populate usage data if requested. if pbReq.Meta { - nsd := new(ctlpb.SmdDevice) *nsd = *sd nsd.Ctrlr = nil + nsd.Rank = engineRank.Uint32() nsd.MetaSize = pbReq.MetaSize nsd.RdbSize = pbReq.RdbSize if healthUpdated { @@ -241,79 +256,112 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc nsd.MetaWalSize = c.HealthStats.MetaWalSize nsd.RdbWalSize = c.HealthStats.RdbWalSize } - engineRank, err := engine.GetRank() - if err != nil { - return nil, errors.Wrapf(err, "instance %d GetRank", engine.Index()) - } - nsd.Rank = engineRank.Uint32() - c.SmdDevices = append(c.SmdDevices, nsd) } + + c.SmdDevices = append(c.SmdDevices, nsd) + } + + for _, c := range seenCtrlrs { + engine.Tracef("nvme ssd scanned: %+v", c) + pbResp.Ctrlrs = append(pbResp.Ctrlrs, c) } return &pbResp, nil } -func bdevScanEngineAssigned(ctx context.Context, engine Engine, pbReq *ctlpb.ScanNvmeReq, devList *storage.BdevDeviceList, isStarted *bool) (*ctlpb.ScanNvmeResp, error) { +func bdevScanEngineAssigned(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq, bdevCfgs storage.TierConfigs, isStarted *bool) (*ctlpb.ScanNvmeResp, error) { *isStarted = engine.IsStarted() if !*isStarted { - engine.Debugf("scanning engine-%d bdev tiers while engine is down", engine.Index()) - - // Retrieve engine cfg bdevs to restrict scan scope. - req := storage.BdevScanRequest{DeviceList: devList} + engine.Debugf("scanning engine-%d bdevs while engine is down", engine.Index()) + if req.Meta { + return nil, errors.New("meta smd usage info unavailable as engine stopped") + } - return bdevScanToProtoResp(engine.GetStorage().ScanBdevs, req) + return bdevScanToProtoResp(engine.GetStorage().ScanBdevs, bdevCfgs) } - engine.Debugf("scanning engine-%d bdev tiers while engine is up", engine.Index()) + engine.Debugf("scanning engine-%d bdevs while engine is up", engine.Index()) - // If engine is started but not ready, wait for ready state. If partial number of engines - // return results, indicate errors for non-ready engines whilst returning successful scan - // results. + // If engine is started but not ready, wait for ready state. pollFn := func(e Engine) bool { return e.IsReady() } if err := pollInstanceState(ctx, []Engine{engine}, pollFn); err != nil { return nil, errors.Wrapf(err, "waiting for engine %d to be ready to receive drpcs", engine.Index()) } - return scanEngineBdevsOverDrpc(ctx, engine, pbReq) + return scanEngineBdevsOverDrpc(ctx, engine, req) +} + +func getEffCtrlrCount(ctrlrs []*ctlpb.NvmeController) (int, error) { + pas := hardware.MustNewPCIAddressSet() + for _, c := range ctrlrs { + if err := pas.AddStrings(c.PciAddr); err != nil { + return 0, err + } + } + if pas.HasVMD() { + if npas, err := pas.BackingToVMDAddresses(); err != nil { + return 0, err + } else { + pas = npas + } + } + + return pas.Len(), nil } // bdevScanEngine calls either in to the private engine storage provider to scan bdevs if engine process // is not started, otherwise dRPC is used to retrieve details from the online engine. -func bdevScanEngine(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq) (resp *ctlpb.ScanNvmeResp, err error) { +func bdevScanEngine(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { if req == nil { return nil, errors.New("nil request") } - eCfgBdevs := storage.TierConfigs(engine.GetStorage().GetBdevConfigs()).Bdevs() - if eCfgBdevs.Len() == 0 { + bdevCfgs := storage.TierConfigs(engine.GetStorage().GetBdevConfigs()) + nrCfgBdevs := bdevCfgs.Bdevs().Len() + + if nrCfgBdevs == 0 { return nil, errEngineBdevScanEmptyDevList } var isStarted bool - resp, err = bdevScanEngineAssigned(ctx, engine, req, eCfgBdevs, &isStarted) + resp, err := bdevScanEngineAssigned(ctx, engine, req, bdevCfgs, &isStarted) + if err != nil { + return nil, err + } + + nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) if err != nil { return nil, err } + if nrScannedBdevs == nrCfgBdevs { + return resp, nil + } // Retry once if engine provider scan returns unexpected number of controllers in case // engines claimed devices between when started state was checked and scan was executed. - if !isStarted && len(resp.Ctrlrs) != eCfgBdevs.Len() { + if !isStarted { engine.Debugf("retrying engine bdev scan as unexpected nr returned, want %d got %d", - eCfgBdevs.Len(), len(resp.Ctrlrs)) + nrCfgBdevs, nrScannedBdevs) - resp, err = bdevScanEngineAssigned(ctx, engine, req, eCfgBdevs, &isStarted) + resp, err = bdevScanEngineAssigned(ctx, engine, req, bdevCfgs, &isStarted) if err != nil { return nil, err } - } - if len(resp.Ctrlrs) != eCfgBdevs.Len() { - engine.Debugf("engine bdev scan returned unexpected nr, want %d got %d", - eCfgBdevs.Len(), len(resp.Ctrlrs)) + nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) + if err != nil { + return nil, err + } + if nrScannedBdevs == nrCfgBdevs { + return resp, nil + } } - return + engine.Debugf("engine bdev scan returned unexpected nr, want %d got %d", nrCfgBdevs, + nrScannedBdevs) + + return resp, nil } func smdQueryEngine(ctx context.Context, engine Engine, pbReq *ctlpb.SmdQueryReq) (*ctlpb.SmdQueryResp_RankResp, error) { diff --git a/src/control/server/instance_storage_rpc_test.go b/src/control/server/instance_storage_rpc_test.go index dfa0a28bd3b..feb767ec915 100644 --- a/src/control/server/instance_storage_rpc_test.go +++ b/src/control/server/instance_storage_rpc_test.go @@ -16,6 +16,7 @@ import ( "github.com/daos-stack/daos/src/control/common/proto" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/server/config" "github.com/daos-stack/daos/src/control/server/engine" @@ -27,11 +28,8 @@ import ( func TestIOEngineInstance_bdevScanEngine(t *testing.T) { c := storage.MockNvmeController(2) defSmdScanRes := func() *ctlpb.SmdDevResp { - return &ctlpb.SmdDevResp{ - Devices: []*ctlpb.SmdDevice{ - proto.MockSmdDevice(c, 2), - }, - } + sd := proto.MockSmdDevice(c, 2) + return &ctlpb.SmdDevResp{Devices: []*ctlpb.SmdDevice{sd}} } healthRespWithUsage := func() *ctlpb.BioHealthResp { mh := proto.MockNvmeHealth(2) @@ -43,6 +41,7 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { for name, tc := range map[string]struct { req ctlpb.ScanNvmeReq bdevAddrs []string + rank int provRes *storage.BdevScanResponse provErr error engStopped bool @@ -69,8 +68,20 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { }, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), - proto.MockNvmeController(2), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, @@ -86,7 +97,13 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { engStopped: true, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, @@ -106,27 +123,88 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { provErr: errors.New("provider scan fail"), expErr: errors.New("provider scan fail"), }, - "scan over drpc; no health or meta": { - smdRes: defSmdScanRes(), - healthRes: proto.MockNvmeHealth(2), + "engines stopped; scan over engine provider; vmd enabled": { + bdevAddrs: []string{"0000:05:05.5"}, + engStopped: true, + provRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{ + &storage.NvmeController{PciAddr: "050505:01:00.0"}, + &storage.NvmeController{PciAddr: "050505:03:00.0"}, + }, + }, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ func() *ctlpb.NvmeController { - c := proto.MockNvmeController(2) - c.HealthStats = nil - c.SmdDevices = nil - return c + nc := &ctlpb.NvmeController{ + PciAddr: "050505:01:00.0", + } + nc.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return nc + }(), + func() *ctlpb.NvmeController { + nc := &ctlpb.NvmeController{ + PciAddr: "050505:03:00.0", + } + nc.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return nc }(), }, State: new(ctlpb.ResponseState), }, + expBackendScanCalls: []storage.BdevScanRequest{ + {DeviceList: storage.MustNewBdevDeviceList("0000:05:05.5")}, + }, }, "scan fails over drpc": { smdErr: errors.New("drpc fail"), expErr: errors.New("drpc fail"), }, + "scan over drpc; no req flags; rank and roles populated": { + req: ctlpb.ScanNvmeReq{}, + rank: 1, + smdRes: defSmdScanRes(), + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.HealthStats = nil + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: 1, RoleBits: storage.BdevRoleAll}, + } + return c + }(), + }, + State: new(ctlpb.ResponseState), + }, + }, + "scan over drpc; no req flags; invalid rank": { + req: ctlpb.ScanNvmeReq{}, + rank: -1, + smdRes: defSmdScanRes(), + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.HealthStats = nil + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: storage.BdevRoleAll, + }, + } + return c + }(), + }, + State: new(ctlpb.ResponseState), + }, + }, "scan over drpc; with health": { req: ctlpb.ScanNvmeReq{Health: true}, + rank: 1, smdRes: defSmdScanRes(), healthRes: healthRespWithUsage(), expResp: &ctlpb.ScanNvmeResp{ @@ -134,15 +212,18 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { func() *ctlpb.NvmeController { c := proto.MockNvmeController(2) c.HealthStats = healthRespWithUsage() - c.SmdDevices = nil + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: 1, RoleBits: storage.BdevRoleAll}, + } return c }(), }, State: new(ctlpb.ResponseState), }, }, - "scan over drpc; with smd": { + "scan over drpc; with meta": { req: ctlpb.ScanNvmeReq{Meta: true}, + rank: 1, smdRes: defSmdScanRes(), healthRes: healthRespWithUsage(), expResp: &ctlpb.ScanNvmeResp{ @@ -150,9 +231,9 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { func() *ctlpb.NvmeController { c := proto.MockNvmeController(2) c.HealthStats = nil - c.SmdDevices = []*ctlpb.SmdDevice{ - proto.MockSmdDevice(nil, 2), - } + sd := proto.MockSmdDevice(nil, 2) + sd.Rank = 1 + c.SmdDevices = []*ctlpb.SmdDevice{sd} return c }(), }, @@ -161,6 +242,7 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { }, "scan over drpc; with smd and health; usage and wal size reported": { req: ctlpb.ScanNvmeReq{Meta: true, Health: true}, + rank: 1, smdRes: defSmdScanRes(), healthRes: healthRespWithUsage(), expResp: &ctlpb.ScanNvmeResp{ @@ -169,6 +251,7 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { c := proto.MockNvmeController(2) c.HealthStats = healthRespWithUsage() sd := proto.MockSmdDevice(nil, 2) + sd.Rank = 1 sd.TotalBytes = c.HealthStats.TotalBytes sd.AvailBytes = c.HealthStats.AvailBytes sd.ClusterSize = c.HealthStats.ClusterSize @@ -250,6 +333,13 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { cs := newMockControlServiceFromBackends(t, log, sCfg, bmb, smb, nil, tc.engStopped) ei := cs.harness.Instances()[0].(*EngineInstance) + if tc.rank < 0 { + ei.setSuperblock(nil) + } else { + ei.setSuperblock(&Superblock{ + Rank: ranklist.NewRankPtr(uint32(tc.rank)), ValidRank: true, + }) + } resp, err := bdevScanEngine(test.Context(t), ei, &tc.req) test.CmpErr(t, tc.expErr, err) diff --git a/src/control/server/instance_superblock.go b/src/control/server/instance_superblock.go index 0d6ec613a8b..c7eff0e100f 100644 --- a/src/control/server/instance_superblock.go +++ b/src/control/server/instance_superblock.go @@ -85,11 +85,11 @@ func (ei *EngineInstance) hasSuperblock() bool { return ei.getSuperblock() != nil } -// NeedsSuperblock indicates whether or not the instance appears +// needsSuperblock indicates whether or not the instance appears // to need a superblock to be created in order to start. // // Should not be called if SCM format is required. -func (ei *EngineInstance) NeedsSuperblock() (bool, error) { +func (ei *EngineInstance) needsSuperblock() (bool, error) { if ei.hasSuperblock() { ei.log.Debugf("instance %d has no superblock set", ei.Index()) return false, nil @@ -116,7 +116,7 @@ func (ei *EngineInstance) createSuperblock() error { return errors.Errorf("can't create superblock: instance %d already started", ei.Index()) } - needsSuperblock, err := ei.NeedsSuperblock() // scm format completed by now + needsSuperblock, err := ei.needsSuperblock() // scm format completed by now if !needsSuperblock { return nil } diff --git a/src/control/server/instance_test.go b/src/control/server/instance_test.go index ea088285467..2792c95facc 100644 --- a/src/control/server/instance_test.go +++ b/src/control/server/instance_test.go @@ -284,3 +284,7 @@ func (mi *MockInstance) GetStorage() *storage.Provider { func (mi *MockInstance) Debugf(format string, args ...interface{}) { return } + +func (mi *MockInstance) Tracef(format string, args ...interface{}) { + return +} diff --git a/src/control/server/storage/bdev.go b/src/control/server/storage/bdev.go index d69326b4aaa..b2bc345bfac 100644 --- a/src/control/server/storage/bdev.go +++ b/src/control/server/storage/bdev.go @@ -124,13 +124,13 @@ func (nds *NvmeDevState) UnmarshalJSON(data []byte) error { // LedState represents the LED state of device. type LedState int32 -// LedState values representing the VMD LED state (see include/spdk/vmd.h). +// LedState values representing the VMD LED state (see src/proto/ctl/smd.proto). const ( - LedStateNormal LedState = iota + LedStateUnknown LedState = iota LedStateIdentify LedStateFaulty LedStateRebuild - LedStateUnknown + LedStateNormal ) func (vls LedState) String() string { @@ -389,7 +389,11 @@ type NvmeControllers []*NvmeController func (ncs NvmeControllers) String() string { var ss []string for _, c := range ncs { - ss = append(ss, c.PciAddr) + s := c.PciAddr + for _, sd := range c.SmdDevices { + s += fmt.Sprintf("-nsid%d-%s", sd.CtrlrNamespaceID, sd.Roles.String()) + } + ss = append(ss, s) } return strings.Join(ss, ", ") } diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index feab22be6cf..b79bd8e0105 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -43,6 +43,7 @@ const ( accelOptMoveName = "move" accelOptCRCName = "crc" + bdevRoleNoneName = "na" bdevRoleDataName = "data" bdevRoleMetaName = "meta" bdevRoleWALName = "wal" @@ -252,10 +253,18 @@ func (tcs TierConfigs) getBdevs(nvmeOnly bool) *BdevDeviceList { } func (tcs TierConfigs) Bdevs() *BdevDeviceList { + if len(tcs) == 0 { + return new(BdevDeviceList) + } + return tcs.getBdevs(false) } func (tcs TierConfigs) NVMeBdevs() *BdevDeviceList { + if len(tcs) == 0 { + return new(BdevDeviceList) + } + return tcs.getBdevs(true) } @@ -281,18 +290,34 @@ func (tcs TierConfigs) checkBdevs(nvmeOnly, emulOnly bool) bool { } func (tcs TierConfigs) HaveBdevs() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(false, false) } func (tcs TierConfigs) HaveRealNVMe() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(true, false) } func (tcs TierConfigs) HaveEmulatedNVMe() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(false, true) } func (tcs TierConfigs) HasBdevRoleMeta() bool { + if len(tcs) == 0 { + return false + } + for _, bc := range tcs.BdevConfigs() { bits := bc.Bdev.DeviceRoles.OptionBits if (bits & BdevRoleMeta) != 0 { @@ -442,6 +467,10 @@ func (tcs TierConfigs) validateBdevRoles() error { // - If the scm tier is of class dcpm, the first (and only) bdev tier should have the Data role. // - If emulated NVMe is present in bdev tiers, implicit role assignment is skipped. func (tcs TierConfigs) AssignBdevTierRoles(extMetadataPath string) error { + if len(tcs) == 0 { + return errors.New("no storage tiers configured") + } + if extMetadataPath == "" { return nil // MD-on-SSD not enabled. } @@ -846,6 +875,9 @@ func (obs *OptionBits) fromStrings(optStr2Flag optFlagMap, opts ...string) error if len(opt) == 0 { continue } + if strings.ToLower(opt) == bdevRoleNoneName { + break + } flag, exists := optStr2Flag[opt] if !exists { return FaultBdevConfigOptFlagUnknown(opt, optStr2Flag.keys()...) @@ -893,13 +925,19 @@ func (bdr BdevRoles) MarshalJSON() ([]byte, error) { // UnmarshalJSON decodes user readable roles string into bitmask. func (bdr *BdevRoles) UnmarshalJSON(data []byte) error { str := strings.Trim(strings.ToLower(string(data)), "\"") + if str == bdevRoleNoneName { + bdr.OptionBits = OptionBits(0) + return nil + } + return bdr.fromStrings(roleOptFlags, strings.Split(str, ",")...) } func (bdr *BdevRoles) String() string { - if bdr == nil { - return "none" + if bdr == nil || bdr.IsEmpty() { + return strings.ToUpper(bdevRoleNoneName) } + return bdr.toString(roleOptFlags) } diff --git a/src/control/server/storage/config_test.go b/src/control/server/storage/config_test.go index be4ddf8efc5..f595aaa4ffa 100644 --- a/src/control/server/storage/config_test.go +++ b/src/control/server/storage/config_test.go @@ -775,6 +775,29 @@ storage: } } +func TestStorage_BdevDeviceRoles_String(t *testing.T) { + for name, tc := range map[string]struct { + bits OptionBits + expOut string + }{ + "empty": { + bits: OptionBits(0), + expOut: "NA", + }, + "all": { + bits: OptionBits(BdevRoleAll), + expOut: "data,meta,wal", + }, + } { + t.Run(name, func(t *testing.T) { + bdr := BdevRoles{OptionBits: tc.bits} + if diff := cmp.Diff(bdr.String(), tc.expOut); diff != "" { + t.Fatalf("bad output (-want +got):\n%s", diff) + } + }) + } +} + func TestStorage_AccelProps_FromYAML(t *testing.T) { for name, tc := range map[string]struct { input string diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index 5d99ae38e9b..1bb4693732a 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -471,9 +471,10 @@ func BdevFormatRequestFromConfig(log logging.Logger, cfg *TierConfig) (BdevForma // BdevTierFormatResult contains details of a format operation result. type BdevTierFormatResult struct { - Tier int - Error error - Result *BdevFormatResponse + Tier int + DeviceRoles BdevRoles + Error error + Result *BdevFormatResponse } // FormatBdevTiers formats all the Bdev tiers in the engine storage @@ -505,6 +506,7 @@ func (p *Provider) FormatBdevTiers(ctrlrs NvmeControllers) (results []BdevTierFo p.RUnlock() results[i].Tier = cfg.Tier + results[i].DeviceRoles = cfg.Bdev.DeviceRoles if err := results[i].Error; err != nil { p.log.Errorf("Instance %d: format failed (%s)", err) continue diff --git a/src/mgmt/smd.pb-c.c b/src/mgmt/smd.pb-c.c index b3ed3284385..de49e886e19 100644 --- a/src/mgmt/smd.pb-c.c +++ b/src/mgmt/smd.pb-c.c @@ -2833,19 +2833,19 @@ const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = }; static const ProtobufCEnumValue ctl__led_state__enum_values_by_number[5] = { - { "OFF", "CTL__LED_STATE__OFF", 0 }, + { "NA", "CTL__LED_STATE__NA", 0 }, { "QUICK_BLINK", "CTL__LED_STATE__QUICK_BLINK", 1 }, { "ON", "CTL__LED_STATE__ON", 2 }, { "SLOW_BLINK", "CTL__LED_STATE__SLOW_BLINK", 3 }, - { "NA", "CTL__LED_STATE__NA", 4 }, + { "OFF", "CTL__LED_STATE__OFF", 4 }, }; static const ProtobufCIntRange ctl__led_state__value_ranges[] = { {0, 0},{0, 5} }; static const ProtobufCEnumValueIndex ctl__led_state__enum_values_by_name[5] = { - { "NA", 4 }, - { "OFF", 0 }, + { "NA", 0 }, + { "OFF", 4 }, { "ON", 2 }, { "QUICK_BLINK", 1 }, { "SLOW_BLINK", 3 }, diff --git a/src/mgmt/smd.pb-c.h b/src/mgmt/smd.pb-c.h index 19ac9fc3d14..fd4ca542b60 100644 --- a/src/mgmt/smd.pb-c.h +++ b/src/mgmt/smd.pb-c.h @@ -66,9 +66,9 @@ typedef enum _Ctl__NvmeDevState { } Ctl__NvmeDevState; typedef enum _Ctl__LedState { /* - * Equivalent to SPDK_VMD_LED_STATE_OFF + * Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) */ - CTL__LED_STATE__OFF = 0, + CTL__LED_STATE__NA = 0, /* * Equivalent to SPDK_VMD_LED_STATE_IDENTIFY (4Hz blink) */ @@ -82,9 +82,9 @@ typedef enum _Ctl__LedState { */ CTL__LED_STATE__SLOW_BLINK = 3, /* - * Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) + * Equivalent to SPDK_VMD_LED_STATE_OFF */ - CTL__LED_STATE__NA = 4 + CTL__LED_STATE__OFF = 4 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__LED_STATE) } Ctl__LedState; typedef enum _Ctl__LedAction { @@ -305,7 +305,7 @@ struct _Ctl__NvmeController }; #define CTL__NVME_CONTROLLER__INIT \ { PROTOBUF_C_MESSAGE_INIT (&ctl__nvme_controller__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, NULL, 0,NULL, 0,NULL, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, NULL, 0,NULL, 0,NULL, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__NA, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } /* @@ -558,7 +558,7 @@ struct _Ctl__LedManageReq }; #define CTL__LED_MANAGE_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&ctl__led_manage_req__descriptor) \ - , (char *)protobuf_c_empty_string, CTL__LED_ACTION__GET, CTL__LED_STATE__OFF, 0 } + , (char *)protobuf_c_empty_string, CTL__LED_ACTION__GET, CTL__LED_STATE__NA, 0 } struct _Ctl__DevReplaceReq diff --git a/src/mgmt/srv_query.c b/src/mgmt/srv_query.c index 7bfd31f85d8..b7e4bf8e5fb 100644 --- a/src/mgmt/srv_query.c +++ b/src/mgmt/srv_query.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ diff --git a/src/proto/ctl/smd.proto b/src/proto/ctl/smd.proto index 18b90798f41..e5494b73e4e 100644 --- a/src/proto/ctl/smd.proto +++ b/src/proto/ctl/smd.proto @@ -84,11 +84,11 @@ enum NvmeDevState { } enum LedState { - OFF = 0; // Equivalent to SPDK_VMD_LED_STATE_OFF + NA = 0; // Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) QUICK_BLINK = 1; // Equivalent to SPDK_VMD_LED_STATE_IDENTIFY (4Hz blink) ON = 2; // Equivalent to SPDK_VMD_LED_STATE_FAULT (solid on) SLOW_BLINK = 3; // Equivalent to SPDK_VMD_LED_STATE_REBUILD (1Hz blink) - NA = 4; // Equivalent to SPDK_VMD_LED_STATE_UNKNOWN (VMD not enabled) + OFF = 4; // Equivalent to SPDK_VMD_LED_STATE_OFF } // NvmeController represents an NVMe Controller (SSD). diff --git a/src/proto/ctl/storage_nvme.proto b/src/proto/ctl/storage_nvme.proto index edafa4e42ba..944d8e943ba 100644 --- a/src/proto/ctl/storage_nvme.proto +++ b/src/proto/ctl/storage_nvme.proto @@ -19,6 +19,7 @@ import "ctl/smd.proto"; message NvmeControllerResult { string pci_addr = 1; // PCI address of NVMe controller ResponseState state = 2; // state of current operation + uint32 role_bits = 3; // Device active roles (bitmask) } message ScanNvmeReq { diff --git a/src/tests/ftest/control/dmg_storage_query.py b/src/tests/ftest/control/dmg_storage_query.py index a9c8a99c143..e3a2687dfd4 100644 --- a/src/tests/ftest/control/dmg_storage_query.py +++ b/src/tests/ftest/control/dmg_storage_query.py @@ -40,7 +40,7 @@ def get_bdev_info(self): for item, device in enumerate(sorted(tier.bdev_list.value)): bdev_info.append( {'bdev': device, - 'roles': ','.join(tier.bdev_roles.value or []), + 'roles': ','.join(tier.bdev_roles.value or ['NA']), 'tier': index, 'tgt_ids': list(range(item, targets, len(tier.bdev_list.value)))})