From 79cfb27b23f0809da275ae2213cc61b031c0cb81 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 25 Sep 2024 09:53:07 -0400 Subject: [PATCH 01/10] DAOS-16477 mgmt: return suspect engines for pool healthy query After significant failures, the system may leave behind some suspect engines that were marked as DEAD by the SWIM protocol, but were not excluded from the system to prevent data loss. An administrator can bring these ranks back online by restarting them. This PR aims to provide an administrative interface for querying suspect engines following a massive failure. These suspect engines can be retrieved using the daos/dmg --health-only command. An example of output of dmg pool query --health-only: Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded Pool health info: - Disabled ranks: 1 - Suspect ranks: 2 - Rebuild busy, 0 objs, 0 recs Required-githooks: true Signed-off-by: Wang Shilong --- src/control/cmd/daos/health.go | 9 + src/control/cmd/daos/pool.go | 3 + src/control/cmd/daos/pretty/health.go | 7 + src/control/cmd/daos/pretty/pool.go | 3 + src/control/cmd/daos/pretty/pool_test.go | 2 + src/control/common/proto/mgmt/pool.pb.go | 202 ++++---- src/control/lib/daos/pool.go | 6 +- src/control/lib/daos/pool_test.go | 21 +- src/include/daos_pool.h | 2 + src/include/daos_srv/pool.h | 9 +- src/mgmt/pool.pb-c.c | 590 +++++++++-------------- src/mgmt/pool.pb-c.h | 16 +- src/mgmt/srv_drpc.c | 22 +- src/mgmt/srv_internal.h | 7 +- src/mgmt/srv_pool.c | 8 +- src/mgmt/tests/mocks.c | 14 +- src/mgmt/tests/mocks.h | 1 + src/mgmt/tests/srv_drpc_tests.c | 8 +- src/pool/srv_cli.c | 90 +++- src/proto/mgmt/pool.proto | 1 + 20 files changed, 518 insertions(+), 503 deletions(-) diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index 70e54213084..33507e8ab7f 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -118,6 +118,15 @@ func (cmd *healthCheckCmd) Execute([]string) error { pool.DisabledRanks = tpi.DisabledRanks } + queryMask.ClearAll() + queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) + tpi, err = queryPool(poolHdl, queryMask) + if err != nil { + cmd.Errorf("failed to query pool %s: %v", pool.Label, err) + continue + } + pool.SuspectRanks = tpi.SuspectRanks + poolConts, err := listContainers(poolHdl) if err != nil { cmd.Errorf("failed to list containers on pool %s: %v", pool.Label, err) diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 7a917b1a9e4..5dd28349479 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -329,6 +329,9 @@ func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.Poo if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { poolInfo.DisabledRanks = rs } + if queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + poolInfo.SuspectRanks = rs + } } return poolInfo, nil diff --git a/src/control/cmd/daos/pretty/health.go b/src/control/cmd/daos/pretty/health.go index 25c94e10f99..ee77cd72371 100644 --- a/src/control/cmd/daos/pretty/health.go +++ b/src/control/cmd/daos/pretty/health.go @@ -61,6 +61,13 @@ func printPoolHealth(out io.Writer, pi *daos.PoolInfo, verbose bool) { } var healthStrings []string + if pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + degStr := "Suspect" + if verbose { + degStr += fmt.Sprintf(" %s", pi.SuspectRanks) + } + healthStrings = append(healthStrings, degStr) + } if pi.DisabledTargets > 0 { degStr := "Degraded" if verbose { diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index a9f685b536f..3ded03cba4d 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -53,6 +53,9 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { if pi.DisabledRanks.Count() > 0 { fmt.Fprintf(w, "- Disabled ranks: %s\n", pi.DisabledRanks) } + if pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + fmt.Fprintf(w, "- Suspect ranks: %s\n", pi.SuspectRanks) + } if pi.Rebuild != nil { if pi.Rebuild.Status == 0 { fmt.Fprintf(w, "- Rebuild %s, %d objs, %d recs\n", diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index 3a1724e1dda..1a8dd2e6d92 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -137,6 +137,7 @@ Pool space info: PoolLayoutVer: 1, UpgradeLayoutVer: 2, DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), + SuspectRanks: ranklist.MustCreateRankSet("[2]"), Rebuild: &daos.PoolRebuildStatus{ State: daos.PoolRebuildStateBusy, Objects: 42, @@ -158,6 +159,7 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Disabled ranks: 0-1,3 +- Suspect ranks: 2 - Rebuild busy, 42 objs, 21 recs Pool space info: - Target(VOS) count:1 diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index d514bfa6c43..245d0771cad 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -1826,6 +1826,7 @@ type PoolQueryResp struct { SvcLdr uint32 `protobuf:"varint,18,opt,name=svc_ldr,json=svcLdr,proto3" json:"svc_ldr,omitempty"` // current raft leader (2.6+) SvcReps []uint32 `protobuf:"varint,19,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // service replica ranks QueryMask uint64 `protobuf:"varint,20,opt,name=query_mask,json=queryMask,proto3" json:"query_mask,omitempty"` // Bitmask of pool query options used + SuspectRanks string `protobuf:"bytes,21,opt,name=suspect_ranks,json=suspectRanks,proto3" json:"suspect_ranks,omitempty"` // optional set of suspect ranks } func (x *PoolQueryResp) Reset() { @@ -1993,6 +1994,13 @@ func (x *PoolQueryResp) GetQueryMask() uint64 { return 0 } +func (x *PoolQueryResp) GetSuspectRanks() string { + if x != nil { + return x.SuspectRanks + } + return "" +} + type PoolProperty struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3012,7 +3020,7 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, - 0x53, 0x59, 0x10, 0x02, 0x22, 0xc0, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, + 0x53, 0x59, 0x10, 0x02, 0x22, 0xe5, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, @@ -3055,103 +3063,105 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, - 0x4d, 0x61, 0x73, 0x6b, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, - 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, - 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, - 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, - 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, - 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, - 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, - 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, - 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, - 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, - 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, - 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, - 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, - 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, - 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, - 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, - 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, - 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, - 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, - 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, - 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x75, 0x73, 0x70, 0x65, 0x63, 0x74, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x75, 0x73, + 0x70, 0x65, 0x63, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, + 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, + 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, + 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, + 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, - 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, - 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, - 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, - 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, - 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, - 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, - 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, - 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, + 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, + 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, + 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, + 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, - 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, - 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, - 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, - 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, - 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, - 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, - 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, - 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, - 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, - 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, - 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, - 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, - 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, - 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, - 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, - 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, - 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, - 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, - 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, - 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, - 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, - 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, - 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, + 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, + 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, + 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, + 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, + 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, + 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, + 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, + 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, + 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, + 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, + 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index e47e6e2b23d..b06a44c9025 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -77,6 +77,7 @@ type ( TierStats []*StorageUsageStats `json:"tier_stats"` EnabledRanks *ranklist.RankSet `json:"enabled_ranks,omitempty"` DisabledRanks *ranklist.RankSet `json:"disabled_ranks,omitempty"` + SuspectRanks *ranklist.RankSet `json:"suspect_ranks,omitempty"` PoolLayoutVer uint32 `json:"pool_layout_ver"` UpgradeLayoutVer uint32 `json:"upgrade_layout_ver"` } @@ -104,7 +105,7 @@ type ( const ( // DefaultPoolQueryMask defines the default pool query mask. - DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ C.DPI_ENGINES_ENABLED) + DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_ENGINES_SUSPECT)) // HealthOnlyPoolQueryMask defines the mask for health-only queries. HealthOnlyPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_SPACE)) @@ -116,6 +117,8 @@ const ( PoolQueryOptionEnabledEngines = "enabled_engines" // PoolQueryOptionDisabledEngines retrieves disabled engines as part of the pool query. PoolQueryOptionDisabledEngines = "disabled_engines" + // PoolQueryOptionSuspectEngines retrieves suspect engines as part of the pool query. + PoolQueryOptionSuspectEngines = "suspect_engines" // PoolConnectFlagReadOnly indicates that the connection is read-only. PoolConnectFlagReadOnly = C.DAOS_PC_RO @@ -130,6 +133,7 @@ var poolQueryOptMap = map[C.int]string{ C.DPI_REBUILD_STATUS: PoolQueryOptionRebuild, C.DPI_ENGINES_ENABLED: PoolQueryOptionEnabledEngines, C.DPI_ENGINES_DISABLED: PoolQueryOptionDisabledEngines, + C.DPI_ENGINES_SUSPECT: PoolQueryOptionSuspectEngines, } func resolvePoolQueryOpt(name string) (C.int, error) { diff --git a/src/control/lib/daos/pool_test.go b/src/control/lib/daos/pool_test.go index 3ff1ca098d2..1b91b9f5520 100644 --- a/src/control/lib/daos/pool_test.go +++ b/src/control/lib/daos/pool_test.go @@ -136,13 +136,14 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { *pqm = HealthOnlyPoolQueryMask }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query all=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSpace, PoolQueryOptionSuspectEngines), }, "set query all=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -162,7 +163,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionSpace) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query space=false (already false)": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -181,7 +183,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionRebuild) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query enabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -194,7 +197,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionEnabledEngines) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query disabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -207,7 +211,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionDisabledEngines) }), - expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, } { t.Run(name, func(t *testing.T) { @@ -232,7 +237,7 @@ func TestDaos_PoolQueryMaskMarshalJSON(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space"`), + expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space,suspect_engines"`), }, } { t.Run(name, func(t *testing.T) { @@ -262,7 +267,7 @@ func TestDaos_PoolQueryMaskUnmarshalJSON(t *testing.T) { }, "uint64 value": { testData: []byte("18446744073709551603"), - expString: "rebuild,space", + expString: "rebuild,space,suspect_engines", }, "string values": { testData: []byte("rebuild,disabled_engines"), diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index 73f44368913..a8ab2e6c6a2 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -162,6 +162,8 @@ enum daos_pool_info_bit { DPI_ENGINES_ENABLED = 1ULL << 2, /** true to include (in \a ranks) engines with some or all targets disabled (down). */ DPI_ENGINES_DISABLED = 1ULL << 3, + /** true to include (in \a ranks) suspect engines. */ + DPI_ENGINES_SUSPECT = 1ULL << 4, /** query all above optional info */ DPI_ALL = -1, }; diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index ccaa4bf506d..a8ea070446b 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -304,10 +304,11 @@ int dsc_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, uint64_t dea enum daos_acl_principal_type principal_type, const char *principal_name); -int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, - d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver); +int + dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, + d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); int dsc_pool_svc_query_target(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, d_rank_t rank, uint32_t tgt_idx, daos_target_info_t *ti); diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 6fed6ca6973..86cdbd6da4a 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -2110,43 +2110,33 @@ const ProtobufCMessageDescriptor mgmt__pool_drain_req__descriptor = (ProtobufCMessageInit) mgmt__pool_drain_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_drain_resp__field_descriptors[1] = -{ - { - "status", - 1, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_INT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolDrainResp, status), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, +static const ProtobufCFieldDescriptor mgmt__pool_drain_resp__field_descriptors[1] = { + { + "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolDrainResp, status), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_drain_resp__field_indices_by_name[] = { - 0, /* field[0] = status */ -}; -static const ProtobufCIntRange mgmt__pool_drain_resp__number_ranges[1 + 1] = -{ - { 1, 0 }, - { 0, 1 } + 0, /* field[0] = status */ }; -const ProtobufCMessageDescriptor mgmt__pool_drain_resp__descriptor = -{ - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.PoolDrainResp", - "PoolDrainResp", - "Mgmt__PoolDrainResp", - "mgmt", - sizeof(Mgmt__PoolDrainResp), - 1, - mgmt__pool_drain_resp__field_descriptors, - mgmt__pool_drain_resp__field_indices_by_name, - 1, mgmt__pool_drain_resp__number_ranges, - (ProtobufCMessageInit) mgmt__pool_drain_resp__init, - NULL,NULL,NULL /* reserved[123] */ +static const ProtobufCIntRange mgmt__pool_drain_resp__number_ranges[1 + 1] = {{1, 0}, {0, 1}}; +const ProtobufCMessageDescriptor mgmt__pool_drain_resp__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.PoolDrainResp", + "PoolDrainResp", + "Mgmt__PoolDrainResp", + "mgmt", + sizeof(Mgmt__PoolDrainResp), + 1, + mgmt__pool_drain_resp__field_descriptors, + mgmt__pool_drain_resp__field_indices_by_name, + 1, + mgmt__pool_drain_resp__number_ranges, + (ProtobufCMessageInit)mgmt__pool_drain_resp__init, + NULL, + NULL, + NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__pool_extend_req__field_descriptors[6] = { { @@ -2192,76 +2182,58 @@ static const unsigned mgmt__pool_extend_req__field_indices_by_name[] = { 0, /* field[0] = sys */ 4, /* field[4] = tier_bytes */ }; -static const ProtobufCIntRange mgmt__pool_extend_req__number_ranges[1 + 1] = -{ - { 1, 0 }, - { 0, 6 } -}; -const ProtobufCMessageDescriptor mgmt__pool_extend_req__descriptor = -{ - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.PoolExtendReq", - "PoolExtendReq", - "Mgmt__PoolExtendReq", - "mgmt", - sizeof(Mgmt__PoolExtendReq), - 6, - mgmt__pool_extend_req__field_descriptors, - mgmt__pool_extend_req__field_indices_by_name, - 1, mgmt__pool_extend_req__number_ranges, - (ProtobufCMessageInit) mgmt__pool_extend_req__init, - NULL,NULL,NULL /* reserved[123] */ -}; -static const ProtobufCFieldDescriptor mgmt__pool_extend_resp__field_descriptors[2] = -{ - { - "status", +static const ProtobufCIntRange mgmt__pool_extend_req__number_ranges[1 + 1] = {{1, 0}, {0, 6}}; +const ProtobufCMessageDescriptor mgmt__pool_extend_req__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.PoolExtendReq", + "PoolExtendReq", + "Mgmt__PoolExtendReq", + "mgmt", + sizeof(Mgmt__PoolExtendReq), + 6, + mgmt__pool_extend_req__field_descriptors, + mgmt__pool_extend_req__field_indices_by_name, 1, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_INT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolExtendResp, status), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "tier_bytes", - 2, - PROTOBUF_C_LABEL_REPEATED, - PROTOBUF_C_TYPE_UINT64, - offsetof(Mgmt__PoolExtendResp, n_tier_bytes), - offsetof(Mgmt__PoolExtendResp, tier_bytes), + mgmt__pool_extend_req__number_ranges, + (ProtobufCMessageInit)mgmt__pool_extend_req__init, NULL, NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, + NULL /* reserved[123] */ }; -static const unsigned mgmt__pool_extend_resp__field_indices_by_name[] = { - 0, /* field[0] = status */ - 1, /* field[1] = tier_bytes */ +static const ProtobufCFieldDescriptor mgmt__pool_extend_resp__field_descriptors[2] = { + { + "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolExtendResp, status), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "tier_bytes", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_UINT64, + offsetof(Mgmt__PoolExtendResp, n_tier_bytes), offsetof(Mgmt__PoolExtendResp, tier_bytes), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, }; -static const ProtobufCIntRange mgmt__pool_extend_resp__number_ranges[1 + 1] = -{ - { 1, 0 }, - { 0, 2 } +static const unsigned mgmt__pool_extend_resp__field_indices_by_name[] = { + 0, /* field[0] = status */ + 1, /* field[1] = tier_bytes */ }; -const ProtobufCMessageDescriptor mgmt__pool_extend_resp__descriptor = -{ - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.PoolExtendResp", - "PoolExtendResp", - "Mgmt__PoolExtendResp", - "mgmt", - sizeof(Mgmt__PoolExtendResp), - 2, - mgmt__pool_extend_resp__field_descriptors, - mgmt__pool_extend_resp__field_indices_by_name, - 1, mgmt__pool_extend_resp__number_ranges, - (ProtobufCMessageInit) mgmt__pool_extend_resp__init, - NULL,NULL,NULL /* reserved[123] */ +static const ProtobufCIntRange mgmt__pool_extend_resp__number_ranges[1 + 1] = {{1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor mgmt__pool_extend_resp__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.PoolExtendResp", + "PoolExtendResp", + "Mgmt__PoolExtendResp", + "mgmt", + sizeof(Mgmt__PoolExtendResp), + 2, + mgmt__pool_extend_resp__field_descriptors, + mgmt__pool_extend_resp__field_indices_by_name, + 1, + mgmt__pool_extend_resp__number_ranges, + (ProtobufCMessageInit)mgmt__pool_extend_resp__init, + NULL, + NULL, + NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__pool_reintegrate_req__field_descriptors[6] = { { @@ -2996,278 +2968,162 @@ const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = (ProtobufCMessageInit) mgmt__pool_rebuild_status__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[19] = -{ - { - "status", - 1, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_INT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, status), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "uuid", - 2, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_STRING, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, uuid), - NULL, - &protobuf_c_empty_string, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "label", - 3, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_STRING, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, label), - NULL, - &protobuf_c_empty_string, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "total_targets", - 4, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, total_targets), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "active_targets", - 5, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, active_targets), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "disabled_targets", - 6, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, disabled_targets), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "rebuild", - 7, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_MESSAGE, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, rebuild), - &mgmt__pool_rebuild_status__descriptor, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "tier_stats", - 8, - PROTOBUF_C_LABEL_REPEATED, - PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__PoolQueryResp, n_tier_stats), - offsetof(Mgmt__PoolQueryResp, tier_stats), - &mgmt__storage_usage_stats__descriptor, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "version", - 10, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, version), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "leader", - 11, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, leader), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "enabled_ranks", - 12, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_STRING, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, enabled_ranks), - NULL, - &protobuf_c_empty_string, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "disabled_ranks", - 13, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_STRING, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, disabled_ranks), - NULL, - &protobuf_c_empty_string, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "total_engines", - 14, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, total_engines), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "pool_layout_ver", - 15, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, pool_layout_ver), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "upgrade_layout_ver", - 16, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, upgrade_layout_ver), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "state", - 17, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_ENUM, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, state), - &mgmt__pool_service_state__descriptor, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "svc_ldr", - 18, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT32, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, svc_ldr), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "svc_reps", - 19, - PROTOBUF_C_LABEL_REPEATED, - PROTOBUF_C_TYPE_UINT32, - offsetof(Mgmt__PoolQueryResp, n_svc_reps), - offsetof(Mgmt__PoolQueryResp, svc_reps), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, - { - "query_mask", +static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[20] = { + { + "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, status), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "uuid", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, uuid), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "label", 3, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, label), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "total_targets", 4, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, total_targets), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "active_targets", 5, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, active_targets), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "disabled_targets", 6, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, disabled_targets), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "rebuild", 7, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, rebuild), &mgmt__pool_rebuild_status__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "tier_stats", 8, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__PoolQueryResp, n_tier_stats), offsetof(Mgmt__PoolQueryResp, tier_stats), + &mgmt__storage_usage_stats__descriptor, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "version", 10, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, version), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "leader", 11, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, leader), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "enabled_ranks", 12, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, enabled_ranks), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "disabled_ranks", 13, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, disabled_ranks), NULL, &protobuf_c_empty_string, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "total_engines", 14, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, total_engines), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "pool_layout_ver", 15, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, pool_layout_ver), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "upgrade_layout_ver", 16, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, upgrade_layout_ver), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "state", 17, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_ENUM, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, state), &mgmt__pool_service_state__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "svc_ldr", 18, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, svc_ldr), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "svc_reps", 19, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_UINT32, + offsetof(Mgmt__PoolQueryResp, n_svc_reps), offsetof(Mgmt__PoolQueryResp, svc_reps), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "query_mask", 20, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT64, 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, query_mask), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "suspect_ranks", 21, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, suspect_ranks), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { + 4, /* field[4] = active_targets */ + 11, /* field[11] = disabled_ranks */ + 5, /* field[5] = disabled_targets */ + 10, /* field[10] = enabled_ranks */ + 2, /* field[2] = label */ + 9, /* field[9] = leader */ + 13, /* field[13] = pool_layout_ver */ + 18, /* field[18] = query_mask */ + 6, /* field[6] = rebuild */ + 15, /* field[15] = state */ + 0, /* field[0] = status */ + 19, /* field[19] = suspect_ranks */ + 16, /* field[16] = svc_ldr */ + 17, /* field[17] = svc_reps */ + 7, /* field[7] = tier_stats */ + 12, /* field[12] = total_engines */ + 3, /* field[3] = total_targets */ + 14, /* field[14] = upgrade_layout_ver */ + 1, /* field[1] = uuid */ + 8, /* field[8] = version */ +}; +static const ProtobufCIntRange mgmt__pool_query_resp__number_ranges[2 + 1] = { + {1, 0}, {10, 8}, {0, 20}}; +const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.PoolQueryResp", + "PoolQueryResp", + "Mgmt__PoolQueryResp", + "mgmt", + sizeof(Mgmt__PoolQueryResp), 20, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_UINT64, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryResp, query_mask), + mgmt__pool_query_resp__field_descriptors, + mgmt__pool_query_resp__field_indices_by_name, + 2, + mgmt__pool_query_resp__number_ranges, + (ProtobufCMessageInit)mgmt__pool_query_resp__init, NULL, NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, -}; -static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { - 4, /* field[4] = active_targets */ - 11, /* field[11] = disabled_ranks */ - 5, /* field[5] = disabled_targets */ - 10, /* field[10] = enabled_ranks */ - 2, /* field[2] = label */ - 9, /* field[9] = leader */ - 13, /* field[13] = pool_layout_ver */ - 18, /* field[18] = query_mask */ - 6, /* field[6] = rebuild */ - 15, /* field[15] = state */ - 0, /* field[0] = status */ - 16, /* field[16] = svc_ldr */ - 17, /* field[17] = svc_reps */ - 7, /* field[7] = tier_stats */ - 12, /* field[12] = total_engines */ - 3, /* field[3] = total_targets */ - 14, /* field[14] = upgrade_layout_ver */ - 1, /* field[1] = uuid */ - 8, /* field[8] = version */ -}; -static const ProtobufCIntRange mgmt__pool_query_resp__number_ranges[2 + 1] = -{ - { 1, 0 }, - { 10, 8 }, - { 0, 19 } -}; -const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = -{ - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.PoolQueryResp", - "PoolQueryResp", - "Mgmt__PoolQueryResp", - "mgmt", - sizeof(Mgmt__PoolQueryResp), - 19, - mgmt__pool_query_resp__field_descriptors, - mgmt__pool_query_resp__field_indices_by_name, - 2, mgmt__pool_query_resp__number_ranges, - (ProtobufCMessageInit) mgmt__pool_query_resp__init, - NULL,NULL,NULL /* reserved[123] */ + NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__pool_property__field_descriptors[3] = { diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index 10ea70360d9..31b755977e3 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -857,11 +857,19 @@ struct _Mgmt__PoolQueryResp * Bitmask of pool query options used */ uint64_t query_mask; + /* + * optional set of suspect ranks + */ + char *suspect_ranks; }; -#define MGMT__POOL_QUERY_RESP__INIT \ - { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_query_resp__descriptor) \ - , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, NULL, 0,NULL, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, 0, 0,NULL, 0 } - +#define MGMT__POOL_QUERY_RESP__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT(&mgmt__pool_query_resp__descriptor) \ + , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, \ + NULL, 0, NULL, 0, 0, (char *)protobuf_c_empty_string, \ + (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, \ + 0, 0, NULL, 0, (char *)protobuf_c_empty_string \ + } typedef enum { MGMT__POOL_PROPERTY__VALUE__NOT_SET = 0, diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index be1a67a8c54..177170ff899 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -394,7 +394,8 @@ static int pool_create_fill_resp(Mgmt__PoolCreateResp *resp, uuid_t uuid, d_rank D_DEBUG(DB_MGMT, "%d service replicas\n", svc_ranks->rl_nr); - rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, &pool_info, NULL, NULL); + rc = + ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, NULL, &pool_info, NULL, NULL); if (rc != 0) { D_ERROR("Failed to query created pool: rc=%d\n", rc); D_GOTO(out, rc); @@ -1746,8 +1747,10 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) d_rank_list_t *svc_ranks = NULL; d_rank_list_t *enabled_ranks = NULL; d_rank_list_t *disabled_ranks = NULL; + d_rank_list_t *suspect_ranks = NULL; char *enabled_ranks_str = NULL; char *disabled_ranks_str = NULL; + char *suspect_ranks_str = NULL; size_t len; uint8_t *body; @@ -1771,8 +1774,8 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(error, rc = -DER_NOMEM); pool_info.pi_bits = req->query_mask; - rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, &disabled_ranks, &pool_info, - &resp.pool_layout_ver, &resp.upgrade_layout_ver); + rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, &disabled_ranks, &suspect_ranks, + &pool_info, &resp.pool_layout_ver, &resp.upgrade_layout_ver); if (rc != 0) { DL_ERROR(rc, DF_UUID ": Failed to query the pool", DP_UUID(uuid)); D_GOTO(error, rc); @@ -1794,9 +1797,18 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) DP_UUID(uuid)); D_GOTO(error, rc); } + rc = d_rank_list_to_str(suspect_ranks, &suspect_ranks_str); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": Failed to serialize the list of suspect ranks", + DP_UUID(uuid)); + D_GOTO(error, rc); + } if (disabled_ranks_str != NULL) D_DEBUG(DB_MGMT, DF_UUID ": list of disabled ranks: %s\n", DP_UUID(uuid), disabled_ranks_str); + if (suspect_ranks_str != NULL) + D_DEBUG(DB_MGMT, DF_UUID ": list of suspect ranks: %s\n", DP_UUID(uuid), + suspect_ranks_str); /* Populate the response */ resp.query_mask = pool_info.pi_bits; @@ -1813,6 +1825,8 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp.enabled_ranks = enabled_ranks_str; if (disabled_ranks_str != NULL) resp.disabled_ranks = disabled_ranks_str; + if (suspect_ranks_str != NULL) + resp.suspect_ranks = suspect_ranks_str; D_ALLOC_ARRAY(resp.tier_stats, DAOS_MEDIA_MAX); if (resp.tier_stats == NULL) @@ -1850,6 +1864,8 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_FREE(enabled_ranks_str); d_rank_list_free(disabled_ranks); D_FREE(disabled_ranks_str); + d_rank_list_free(suspect_ranks); + D_FREE(suspect_ranks_str); d_rank_list_free(svc_ranks); pool_query_free_tier_stats(&resp); } diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 11fe77c9b1c..a2cbaf0890e 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -114,9 +114,10 @@ int ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, struct daos_pool_cont_info **containers, uint64_t *ncontainers); int - ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, daos_pool_info_t *pool_info, - uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); + ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, + d_rank_list_t **disabled_ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver); int ds_mgmt_pool_query_targets(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_t rank, d_rank_list_t *tgts, daos_target_info_t **infos); diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index 0497058191c..0609d7de7d5 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -398,8 +398,9 @@ ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, */ int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, daos_pool_info_t *pool_info, - uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) + d_rank_list_t **disabled_ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver) { if (pool_info == NULL) { D_ERROR("pool_info was NULL\n"); @@ -409,7 +410,8 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **e D_DEBUG(DB_MGMT, "Querying pool "DF_UUID"\n", DP_UUID(pool_uuid)); return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), enabled_ranks, - disabled_ranks, pool_info, pool_layout_ver, upgrade_layout_ver); + disabled_ranks, suspect_ranks, pool_info, pool_layout_ver, + upgrade_layout_ver); } /** diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 912a36f293a..c22df3846e0 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -281,11 +281,13 @@ daos_pool_info_t ds_mgmt_pool_query_info_in; void *ds_mgmt_pool_query_info_ptr; d_rank_list_t *ds_mgmt_pool_query_enabled_ranks_out; d_rank_list_t *ds_mgmt_pool_query_disabled_ranks_out; +d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, daos_pool_info_t *pool_info, - uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) + d_rank_list_t **disabled_ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver) { /* If function is to return with an error, pool_info and ranks will not be filled. */ if (ds_mgmt_pool_query_return != 0) @@ -310,6 +312,13 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **e ds_mgmt_pool_query_disabled_ranks_out = *disabled_ranks; } + if ((pool_info->pi_bits & DPI_ENGINES_SUSPECT) != 0) { + D_ASSERT(suspect_ranks != NULL); + + *suspect_ranks = d_rank_list_alloc(2); /* 0-1 ; caller must free this */ + ds_mgmt_pool_query_suspect_ranks_out = *suspect_ranks; + } + ds_mgmt_pool_query_info_in = *pool_info; *pool_info = ds_mgmt_pool_query_info_out; @@ -325,6 +334,7 @@ mock_ds_mgmt_pool_query_setup(void) memset(&ds_mgmt_pool_query_info_out, 0, sizeof(daos_pool_info_t)); ds_mgmt_pool_query_enabled_ranks_out = NULL; ds_mgmt_pool_query_disabled_ranks_out = NULL; + ds_mgmt_pool_query_suspect_ranks_out = NULL; } int ds_mgmt_pool_query_targets_return; diff --git a/src/mgmt/tests/mocks.h b/src/mgmt/tests/mocks.h index 8bd0ffe998a..2ecc03d8ed9 100644 --- a/src/mgmt/tests/mocks.h +++ b/src/mgmt/tests/mocks.h @@ -110,6 +110,7 @@ extern daos_pool_info_t ds_mgmt_pool_query_info_out; extern void *ds_mgmt_pool_query_info_ptr; extern d_rank_list_t *ds_mgmt_pool_query_enabled_ranks_out; extern d_rank_list_t *ds_mgmt_pool_query_disabled_ranks_out; +extern d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; void mock_ds_mgmt_pool_query_setup(void); diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 85cd31deefe..a0682af6330 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1408,12 +1408,13 @@ test_drpc_pool_query_success(void **state) Drpc__Response resp = DRPC__RESPONSE__INIT; uuid_t exp_uuid; daos_pool_info_t exp_info = {0}; + uint64_t flags = DPI_ENGINES_ENABLED | DPI_ENGINES_DISABLED | DPI_ENGINES_SUSPECT; init_test_pool_info(&exp_info); init_test_rebuild_status(&exp_info.pi_rebuild_st); ds_mgmt_pool_query_info_out = exp_info; - setup_pool_query_drpc_call(&call, TEST_UUID, DPI_ENGINES_ENABLED | DPI_ENGINES_DISABLED); + setup_pool_query_drpc_call(&call, TEST_UUID, flags); ds_mgmt_drpc_pool_query(&call, &resp); @@ -1424,8 +1425,9 @@ test_drpc_pool_query_success(void **state) assert_non_null(ds_mgmt_pool_query_info_ptr); assert_non_null(ds_mgmt_pool_query_enabled_ranks_out); assert_non_null(ds_mgmt_pool_query_disabled_ranks_out); - assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, - DEFAULT_QUERY_BITS | DPI_ENGINES_ENABLED | DPI_ENGINES_DISABLED); + assert_non_null(ds_mgmt_pool_query_suspect_ranks_out); + flags |= DEFAULT_QUERY_BITS; + assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, DEFAULT_QUERY_BITS | flags); expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__IDLE, diff --git a/src/pool/srv_cli.c b/src/pool/srv_cli.c index 0545480fe29..5630394b154 100644 --- a/src/pool/srv_cli.c +++ b/src/pool/srv_cli.c @@ -337,6 +337,7 @@ dsc_pool_svc_call(uuid_t uuid, d_rank_list_t *ranks, struct dsc_pool_svc_call_cb struct pool_query_arg { d_rank_list_t **pqa_enabled_ranks; d_rank_list_t **pqa_disabled_ranks; + d_rank_list_t **pqa_suspect_ranks; daos_pool_info_t *pqa_info; uint32_t *pqa_layout_ver; uint32_t *pqa_upgrade_layout_ver; @@ -367,16 +368,65 @@ pool_query_init(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) return rc; } +static int +pool_map_get_suspect_ranks(struct pool_map *map, d_rank_list_t **ranks) +{ + crt_group_t *primary_grp; + struct pool_domain *doms; + int doms_cnt; + int i; + int rc = 0; + d_rank_list_t *rank_list = NULL; + + doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms); + D_ASSERT(doms_cnt >= 0); + primary_grp = crt_group_lookup(NULL); + D_ASSERT(primary_grp != NULL); + + rank_list = d_rank_list_alloc(0); + if (!rank_list) + return -DER_NOMEM; + + for (i = 0; i < doms_cnt; i++) { + struct swim_member_state state; + + if (!(doms[i].do_comp.co_status & PO_COMP_ST_UPIN)) + continue; + + rc = crt_rank_state_get(primary_grp, doms[i].do_comp.co_rank, &state); + if (rc != 0 && rc != -DER_NONEXIST) { + D_ERROR("failed to get status of rank %u: %d\n", doms[i].do_comp.co_rank, + rc); + break; + } + + D_DEBUG(DB_MD, "rank/state %d/%d\n", doms[i].do_comp.co_rank, + rc == -DER_NONEXIST ? -1 : state.sms_status); + if (rc == -DER_NONEXIST || state.sms_status == SWIM_MEMBER_DEAD) { + rc = d_rank_list_append(rank_list, doms[i].do_comp.co_rank); + if (rc) + D_GOTO(err, rc); + } + } +err: + if (rc == 0) + *ranks = rank_list; + else + d_rank_list_free(rank_list); + return rc; +} + static int process_query_result(d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, - daos_pool_info_t *info, uuid_t pool_uuid, uint32_t map_version, - uint32_t leader_rank, struct daos_pool_space *ps, + d_rank_list_t **suspect_ranks, daos_pool_info_t *info, uuid_t pool_uuid, + uint32_t map_version, uint32_t leader_rank, struct daos_pool_space *ps, struct daos_rebuild_status *rs, struct pool_buf *map_buf, uint64_t pi_bits) { struct pool_map *map = NULL; unsigned int num_disabled = 0; d_rank_list_t *enabled_rank_list = NULL; d_rank_list_t *disabled_rank_list = NULL; + d_rank_list_t *suspect_rank_list = NULL; int rc; rc = pool_map_create(map_buf, map_version, &map); @@ -424,6 +474,22 @@ process_query_result(d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ran D_DEBUG(DB_MD, DF_UUID ": found %" PRIu32 " disabled ranks in pool map\n", DP_UUID(pool_uuid), disabled_rank_list->rl_nr); } + if ((pi_bits & DPI_ENGINES_SUSPECT) != 0) { + if (suspect_ranks == NULL) { + DL_ERROR(-DER_INVAL, + DF_UUID ": query pool requested suspect ranks, but ptr is NULL", + DP_UUID(pool_uuid)); + D_GOTO(error, rc = -DER_INVAL); + } + + rc = pool_map_get_suspect_ranks(map, &suspect_rank_list); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": pool_map_get_ranks() failed", DP_UUID(pool_uuid)); + D_GOTO(error, rc); + } + D_DEBUG(DB_MD, DF_UUID ": found %" PRIu32 " suspect ranks in pool map\n", + DP_UUID(pool_uuid), suspect_rank_list->rl_nr); + } pool_query_reply_to_info(pool_uuid, map_buf, map_version, leader_rank, ps, rs, info); info->pi_ndisabled = num_disabled; @@ -431,11 +497,14 @@ process_query_result(d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ran *enabled_ranks = enabled_rank_list; if (disabled_rank_list != NULL) *disabled_ranks = disabled_rank_list; + if (suspect_rank_list != NULL) + *suspect_ranks = suspect_rank_list; D_GOTO(out, rc = -DER_SUCCESS); error: d_rank_list_free(disabled_rank_list); d_rank_list_free(enabled_rank_list); + d_rank_list_free(suspect_rank_list); out: if (map != NULL) pool_map_decref(map); @@ -464,10 +533,10 @@ pool_query_consume(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) D_DEBUG(DB_MGMT, DF_UUID": Successfully queried pool\n", DP_UUID(pool_uuid)); - rc = process_query_result(arg->pqa_enabled_ranks, arg->pqa_disabled_ranks, arg->pqa_info, - pool_uuid, out->pqo_op.po_map_version, - out->pqo_op.po_hint.sh_rank, &out->pqo_space, - &out->pqo_rebuild_st, arg->pqa_map_buf, arg->pqa_info->pi_bits); + rc = process_query_result( + arg->pqa_enabled_ranks, arg->pqa_disabled_ranks, arg->pqa_suspect_ranks, arg->pqa_info, + pool_uuid, out->pqo_op.po_map_version, out->pqo_op.po_hint.sh_rank, &out->pqo_space, + &out->pqo_rebuild_st, arg->pqa_map_buf, arg->pqa_info->pi_bits); if (arg->pqa_layout_ver) *arg->pqa_layout_ver = out->pqo_pool_layout_ver; if (arg->pqa_upgrade_layout_ver) @@ -502,7 +571,9 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { * \param[in] ps_ranks Ranks of pool svc replicas * \param[in] deadline Unix time deadline in milliseconds * \param[out] enabled_ranks Optional, storage ranks with enabled targets. - * \param[out] disabled_ranks Optional, storage ranks with disabled targets. + * \param[out] disabled_ranks Optional, storage ranks with disabled ranks. + * \param[out] suspect_ranks Optional, suspect ranks marked as DEAD by the SWIM + * protocol, but were not excluded from the system. * \param[in][out] pool_info Results of the pool query * \param[in][out] pool_layout_ver Results of the current pool global version * \param[in][out] upgrade_layout_ver Results of the target latest pool global version @@ -517,12 +588,13 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver) + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { struct pool_query_arg arg = { .pqa_enabled_ranks = enabled_ranks, .pqa_disabled_ranks = disabled_ranks, + .pqa_suspect_ranks = suspect_ranks, .pqa_info = pool_info, .pqa_layout_ver = pool_layout_ver, .pqa_upgrade_layout_ver = upgrade_layout_ver, diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index e65374afaec..1ab3a0191eb 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -236,6 +236,7 @@ message PoolQueryResp { uint32 svc_ldr = 18; // current raft leader (2.6+) repeated uint32 svc_reps = 19; // service replica ranks uint64 query_mask = 20; // Bitmask of pool query options used + string suspect_ranks = 21; // optional set of suspect ranks } message PoolProperty { From acac64f083e4aaacbfaac785ad12cbfca77932db Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 9 Oct 2024 04:45:40 -0400 Subject: [PATCH 02/10] control plane improvements Required-githooks: true Signed-off-by: Wang Shilong --- docs/admin/pool_operations.md | 22 +++++ src/control/cmd/daos/health.go | 24 ++---- src/control/cmd/daos/pool.go | 54 +++++++++++- src/control/cmd/daos/pretty/pool.go | 3 +- src/control/cmd/daos/pretty/pool_test.go | 41 ++++++++- src/control/lib/control/pool_test.go | 101 ++++++++++++++++++++++- 6 files changed, 219 insertions(+), 26 deletions(-) diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md index 36907a2e31f..f82b797e8f0 100644 --- a/docs/admin/pool_operations.md +++ b/docs/admin/pool_operations.md @@ -286,6 +286,28 @@ The example below shows a rebuild in progress and NVMe space allocated. Rebuild busy, 75 objs, 9722 recs ``` +After experiencing significant failures, the system may retain some suspect +engines that have been marked as DEAD by the SWIM protocol but were not excluded +from the system to prevent data loss. An administrator can bring these engines back +online by restarting them. The example below illustrates the system’s status with +suspect and disabled engines. + +```bash +$ dmg pool query tank -t +``` + +NB: The --health-only/-t option is necessary to conduct pool health-related queries only, +which will bypass the collective call to each vos target. This is important because such +calls may hang and time out if there are suspect engines present. + +```bash +Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded + Pool health info: + - Disabled ranks: 1 + - Suspect ranks: 2 + - Rebuild busy, 0 objs, 0 recs +``` + Additional status and telemetry data is planned to be exported through management tools and will be documented here once available. diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index 33507e8ab7f..dbeaacde0a0 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -99,32 +99,18 @@ func (cmd *healthCheckCmd) Execute([]string) error { } }() - queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines) - tpi, err := queryPool(poolHdl, queryMask) - if err != nil { - cmd.Errorf("failed to query pool %s: %v", pool.Label, err) - continue - } - pool.EnabledRanks = tpi.EnabledRanks - + queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionSuspectEngines) if pool.DisabledTargets > 0 { - queryMask.ClearAll() queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - tpi, err = queryPool(poolHdl, queryMask) - if err != nil { - cmd.Errorf("failed to query pool %s: %v", pool.Label, err) - continue - } - pool.DisabledRanks = tpi.DisabledRanks } - - queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) - tpi, err = queryPool(poolHdl, queryMask) + tpi, err := queryPool(poolHdl, queryMask) if err != nil { cmd.Errorf("failed to query pool %s: %v", pool.Label, err) continue } + pool.EnabledRanks = tpi.EnabledRanks + pool.DisabledRanks = tpi.DisabledRanks pool.SuspectRanks = tpi.SuspectRanks poolConts, err := listContainers(poolHdl) diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 5dd28349479..f4743db03ed 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -295,11 +295,12 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { +func queryPoolHelper(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { var rlPtr **C.d_rank_list_t = nil var rl *C.d_rank_list_t = nil - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) || + queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { rlPtr = &rl } @@ -337,6 +338,55 @@ func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.Poo return poolInfo, nil } +func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + origQueryMask := queryMask + queryMask.ClearOptions(daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionSuspectEngines, daos.PoolQueryOptionDisabledEngines) + if origQueryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + queryMask.SetOptions(daos.PoolQueryOptionEnabledEngines) + origQueryMask.ClearOptions(daos.PoolQueryOptionEnabledEngines) + } else if origQueryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) + origQueryMask.ClearOptions(daos.PoolQueryOptionDisabledEngines) + } else if origQueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) + origQueryMask.ClearOptions(daos.PoolQueryOptionSuspectEngines) + } + poolInfo, err := queryPoolHelper(poolHdl, queryMask) + if err != nil { + return nil, err + } + if origQueryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + queryMask.ClearAll() + queryMask.SetOptions(daos.PoolQueryOptionEnabledEngines) + poolInfo1, err := queryPoolHelper(poolHdl, queryMask) + if err != nil { + return nil, err + } + poolInfo.EnabledRanks = poolInfo1.EnabledRanks + } + if origQueryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + queryMask.ClearAll() + queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) + poolInfo1, err := queryPoolHelper(poolHdl, queryMask) + if err != nil { + return nil, err + } + poolInfo.DisabledRanks = poolInfo1.DisabledRanks + } + if origQueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + queryMask.ClearAll() + queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) + poolInfo1, err := queryPoolHelper(poolHdl, queryMask) + if err != nil { + return nil, err + } + poolInfo.SuspectRanks = poolInfo1.SuspectRanks + } + + return poolInfo, nil +} + func (cmd *poolQueryCmd) Execute(_ []string) error { queryMask := daos.DefaultPoolQueryMask if cmd.HealthOnly { diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index 3ded03cba4d..697009396ce 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -53,7 +53,8 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { if pi.DisabledRanks.Count() > 0 { fmt.Fprintf(w, "- Disabled ranks: %s\n", pi.DisabledRanks) } - if pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + if pi.QueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) && + pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { fmt.Fprintf(w, "- Suspect ranks: %s\n", pi.SuspectRanks) } if pi.Rebuild != nil { diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index 1a8dd2e6d92..ca55d83ac02 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -137,7 +137,6 @@ Pool space info: PoolLayoutVer: 1, UpgradeLayoutVer: 2, DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), - SuspectRanks: ranklist.MustCreateRankSet("[2]"), Rebuild: &daos.PoolRebuildStatus{ State: daos.PoolRebuildStateBusy, Objects: 42, @@ -159,7 +158,6 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Disabled ranks: 0-1,3 -- Suspect ranks: 2 - Rebuild busy, 42 objs, 21 recs Pool space info: - Target(VOS) count:1 @@ -169,6 +167,45 @@ Pool space info: - Storage tier 1 (NVMe): Total size: 2 B Free: 1 B, min:0 B, max:0 B, mean:0 B +`, poolUUID.String()), + }, + "normal response; suspect ranks": { + pi: &daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateDegraded, + UUID: poolUUID, + TotalTargets: 2, + DisabledTargets: 1, + ActiveTargets: 1, + ServiceLeader: 42, + Version: 100, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), + SuspectRanks: ranklist.MustCreateRankSet("[2]"), + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 2, + Free: 1, + }, + { + Total: 2, + Free: 1, + }, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded +Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. +Pool health info: +- Disabled ranks: 0-1,3 +- Suspect ranks: 2 +- Rebuild busy, 42 objs, 21 recs `, poolUUID.String()), }, "unknown/invalid rebuild state response": { diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index 7e342d95be8..fd42c913bf3 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -842,7 +842,7 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"disabled_engines,rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, - "valid rankset": { + "valid rankset default query": { pqr: &PoolQueryResp{ Status: 42, PoolInfo: daos.PoolInfo{ @@ -864,6 +864,28 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"disabled_engines,rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"enabled_ranks":[0,1,2,3,5],"disabled_ranks":[],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, + "valid rankset health query": { + pqr: &PoolQueryResp{ + Status: 42, + PoolInfo: daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateReady, + UUID: poolUUID, + TotalTargets: 1, + ActiveTargets: 2, + TotalEngines: 3, + DisabledTargets: 4, + Version: 5, + ServiceLeader: 6, + ServiceReplicas: []ranklist.Rank{0, 1, 2}, + DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[7,8,9]"), + PoolLayoutVer: 7, + UpgradeLayoutVer: 8, + }, + }, + exp: `{"query_mask":"disabled_engines,rebuild,suspect_engines","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"disabled_ranks":[],"suspect_ranks":[7,8,9],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, + }, } { t.Run(name, func(t *testing.T) { got, err := json.Marshal(tc.pqr) @@ -904,7 +926,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { }, }, "valid rankset": { - data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, + data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","suspect_ranks":"[4]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, expResp: PoolQueryResp{ Status: 0, PoolInfo: daos.PoolInfo{ @@ -917,6 +939,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { ServiceLeader: 6, EnabledRanks: ranklist.MustCreateRankSet("[0-3,5]"), DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[4]"), PoolLayoutVer: 7, UpgradeLayoutVer: 8, }, @@ -1187,6 +1210,80 @@ func TestControl_PoolQuery(t *testing.T) { }, }, }, + "query succeeds suspect ranks": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: mgmtpb.PoolServiceState_Degraded, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_BUSY, + Objects: 1, + Records: 2, + }, + TierStats: []*mgmtpb.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeScm), + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeNvme), + }, + }, + SuspectRanks: "[1,2,3,7]", + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: daos.PoolServiceStateDegraded, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 1, + Records: 2, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeScm, + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeNvme, + }, + }, + SuspectRanks: ranklist.MustCreateRankSet("[1-3,7]"), + }, + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) From b76d8113d98fdb7a4608aca88e96ccac7d1c10ce Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 10 Oct 2024 15:46:18 +0800 Subject: [PATCH 03/10] extend ftest to cover new interface and doc improvments Test-tag: DmgPoolQueryRanks Required-githooks: true Signed-off-by: Wang Shilong --- docs/admin/pool_operations.md | 14 +- .../ftest/control/dmg_pool_query_ranks.py | 142 +++++++++++------- .../ftest/control/dmg_pool_query_ranks.yaml | 7 +- src/tests/ftest/util/dmg_utils.py | 6 +- src/tests/ftest/util/dmg_utils_base.py | 5 +- src/tests/ftest/util/server_utils_params.py | 5 +- 6 files changed, 108 insertions(+), 71 deletions(-) diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md index f82b797e8f0..efc827ae1e7 100644 --- a/docs/admin/pool_operations.md +++ b/docs/admin/pool_operations.md @@ -286,19 +286,19 @@ The example below shows a rebuild in progress and NVMe space allocated. Rebuild busy, 75 objs, 9722 recs ``` -After experiencing significant failures, the system may retain some suspect +After experiencing significant failures, the pool may retain some suspect engines that have been marked as DEAD by the SWIM protocol but were not excluded -from the system to prevent data loss. An administrator can bring these engines back -online by restarting them. The example below illustrates the system’s status with -suspect and disabled engines. +from the pool to prevent potential data inconsistency. An administrator can bring +these engines back online by restarting them. The example below illustrates the +system’s status with suspect and disabled engines. ```bash $ dmg pool query tank -t ``` -NB: The --health-only/-t option is necessary to conduct pool health-related queries only, -which will bypass the collective call to each vos target. This is important because such -calls may hang and time out if there are suspect engines present. +NB: The --health-only/-t option is necessary to conduct pool health-related queries only. +This is important because suspect ranks may cause commands to hang and timeout so identifying +and restarting them is a useful procedure. ```bash Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.py b/src/tests/ftest/control/dmg_pool_query_ranks.py index 22f45ee8d9a..ecbcefb1f12 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.py +++ b/src/tests/ftest/control/dmg_pool_query_ranks.py @@ -23,7 +23,7 @@ def setUp(self): super().setUp() # Init the pool - self.add_pool(connect=False) + self.add_pool(connect=False, properties='rd_fac:1') def test_pool_query_ranks_basic(self): """Test the state of ranks with dmg pool query. @@ -52,14 +52,21 @@ def test_pool_query_ranks_basic(self): self.log.debug("Checking enabled ranks state information") data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) self.assertListEqual( - data['response'].get('enabled_ranks'), [0, 1, 2], - "Invalid enabled_ranks field: want=[0, 1, 2], got={}".format( + data['response'].get('enabled_ranks'), [0, 1, 2, 3, 4], + "Invalid enabled_ranks field: want=[0, 1, 2, 3, 4], got={}".format( data['response'].get('enabled_ranks'))) self.assertListEqual( data['response'].get('disabled_ranks'), [], - "Invalid disabled_ranks field: want=[], got={}".format( + "Invalid suspect_ranks field: want=[], got={}".format( data['response'].get('disabled_ranks'))) + self.log.debug("Checking suspect ranks state information") + data = self.dmg.pool_query(self.pool.identifier, health_only=True) + self.assertListEqual( + data['response'].get('suspect_ranks'), [], + "Invalid suspect_ranks field: want=[], got={}".format( + data['response'].get('suspect_ranks'))) + def test_pool_query_ranks_mgmt(self): """Test the state of ranks after excluding and reintegrate them. @@ -80,58 +87,77 @@ def test_pool_query_ranks_mgmt(self): all_ranks = enabled_ranks.copy() self.random.shuffle(all_ranks) - self.log.info("Starting excluding ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Excluding rank %d", rank) - self.pool.exclude([rank]) - enabled_ranks.remove(rank) - disabled_ranks = sorted(disabled_ranks + [rank]) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) - - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + exclude_rank = all_ranks[0] + suspect_rank = all_ranks[1] + suspect_ranks = [suspect_rank] + self.log.info("Starting excluding rank:%d all_ranks=%s", exclude_rank, all_ranks) + self.pool.exclude([exclude_rank]) + enabled_ranks.remove(exclude_rank) + disabled_ranks = sorted(disabled_ranks + [exclude_rank]) - self.random.shuffle(all_ranks) - self.log.info("Starting reintegrating ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Reintegrating rank %d", rank) - - cmd_succeed = False - for _ in range(3): - try: - result = self.pool.reintegrate(rank) - cmd_succeed = True - break - except CommandFailure: - self.log.debug("dmg command failed retry") - time.sleep(3) - - self.assertTrue(cmd_succeed, "pool reintegrate failed: {}".format(result)) - enabled_ranks = sorted(enabled_ranks + [rank]) - disabled_ranks.remove(rank) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) - - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + self.log.debug("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self.assertListEqual( + data['response'].get('enabled_ranks'), enabled_ranks, + "Invalid enabled_ranks field: want={}, got={}".format( + enabled_ranks, data['response'].get('enabled_ranks'))) + self.assertListEqual( + data['response'].get('disabled_ranks'), disabled_ranks, + "Invalid disabled_ranks field: want={}, got={}".format( + disabled_ranks, data['response'].get('disabled_ranks'))) + + self.log.debug("Waiting for pool to be rebuild") + self.pool.wait_for_rebuild_to_start() + + # kill second rank. + self.server_managers[0].stop_ranks([suspect_rank], self.d_log) + time.sleep(20) + + self.log.debug("Checking suspect ranks state information") + data = self.dmg.pool_query(self.pool.identifier, health_only=True) + self.assertListEqual( + data['response'].get('suspect_ranks'), suspect_ranks, + "Invalid enabled_ranks field: want={}, got={}".format( + suspect_ranks, data['response'].get('suspect_ranks'))) + self.assertListEqual( + data['response'].get('disabled_ranks'), disabled_ranks, + "Invalid disabled_ranks field: want={}, got={}".format( + disabled_ranks, data['response'].get('disabled_ranks'))) + + self.server_managers[0].start_ranks([suspect_rank], self.d_log) + time.sleep(10) + data = self.dmg.pool_query(self.pool.identifier, health_only=True) + self.assertListEqual( + data['response'].get('suspect_ranks'), [], + "Invalid enabled_ranks field: want={}, got={}".format( + [], data['response'].get('suspect_ranks'))) + + self.log.debug("Reintegrating rank %d", exclude_rank) + cmd_succeed = False + for _ in range(3): + try: + result = self.pool.reintegrate(exclude_rank) + cmd_succeed = True + break + except CommandFailure: + self.log.debug("dmg command failed retry") + time.sleep(3) + + self.assertTrue(cmd_succeed, "pool reintegrate failed: {}".format(result)) + self.log.debug("Waiting for pool to be rebuild") + self.pool.wait_for_rebuild_to_start() + self.pool.wait_for_rebuild_to_end() + + enabled_ranks = sorted(enabled_ranks + [exclude_rank]) + disabled_ranks.remove(exclude_rank) + + self.log.debug("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self.assertListEqual( + data['response'].get('enabled_ranks'), enabled_ranks, + "Invalid enabled_ranks field: want={}, got={}".format( + enabled_ranks, data['response'].get('enabled_ranks'))) + self.assertListEqual( + data['response'].get('disabled_ranks'), disabled_ranks, + "Invalid disabled_ranks field: want={}, got={}".format( + disabled_ranks, data['response'].get('disabled_ranks'))) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index 54fce55c6e0..320295ec5e2 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -1,16 +1,20 @@ hosts: - test_servers: 3 + test_servers: 5 timeouts: test_pool_query_ranks_basic: 120 test_pool_query_ranks_error: 120 test_pool_query_ranks_mgmt: 480 server_config: name: daos_server + crt_timeout: 5 engines_per_host: 1 engines: 0: targets: 4 nr_xs_helpers: 0 + env_vars: + - SWIM_SUSPECT_TIMEOUT=10000 + - DAOS_POOL_RF=1 storage: 0: class: ram @@ -18,3 +22,4 @@ server_config: system_ram_reserved: 1 pool: size: 4GB + svcn: 5 diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index a48b45e59dd..41de535cc0d 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -625,12 +625,13 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None, return data - def pool_query(self, pool, show_enabled=False): + def pool_query(self, pool, show_enabled=False, health_only=False): """Query a pool with the dmg command. Args: pool (str): Pool UUID or label to query. show_enabled (bool, optional): Display enabled ranks. + health_only (bool, optional): Only perform pool health related queries. Raises: CommandFailure: if the dmg pool query command fails. @@ -676,7 +677,8 @@ def pool_query(self, pool, show_enabled=False): # "error": null, # "status": 0 # } - return self._get_json_result(("pool", "query"), pool=pool, show_enabled=show_enabled) + return self._get_json_result(("pool", "query"), pool=pool, + show_enabled=show_enabled, health_only=health_only) def pool_query_targets(self, pool, rank=None, target_idx=None): """Call dmg pool query-targets. diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index 951694a2251..f7e1ee3873d 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -6,8 +6,10 @@ from socket import gethostname from ClusterShell.NodeSet import NodeSet + from command_utils import CommandWithSubCommand, YamlCommand -from command_utils_base import BasicParameter, CommandWithParameters, FormattedParameter +from command_utils_base import (BasicParameter, CommandWithParameters, + FormattedParameter) from environment_utils import TestEnvironment from general_utils import nodeset_append_suffix @@ -533,6 +535,7 @@ def __init__(self): super().__init__("/run/dmg/pool/query/*", "query") self.pool = BasicParameter(None, position=1) self.show_enabled = FormattedParameter("--show-enabled", False) + self.health_only = FormattedParameter("--health-only", False) class QueryTargetsSubCommand(CommandWithParameters): """Defines an object for the dmg pool query-targets command.""" diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 440ffe68f82..3bfd6bd9665 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,7 +5,8 @@ """ import os -from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters +from command_utils_base import (BasicParameter, LogParameter, + TransportCredentials, YamlParameters) MAX_STORAGE_TIERS = 5 @@ -434,7 +435,6 @@ class EngineYamlParameters(YamlParameters): REQUIRED_ENV_VARS = { "common": [ "D_LOG_FILE_APPEND_PID=1", - "DAOS_POOL_RF=4", "COVFILE=/tmp/test.cov"], "ofi+tcp": [], "ofi+tcp;ofi_rxm": [], @@ -494,6 +494,7 @@ def __init__(self, base_namespace, index, provider=None, max_storage_tiers=MAX_S "ABT_ENV_MAX_NUM_XSTREAMS=100", "ABT_MAX_NUM_XSTREAMS=100", "DAOS_MD_CAP=1024", + "DAOS_POOL_RF=4", "DAOS_SCHED_WATCHDOG_ALL=1", "DD_MASK=mgmt,io,md,epc,rebuild", ] From 7a2b72f2d1b41e37da8957fbce3fe73c0141a159 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 11 Oct 2024 05:33:45 -0400 Subject: [PATCH 04/10] bump timeout Test-tag: DmgPoolQueryRanks Required-githooks: true Signed-off-by: Wang Shilong --- src/tests/ftest/control/dmg_pool_query_ranks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index 320295ec5e2..034039bd921 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -3,7 +3,7 @@ hosts: timeouts: test_pool_query_ranks_basic: 120 test_pool_query_ranks_error: 120 - test_pool_query_ranks_mgmt: 480 + test_pool_query_ranks_mgmt: 780 server_config: name: daos_server crt_timeout: 5 From 2048ef4db71d7ae13087b3450d7934e8d3ac69dd Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 14 Oct 2024 02:29:10 -0400 Subject: [PATCH 05/10] collect more debug logs Test-tag: DmgPoolQueryRanks Required-githooks: true Signed-off-by: Wang Shilong --- src/tests/ftest/control/dmg_pool_query_ranks.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index 034039bd921..e2258825ce4 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -3,7 +3,7 @@ hosts: timeouts: test_pool_query_ranks_basic: 120 test_pool_query_ranks_error: 120 - test_pool_query_ranks_mgmt: 780 + test_pool_query_ranks_mgmt: 480 server_config: name: daos_server crt_timeout: 5 @@ -15,6 +15,7 @@ server_config: env_vars: - SWIM_SUSPECT_TIMEOUT=10000 - DAOS_POOL_RF=1 + - DD_MASK=io,epc,rebuild storage: 0: class: ram From 1e9f4df45611f5c27ab327ee84e166bfa231ae43 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 15 Oct 2024 04:06:04 -0400 Subject: [PATCH 06/10] improve test according to feedbacks. Required-githooks: true Test-tag: DmgPoolQueryRanks Signed-off-by: Wang Shilong --- .../ftest/control/dmg_pool_query_ranks.py | 26 +++++------------- .../ftest/control/dmg_pool_query_ranks.yaml | 1 + src/tests/ftest/util/dmg_utils_base.py | 4 +-- src/tests/ftest/util/server_utils_params.py | 3 +-- src/tests/ftest/util/test_utils_pool.py | 27 +++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.py b/src/tests/ftest/control/dmg_pool_query_ranks.py index ecbcefb1f12..20e081c9fea 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.py +++ b/src/tests/ftest/control/dmg_pool_query_ranks.py @@ -23,7 +23,7 @@ def setUp(self): super().setUp() # Init the pool - self.add_pool(connect=False, properties='rd_fac:1') + self.add_pool(connect=False) def test_pool_query_ranks_basic(self): """Test the state of ranks with dmg pool query. @@ -89,7 +89,6 @@ def test_pool_query_ranks_mgmt(self): self.random.shuffle(all_ranks) exclude_rank = all_ranks[0] suspect_rank = all_ranks[1] - suspect_ranks = [suspect_rank] self.log.info("Starting excluding rank:%d all_ranks=%s", exclude_rank, all_ranks) self.pool.exclude([exclude_rank]) enabled_ranks.remove(exclude_rank) @@ -111,42 +110,31 @@ def test_pool_query_ranks_mgmt(self): # kill second rank. self.server_managers[0].stop_ranks([suspect_rank], self.d_log) - time.sleep(20) - - self.log.debug("Checking suspect ranks state information") - data = self.dmg.pool_query(self.pool.identifier, health_only=True) - self.assertListEqual( - data['response'].get('suspect_ranks'), suspect_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - suspect_ranks, data['response'].get('suspect_ranks'))) + self.pool.wait_pool_suspect_ranks([suspect_rank], timeout=30) self.assertListEqual( data['response'].get('disabled_ranks'), disabled_ranks, "Invalid disabled_ranks field: want={}, got={}".format( disabled_ranks, data['response'].get('disabled_ranks'))) self.server_managers[0].start_ranks([suspect_rank], self.d_log) - time.sleep(10) - data = self.dmg.pool_query(self.pool.identifier, health_only=True) - self.assertListEqual( - data['response'].get('suspect_ranks'), [], - "Invalid enabled_ranks field: want={}, got={}".format( - [], data['response'].get('suspect_ranks'))) + self.pool.wait_pool_suspect_ranks([], timeout=30) self.log.debug("Reintegrating rank %d", exclude_rank) cmd_succeed = False for _ in range(3): try: - result = self.pool.reintegrate(exclude_rank) + self.pool.reintegrate(exclude_rank) cmd_succeed = True break except CommandFailure: self.log.debug("dmg command failed retry") time.sleep(3) - self.assertTrue(cmd_succeed, "pool reintegrate failed: {}".format(result)) + self.assertTrue(cmd_succeed, "pool reintegrate failed") self.log.debug("Waiting for pool to be rebuild") self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + # Fix this after DAOS-16702 + # self.pool.wait_for_rebuild_to_end enabled_ranks = sorted(enabled_ranks + [exclude_rank]) disabled_ranks.remove(exclude_rank) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index e2258825ce4..c3255d22632 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -24,3 +24,4 @@ server_config: pool: size: 4GB svcn: 5 + properties: rd_fac:1 diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index f7e1ee3873d..7e5d2300a53 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -6,10 +6,8 @@ from socket import gethostname from ClusterShell.NodeSet import NodeSet - from command_utils import CommandWithSubCommand, YamlCommand -from command_utils_base import (BasicParameter, CommandWithParameters, - FormattedParameter) +from command_utils_base import BasicParameter, CommandWithParameters, FormattedParameter from environment_utils import TestEnvironment from general_utils import nodeset_append_suffix diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index c164ae8e816..de8daa1fd13 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,8 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, - TransportCredentials, YamlParameters) +from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters MAX_STORAGE_TIERS = 5 diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 7c6b5758f87..c34e4f5a170 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1448,6 +1448,33 @@ def check_pool_files(self, hosts, uuid, scm_mount): status = False return status + def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): + """Wait for the pool suspect ranks. + + Args: + expected (list): suspect ranks check to wait. + interval (int, optional): number of seconds to wait in between pool query checks + timeout(int, optional): time to fail test if it could not match + expected values. + + Raises: + DaosTestError: if waiting for timeout. + + """ + self.log.info("waiting for pool ranks %s to be suspected", expected) + + start = time() + data = self.dmg.pool_query(self.identifier, health_only=True) + while data['response'].get('suspect_ranks') != expected: + self.log.info(" suspect ranks is %s ...", data['response'].get('suspect_ranks')) + if time() - start > timeout: + raise DaosTestError("TIMEOUT detected after {} seconds while for waiting for " + "ranks: %s suspect", expected) + sleep(interval) + data = self.dmg.pool_query(self.identifier, health_only=True) + + self.log.info("Wait for suspect ranks complete: suspect ranks %s", expected) + def verify_uuid_directory(self, host, scm_mount): """Check if pool folder exist on server. From 9ff31a9d6b2eb99467409ca424ff7117dce4dc3f Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 15 Oct 2024 16:33:11 +0800 Subject: [PATCH 07/10] Fix warning Test-tag: DmgPoolQueryRanks Required-githooks: true Signed-off-by: Wang Shilong --- src/tests/ftest/util/test_utils_pool.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index c34e4f5a170..1352f387944 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1468,8 +1468,9 @@ def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): while data['response'].get('suspect_ranks') != expected: self.log.info(" suspect ranks is %s ...", data['response'].get('suspect_ranks')) if time() - start > timeout: - raise DaosTestError("TIMEOUT detected after {} seconds while for waiting for " - "ranks: %s suspect", expected) + raise DaosTestError( + "TIMEOUT detected after {} seconds while for waiting for ranks: %s suspect", + expected) sleep(interval) data = self.dmg.pool_query(self.identifier, health_only=True) From dce3a51a4581647ce8a5fc98615aafb016dabb82 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 15 Oct 2024 16:44:18 +0800 Subject: [PATCH 08/10] fix format Test-tag: DmgPoolQueryRanks Required-githooks: true Signed-off-by: Wang Shilong --- src/tests/ftest/util/test_utils_pool.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 1352f387944..f5e88d2c26c 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1468,9 +1468,8 @@ def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): while data['response'].get('suspect_ranks') != expected: self.log.info(" suspect ranks is %s ...", data['response'].get('suspect_ranks')) if time() - start > timeout: - raise DaosTestError( - "TIMEOUT detected after {} seconds while for waiting for ranks: %s suspect", - expected) + raise DaosTestError("TIMEOUT detected after {} seconds while for waiting " + "for ranks {} suspect".format(timeout, expected)) sleep(interval) data = self.dmg.pool_query(self.identifier, health_only=True) From c9446037093cf19c7f71c8131013da5aaf6e97b0 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 20 Oct 2024 21:06:13 -0400 Subject: [PATCH 09/10] test Features: DmgPoolQueryRanks Skip-nlt: true Required-githooks: true Signed-off-by: Wang Shilong From 3166a87aa7610c1197a253fbd5a64939686be79e Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 24 Oct 2024 04:14:10 -0400 Subject: [PATCH 10/10] codes improvements Features: DmgPoolQueryRanks Skip-nlt: true Signed-off-by: Wang Shilong Required-githooks: true --- src/control/cmd/daos/pool.go | 88 ++++++++++++++++------------ src/tests/ftest/daos_test/suite.yaml | 2 + 2 files changed, 51 insertions(+), 39 deletions(-) diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index f4743db03ed..085a78b3b73 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -295,7 +295,7 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPoolHelper(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { +func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { var rlPtr **C.d_rank_list_t = nil var rl *C.d_rank_list_t = nil @@ -337,51 +337,61 @@ func queryPoolHelper(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*da return poolInfo, nil } - func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - origQueryMask := queryMask - queryMask.ClearOptions(daos.PoolQueryOptionEnabledEngines, - daos.PoolQueryOptionSuspectEngines, daos.PoolQueryOptionDisabledEngines) - if origQueryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - queryMask.SetOptions(daos.PoolQueryOptionEnabledEngines) - origQueryMask.ClearOptions(daos.PoolQueryOptionEnabledEngines) - } else if origQueryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - origQueryMask.ClearOptions(daos.PoolQueryOptionDisabledEngines) - } else if origQueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { - queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) - origQueryMask.ClearOptions(daos.PoolQueryOptionSuspectEngines) - } - poolInfo, err := queryPoolHelper(poolHdl, queryMask) - if err != nil { - return nil, err - } - if origQueryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + poolInfo := &daos.PoolInfo{} + originalMask := queryMask // Save the original queryMask + + // Function to handle the query and return a single RankList + queryAndUpdate := func(option string) error { + // Clear previous options and set new option queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionEnabledEngines) - poolInfo1, err := queryPoolHelper(poolHdl, queryMask) + queryMask.SetOptions(option) + + poolInfo1, err := queryPoolRankLists(poolHdl, queryMask) if err != nil { - return nil, err + return err } - poolInfo.EnabledRanks = poolInfo1.EnabledRanks - } - if origQueryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - poolInfo1, err := queryPoolHelper(poolHdl, queryMask) - if err != nil { - return nil, err + + switch option { + case daos.PoolQueryOptionEnabledEngines: + poolInfo.EnabledRanks = poolInfo1.EnabledRanks + case daos.PoolQueryOptionDisabledEngines: + poolInfo.DisabledRanks = poolInfo1.DisabledRanks + case daos.PoolQueryOptionSuspectEngines: + poolInfo.SuspectRanks = poolInfo1.SuspectRanks } - poolInfo.DisabledRanks = poolInfo1.DisabledRanks + return nil } - if origQueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { - queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionSuspectEngines) - poolInfo1, err := queryPoolHelper(poolHdl, queryMask) - if err != nil { - return nil, err + + // Preprocess queryMask, select one option for the first query + var firstOption string + if originalMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + firstOption = daos.PoolQueryOptionEnabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + firstOption = daos.PoolQueryOptionDisabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + firstOption = daos.PoolQueryOptionSuspectEngines + } + + // Perform the first query to get basic information + if err := queryAndUpdate(firstOption); err != nil { + return nil, err + } + + // Check the original query mask and update fields as needed + queryOptions := []string{ + daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionDisabledEngines, + daos.PoolQueryOptionSuspectEngines, + } + + // Process each option sequentially + for _, opt := range queryOptions { + if originalMask.HasOption(opt) && opt != firstOption { + if err := queryAndUpdate(opt); err != nil { + return nil, err + } } - poolInfo.SuspectRanks = poolInfo1.SuspectRanks } return poolInfo, nil diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 3f8572f2bcf..797d70fb450 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -55,6 +55,7 @@ server_config: - D_LOG_FLUSH=DEBUG - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 + - DAOS_POOL_RF=4 storage: auto 1: pinned_numa_node: 1 @@ -70,6 +71,7 @@ server_config: - D_LOG_FLUSH=DEBUG - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 + - DAOS_POOL_RF=4 storage: auto transport_config: allow_insecure: true