Skip to content

Commit

Permalink
DAOS-16477 control: add pool query to return hanging ranks
Browse files Browse the repository at this point in the history
Required-githooks: true
Signed-off-by: Wang Shilong <shilong.wang@intel.com>
  • Loading branch information
wangshilong committed Sep 25, 2024
1 parent a6d2773 commit 2442d06
Show file tree
Hide file tree
Showing 17 changed files with 429 additions and 403 deletions.
9 changes: 9 additions & 0 deletions src/control/cmd/daos/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,15 @@ func (cmd *healthCheckCmd) Execute([]string) error {
pool.DisabledRanks = tpi.DisabledRanks
}

queryMask.ClearAll()
queryMask.SetOptions(daos.PoolQueryOptionHangingEngines)
tpi, err = queryPool(poolHdl, queryMask)
if err != nil {
cmd.Errorf("failed to query pool %s: %v", pool.Label, err)
continue
}
pool.HangingRanks = tpi.HangingRanks

poolConts, err := listContainers(poolHdl)
if err != nil {
cmd.Errorf("failed to list containers on pool %s: %v", pool.Label, err)
Expand Down
3 changes: 3 additions & 0 deletions src/control/cmd/daos/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.Poo
if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) {
poolInfo.DisabledRanks = rs
}
if queryMask.HasOption(daos.PoolQueryOptionHangingEngines) {
poolInfo.HangingRanks = rs
}
}

return poolInfo, nil
Expand Down
3 changes: 3 additions & 0 deletions src/control/cmd/daos/pretty/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error {
if pi.DisabledRanks != nil && pi.DisabledRanks.Count() > 0 {
fmt.Fprintf(w, "- Disabled ranks: %s\n", pi.DisabledRanks)
}
if pi.HangingRanks != nil && pi.HangingRanks.Count() > 0 {
fmt.Fprintf(w, "- Hanging ranks: %s\n", pi.HangingRanks)
}
if pi.Rebuild != nil {
if pi.Rebuild.Status == 0 {
fmt.Fprintf(w, "- Rebuild %s, %d objs, %d recs\n",
Expand Down
2 changes: 2 additions & 0 deletions src/control/cmd/daos/pretty/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ Pool space info:
PoolLayoutVer: 1,
UpgradeLayoutVer: 2,
DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"),
HangingRanks: ranklist.MustCreateRankSet("[2]"),
Rebuild: &daos.PoolRebuildStatus{
State: daos.PoolRebuildStateBusy,
Objects: 42,
Expand All @@ -158,6 +159,7 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded
Pool layout out of date (1 < 2) -- see `+backtickStr+` for details.
Pool health info:
- Disabled ranks: 0-1,3
- Hanging ranks: 2
- Rebuild busy, 42 objs, 21 recs
Pool space info:
- Target(VOS) count:1
Expand Down
203 changes: 106 additions & 97 deletions src/control/common/proto/mgmt/pool.pb.go

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/control/lib/daos/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ type (
TierStats []*StorageUsageStats `json:"tier_stats"`
EnabledRanks *ranklist.RankSet `json:"enabled_ranks,omitempty"`
DisabledRanks *ranklist.RankSet `json:"disabled_ranks,omitempty"`
HangingRanks *ranklist.RankSet `json:"hanging_ranks,omitempty"`
PoolLayoutVer uint32 `json:"pool_layout_ver"`
UpgradeLayoutVer uint32 `json:"upgrade_layout_ver"`
}
Expand Down Expand Up @@ -104,7 +105,7 @@ type (

const (
// DefaultPoolQueryMask defines the default pool query mask.
DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_ENGINES_DISABLED))
DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_ENGINES_DISABLED | C.DPI_ENGINES_HANGING))
// HealthOnlyPoolQueryMask defines the mask for health-only queries.
HealthOnlyPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_SPACE))

Expand All @@ -116,6 +117,8 @@ const (
PoolQueryOptionEnabledEngines = "enabled_engines"
// PoolQueryOptionDisabledEngines retrieves disabled engines as part of the pool query.
PoolQueryOptionDisabledEngines = "disabled_engines"
// PoolQueryOptionHangingEngines retrieves hanging engines as part of the pool query.
PoolQueryOptionHangingEngines = "hanging_engines"

// PoolConnectFlagReadOnly indicates that the connection is read-only.
PoolConnectFlagReadOnly = C.DAOS_PC_RO
Expand All @@ -130,6 +133,7 @@ var poolQueryOptMap = map[C.int]string{
C.DPI_REBUILD_STATUS: PoolQueryOptionRebuild,
C.DPI_ENGINES_ENABLED: PoolQueryOptionEnabledEngines,
C.DPI_ENGINES_DISABLED: PoolQueryOptionDisabledEngines,
C.DPI_ENGINES_HANGING: PoolQueryOptionHangingEngines,
}

func resolvePoolQueryOpt(name string) (C.int, error) {
Expand Down
17 changes: 9 additions & 8 deletions src/control/lib/daos/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,14 @@ func TestDaos_PoolQueryMask(t *testing.T) {
testMask: genTestMask(func(pqm *PoolQueryMask) {
*pqm = HealthOnlyPoolQueryMask
}),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionHangingEngines, PoolQueryOptionRebuild),
},
"set query all=true": {
testMask: genTestMask(func(pqm *PoolQueryMask) {
pqm.SetAll()
}),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionHangingEngines,
PoolQueryOptionRebuild, PoolQueryOptionSpace),
},
"set query all=false": {
testMask: genTestMask(func(pqm *PoolQueryMask) {
Expand All @@ -162,7 +163,7 @@ func TestDaos_PoolQueryMask(t *testing.T) {
pqm.SetAll()
pqm.ClearOptions(PoolQueryOptionSpace)
}),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionHangingEngines, PoolQueryOptionRebuild),
},
"set query space=false (already false)": {
testMask: genTestMask(func(pqm *PoolQueryMask) {
Expand All @@ -181,7 +182,7 @@ func TestDaos_PoolQueryMask(t *testing.T) {
pqm.SetAll()
pqm.ClearOptions(PoolQueryOptionRebuild)
}),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionHangingEngines, PoolQueryOptionSpace),
},
"set query enabled_engines=true": {
testMask: genTestMask(func(pqm *PoolQueryMask) {
Expand All @@ -194,7 +195,7 @@ func TestDaos_PoolQueryMask(t *testing.T) {
pqm.SetAll()
pqm.ClearOptions(PoolQueryOptionEnabledEngines)
}),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace),
expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionHangingEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace),
},
"set query disabled_engines=true": {
testMask: genTestMask(func(pqm *PoolQueryMask) {
Expand All @@ -207,7 +208,7 @@ func TestDaos_PoolQueryMask(t *testing.T) {
pqm.SetAll()
pqm.ClearOptions(PoolQueryOptionDisabledEngines)
}),
expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace),
expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionHangingEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace),
},
} {
t.Run(name, func(t *testing.T) {
Expand All @@ -232,7 +233,7 @@ func TestDaos_PoolQueryMaskMarshalJSON(t *testing.T) {
testMask: genTestMask(func(pqm *PoolQueryMask) {
pqm.SetAll()
}),
expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space"`),
expJSON: []byte(`"disabled_engines,enabled_engines,hanging_engines,rebuild,space"`),
},
} {
t.Run(name, func(t *testing.T) {
Expand Down Expand Up @@ -262,7 +263,7 @@ func TestDaos_PoolQueryMaskUnmarshalJSON(t *testing.T) {
},
"uint64 value": {
testData: []byte("18446744073709551603"),
expString: "rebuild,space",
expString: "hanging_engines,rebuild,space",
},
"string values": {
testData: []byte("rebuild,disabled_engines"),
Expand Down
2 changes: 2 additions & 0 deletions src/include/daos_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ enum daos_pool_info_bit {
DPI_ENGINES_ENABLED = 1ULL << 2,
/** true to include (in \a ranks) engines with some or all targets disabled (down). */
DPI_ENGINES_DISABLED = 1ULL << 3,
/** true to include (in \a ranks) hanging engines. */
DPI_ENGINES_HANGING = 1ULL << 4,
/** query all above optional info */
DPI_ALL = -1,
};
Expand Down
9 changes: 5 additions & 4 deletions src/include/daos_srv/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,11 @@ int dsc_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, uint64_t dea
enum daos_acl_principal_type principal_type,
const char *principal_name);

int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline,
d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks,
daos_pool_info_t *pool_info, uint32_t *pool_layout_ver,
uint32_t *upgrade_layout_ver);
int
dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline,
d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks,
d_rank_list_t **hanging_ranks, daos_pool_info_t *pool_info,
uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver);
int dsc_pool_svc_query_target(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline,
d_rank_t rank, uint32_t tgt_idx, daos_target_info_t *ti);

Expand Down
Loading

0 comments on commit 2442d06

Please sign in to comment.