diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 42b69cab520..6c3f2b4a8a6 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -599,34 +599,48 @@ func (cmd *poolAutoTestCmd) Execute(_ []string) error { } func getPoolList(log logging.Logger, sysName string, queryEnabled bool) ([]*daos.PoolInfo, error) { - var rc C.int - bufSize := C.size_t(0) - - // First, fetch the total number of pools in the system. - // We may not have access to all of them, so this is an upper bound. - rc = C.daos_mgmt_list_pools(nil, &bufSize, nil, nil) - if err := daosError(rc); err != nil { - return nil, err - } - - if bufSize < 1 { - return nil, nil - } - var cSysName *C.char if sysName != "" { cSysName := C.CString(sysName) defer freeString(cSysName) } - // Now, we actually fetch the pools into the buffer that we've created. - cPools := make([]C.daos_mgmt_pool_info_t, bufSize) - rc = C.daos_mgmt_list_pools(nil, &bufSize, &cPools[0], nil) - if err := daosError(rc); err != nil { + var cPools []C.daos_mgmt_pool_info_t + for { + var rc C.int + var poolCount C.size_t + + // First, fetch the total number of pools in the system. + // We may not have access to all of them, so this is an upper bound. + rc = C.daos_mgmt_list_pools(cSysName, &poolCount, nil, nil) + if err := daosError(rc); err != nil { + return nil, err + } + log.Debugf("pools in system: %d", poolCount) + + if poolCount < 1 { + return nil, nil + } + + // Now, we actually fetch the pools into the buffer that we've created. + cPools = make([]C.daos_mgmt_pool_info_t, poolCount) + rc = C.daos_mgmt_list_pools(cSysName, &poolCount, &cPools[0], nil) + err := daosError(rc) + if err == nil { + cPools = cPools[:poolCount] // adjust the slice to the number of pools retrieved + log.Debugf("fetched %d pools", len(cPools)) + break + } + if err == daos.StructTooSmall { + log.Notice("server-side pool list changed; re-fetching") + continue + } + log.Errorf("failed to fetch pool list: %s", err) return nil, err } - pools := make([]*daos.PoolInfo, 0, bufSize) - for i := 0; i < int(bufSize); i++ { + + pools := make([]*daos.PoolInfo, 0, len(cPools)) + for i := 0; i < len(cPools); i++ { cPool := &cPools[i] svcRanks, err := rankSetFromC(cPool.mgpi_svc) @@ -658,15 +672,16 @@ func getPoolList(log logging.Logger, sysName string, queryEnabled bool) ([]*daos if qErr != nil { log.Errorf("failed to query pool %q: %s", poolLabel, qErr) } - pool.Label = poolLabel - pool.ServiceReplicas = svcRanks.Ranks() - if err := daosError(C.daos_pool_disconnect(poolHandle, nil)); err != nil { log.Errorf("failed to disconnect from pool %q: %s", poolLabel, err) } if qErr != nil { continue } + + // Add a few missing pieces that the query doesn't fill in. + pool.Label = poolLabel + pool.ServiceReplicas = svcRanks.Ranks() } else { // Just populate the basic info. pool = &daos.PoolInfo{ diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 54db5c15e0e..ddf33bd6dfd 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -459,7 +459,7 @@ ds_get_pool_list(uint64_t *npools, daos_mgmt_pool_info_t *pools) if (*npools > 0 && lp_resp->n_pools > *npools) { D_ERROR("pool list exceeds request buffer (req: %lu, actual: %lu)", *npools, lp_resp->n_pools); - D_GOTO(out_resp, rc = -DER_TRUNC); + D_GOTO(out_resp, rc = -DER_OVERFLOW); } *npools = lp_resp->n_pools; diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index 354b0ef794b..95e76d0e4e5 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -1374,10 +1374,8 @@ dc_mgmt_pool_list(tse_task_t *task) } rc = out->plo_op.mo_rc; - if (rc != 0) { - DL_ERROR(rc, "failed to list pools"); + if (rc != 0) D_GOTO(out_put_req, rc); - } *args->npools = out->plo_npools; diff --git a/src/mgmt/srv.c b/src/mgmt/srv.c index cea08b359ac..db738a88ed1 100644 --- a/src/mgmt/srv.c +++ b/src/mgmt/srv.c @@ -530,6 +530,15 @@ ds_mgmt_pool_list_hdlr(crt_rpc_t *rpc) send_resp: out->plo_op.mo_rc = rc; + if (rc == 0) { + if (n_rpc > 0) + D_DEBUG(DB_MGMT, "returning %zu/%zu pools\n", n_rpc, n_mgmt); + else + D_DEBUG(DB_MGMT, "returning %zu pools\n", n_mgmt); + } else { + DL_ERROR(rc, "failed to list pools"); + } + rc = crt_reply_send(rpc); if (rc != 0) DL_ERROR(rc, "crt_reply_send() failed");