Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16170 cart: do not release completed RPC reference repeatedly - b26 #15477

Draft
wants to merge 1 commit into
base: release/2.6
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,7 +1537,7 @@ crt_req_send(crt_rpc_t *req, crt_cb_t complete_cb, void *arg)
/* failure already reported through complete cb */
if (complete_cb != NULL)
rc = 0;
} else {
} else if (!crt_rpc_completed(rpc_priv)) {
RPC_DECREF(rpc_priv);
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/chk/chk_rpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ chk_query_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t poo
int
chk_mark_remote(d_rank_list_t *rank_list, uint64_t gen, d_rank_t rank, uint32_t version)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_mark_in *cmi;
struct chk_mark_out *cmo;
int rc;
Expand Down Expand Up @@ -730,7 +730,7 @@ int
chk_act_remote(d_rank_list_t *rank_list, uint64_t gen, uint64_t seq, uint32_t cla,
uint32_t act, d_rank_t rank, bool for_all)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_act_in *cai;
struct chk_act_out *cao;
int rc;
Expand Down Expand Up @@ -825,7 +825,7 @@ int
chk_pool_start_remote(d_rank_list_t *rank_list, uint64_t gen, uuid_t uuid, uint32_t phase,
uint32_t flags)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_pool_start_in *cpsi;
struct chk_pool_start_out *cpso;
int rc;
Expand Down Expand Up @@ -863,7 +863,7 @@ chk_pool_mbs_remote(d_rank_t rank, uint32_t phase, uint64_t gen, uuid_t uuid, ch
uint64_t seq, uint32_t flags, uint32_t mbs_nr, struct chk_pool_mbs *mbs_array,
int *svc_rc, struct rsvc_hint *svc_hint)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_pool_mbs_in *cpmi;
struct chk_pool_mbs_out *cpmo;
int rc;
Expand Down Expand Up @@ -908,7 +908,7 @@ int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act,
char *msg, uint32_t option_nr, uint32_t *options, uint32_t detail_nr,
d_sg_list_t *details, uint64_t seq)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_report_in *cri;
struct chk_report_out *cro;
int rc;
Expand Down Expand Up @@ -984,7 +984,7 @@ int
chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags,
uint32_t *pool_nr, uuid_t **pools)
{
crt_rpc_t *req;
crt_rpc_t *req = NULL;
struct chk_rejoin_in *cri;
struct chk_rejoin_out *cro;
uuid_t *tmp;
Expand Down
86 changes: 78 additions & 8 deletions src/tests/suite/daos_cr.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,78 @@ cr_locate_dcri(struct daos_check_info *dci, struct daos_check_report_info *base,
return dcri;
}

static int
cr_pool_fail_result(struct daos_check_info *dci, uuid_t uuid, uint32_t cla, uint32_t act, int *ret)
{
struct daos_check_pool_info *dcpi;
struct daos_check_report_info *dcri;
int i;

if (dci->dci_pool_nr != 1) {
print_message("CR pool count %d (pool " DF_UUID ") is not 1\n",
dci->dci_pool_nr, DP_UUID(uuid));
return -DER_INVAL;
}

dcpi = &dci->dci_pools[0];
D_ASSERTF(uuid_compare(dcpi->dcpi_uuid, uuid) == 0,
"Unmatched pool UUID (1): " DF_UUID " vs " DF_UUID "\n",
DP_UUID(dcpi->dcpi_uuid), DP_UUID(uuid));

if (!cr_pool_status_failed(dcpi->dcpi_status)) {
print_message("CR pool " DF_UUID " status %s is not failed\n",
DP_UUID(uuid), dcpi->dcpi_status);
return -DER_INVAL;
}

if (cr_pool_phase_is_done(dcpi->dcpi_phase)) {
print_message("CR pool " DF_UUID " phase should not be done\n",
DP_UUID(uuid));
return -DER_INVAL;
}

#ifdef CR_ACCURATE_QUERY_RESULT
if (dci->dci_report_nr != 1) {
print_message("CR (failed) pool " DF_UUID " has unexpected reports: %d\n",
DP_UUID(uuid), dci->dci_report_nr);
return -DER_INVAL;
}
#endif

for (i = 0; i < dci->dci_report_nr; i++) {
dcri = &dci->dci_reports[i];
if (uuid_compare(dcri->dcri_uuid, uuid) != 0) {
#ifdef CR_ACCURATE_QUERY_RESULT
print_message("Detect unrelated inconsistency report: "
DF_UUID " vs " DF_UUID "\n",
DP_UUID(dcpi->dcpi_uuid), DP_UUID(uuid));
return -DER_INVAL;
#else
continue;
#endif
}

if (dcri->dcri_class != cla) {
print_message("CR (failed) pool " DF_UUID " reports unexpected "
"inconsistency at %d: %u vs %u\n",
DP_UUID(uuid), i, dcri->dcri_class, cla);
return -DER_INVAL;
}

if (dcri->dcri_act != act) {
print_message("CR (failed) pool " DF_UUID " reports unexpected "
"solution at %d: %u vs %u\n",
DP_UUID(uuid), i, dcri->dcri_act, act);
return -DER_INVAL;
}

*ret = dcri->dcri_result;
return 0;
}

return -DER_INVAL;
}

static void
cr_dci_fini(struct daos_check_info *dci)
{
Expand Down Expand Up @@ -2953,7 +3025,7 @@ cr_engine_rejoin_fail(void **state)
uint32_t class = TCC_POOL_NONEXIST_ON_MS;
uint32_t action;
int rank = -1;
int result;
int result = 0;
int rc;
int i;

Expand Down Expand Up @@ -3006,15 +3078,13 @@ cr_engine_rejoin_fail(void **state)
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
assert_rc_equal(rc, 0);

/* The check on the pool will fail as -DER_HG or -DER_TIMEDOUT. */
result = -DER_HG;
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 1, &class, &action, &result);
if (rc == -DER_INVAL) {
result = -DER_TIMEDOUT;
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 1, &class, &action, &result);
}
rc = cr_pool_fail_result(&dci, pool.pool_uuid, class, action, &result);
assert_rc_equal(rc, 0);

/* The check on the pool will fail because of -DER_HG or -DER_TIMEDOUT or -DER_OOG. */
D_ASSERTF(result == -DER_HG || result == -DER_TIMEDOUT || result == -DER_OOG,
"Unexpected pool " DF_UUID " fail result: %d\n", DP_UUID(pool.pool_uuid), result);

/* Reint the rank, rejoin will fail but not affect the rank start. */
rc = cr_rank_reint(rank, true);
assert_rc_equal(rc, 0);
Expand Down
Loading