Skip to content

Commit

Permalink
DAOS-13896 pool: reintegration discard after rebuild (#12577)
Browse files Browse the repository at this point in the history
Pool reintegration discard should be done after rebuild
to make sure all DOWNOUT targets being discarded before
reintegration.

Required-githooks: true

Signed-off-by: Di Wang <di.wang@intel.com>
  • Loading branch information
wangdi authored Jul 17, 2023
1 parent 4ffb5ca commit 6be7d3a
Showing 1 changed file with 30 additions and 15 deletions.
45 changes: 30 additions & 15 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -5787,7 +5787,6 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc,
struct pool_map *map;
uint32_t map_version_before;
uint32_t map_version;
uint32_t rebuild_ver = 0;
struct pool_buf *map_buf = NULL;
bool updated = false;
int rc;
Expand All @@ -5796,19 +5795,6 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc,
DP_UUID(svc->ps_uuid), opc, exclude_rank, tgts->pti_number,
tgt_addrs == NULL ? 0 : tgt_addrs->pta_number);

/* Check if there are ongoing rebuild jobs for extend/reint/drain, to avoid
* updating the pool map during rebuild, which might screw the object layout.
*/
if (opc == POOL_EXTEND || opc == POOL_REINT || opc == POOL_DRAIN) {
ds_rebuild_running_query(svc->ps_uuid, &rebuild_ver, NULL, NULL);
if (rebuild_ver != 0) {
D_ERROR(DF_UUID": other rebuild job rebuild ver %u is ongoing,"
" so current opc %d can not be done: %d\n",
DP_UUID(svc->ps_uuid), rebuild_ver, opc, DER_BUSY);
D_GOTO(out, rc = -DER_BUSY);
}
}

rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx);
if (rc != 0)
goto out;
Expand Down Expand Up @@ -6146,6 +6132,7 @@ ds_pool_extend_handler(crt_rpc_t *rpc)
d_rank_list_t rank_list;
uint32_t ndomains;
uint32_t *domains;
uint32_t rebuild_ver = 0;
int rc;

D_DEBUG(DB_MD, DF_UUID": processing rpc %p\n", DP_UUID(in->pei_op.pi_uuid), rpc);
Expand All @@ -6160,12 +6147,24 @@ ds_pool_extend_handler(crt_rpc_t *rpc)
if (rc != 0)
goto out;

/**
* Check if there are ongoing rebuild jobs for extend/reint/drain, to avoid
* updating the pool map during rebuild, which might screw the object layout.
*/
ds_rebuild_running_query(svc->ps_uuid, &rebuild_ver, NULL, NULL);
if (rebuild_ver != 0) {
D_ERROR(DF_UUID": other rebuild job rebuild ver %u is ongoing,"
"extending can not be done: %d\n",
DP_UUID(svc->ps_uuid), rebuild_ver, -DER_BUSY);
D_GOTO(put, rc = -DER_BUSY);
}

rc = pool_svc_update_map(svc, opc_get(rpc->cr_opc),
false /* exclude_rank */,
&rank_list, domains, ndomains,
NULL, NULL, &out->peo_op.po_map_version,
&out->peo_op.po_hint);

put:
pool_svc_put_leader(svc);
out:
out->peo_op.po_rc = rc;
Expand Down Expand Up @@ -6239,6 +6238,7 @@ ds_pool_update_handler(crt_rpc_t *rpc)
struct pool_tgt_update_in *in = crt_req_get(rpc);
struct pool_tgt_update_out *out = crt_reply_get(rpc);
struct pool_svc *svc;
uint32_t rebuild_ver = 0;
struct pool_target_addr_list list = { 0 };
struct pool_target_addr_list inval_list_out = { 0 };
int rc;
Expand All @@ -6255,6 +6255,21 @@ ds_pool_update_handler(crt_rpc_t *rpc)
if (rc != 0)
goto out;

if (opc_get(rpc->cr_opc) == POOL_REINT || opc_get(rpc->cr_opc) == POOL_DRAIN) {
/**
* Check if there are ongoing rebuild jobs for extend/reint/drain, to avoid
* updating the pool map during rebuild, which might screw the object layout.
*/
ds_rebuild_running_query(svc->ps_uuid, &rebuild_ver, NULL, NULL);
if (rebuild_ver != 0) {
D_ERROR(DF_UUID": other rebuild job rebuild ver %u is ongoing,"
" so current opc %d can not be done: %d\n",
DP_UUID(svc->ps_uuid), rebuild_ver, opc_get(rpc->cr_opc),
-DER_BUSY);
D_GOTO(out_svc, rc = -DER_BUSY);
}
}

list.pta_number = in->pti_addr_list.ca_count;
list.pta_addrs = in->pti_addr_list.ca_arrays;

Expand Down

0 comments on commit 6be7d3a

Please sign in to comment.