From ef5e51c3c6eb71180c8dd82d05832e3cbb30e44c Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 4 Oct 2023 06:40:06 +0900 Subject: [PATCH 1/2] DAOS-11955 pool: Ensure a PS is inside pool (#13046) * DAOS-11955 pool: Ensure a PS is inside its pool It was found that a PS leader may enter ds_pool_plan_svc_reconfs with itself being an undesirable replica. This may lead to an assertion failure at "move n replicas from undesired to to_remove" in ds_pool_plan_svc_reconfs. Moreover, such a PS leader may be outside of the pool group, making it incapable of performing many duties that involve collective communication. This patch therefore ensures that a PS leader will remove undesirable PS replicas synchronously before committing a pool map modification that introduces new undesirable PS replicas. (If we were to keep an undesirable PS replica, it might become a PS leader.) - Extend and clean up pool_svc_sched. * Allow pool_svc_reconf_ult to return an error, so that we can fail a pool map modification if its synchronous PS replica removal fails. * Allow pool_svc_reconf_ult to get an argument, so that we can tell pool_svc_reconf_ult whether we want a synchronous remove-only run or an asyncrhonous add-remove run. * Move pool_svc_sched.{psc_svc_rf,psc_force_notify} up to pool_svc. - Prevent pool_svc_step_up_cb from canceling in-progress reconfigurations by comparing pool map versions for which the reconfigurations are scheduled. - Rename POOL_GROUP_MAP_STATUS to POOL_GROUP_MAP_STATES so that we are consistent with the pool_map module. Signed-off-by: Li Wei Signed-off-by: Jeff Olivier --- src/pool/srv_internal.h | 13 +- src/pool/srv_pool.c | 321 +++++++++++++++++++++++++++++++--------- src/pool/srv_target.c | 2 +- src/pool/srv_util.c | 181 ++++++++++------------ 4 files changed, 338 insertions(+), 179 deletions(-) diff --git a/src/pool/srv_internal.h b/src/pool/srv_internal.h index 8a0725fe7c7..0ffb55cb4c7 100644 --- a/src/pool/srv_internal.h +++ b/src/pool/srv_internal.h @@ -16,8 +16,17 @@ #include #include -/* Map status of ranks that make up the pool group */ -#define POOL_GROUP_MAP_STATUS (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) +/* Map states of ranks that make up the pool group */ +#define POOL_GROUP_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) + +/* Map states of ranks that make up the pool service */ +#define POOL_SVC_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN) + +/* + * Since we want all PS replicas to belong to the pool group, + * POOL_SVC_MAP_STATES must be a subset of POOL_GROUP_MAP_STATES. + */ +D_CASSERT((POOL_SVC_MAP_STATES & POOL_GROUP_MAP_STATES) == POOL_SVC_MAP_STATES); /** * Global pool metrics diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index d9ff14e266b..20aca49f867 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -65,12 +65,12 @@ struct pool_svc_events { /* Pool service schedule state */ struct pool_svc_sched { - int psc_svc_rf; - bool psc_force_notify; /* for pool_svc_step_up_cb */ ABT_mutex psc_mutex; /* only for psc_cv */ ABT_cond psc_cv; bool psc_in_progress; bool psc_canceled; + void *psc_arg; + int psc_rc; }; static int @@ -89,10 +89,10 @@ sched_init(struct pool_svc_sched *sched) return dss_abterr2der(rc); } - sched->psc_svc_rf = -1; - sched->psc_force_notify = false; sched->psc_in_progress = false; sched->psc_canceled = false; + sched->psc_arg = NULL; + sched->psc_rc = 0; return 0; } @@ -104,10 +104,12 @@ sched_fini(struct pool_svc_sched *sched) } static void -sched_begin(struct pool_svc_sched *sched) +sched_begin(struct pool_svc_sched *sched, void *arg) { sched->psc_in_progress = true; sched->psc_canceled = false; + sched->psc_arg = arg; + sched->psc_rc = 0; } static void @@ -118,20 +120,32 @@ sched_end(struct pool_svc_sched *sched) } static void -sched_cancel_and_wait(struct pool_svc_sched *sched) +sched_cancel(struct pool_svc_sched *sched) +{ + if (sched->psc_in_progress) + sched->psc_canceled = true; +} + +static void +sched_wait(struct pool_svc_sched *sched) { /* * The CV requires a mutex. We don't otherwise need it for ULTs within * the same xstream. */ ABT_mutex_lock(sched->psc_mutex); - if (sched->psc_in_progress) - sched->psc_canceled = true; while (sched->psc_in_progress) ABT_cond_wait(sched->psc_cv, sched->psc_mutex); ABT_mutex_unlock(sched->psc_mutex); } +static void +sched_cancel_and_wait(struct pool_svc_sched *sched) +{ + sched_cancel(sched); + sched_wait(sched); +} + /* Pool service */ struct pool_svc { struct ds_rsvc ps_rsvc; @@ -144,8 +158,9 @@ struct pool_svc { struct ds_pool *ps_pool; struct pool_svc_events ps_events; uint32_t ps_global_version; + int ps_svc_rf; + bool ps_force_notify;/* MS of PS membership */ struct pool_svc_sched ps_reconf_sched; - /* Check all containers RF for the pool */ struct pool_svc_sched ps_rfcheck_sched; /* The global pool map version on all pool targets */ @@ -1043,6 +1058,8 @@ pool_svc_alloc_cb(d_iov_t *id, struct ds_rsvc **rsvc) uuid_copy(svc->ps_uuid, id->iov_buf); D_INIT_LIST_HEAD(&svc->ps_events.pse_queue); svc->ps_events.pse_handler = ABT_THREAD_NULL; + svc->ps_svc_rf = -1; + svc->ps_force_notify = false; rc = ABT_rwlock_create(&svc->ps_lock); if (rc != ABT_SUCCESS) { @@ -1519,9 +1536,9 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, svc_rf_entry = daos_prop_entry_get(*prop, DAOS_PROP_PO_SVC_REDUN_FAC); D_ASSERT(svc_rf_entry != NULL); if (daos_prop_is_set(svc_rf_entry)) - svc->ps_reconf_sched.psc_svc_rf = svc_rf_entry->dpe_val; + svc->ps_svc_rf = svc_rf_entry->dpe_val; else - svc->ps_reconf_sched.psc_svc_rf = -1; + svc->ps_svc_rf = -1; out_lock: ABT_rwlock_unlock(svc->ps_lock); @@ -1750,9 +1767,11 @@ pool_svc_step_down_metrics(struct pool_svc *svc) d_tm_set_gauge(metrics->total_ranks, 0); } -static void pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)); static void pool_svc_reconf_ult(void *arg); +static int pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, + void (*func)(void *), void *arg); +static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, + uint32_t map_version_for, bool sync_remove); static void pool_svc_rfcheck_ult(void *arg); static int @@ -1766,7 +1785,6 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) daos_prop_t *prop = NULL; bool cont_svc_up = false; bool events_initialized = false; - bool svc_scheduled = false; d_rank_t rank = dss_self_rank(); int rc; @@ -1813,10 +1831,22 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) * Just in case the previous leader didn't finish the last series of * reconfigurations or the last MS notification. */ - svc->ps_reconf_sched.psc_force_notify = true; - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); - svc_scheduled = true; + svc->ps_force_notify = true; + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc == -DER_OP_CANCELED) { + DL_INFO(rc, DF_UUID": not scheduling pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + } else if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + goto out; + } + + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, NULL /* arg */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + goto out; + } rc = ds_pool_iv_prop_update(svc->ps_pool, prop); if (rc) { @@ -1862,11 +1892,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) if (rc != 0) { if (events_initialized) fini_events(svc); - if (svc_scheduled) { - sched_cancel_and_wait(&svc->ps_reconf_sched); - sched_cancel_and_wait(&svc->ps_rfcheck_sched); - } - + sched_cancel_and_wait(&svc->ps_rfcheck_sched); + sched_cancel_and_wait(&svc->ps_reconf_sched); if (cont_svc_up) ds_cont_svc_step_down(svc->ps_cont_svc); if (svc->ps_pool != NULL) @@ -5875,27 +5902,47 @@ ds_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, return rc; } +struct pool_svc_reconf_arg { + struct pool_map *sca_map; + uint32_t sca_map_version_for; + bool sca_sync_remove; +}; + +/* Must be used with pool_svc.ps_reconf_sched (see container_of below). */ static void -pool_svc_reconf_ult(void *arg) -{ - struct pool_svc *svc = arg; - struct pool_svc_sched *reconf = &svc->ps_reconf_sched; - d_rank_list_t *current; - d_rank_list_t *to_add; - d_rank_list_t *to_remove; - d_rank_list_t *new; - int rc; +pool_svc_reconf_ult(void *varg) +{ + struct pool_svc_sched *reconf = varg; + struct pool_svc_reconf_arg *arg = reconf->psc_arg; + struct pool_svc *svc; + struct pool_map *map; + d_rank_list_t *current; + d_rank_list_t *to_add; + d_rank_list_t *to_remove; + d_rank_list_t *new; + int rc; + + svc = container_of(reconf, struct pool_svc, ps_reconf_sched); + + if (arg->sca_map == NULL) + map = svc->ps_pool->sp_map; + else + map = arg->sca_map; D_DEBUG(DB_MD, DF_UUID": begin\n", DP_UUID(svc->ps_uuid)); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } /* When there are pending events, the pool map may be unstable. */ - while (events_pending(svc)) { + while (!arg->sca_sync_remove && events_pending(svc)) { dss_sleep(3000 /* ms */); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } } rc = rdb_get_ranks(svc->ps_rsvc.s_db, ¤t); @@ -5905,10 +5952,12 @@ pool_svc_reconf_ult(void *arg) goto out; } - ABT_rwlock_rdlock(svc->ps_pool->sp_lock); - rc = ds_pool_plan_svc_reconfs(reconf->psc_svc_rf, svc->ps_pool->sp_map, current, - dss_self_rank(), &to_add, &to_remove); - ABT_rwlock_unlock(svc->ps_pool->sp_lock); + if (arg->sca_map == NULL) + ABT_rwlock_rdlock(svc->ps_pool->sp_lock); + rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, current, dss_self_rank(), &to_add, + &to_remove); + if (arg->sca_map == NULL) + ABT_rwlock_unlock(svc->ps_pool->sp_lock); if (rc != 0) { D_ERROR(DF_UUID": cannot plan pool service reconfigurations: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); @@ -5916,7 +5965,7 @@ pool_svc_reconf_ult(void *arg) } D_DEBUG(DB_MD, DF_UUID": svc_rf=%d current=%u to_add=%u to_remove=%u\n", - DP_UUID(svc->ps_uuid), reconf->psc_svc_rf, current->rl_nr, to_add->rl_nr, + DP_UUID(svc->ps_uuid), svc->ps_svc_rf, current->rl_nr, to_add->rl_nr, to_remove->rl_nr); /* @@ -5927,14 +5976,17 @@ pool_svc_reconf_ult(void *arg) * of the two calls returns an error, we still need to report any * membership changes to the MS. */ - if (to_add->rl_nr > 0) + if (!arg->sca_sync_remove && to_add->rl_nr > 0) { ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, ds_rsvc_get_md_cap()); - if (reconf->psc_canceled) - goto out_to_add_remove; - if (to_add->rl_nr > to_remove->rl_nr) - to_remove->rl_nr = 0; - else - to_remove->rl_nr -= to_add->rl_nr; + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; + goto out_to_add_remove; + } + if (to_add->rl_nr > to_remove->rl_nr) + to_remove->rl_nr = 0; + else + to_remove->rl_nr -= to_add->rl_nr; + } if (to_remove->rl_nr > 0) { d_rank_list_t *tmp; @@ -5963,26 +6015,35 @@ pool_svc_reconf_ult(void *arg) d_rank_list_sort(current); d_rank_list_sort(new); - if (reconf->psc_force_notify || !d_rank_list_identical(new, current)) { + if (svc->ps_force_notify || !d_rank_list_identical(new, current)) { + int rc_tmp; + /* * Send RAS event to control-plane over dRPC to indicate * change in pool service replicas. */ - rc = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); - if (rc == 0) - reconf->psc_force_notify = false; + rc_tmp = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); + if (rc_tmp == 0) + svc->ps_force_notify = false; else - D_ERROR(DF_UUID": replica update notify failure: "DF_RC"\n", - DP_UUID(svc->ps_uuid), DP_RC(rc)); + DL_ERROR(rc_tmp, DF_UUID": replica update notify failure", + DP_UUID(svc->ps_uuid)); } d_rank_list_free(new); } - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out_to_add_remove; + } - /* Ignore the return value of this ds_rsvc_dist_stop call. */ - if (to_remove->rl_nr > 0) + /* + * Don't attempt to destroy any removed replicas in the "synchronous + * remove" mode, so that we don't delay pool_svc_update_map_internal + * for too long. Ignore the return value of this ds_rsvc_dist_stop + * call. + */ + if (!arg->sca_sync_remove && to_remove->rl_nr > 0) ds_rsvc_dist_stop(svc->ps_rsvc.s_class, &svc->ps_rsvc.s_id, to_remove, NULL /* excluded */, svc->ps_rsvc.s_term, true /* destroy */); @@ -5992,14 +6053,17 @@ pool_svc_reconf_ult(void *arg) out_cur: d_rank_list_free(current); out: + /* Do not yield between the D_FREE and the sched_end. */ + D_FREE(reconf->psc_arg); + reconf->psc_rc = rc; sched_end(reconf); ABT_cond_broadcast(reconf->psc_cv); - D_DEBUG(DB_MD, DF_UUID": end\n", DP_UUID(svc->ps_uuid)); + D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); } -static void -pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)) +static int +pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, void (*func)(void *), + void *arg) { enum ds_rsvc_state state; int rc; @@ -6014,13 +6078,13 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, if (state == DS_RSVC_DRAINING) { D_DEBUG(DB_MD, DF_UUID": end: service %s\n", DP_UUID(svc->ps_uuid), ds_rsvc_state_str(state)); - return; + return -DER_OP_CANCELED; } D_ASSERT(&svc->ps_reconf_sched == sched || &svc->ps_rfcheck_sched == sched); sched_cancel_and_wait(sched); - sched_begin(sched); + sched_begin(sched, arg); /* * An extra svc leader reference is not required, because @@ -6028,14 +6092,16 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, * * ULT tracking is achieved through sched, not a ULT handle. */ - rc = dss_ult_create(func, svc, DSS_XS_SELF, 0, 0, NULL /* ult */); + rc = dss_ult_create(func, sched, DSS_XS_SELF, 0, 0, NULL /* ult */); if (rc != 0) { D_ERROR(DF_UUID": failed to create ULT: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); sched_end(sched); + return rc; } D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); + return 0; } static int pool_find_all_targets_by_addr(struct pool_map *map, @@ -6065,11 +6131,12 @@ cont_rf_check_cb(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx, void *ar return 0; } +/* Must be used with pool_svc.ps_rfcheck_sched (see container_of below). */ static void pool_svc_rfcheck_ult(void *arg) { - struct pool_svc *svc = arg; - int rc; + struct pool_svc *svc = container_of(arg, struct pool_svc, ps_rfcheck_sched); + int rc; do { /* retry until some one stop the pool svc(rc == 1) or succeed */ @@ -6089,6 +6156,69 @@ pool_svc_rfcheck_ult(void *arg) ABT_cond_broadcast(svc->ps_rfcheck_sched.psc_cv); } +/* + * If map is NULL, map_version_for must be provided, and svc->ps_pool->sp_map + * will be used during reconfiguration; otherwise, map_version_for is ignored. + */ +static int +pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t map_version_for, + bool sync_remove) +{ + struct pool_svc_reconf_arg *reconf_arg; + uint32_t v; + int rc; + + if (map == NULL) + v = map_version_for; + else + v = pool_map_get_version(map); + + if (svc->ps_reconf_sched.psc_in_progress) { + uint32_t v_in_progress; + + /* Safe to access psc_arg as long as we don't yield. */ + reconf_arg = svc->ps_reconf_sched.psc_arg; + if (reconf_arg->sca_map == NULL) + v_in_progress = reconf_arg->sca_map_version_for; + else + v_in_progress = pool_map_get_version(reconf_arg->sca_map); + if (v_in_progress >= v) { + D_DEBUG(DB_MD, DF_UUID": stale request: v_in_progress=%u v=%u\n", + DP_UUID(svc->ps_uuid), v_in_progress, v); + return -DER_OP_CANCELED; + } + } + + D_ALLOC_PTR(reconf_arg); + if (reconf_arg == NULL) + return -DER_NOMEM; + reconf_arg->sca_map = map; + reconf_arg->sca_map_version_for = v; + reconf_arg->sca_sync_remove = sync_remove; + + /* + * If successful, this call passes the ownership of reconf_arg to + * pool_svc_reconf_ult. + */ + rc = pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult, reconf_arg); + if (rc != 0) { + D_FREE(reconf_arg); + return rc; + } + + if (sync_remove) { + sched_wait(&svc->ps_reconf_sched); + + rc = svc->ps_reconf_sched.psc_rc; + if (rc != 0) { + DL_ERROR(rc, DF_UUID": pool service reconfigurator", DP_UUID(svc->ps_uuid)); + return rc; + } + } + + return 0; +} + /* * Perform an update to the pool map of \a svc. * @@ -6128,6 +6258,7 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, uint32_t map_version_before; uint32_t map_version; struct pool_buf *map_buf = NULL; + struct pool_domain *node; bool updated = false; int rc; @@ -6197,13 +6328,13 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, } } } + /* * Attempt to modify the temporary pool map and save its versions * before and after. If the version hasn't changed, we are done. */ map_version_before = pool_map_get_version(map); - rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, - true); + rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, true); if (rc != 0) D_GOTO(out_map, rc); map_version = pool_map_get_version(map); @@ -6212,6 +6343,35 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (map_version == map_version_before) D_GOTO(out_map, rc = 0); + /* + * If the map modification affects myself, leave it to a new PS leader + * if there's another PS replica, or reject it. + */ + node = pool_map_find_node_by_rank(map, dss_self_rank()); + if (node == NULL || !(node->do_comp.co_status & POOL_SVC_MAP_STATES)) { + d_rank_list_t *replicas; + + rc = rdb_get_ranks(svc->ps_rsvc.s_db, &replicas); + if (replicas->rl_nr == 1) { + D_ERROR(DF_UUID": rejecting rank exclusion: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rc = -DER_INVAL; + } else { + /* + * The handling is unreliable, for we may become a new + * PS leader again; a more reliable implementation + * requires the currently unavailable Raft leadership + * transfer support. + */ + D_INFO(DF_UUID": resigning PS leadership: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term); + rc = -DER_NOTLEADER; + } + d_rank_list_free(replicas); + goto out_map; + } + /* Write the new pool map. */ rc = pool_buf_extract(map, &map_buf); if (rc != 0) @@ -6220,6 +6380,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (rc != 0) goto out_map_buf; + /* + * Remove all undesired PS replicas (if any) before committing map, so + * that the set of PS replicas remains a subset of the pool groups. + */ + rc = pool_svc_schedule_reconf(svc, map, 0 /* map_version_for */, true /* sync_remove */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to remove undesired pool service replicas", + DP_UUID(svc->ps_uuid)); + goto out_map; + } + rc = rdb_tx_commit(&tx); if (rc != 0) { D_DEBUG(DB_MD, DF_UUID": failed to commit: "DF_RC"\n", @@ -6245,9 +6416,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, ds_rsvc_request_map_dist(&svc->ps_rsvc); - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - if (opc == MAP_EXCLUDE) - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + + if (opc == POOL_EXCLUDE) { + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, + NULL /* arg */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + } rc = pool_svc_update_map_metrics(svc->ps_uuid, map, svc->ps_pool->sp_metrics[DAOS_POOL_MODULE]); @@ -7117,7 +7296,7 @@ ds_pool_ranks_get_handler(crt_rpc_t *rpc) D_GOTO(out, rc = -DER_INVAL); /* Get available ranks */ - rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATUS, &out_ranks); + rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATES, &out_ranks); if (rc != 0) { D_ERROR(DF_UUID ": get ranks failed, " DF_RC "\n", DP_UUID(in->prgi_op.pi_uuid), DP_RC(rc)); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 619a5ac2c30..a0b8d98acd8 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1596,7 +1596,7 @@ update_pool_group(struct ds_pool *pool, struct pool_map *map) D_DEBUG(DB_MD, DF_UUID": %u -> %u\n", DP_UUID(pool->sp_uuid), version, pool_map_get_version(map)); - rc = map_ranks_init(map, POOL_GROUP_MAP_STATUS, &ranks); + rc = map_ranks_init(map, POOL_GROUP_MAP_STATES, &ranks); if (rc != 0) return rc; diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index b5b37709690..75beb2bc8d8 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -90,7 +90,7 @@ ds_pool_map_rank_up(struct pool_map *map, d_rank_t rank) return false; D_ASSERTF(rc == 1, "%d\n", rc); - return node->do_comp.co_status & POOL_GROUP_MAP_STATUS; + return node->do_comp.co_status & POOL_GROUP_MAP_STATES; } int @@ -332,12 +332,12 @@ compute_svc_reconf_objective(int svc_rf, d_rank_list_t *replicas) } /* - * Find n ranks with states in nodes but not in blacklist_0 or blacklist_1, and - * append them to list. Return the number of ranks appended or an error. + * Find n ranks with states in nodes but not in blacklist, and append them to + * list. Return the number of ranks appended or an error. */ static int find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnodes, - d_rank_list_t *blacklist_0, d_rank_list_t *blacklist_1, d_rank_list_t *list) + d_rank_list_t *blacklist, d_rank_list_t *list) { int n_appended = 0; int i; @@ -349,9 +349,7 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode for (i = 0; i < nnodes; i++) { if (!(nodes[i].do_comp.co_status & states)) continue; - if (d_rank_list_find(blacklist_0, nodes[i].do_comp.co_rank, NULL /* idx */)) - continue; - if (d_rank_list_find(blacklist_1, nodes[i].do_comp.co_rank, NULL /* idx */)) + if (d_rank_list_find(blacklist, nodes[i].do_comp.co_rank, NULL /* idx */)) continue; rc = d_rank_list_append(list, nodes[i].do_comp.co_rank); if (rc != 0) @@ -370,7 +368,10 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode * caller is responsible for freeing \a to_add_out and \a to_remove_out with * d_rank_list_free. * - * We desire replicas in UP or UPIN states. + * We desire replicas in POOL_SVC_MAP_STATES. The \a self replica must be in a + * desired state in \a map, or this function will return -DER_INVAL. All + * undesired replicas, if any, will be appended to \a to_remove, so that no + * replica is outside the pool group. * * If removals are necessary, we only append desired replicas to \a * to_remove_out after all undesired replicas have already been appended to the @@ -392,12 +393,10 @@ int ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replicas, d_rank_t self, d_rank_list_t **to_add_out, d_rank_list_t **to_remove_out) { - const pool_comp_state_t desired_states = PO_COMP_ST_UPIN; struct pool_domain *nodes = NULL; int nnodes; int objective; d_rank_list_t *desired = NULL; - d_rank_list_t *undesired = NULL; d_rank_list_t *to_add = NULL; d_rank_list_t *to_remove = NULL; int i; @@ -409,93 +408,56 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic objective = compute_svc_reconf_objective(svc_rf, replicas); desired = d_rank_list_alloc(0); - undesired = d_rank_list_alloc(0); to_add = d_rank_list_alloc(0); to_remove = d_rank_list_alloc(0); - if (desired == NULL || undesired == NULL || to_add == NULL || to_remove == NULL) { + if (desired == NULL || to_add == NULL || to_remove == NULL) { rc = -DER_NOMEM; goto out; } - /* Classify replicas into desired and undesired. */ + /* Classify replicas into desired and to_remove. */ for (i = 0; i < replicas->rl_nr; i++) { + d_rank_t rank = replicas->rl_ranks[i]; d_rank_list_t *list; int j; for (j = 0; j < nnodes; j++) - if (nodes[j].do_comp.co_rank == replicas->rl_ranks[i]) + if (nodes[j].do_comp.co_rank == rank) break; if (j == nnodes) /* not found (hypothetical) */ - list = undesired; - else if (nodes[j].do_comp.co_status & desired_states) + list = to_remove; + else if (nodes[j].do_comp.co_status & POOL_SVC_MAP_STATES) list = desired; else - list = undesired; - rc = d_rank_list_append(list, replicas->rl_ranks[i]); + list = to_remove; + if (rank == self && list == to_remove) { + D_ERROR("self undesired: state=%x\n", + j < nnodes ? nodes[j].do_comp.co_status : -1); + rc = -DER_INVAL; + goto out; + } + rc = d_rank_list_append(list, rank); if (rc != 0) goto out; } - D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, undesired->rl_nr, + D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, to_remove->rl_nr, objective); - /* - * If we have too many replicas, remove undesired ones (if any) before - * desired ones. - */ - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, undesired, to_remove); - if (rc == -DER_NONEXIST) - break; - else if (rc != 0) - goto out; - } - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, desired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } - - /* If necessary, add more replicas towards the objective. */ - if (desired->rl_nr + undesired->rl_nr < objective) { - rc = find_ranks(objective - desired->rl_nr - undesired->rl_nr, desired_states, - nodes, nnodes, desired, undesired, to_add); - if (rc < 0) - goto out; - /* Copy the new ones to desired. */ - for (i = 0; i < to_add->rl_nr; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); + if (desired->rl_nr > objective) { + /* Too many replicas, remove one by one. */ + do { + rc = move_rank_except_for(self, desired, to_remove); + D_ASSERT(rc != -DER_NONEXIST); if (rc != 0) goto out; - } - } - - /* - * If there are undesired ones, try to replace as many of them as - * possible. - */ - if (undesired->rl_nr > 0) { - int n; - - rc = find_ranks(undesired->rl_nr, desired_states, nodes, nnodes, desired, undesired, - to_add); + } while (desired->rl_nr > objective); + } else if (desired->rl_nr < objective) { + /* Too few replicas, add some. */ + rc = find_ranks(objective - desired->rl_nr, POOL_SVC_MAP_STATES, nodes, nnodes, + desired, to_add); if (rc < 0) goto out; - n = rc; - /* Copy the n replacements to desired. */ - for (i = 0; i < n; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); - if (rc != 0) - goto out; - } - /* Move n replicas from undesired to to_remove. */ - for (i = 0; i < n; i++) { - rc = move_rank_except_for(self, undesired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } } rc = 0; @@ -507,7 +469,6 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic d_rank_list_free(to_remove); d_rank_list_free(to_add); } - d_rank_list_free(undesired); d_rank_list_free(desired); return rc; } @@ -546,10 +507,6 @@ testu_rank_sets_belong(d_rank_list_t *x, d_rank_t *y_ranks, int y_ranks_len) static struct pool_map * testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_down_ranks) { - d_rank_list_t ranks_list = { - .rl_ranks = ranks, - .rl_nr = n_ranks - }; struct pool_buf *map_buf; struct pool_map *map; uint32_t *domains; @@ -567,8 +524,7 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ domains[3 + i] = i; rc = gen_pool_buf(NULL /* map */, &map_buf, 1 /* map_version */, n_domains, - n_ranks, n_ranks * 1 /* ntargets */, domains, &ranks_list, - 1 /* dss_tgt_nr */); + n_ranks, n_ranks * 1 /* ntargets */, domains, 1 /* dss_tgt_nr */); D_ASSERT(rc == 0); rc = pool_map_create(map_buf, 1, &map); @@ -590,7 +546,8 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ static void testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ranks[], int n_down_ranks, d_rank_t replicas_ranks[], int n_replicas_ranks, - d_rank_t self, d_rank_list_t **to_add, d_rank_list_t **to_remove) + d_rank_t self, int expected_rc, d_rank_list_t **to_add, + d_rank_list_t **to_remove) { struct pool_map *map; d_rank_list_t replicas_list; @@ -602,7 +559,7 @@ testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ replicas_list.rl_nr = n_replicas_ranks; rc = ds_pool_plan_svc_reconfs(svc_rf, map, &replicas_list, self, to_add, to_remove); - D_ASSERT(rc == 0); + D_ASSERTF(rc == expected_rc, "rc=%d expected_rc=%d\n", rc, expected_rc); pool_map_decref(map); } @@ -614,10 +571,11 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_list_t *to_add; d_rank_list_t *to_remove; -#define call_testu_plan_svc_reconfs \ +#define call_testu_plan_svc_reconfs(expected_rc) \ testu_plan_svc_reconfs(svc_rf, ranks, ARRAY_SIZE(ranks), down_ranks, \ ARRAY_SIZE(down_ranks), replicas_ranks, \ - ARRAY_SIZE(replicas_ranks), self, &to_add, &to_remove); + ARRAY_SIZE(replicas_ranks), self, expected_rc, &to_add, \ + &to_remove); #define call_d_rank_list_free \ d_rank_list_free(to_add); \ @@ -630,7 +588,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -638,6 +596,16 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } + /* The PS leader itself must not be undesired. */ + { + int svc_rf = 1; + d_rank_t ranks[] = {0, 1, 2}; + d_rank_t down_ranks[] = {0}; + d_rank_t replicas_ranks[] = {0, 1, 2}; + + call_testu_plan_svc_reconfs(-DER_INVAL) + } + /* One lonely replica. */ { int svc_rf = 0; @@ -645,7 +613,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -661,7 +629,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -678,7 +646,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -695,7 +663,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -704,17 +672,19 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS holds its ground when there's no replacement. */ + /* A PS removes the down rank even when there's no replacement. */ { int svc_rf = 1; d_rank_t ranks[] = {0, 1, 2}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -728,7 +698,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_add_candidates[] = {3, 4}; d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 1); D_ASSERT(testu_rank_sets_belong(to_add, expected_to_add_candidates, @@ -750,7 +720,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2, 3}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -767,7 +737,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0, 1, 2}; d_rank_t expected_to_remove[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, @@ -776,19 +746,21 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS keeps down ranks while growing. */ + /* A PS removes down ranks while growing. */ { int svc_rf = 2; - d_rank_t ranks[] = {0, 1, 2, 3, 4}; + d_rank_t ranks[] = {0, 1, 2, 3, 4, 5}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; - d_rank_t expected_to_add[] = {3, 4}; + d_rank_t expected_to_add[] = {3, 4, 5}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -802,7 +774,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_remove_candidates[] = {1, 2, 3, 4, 5, 6, 7, 8}; d_rank_list_t tmp; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 4); @@ -822,15 +794,14 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {1, 3, 5, 7}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; d_rank_t expected_to_add[] = {9}; - d_rank_t expected_to_remove_candidates[] = {1, 3, 5, 7}; + d_rank_t expected_to_remove[] = {1, 3, 5, 7}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 3); - D_ASSERT(testu_rank_sets_belong(to_remove, expected_to_remove_candidates, - ARRAY_SIZE(expected_to_remove_candidates))); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } From 9b7547e8c577878e170dca2f1d7f5db894dc4011 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 8 Feb 2024 02:57:50 +0900 Subject: [PATCH 2/2] DAOS-14730 pool: Clean up map update logging (#13709) This patch cleans up the pool map update logging on the client side and the engine side. A few notable changes: - In dc_pool_map_update, if the incoming map is of the same version as the one we already have, do not perform the update. Signed-off-by: Li Wei --- src/pool/cli.c | 31 ++++++++++++------------------- src/pool/srv_target.c | 41 ++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/pool/cli.c b/src/pool/cli.c index 4258ce9084e..d92e2e3caaa 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -471,32 +471,23 @@ int dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect) { unsigned int map_version; + unsigned int map_version_before = 0; int rc; D_ASSERT(map != NULL); map_version = pool_map_get_version(map); - if (pool->dp_map == NULL) { - rc = pl_map_update(pool->dp_pool, map, connect, DEFAULT_PL_TYPE); - if (rc != 0) - D_GOTO(out, rc); - - D_DEBUG(DB_MD, DF_UUID": init pool map: %u\n", - DP_UUID(pool->dp_pool), pool_map_get_version(map)); - D_GOTO(out_update, rc = 0); - } + if (pool->dp_map != NULL) + map_version_before = pool_map_get_version(pool->dp_map); - if (map_version < pool_map_get_version(pool->dp_map)) { - D_DEBUG(DB_MD, DF_UUID": got older pool map: %u -> %u %p\n", - DP_UUID(pool->dp_pool), - pool_map_get_version(pool->dp_map), map_version, pool); + if (map_version <= map_version_before) { + D_DEBUG(DB_MD, DF_UUID ": ignored pool map update: version=%u->%u pool=%p\n", + DP_UUID(pool->dp_pool), map_version_before, map_version, pool); D_GOTO(out, rc = 0); } - D_DEBUG(DB_MD, DF_UUID": updating pool map: %u -> %u\n", - DP_UUID(pool->dp_pool), - pool->dp_map == NULL ? - 0 : pool_map_get_version(pool->dp_map), map_version); + D_DEBUG(DB_MD, DF_UUID ": updating pool map: version=%u->%u\n", DP_UUID(pool->dp_pool), + map_version_before, map_version); rc = pl_map_update(pool->dp_pool, map, connect, DEFAULT_PL_TYPE); if (rc != 0) { @@ -505,12 +496,14 @@ dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect) D_GOTO(out, rc); } - pool_map_decref(pool->dp_map); -out_update: + if (pool->dp_map != NULL) + pool_map_decref(pool->dp_map); pool_map_addref(map); pool->dp_map = map; if (pool->dp_map_version_known < map_version) pool->dp_map_version_known = map_version; + D_INFO(DF_UUID ": updated pool map: version=%u->%u\n", DP_UUID(pool->dp_pool), + map_version_before, map_version); out: return rc; } diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index a0b8d98acd8..696f32c8b58 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1642,7 +1642,7 @@ ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf, unsigned int map_version) { struct pool_map *map = NULL; - bool update_map = false; + bool map_updated = false; int rc = 0; if (buf != NULL) { @@ -1661,11 +1661,10 @@ ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf, pool_map_get_version(pool->sp_map) < map_version)) { struct pool_map *tmp = pool->sp_map; - D_DEBUG(DB_MD, DF_UUID - ": update pool_map version: %p/%d -> %p/%d\n", - DP_UUID(pool->sp_uuid), pool->sp_map, - pool->sp_map ? pool_map_get_version(pool->sp_map) : -1, - map, pool_map_get_version(map)); + D_DEBUG(DB_MD, DF_UUID ": updating pool map: version=%u->%u pointer=%p->%p\n", + DP_UUID(pool->sp_uuid), + pool->sp_map == NULL ? 0 : pool_map_get_version(pool->sp_map), + pool_map_get_version(map), pool->sp_map, map); rc = update_pool_group(pool, map); if (rc != 0) { @@ -1690,26 +1689,33 @@ ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf, D_GOTO(out, rc); } - update_map = true; - /* drop the stale map */ + /* Swap pool->sp_map and map. */ pool->sp_map = map; map = tmp; + + map_updated = true; + D_INFO(DF_UUID ": updated pool map: version=%u->%u pointer=%p->%p\n", + DP_UUID(pool->sp_uuid), map == NULL ? 0 : pool_map_get_version(map), + pool_map_get_version(pool->sp_map), map, pool->sp_map); } /* Check if the pool map on each xstream needs to update */ if (pool->sp_map_version < map_version) { - D_DEBUG(DB_MD, DF_UUID - ": changed cached map version: %u -> %u\n", - DP_UUID(pool->sp_uuid), pool->sp_map_version, - map_version); + unsigned int map_version_before = pool->sp_map_version; + + D_DEBUG(DB_MD, DF_UUID ": updating cached pool map version: %u->%u\n", + DP_UUID(pool->sp_uuid), map_version_before, map_version); pool->sp_map_version = map_version; rc = dss_task_collective(update_child_map, pool, 0); D_ASSERT(rc == 0); - update_map = true; + + map_updated = true; + D_INFO(DF_UUID ": updated cached pool map version: %u->%u\n", + DP_UUID(pool->sp_uuid), map_version_before, map_version); } - if (update_map) { + if (map_updated) { struct dtx_scan_args *arg; int ret; @@ -1729,9 +1735,10 @@ ds_pool_tgt_map_update(struct ds_pool *pool, struct pool_buf *buf, D_FREE(arg); } } else { - D_WARN("Ignore update pool "DF_UUID" %d -> %d\n", - DP_UUID(pool->sp_uuid), pool->sp_map_version, - map_version); + /* This should be a D_DEBUG eventually. */ + D_INFO(DF_UUID ": ignored pool map update: version=%u->%u cached_version=%u\n", + DP_UUID(pool->sp_uuid), pool_map_get_version(pool->sp_map), map_version, + pool->sp_map_version); } out: ABT_rwlock_unlock(pool->sp_lock);