From 3ae5b2750809455c455afdd7b4acf02140291578 Mon Sep 17 00:00:00 2001 From: Xuezhao Liu Date: Wed, 12 Jun 2024 10:07:28 +0000 Subject: [PATCH] DAOS-16001 placement: fix cases for delay_rebuild Features: rebuild Allow-unstable-test: true Required-githooks: true Signed-off-by: Xuezhao Liu --- src/include/daos/pool_map.h | 21 +++++++++++++++++---- src/placement/pl_map.h | 3 ++- src/placement/pl_map_common.c | 20 ++++++++++++++------ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/include/daos/pool_map.h b/src/include/daos/pool_map.h index ce1d0a2b7fa..c5d571d9e63 100644 --- a/src/include/daos/pool_map.h +++ b/src/include/daos/pool_map.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -18,10 +18,10 @@ #define POOL_MAP_VER_2 (2) #define POOL_MAP_VERSION POOL_MAP_VER_2 -#define DF_TARGET "Target[%d] (rank %u idx %u status %u ver %u in/out ver %u fseq %u)" +#define DF_TARGET "Target[%d] (rank %u idx %u status %u flags %u ver %u in/out ver %u fseq %u)" #define DP_TARGET(t) t->ta_comp.co_id, t->ta_comp.co_rank, t->ta_comp.co_index,\ - t->ta_comp.co_status, t->ta_comp.co_ver, t->ta_comp.co_in_ver, \ - t->ta_comp.co_fseq + t->ta_comp.co_status, t->ta_comp.co_flags, t->ta_comp.co_ver, \ + t->ta_comp.co_in_ver, t->ta_comp.co_fseq /** * pool component types @@ -373,6 +373,19 @@ pool_target_is_up_or_drain(struct pool_target *tgt) return tgt->ta_comp.co_status & (PO_COMP_ST_UP | PO_COMP_ST_DRAIN); } +static inline bool +pool_target_is_up(struct pool_target *tgt) +{ + return (tgt->ta_comp.co_status == PO_COMP_ST_UP); +} + +static inline bool +pool_target_is_down2up(struct pool_target *tgt) +{ + return (tgt->ta_comp.co_status == PO_COMP_ST_UP) && + (tgt->ta_comp.co_flags & PO_COMPF_DOWN2UP); +} + /** Check if the target is in PO_COMP_ST_DOWN status */ static inline bool pool_target_down(struct pool_target *tgt) diff --git a/src/placement/pl_map.h b/src/placement/pl_map.h index 58ef26c97ee..0c648b59be1 100644 --- a/src/placement/pl_map.h +++ b/src/placement/pl_map.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -70,6 +70,7 @@ struct failed_shard { uint32_t fs_fseq; uint32_t fs_tgt_id; uint8_t fs_status; + uint32_t fs_down2up:1; }; #define DF_FAILEDSHARD "shard_idx: %d, fseq: %d, tgt_id: %d, status: %d" diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c index 53039baccf7..ba5bdac315a 100644 --- a/src/placement/pl_map_common.c +++ b/src/placement/pl_map_common.c @@ -76,9 +76,11 @@ remap_alloc_one(d_list_t *remap_list, unsigned int shard_idx, f_new->fs_fseq = tgt->ta_comp.co_fseq; f_new->fs_status = tgt->ta_comp.co_status; f_new->fs_data = data; + if (pool_target_is_down2up(tgt)) + f_new->fs_down2up = 1; - D_DEBUG(DB_PL, "tgt %u status %u reint %s\n", tgt->ta_comp.co_id, - tgt->ta_comp.co_status, for_reint ? "yes" : "no"); + D_DEBUG(DB_PL, "tgt %u status %u flags %u reint %s\n", tgt->ta_comp.co_id, + tgt->ta_comp.co_status, tgt->ta_comp.co_flags, for_reint ? "yes" : "no"); if (!for_reint) { f_new->fs_tgt_id = -1; remap_add_one(remap_list, f_new); @@ -251,7 +253,13 @@ is_comp_avaible(struct pool_component *comp, uint32_t allow_version, status = PO_COMP_ST_UPIN; } else if (status == PO_COMP_ST_UP) { if (comp->co_flags & PO_COMPF_DOWN2UP) { - status = PO_COMP_ST_UPIN; + /* PO_COMP_ST_UP status with PO_COMPF_DOWN2UP flag + * is the case of delay_rebuild exclude+reint. + * Cannot mark it as UPIN to avoid it be used for + * rebuild enumerate/fetch, as the data will be + * discarded in reintegrate. + */ + /* status = PO_COMP_ST_UPIN; */ } else { if (comp->co_fseq <= 1) status = PO_COMP_ST_NEW; @@ -387,12 +395,12 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, l_shard->po_fseq = f_shard->fs_fseq; /* - * Mark the shard as 'rebuilding' so that read will - * skip this shard. + * Mark the shard as 'rebuilding' so that read will skip this shard. + * f_shard->fs_down2up is the case of delay_rebuild exclude+reint. */ if (f_shard->fs_status == PO_COMP_ST_DOWN || f_shard->fs_status == PO_COMP_ST_DRAIN || - pool_target_down(spare_tgt)) + f_shard->fs_down2up || pool_target_down(spare_tgt)) l_shard->po_rebuilding = 1; } else { l_shard->po_shard = -1;