From 1958cb1ead33e06046da9e9a14991da835c0c11c Mon Sep 17 00:00:00 2001 From: wangdi Date: Wed, 14 Jun 2023 09:27:25 -0700 Subject: [PATCH] DAOS-13516 object: shrink the group number of GX object (#12217) Shrink the group number of GX object to avoid putting multiple shards in the same domain during rebuild, which might break RF setting. GX group number will be shrinked from target_num/domain_nr to target_num/domain_nr - RF. Signed-off-by: Di Wang --- src/include/daos/object.h | 5 +++-- src/object/cli_obj.c | 16 ++++++++-------- src/object/obj_class.c | 35 ++++++++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/include/daos/object.h b/src/include/daos/object.h index 71cc6c5ca8c..e23196e3e70 100644 --- a/src/include/daos/object.h +++ b/src/include/daos/object.h @@ -240,8 +240,9 @@ int daos_obj_set_oid_by_class(daos_obj_id_t *oid, enum daos_otype_t type, unsigned int daos_oclass_grp_size(struct daos_oclass_attr *oc_attr); unsigned int daos_oclass_grp_nr(struct daos_oclass_attr *oc_attr, struct daos_obj_md *md); -int daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, - enum daos_obj_redun *ord, uint32_t *nr); +int +daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, enum daos_obj_redun *ord, + uint32_t *nr, uint32_t rf_factor); bool daos_oclass_is_valid(daos_oclass_id_t oc_id); int daos_obj_get_oclass(daos_handle_t coh, enum daos_otype_t type, daos_oclass_hints_t hints, uint32_t args, daos_oclass_id_t *cid); diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index c36306ba8c8..4e8b9634c94 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -7174,6 +7174,7 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid, uint32_t nr_grp; struct cont_props props; int rc; + uint32_t rf; struct dc_cont *dc; if (!daos_otype_t_is_valid(type)) @@ -7199,18 +7200,17 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid, rc = pl_map_query(pool->dp_pool, &attr); D_ASSERT(rc == 0); dc_pool_put(pool); + rf = dc->dc_props.dcp_redun_fac; - D_DEBUG(DB_TRACE, "available domain=%d, targets=%d\n", - attr.pa_domain_nr, attr.pa_target_nr); + D_DEBUG(DB_TRACE, "available domain=%d, targets=%d rf:%u\n", attr.pa_domain_nr, + attr.pa_target_nr, rf); if (cid == OC_UNKNOWN) { - uint32_t rf; - - rf = dc->dc_props.dcp_redun_fac; rc = dc_set_oclass(rf, attr.pa_domain_nr, attr.pa_target_nr, type, hints, &ord, &nr_grp); } else { - rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, attr.pa_target_nr, &ord, &nr_grp); + rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, attr.pa_target_nr, &ord, &nr_grp, + rf); } dc_cont_put(dc); @@ -7263,8 +7263,8 @@ daos_obj_generate_oid_by_rf(daos_handle_t poh, uint64_t rf_factor, attr.pa_target_nr, type, hints, &ord, &nr_grp); else - rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, - attr.pa_target_nr, &ord, &nr_grp); + rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, attr.pa_target_nr, &ord, &nr_grp, + rf_factor); if (rc) return rc; diff --git a/src/object/obj_class.c b/src/object/obj_class.c index cbb2931dfe1..f533ccd7417 100644 --- a/src/object/obj_class.c +++ b/src/object/obj_class.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -234,9 +234,24 @@ daos_oclass_grp_nr(struct daos_oclass_attr *oc_attr, struct daos_obj_md *md) return oc_attr->ca_grp_nr; } +/** + * To honor RF setting during failure cases, let's reserve RF + * groups, so if some targets fail, there will be enough replacement + * targets to rebuild, so to avoid putting multiple shards in the same + * domain, which may break the RF setting. + * + * Though let's keep reserve targets to be less than 30% of the total + * targets. + */ +static uint32_t +reserve_grp_by_rf(uint32_t target_nr, uint32_t grp_size, uint32_t rf) +{ + return min(((target_nr * 3) / 10) / grp_size, rf); +} + int -daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, - enum daos_obj_redun *ord, uint32_t *nr) +daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, enum daos_obj_redun *ord, + uint32_t *nr, uint32_t rf_factor) { struct daos_obj_class *oc; struct daos_oclass_attr ca; @@ -270,9 +285,14 @@ daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, } grp_size = daos_oclass_grp_size(&ca); - if (ca.ca_grp_nr == DAOS_OBJ_GRP_MAX) + if (ca.ca_grp_nr == DAOS_OBJ_GRP_MAX) { + uint32_t reserve_grp = reserve_grp_by_rf(target_nr, grp_size, rf_factor); + ca.ca_grp_nr = max(1, (target_nr / grp_size)); + if (ca.ca_grp_nr > reserve_grp) + ca.ca_grp_nr -= reserve_grp; + } if (grp_size > domain_nr) { D_ERROR("grp size (%u) (%u) is larger than domain nr (%u)\n", grp_size, DAOS_OBJ_REPL_MAX, domain_nr); @@ -823,8 +843,13 @@ dc_set_oclass(uint32_t rf, int domain_nr, int target_nr, enum daos_otype_t otype } if (grp_nr == DAOS_OBJ_GRP_MAX || grp_nr * grp_size > target_nr) { + uint32_t max_grp = target_nr / grp_size; + uint32_t reserve_grp = reserve_grp_by_rf(target_nr, grp_size, rf); + /* search for the highest scalability in the allowed range */ - *nr = max(1, (target_nr / grp_size)); + if (max_grp > reserve_grp) + max_grp = max_grp - reserve_grp; + *nr = max(1, max_grp); } else { *nr = grp_nr; }