From 906f0a44b74359f0e1370640851f53d77df77f49 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Fri, 6 Sep 2024 00:27:23 +0800 Subject: [PATCH] DAOS-16483 vos: handle empty DTX when vos_tx_end - b26 (#15055) It is possible that the DTX modified nothing when stop currnet backend transaction. Under such case, we may not generate persistent DTX entry. Then need to bypass such case before checking on-disk DTX entry status. The patch makes some clean and removed redundant metrics for committed DTX entries. Enhance vos_dtx_deregister_record() to handle GC case. Signed-off-by: Fan Yong --- src/dtx/dtx_common.c | 2 +- src/tests/ftest/util/telemetry_utils.py | 1 - src/vos/vos_common.c | 31 ++++----- src/vos/vos_dtx.c | 86 ++++++++++++++++++++----- src/vos/vos_tls.h | 1 - 5 files changed, 83 insertions(+), 38 deletions(-) diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index 353bd880009..ff4f2dfe4ef 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1341,7 +1341,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul * it persistently. Otherwise, the subsequent DTX resync may not find it as * to regard it as failed transaction and abort it. */ - if (result == 0 && !dth->dth_active && !dth->dth_prepared && + if (result == 0 && !dth->dth_active && !dth->dth_prepared && !dth->dth_solo && (dth->dth_dist || dth->dth_modification_cnt > 0)) { result = vos_dtx_attach(dth, true, dth->dth_ent != NULL ? true : false); if (unlikely(result < 0)) { diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index aec831b3b8a..db424b6de68 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -421,7 +421,6 @@ class TelemetryUtils(): ENGINE_NVME_CRIT_WARN_METRICS +\ ENGINE_NVME_INTEL_VENDOR_METRICS ENGINE_MEM_USAGE_METRICS = [ - "engine_mem_vos_dtx_cmt_ent_48", "engine_mem_vos_vos_obj_360", "engine_mem_vos_vos_lru_size", "engine_mem_dtx_dtx_leader_handle_360"] diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index fb8461e2931..93bf1757f10 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -405,16 +405,24 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, } } else if (dae != NULL) { if (dth->dth_solo) { - if (err == 0 && cont->vc_solo_dtx_epoch < dth->dth_epoch) + if (err == 0 && dae->dae_committing && + cont->vc_solo_dtx_epoch < dth->dth_epoch) cont->vc_solo_dtx_epoch = dth->dth_epoch; vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0); } else { D_ASSERT(dce == NULL); - if (err == 0) { - dae->dae_prepared = 1; + if (err == 0 && dth->dth_active) { + D_ASSERTF(!UMOFF_IS_NULL(dae->dae_df_off), + "Non-prepared DTX " DF_DTI "\n", + DP_DTI(&dth->dth_xid)); + dae_df = umem_off2ptr(umm, dae->dae_df_off); - D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + D_ASSERTF(!(dae_df->dae_flags & DTE_INVALID), + "Invalid status for DTX " DF_DTI "\n", + DP_DTI(&dth->dth_xid)); + + dae->dae_prepared = 1; } } } @@ -563,13 +571,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id) } } - rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, - "Number of committed entries kept around for reply" - " reconstruction", "entries", - "io/dtx/committed/tgt_%u", tgt_id); - if (rc) - D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", - DP_RC(rc)); if (tgt_id >= 0) { rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, "Number of committed entries kept around for reply" @@ -579,14 +580,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); - rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE, - "Number of committed entries", "entry", - "mem/vos/dtx_cmt_ent_%u/tgt_%u", - sizeof(struct vos_dtx_cmt_ent), tgt_id); - if (rc) - D_WARN("Failed to create committed cnt: "DF_RC"\n", - DP_RC(rc)); - rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, "Number of cached vos object", "entry", "mem/vos/vos_obj_%u/tgt_%u", diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 0e70133629f..1c60f781507 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -769,7 +769,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p, struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal) { - struct vos_tls *tls = vos_tls_get(false); struct vos_dtx_act_ent *dae = NULL; struct vos_dtx_cmt_ent *dce = NULL; d_iov_t kiov; @@ -834,7 +833,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t if (dce == NULL) D_GOTO(out, rc = -DER_NOMEM); - d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); DCE_CMT_TIME(dce) = cmt_time; if (dae != NULL) { DCE_XID(dce) = DAE_XID(dae); @@ -1535,10 +1533,14 @@ int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record) { + struct dtx_handle *dth = vos_dth_get(false); struct vos_container *cont; struct vos_dtx_act_ent *dae; + struct vos_dtx_act_ent_df *dae_df; + umem_off_t *rec_df; bool found; int count; + int rc; int i; if (!vos_dtx_is_normal_entry(entry)) @@ -1567,10 +1569,54 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, * by another prepared (but non-committed) DTX, then do not allow current transaction * to modify it. Because if current transaction is aborted or failed for some reason, * there is no efficient way to recover such former non-committed DTX. + * + * If dth is NULL, then it is for GC. Under such case, deregister the record anyway. */ - if (dae->dae_dbd != NULL) - return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8); + if (dae->dae_dbd != NULL) { + if (dth != NULL) + return dtx_inprogress(dae, dth, false, false, 8); + + dae_df = umem_off2ptr(umm, dae->dae_df_off); + D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT) + count = DTX_INLINE_REC_CNT; + else + count = dae_df->dae_rec_cnt; + + rec_df = dae_df->dae_rec_inline; + for (i = 0; i < count; i++) { + if (record == umem_off2offset(rec_df[i])) { + rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i])); + if (rc != 0) + return rc; + + rec_df[i] = UMOFF_NULL; + goto cache; + } + } + + rec_df = umem_off2ptr(umm, dae_df->dae_rec_off); + if (rec_df == NULL) + /* If non-exist on disk, then must be non-exist in cache. */ + return 0; + + for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) { + if (record == umem_off2offset(rec_df[i])) { + rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i])); + if (rc != 0) + return rc; + + rec_df[i] = UMOFF_NULL; + goto cache; + } + } + + /* If non-exist on disk, then must be non-exist in cache. */ + return 0; + } + +cache: if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) count = DTX_INLINE_REC_CNT; else @@ -2116,14 +2162,18 @@ vos_dtx_post_handle(struct vos_container *cont, if (!abort && dces != NULL) { struct vos_tls *tls = vos_tls_get(false); + int j = 0; D_ASSERT(cont->vc_pool->vp_sysdb == false); for (i = 0; i < count; i++) { - if (dces[i] != NULL) { - cont->vc_dtx_committed_count++; - cont->vc_pool->vp_dtx_committed_count++; - d_tm_inc_gauge(tls->vtl_committed, 1); - } + if (dces[i] != NULL) + j++; + } + + if (j > 0) { + cont->vc_dtx_committed_count += j; + cont->vc_pool->vp_dtx_committed_count += j; + d_tm_inc_gauge(tls->vtl_committed, j); } } @@ -2439,6 +2489,7 @@ vos_dtx_aggregate(daos_handle_t coh) uint64_t epoch; umem_off_t dbd_off; umem_off_t next = UMOFF_NULL; + int count = 0; int rc; int i; @@ -2481,13 +2532,10 @@ vos_dtx_aggregate(daos_handle_t coh) UMOFF_P(dbd_off), DP_RC(rc)); goto out; } - - cont->vc_dtx_committed_count--; - cont->vc_pool->vp_dtx_committed_count--; - d_tm_dec_gauge(tls->vtl_committed, 1); - d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); } + count = dbd->dbd_count; + if (epoch != cont_df->cd_newest_aggregated) { rc = umem_tx_add_ptr(umm, &cont_df->cd_newest_aggregated, sizeof(cont_df->cd_newest_aggregated)); @@ -2545,8 +2593,14 @@ vos_dtx_aggregate(daos_handle_t coh) out: rc = umem_tx_end(umm, rc); - if (rc == 0 && cont->vc_cmt_dtx_reindex_pos == dbd_off) - cont->vc_cmt_dtx_reindex_pos = next; + if (rc == 0) { + if (cont->vc_cmt_dtx_reindex_pos == dbd_off) + cont->vc_cmt_dtx_reindex_pos = next; + + cont->vc_dtx_committed_count -= count; + cont->vc_pool->vp_dtx_committed_count -= count; + d_tm_dec_gauge(tls->vtl_committed, count); + } DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Release DTX committed blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd, diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 981cce10be5..2fc328457d0 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -64,7 +64,6 @@ struct vos_tls { }; struct d_tm_node_t *vtl_committed; struct d_tm_node_t *vtl_obj_cnt; - struct d_tm_node_t *vtl_dtx_cmt_ent_cnt; struct d_tm_node_t *vtl_lru_alloc_size; };