From b7ec6f79fac7ceddc7d6715d22992be9accc4a18 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 23 Sep 2024 21:57:36 +0800 Subject: [PATCH] DAOS-16562 vos: umem cache metrics (#15155) Added umem cache metrics for md-on-ssd phase2 pool. Signed-off-by: Niu Yawei --- src/common/mem.c | 31 ++++++++++----- src/include/daos/mem.h | 16 ++++++++ src/vos/vos_common.c | 6 ++- src/vos/vos_gc.c | 2 +- src/vos/vos_internal.h | 56 ++++++++++++++++++++++++++ src/vos/vos_io.c | 7 +++- src/vos/vos_obj_cache.c | 12 ++++-- src/vos/vos_pool.c | 87 +++++++++++++++++++++++++++++++++-------- 8 files changed, 181 insertions(+), 36 deletions(-) diff --git a/src/common/mem.c b/src/common/mem.c index a40b12d854a..949019bd158 100644 --- a/src/common/mem.c +++ b/src/common/mem.c @@ -3008,6 +3008,12 @@ umem_cache_checkpoint(struct umem_store *store, umem_cache_wait_cb_t wait_cb, vo return rc; } +static inline void +inc_cache_stats(struct umem_cache *cache, unsigned int op) +{ + cache->ca_cache_stats[op] += 1; +} + static int cache_load_page(struct umem_cache *cache, struct umem_page_info *pinfo) { @@ -3035,7 +3041,7 @@ cache_load_page(struct umem_cache *cache, struct umem_page_info *pinfo) VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE((char *)pinfo->pi_addr, len); pinfo->pi_io = 0; if (rc) { - DL_ERROR(rc, "Read MD blob failed.\n"); + DL_ERROR(rc, "Read MD blob failed."); page_wakeup_io(cache, pinfo); return rc; } else if (cache->ca_evtcb_fn) { @@ -3053,6 +3059,7 @@ cache_load_page(struct umem_cache *cache, struct umem_page_info *pinfo) cache_add2lru(cache, pinfo); page_wakeup_io(cache, pinfo); + inc_cache_stats(cache, UMEM_CACHE_STATS_LOAD); return rc; } @@ -3144,6 +3151,7 @@ cache_flush_page(struct umem_cache *cache, struct umem_page_info *pinfo) &chkpt_id, NULL); D_FREE(chkpt_data_all); D_ASSERT(d_list_empty(&dirty_list)); + inc_cache_stats(cache, UMEM_CACHE_STATS_FLUSH); return rc; } @@ -3177,7 +3185,7 @@ cache_evict_page(struct umem_cache *cache, bool for_sys) if (is_page_dirty(pinfo)) { rc = cache_flush_page(cache, pinfo); if (rc) { - DL_ERROR(rc, "Flush page failed.\n"); + DL_ERROR(rc, "Flush page failed."); return rc; } @@ -3187,13 +3195,14 @@ cache_evict_page(struct umem_cache *cache, bool for_sys) } if (cache->ca_evtcb_fn) { - rc = - cache->ca_evtcb_fn(UMEM_CACHE_EVENT_PGEVICT, cache->ca_fn_arg, pinfo->pi_pg_id); + rc = cache->ca_evtcb_fn(UMEM_CACHE_EVENT_PGEVICT, cache->ca_fn_arg, + pinfo->pi_pg_id); if (rc) DL_ERROR(rc, "Page evict callback failed."); } d_list_del_init(&pinfo->pi_lru_link); cache_unmap_page(cache, pinfo); + inc_cache_stats(cache, UMEM_CACHE_STATS_EVICT); return 0; } @@ -3237,7 +3246,7 @@ cache_get_free_page(struct umem_cache *cache, struct umem_page_info **ret_pinfo, while (need_evict(cache)) { rc = cache_evict_page(cache, for_sys); if (rc && rc != -DER_AGAIN && rc != -DER_BUSY) { - DL_ERROR(rc, "Evict page failed.\n"); + DL_ERROR(rc, "Evict page failed."); return rc; } @@ -3288,7 +3297,7 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) if (is_id_evictable(cache, pg_id)) { rc = cache_get_free_page(cache, &pinfo, 0, false); if (rc) { - DL_ERROR(rc, "Failed to get free page.\n"); + DL_ERROR(rc, "Failed to get free page."); break; } } else { @@ -3324,9 +3333,11 @@ cache_pin_pages(struct umem_cache *cache, uint32_t *pages, int page_nr, bool for if (pinfo != NULL) { D_ASSERT(pinfo->pi_pg_id == pg_id); D_ASSERT(pinfo->pi_mapped == 1); + inc_cache_stats(cache, UMEM_CACHE_STATS_HIT); goto next; } + inc_cache_stats(cache, UMEM_CACHE_STATS_MISS); rc = cache_get_free_page(cache, &pinfo, pinned, for_sys); if (rc) goto error; @@ -3471,7 +3482,7 @@ umem_cache_map(struct umem_store *store, struct umem_cache_range *ranges, int ra rc = cache_map_pages(cache, out_pages, page_nr); if (rc) - DL_ERROR(rc, "Map page failed.\n"); + DL_ERROR(rc, "Map page failed."); if (out_pages != &in_pages[0]) D_FREE(out_pages); @@ -3494,7 +3505,7 @@ umem_cache_load(struct umem_store *store, struct umem_cache_range *ranges, int r rc = cache_pin_pages(cache, out_pages, page_nr, for_sys); if (rc) { - DL_ERROR(rc, "Load page failed.\n"); + DL_ERROR(rc, "Load page failed."); } else { for (i = 0; i < page_nr; i++) { uint32_t pg_id = out_pages[i]; @@ -3531,7 +3542,7 @@ umem_cache_pin(struct umem_store *store, struct umem_cache_range *ranges, int ra rc = cache_pin_pages(cache, out_pages, page_nr, for_sys); if (rc) { - DL_ERROR(rc, "Load page failed.\n"); + DL_ERROR(rc, "Load page failed."); goto out; } @@ -3594,7 +3605,7 @@ umem_cache_reserve(struct umem_store *store) while (need_reserve(cache, 0)) { rc = cache_evict_page(cache, false); if (rc && rc != -DER_AGAIN && rc != -DER_BUSY) { - DL_ERROR(rc, "Evict page failed.\n"); + DL_ERROR(rc, "Evict page failed."); break; } diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index 2b03ac82e02..79709b58f3c 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -208,6 +208,20 @@ enum umem_page_stats { UMEM_PG_STATS_MAX, }; +enum umem_cache_stats { + /* How many page cache hit */ + UMEM_CACHE_STATS_HIT = 0, + /* How many page cache miss */ + UMEM_CACHE_STATS_MISS, + /* How many pages are evicted */ + UMEM_CACHE_STATS_EVICT, + /* How many dirty pages are flushed on evicting */ + UMEM_CACHE_STATS_FLUSH, + /* How many pages are loaded on cache miss */ + UMEM_CACHE_STATS_LOAD, + UMEM_CACHE_STATS_MAX, +}; + /** Global cache status for each umem_store */ struct umem_cache { struct umem_store *ca_store; @@ -255,6 +269,8 @@ struct umem_cache { void *ca_fn_arg; /** Page stats */ uint32_t ca_pgs_stats[UMEM_PG_STATS_MAX]; + /** Cache stats */ + uint64_t ca_cache_stats[UMEM_CACHE_STATS_MAX]; /** How many waiters waiting on free page reserve */ uint32_t ca_reserve_waiters; /** Waitqueue for free page reserve: umem_cache_reserve() */ diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index dbb8d28fd04..87d86c945b2 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -692,8 +692,7 @@ static inline int vos_metrics_count(void) { return vea_metrics_count() + - (sizeof(struct vos_agg_metrics) + sizeof(struct vos_space_metrics) + - sizeof(struct vos_chkpt_metrics)) / sizeof(struct d_tm_node_t *); + sizeof(struct vos_pool_metrics) / sizeof(struct d_tm_node_t *); } static void @@ -873,6 +872,9 @@ vos_metrics_alloc(const char *path, int tgt_id) /* Initialize metrics for WAL */ vos_wal_metrics_init(&vp_metrics->vp_wal_metrics, path, tgt_id); + /* Initialize metrcis for umem cache */ + vos_cache_metrics_init(&vp_metrics->vp_cache_metrics, path, tgt_id); + return vp_metrics; } diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index cda1c60d147..5c4967d113f 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -747,7 +747,7 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret) rg.cr_off = umem_get_mb_base_offset(vos_pool2umm(pool), bkt); rg.cr_size = vos_pool2store(pool)->cache->ca_page_sz; - rc = umem_cache_pin(vos_pool2store(pool), &rg, 1, false, &pin_hdl); + rc = vos_cache_pin(vos_pool2store(pool), &rg, 1, false, &pin_hdl); if (rc) { DL_ERROR(rc, "Failed to pin bucket %u.", bkt); goto tx_error; diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index dc7f4796817..8aeec3ffcdf 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -241,6 +241,21 @@ struct vos_wal_metrics { void vos_wal_metrics_init(struct vos_wal_metrics *vw_metrics, const char *path, int tgt_id); +/* VOS pool metrics for umem cache */ +struct vos_cache_metrics { + struct d_tm_node_t *vcm_pg_ne; + struct d_tm_node_t *vcm_pg_pinned; + struct d_tm_node_t *vcm_pg_free; + struct d_tm_node_t *vcm_pg_hit; + struct d_tm_node_t *vcm_pg_miss; + struct d_tm_node_t *vcm_pg_evict; + struct d_tm_node_t *vcm_pg_flush; + struct d_tm_node_t *vcm_pg_load; + struct d_tm_node_t *vcm_obj_hit; +}; + +void vos_cache_metrics_init(struct vos_cache_metrics *vc_metrcis, const char *path, int tgt_id); + struct vos_pool_metrics { void *vp_vea_metrics; struct vos_agg_metrics vp_agg_metrics; @@ -248,6 +263,7 @@ struct vos_pool_metrics { struct vos_space_metrics vp_space_metrics; struct vos_chkpt_metrics vp_chkpt_metrics; struct vos_wal_metrics vp_wal_metrics; + struct vos_cache_metrics vp_cache_metrics; /* TODO: add more metrics for VOS */ }; @@ -1902,6 +1918,46 @@ vos_obj_reserve(struct umem_instance *umm, struct vos_object *obj, } /* vos_obj_cache.c */ +static inline struct vos_cache_metrics * +store2cache_metrics(struct umem_store *store) +{ + struct vos_pool_metrics *vpm = (struct vos_pool_metrics *)store->stor_stats; + + return vpm != NULL ? &vpm->vp_cache_metrics : NULL; +} + +static inline void +update_page_stats(struct umem_store *store) +{ + struct vos_cache_metrics *vcm = store2cache_metrics(store); + struct umem_cache *cache = store->cache; + + if (vcm == NULL) + return; + + d_tm_set_counter(vcm->vcm_pg_ne, cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE]); + d_tm_set_counter(vcm->vcm_pg_pinned, cache->ca_pgs_stats[UMEM_PG_STATS_PINNED]); + d_tm_set_counter(vcm->vcm_pg_free, cache->ca_pgs_stats[UMEM_PG_STATS_FREE]); + + d_tm_set_counter(vcm->vcm_pg_hit, cache->ca_cache_stats[UMEM_CACHE_STATS_HIT]); + d_tm_set_counter(vcm->vcm_pg_miss, cache->ca_cache_stats[UMEM_CACHE_STATS_MISS]); + d_tm_set_counter(vcm->vcm_pg_evict, cache->ca_cache_stats[UMEM_CACHE_STATS_EVICT]); + d_tm_set_counter(vcm->vcm_pg_flush, cache->ca_cache_stats[UMEM_CACHE_STATS_FLUSH]); + d_tm_set_counter(vcm->vcm_pg_load, cache->ca_cache_stats[UMEM_CACHE_STATS_LOAD]); +} + +static inline int +vos_cache_pin(struct umem_store *store, struct umem_cache_range *ranges, int range_nr, + bool for_sys, struct umem_pin_handle **pin_handle) +{ + int rc; + + rc = umem_cache_pin(store, ranges, range_nr, for_sys, pin_handle); + update_page_stats(store); + + return rc; +} + int vos_obj_acquire(struct vos_container *cont, daos_unit_oid_t oid, bool pin, struct vos_object **obj_p); diff --git a/src/vos/vos_io.c b/src/vos/vos_io.c index 37beb8d5be5..32218dec7bb 100644 --- a/src/vos/vos_io.c +++ b/src/vos/vos_io.c @@ -2624,8 +2624,11 @@ vos_update_begin(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, /* Hold the object for the evictable md-on-ssd phase2 pool */ if (vos_pool_is_evictable(vos_cont2pool(ioc->ic_cont))) { /* - * To avoid the complication of adding object ilog to ts_set, we simply pin object - * here without ts_set involved. + * FIXME: + * The same object will be referenced by vos_obj_acquire() and vos_obj_hold() + * (in vos_update_end()) twice, this is for avoiding the complication of adding + * object ilog to ts_set. We'll re-org vos_obj_hold() in the future to make the + * code look cleaner. */ rc = vos_obj_acquire(ioc->ic_cont, ioc->ic_oid, true, &ioc->ic_pinned_obj); if (rc != 0) diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index fdb0e3d0c7a..5a57df97234 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -333,22 +333,26 @@ obj_pin_bkt(struct vos_pool *pool, struct vos_object *obj) ABT_cond_wait(obj->obj_wait_loading, obj->obj_mutex); ABT_mutex_unlock(obj->obj_mutex); - /* The loader failed on umem_cache_pin() */ + /* The loader failed on vos_cache_pin() */ if (obj->obj_pin_hdl == NULL) { D_ERROR("Object:"DF_UOID" isn't pinned.\n", DP_UOID(obj->obj_id)); return -DER_BUSY; } } - if (obj->obj_pin_hdl != NULL) + if (obj->obj_pin_hdl != NULL) { + struct vos_cache_metrics *vcm = store2cache_metrics(store); + + d_tm_inc_counter(vcm->vcm_obj_hit, 1); return 0; + } obj->obj_bkt_loading = 1; rg.cr_off = umem_get_mb_base_offset(vos_pool2umm(pool), obj->obj_bkt_ids[0]); rg.cr_size = store->cache->ca_page_sz; - rc = umem_cache_pin(store, &rg, 1, false, &obj->obj_pin_hdl); + rc = vos_cache_pin(store, &rg, 1, false, &obj->obj_pin_hdl); if (rc) DL_ERROR(rc, "Failed to pin object:"DF_UOID".", DP_UOID(obj->obj_id)); @@ -886,7 +890,7 @@ vos_bkt_array_pin(struct vos_pool *pool, struct vos_bkt_array *bkts, ranges[i].cr_size = vos_pool2store(pool)->cache->ca_page_sz; } - rc = umem_cache_pin(vos_pool2store(pool), ranges, bkts->vba_cnt, false, pin_hdl); + rc = vos_cache_pin(vos_pool2store(pool), ranges, bkts->vba_cnt, false, pin_hdl); if (rc) DL_ERROR(rc, "Failed to pin %u ranges.", bkts->vba_cnt); diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index e206ecbb479..ff4edbb51e8 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -421,13 +421,75 @@ vos_wal_metrics_init(struct vos_wal_metrics *vw_metrics, const char *path, int t D_WARN("Failed to create 'replay_entries' telemetry: "DF_RC"\n", DP_RC(rc)); } +#define VOS_CACHE_DIR "vos_cache" + +void +vos_cache_metrics_init(struct vos_cache_metrics *vc_metrics, const char *path, int tgt_id) +{ + int rc; + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_ne, D_TM_GAUGE, "Non-evictable pages", + "pages", "%s/%s/page_ne/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create non-evictable pages telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_pinned, D_TM_GAUGE, "Pinned pages", + "pages", "%s/%s/page_pinned/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create pinned pages telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_free, D_TM_GAUGE, "Free pages", + "pages", "%s/%s/page_free/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create free pages telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_hit, D_TM_COUNTER, "Page cache hit", + "hits", "%s/%s/page_hit/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create page hit telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_miss, D_TM_COUNTER, "Page cache miss", + "misses", "%s/%s/page_miss/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create page miss telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_evict, D_TM_COUNTER, "Page cache evict", + "pages", "%s/%s/page_evict/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create page evict telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_flush, D_TM_COUNTER, "Page cache flush", + "pages", "%s/%s/page_flush/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create page flush telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_pg_load, D_TM_COUNTER, "Page cache load", + "pages", "%s/%s/page_load/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create page load telemetry."); + + rc = d_tm_add_metric(&vc_metrics->vcm_obj_hit, D_TM_COUNTER, "Object cache hit", + "hits", "%s/%s/obj_hit/tgt_%d", path, VOS_CACHE_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create object hit telemetry."); + +} + +static inline struct vos_wal_metrics * +store2wal_metrics(struct umem_store *store) +{ + struct vos_pool_metrics *vpm = (struct vos_pool_metrics *)store->stor_stats; + + return vpm != NULL ? &vpm->vp_wal_metrics : NULL; +} + static inline int vos_wal_reserve(struct umem_store *store, uint64_t *tx_id) { struct bio_wal_info wal_info; struct vos_pool *pool; struct bio_wal_stats ws = { 0 }; - struct vos_wal_metrics *vwm; + struct vos_wal_metrics *vwm = store2wal_metrics(store); int rc; pool = store->vos_priv; @@ -445,7 +507,6 @@ vos_wal_reserve(struct umem_store *store, uint64_t *tx_id) reserve: D_ASSERT(store && store->stor_priv != NULL); - vwm = (struct vos_wal_metrics *)store->stor_stats; rc = bio_wal_reserve(store->stor_priv, tx_id, (vwm != NULL) ? &ws : NULL); if (rc == 0 && vwm != NULL) d_tm_set_gauge(vwm->vwm_wal_waiters, ws.ws_waiters); @@ -459,11 +520,10 @@ vos_wal_commit(struct umem_store *store, struct umem_wal_tx *wal_tx, void *data_ struct bio_wal_info wal_info; struct vos_pool *pool; struct bio_wal_stats ws = {0}; - struct vos_wal_metrics *vwm; + struct vos_wal_metrics *vwm = store2wal_metrics(store); int rc; D_ASSERT(store && store->stor_priv != NULL); - vwm = (struct vos_wal_metrics *)store->stor_stats; if (vwm != NULL) d_tm_mark_duration_start(vwm->vwm_wal_dur, D_TM_CLOCK_REALTIME); rc = bio_wal_commit(store->stor_priv, wal_tx, data_iod, (vwm != NULL) ? &ws : NULL); @@ -515,18 +575,15 @@ vos_wal_replay(struct umem_store *store, int (*replay_cb)(uint64_t tx_id, struct umem_action *act, void *arg), void *arg) { - struct bio_wal_rp_stats wrs; - int rc; + struct bio_wal_rp_stats wrs; + struct vos_wal_metrics *vwm = store2wal_metrics(store); + int rc; D_ASSERT(store && store->stor_priv != NULL); - rc = bio_wal_replay(store->stor_priv, - (store->stor_stats != NULL) ? &wrs : NULL, - replay_cb, arg); + rc = bio_wal_replay(store->stor_priv, (vwm != NULL) ? &wrs : NULL, replay_cb, arg); /* VOS file rehydration metrics */ - if (store->stor_stats != NULL && rc >= 0) { - struct vos_wal_metrics *vwm = (struct vos_wal_metrics *)store->stor_stats; - + if (vwm != NULL && rc >= 0) { d_tm_inc_counter(vwm->vwm_replay_count, 1); d_tm_set_gauge(vwm->vwm_replay_size, wrs.wrs_sz); d_tm_set_gauge(vwm->vwm_replay_time, wrs.wrs_tm); @@ -885,11 +942,7 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned } init_umem_store(&store, mc); - if (metrics != NULL) { - struct vos_pool_metrics *vpm = (struct vos_pool_metrics *)metrics; - - store.stor_stats = &vpm->vp_wal_metrics; - } + store.stor_stats = metrics; umem_open: pop = umempobj_open(path, layout, UMEMPOBJ_ENABLE_STATS, &store);