Skip to content

Commit

Permalink
DAOS-16591 mgmt, vos, common: Align scm/meta size (#15146)
Browse files Browse the repository at this point in the history
The scm and meta sizes for vos are now aligned-up to 16M for pools
using phase 2 allocator.

Signed-off-by: Sherin T George <sherin-t.george@hpe.com>
  • Loading branch information
sherintg committed Sep 24, 2024
1 parent b7ec6f7 commit af71f88
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 14 deletions.
6 changes: 6 additions & 0 deletions src/common/dav_v2/dav_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,3 +470,9 @@ dav_class_register_v2(dav_obj_t *pop, struct dav_alloc_class_desc *p)

return 0;
}

DAV_FUNC_EXPORT size_t
dav_obj_pgsz_v2()
{
return ZONE_MAX_SIZE;
}
6 changes: 6 additions & 0 deletions src/common/dav_v2/dav_v2.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,4 +313,10 @@ dav_get_heap_mb_stats_v2(dav_obj_t *pop, uint32_t mb_id, struct dav_heap_mb_stat
uint32_t
dav_allot_mb_evictable_v2(dav_obj_t *pop, int flags);

/*
* Return the page size for dav_v2.
*/
size_t
dav_obj_pgsz_v2();

#endif /* __DAOS_COMMON_DAV_V2_H */
31 changes: 28 additions & 3 deletions src/common/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ struct umem_tx_stage_item {

#ifdef DAOS_PMEM_BUILD

static int daos_md_backend = DAOS_MD_PMEM;
static int daos_md_backend = DAOS_MD_PMEM;
static bool daos_disable_bmem_v2 = false;
#define UMM_SLABS_CNT 16

/** Initializes global settings for the pmem objects.
Expand All @@ -51,6 +52,7 @@ umempobj_settings_init(bool md_on_ssd)
int rc;
enum pobj_arenas_assignment_type atype;
unsigned int md_mode = DAOS_MD_BMEM;
unsigned int md_disable_bmem_v2 = 0;

if (!md_on_ssd) {
daos_md_backend = DAOS_MD_PMEM;
Expand Down Expand Up @@ -81,16 +83,30 @@ umempobj_settings_init(bool md_on_ssd)
return -DER_INVAL;
};

d_getenv_uint("DAOS_MD_DISABLE_BMEM_V2", &md_disable_bmem_v2);
if (md_disable_bmem_v2 && (md_mode != DAOS_MD_BMEM))
D_INFO("Ignoring DAOS_MD_DISABLE_BMEM_V2 tunable");
else
daos_disable_bmem_v2 = md_disable_bmem_v2;

daos_md_backend = md_mode;
return 0;
}

int umempobj_get_backend_type(void)
int
umempobj_get_backend_type(void)
{
return daos_md_backend;
}

int umempobj_backend_type2class_id(int backend)
bool
umempobj_allow_md_bmem_v2()
{
return !daos_disable_bmem_v2;
}

int
umempobj_backend_type2class_id(int backend)
{
switch (backend) {
case DAOS_MD_PMEM:
Expand All @@ -108,6 +124,15 @@ int umempobj_backend_type2class_id(int backend)
}
}

size_t
umempobj_pgsz(int backend)
{
if (backend == DAOS_MD_BMEM_V2)
return dav_obj_pgsz_v2();
else
return (1UL << 12);
}

/** Define common slabs. We can refine this for 2.4 pools but that is for next patch */
static const int slab_map[] = {
0, /* 32 bytes */
Expand Down
8 changes: 8 additions & 0 deletions src/include/daos/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ int umempobj_settings_init(bool md_on_ssd);
/* convert backend type to umem class id */
int umempobj_backend_type2class_id(int backend);

/* get page size for the backend */
size_t
umempobj_pgsz(int backend);

/* umem persistent object property flags */
#define UMEMPOBJ_ENABLE_STATS 0x1

Expand All @@ -46,6 +50,10 @@ enum {
/* return umem backend type */
int umempobj_get_backend_type(void);

/* returns whether bmem_v2 pools are allowed */
bool
umempobj_allow_md_bmem_v2();

#endif

struct umem_wal_tx;
Expand Down
10 changes: 10 additions & 0 deletions src/include/daos_srv/vos.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,16 @@ int
vos_aggregate(daos_handle_t coh, daos_epoch_range_t *epr,
int (*yield_func)(void *arg), void *yield_arg, uint32_t flags);

/**
* Round up the scm and meta sizes to match the backend requirement.
* \param[in/out] scm_sz SCM size that needs to be aligned up
* \param[in/out] meta_sz META size that needs to be aligned up
*
* \return 0 on success, error otherwise.
*/
int
vos_pool_roundup_size(size_t *scm_sz, size_t *meta_sz);

/**
* Discards changes in all epochs with the epoch range \a epr
*
Expand Down
2 changes: 1 addition & 1 deletion src/mgmt/srv_drpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp)

scm_size = req->tier_bytes[DAOS_MEDIA_SCM];
if (req->mem_ratio)
scm_size *= req->mem_ratio;
scm_size *= (double)req->mem_ratio;

rc = ds_mgmt_create_pool(pool_uuid, req->sys, "pmem", targets, scm_size,
req->tier_bytes[DAOS_MEDIA_NVME], prop, &svc, req->n_fault_domains,
Expand Down
26 changes: 19 additions & 7 deletions src/mgmt/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,15 +1048,14 @@ tgt_create_preallocate(void *arg)
* 16MB minimum per pmemobj file (SCM partition)
*/
D_ASSERT(dss_tgt_nr > 0);
D_ASSERT((tca->tca_scm_size / dss_tgt_nr) >= (1 << 24));
if (!bio_nvme_configured(SMD_DEV_TYPE_META)) {
rc = tgt_vos_preallocate_sequential(tca->tca_ptrec->dptr_uuid,
max(tca->tca_scm_size / dss_tgt_nr,
1 << 24), dss_tgt_nr);
rc = tgt_vos_preallocate_sequential(
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr);
} else {
rc = tgt_vos_preallocate_parallel(tca->tca_ptrec->dptr_uuid,
max(tca->tca_scm_size / dss_tgt_nr,
1 << 24), dss_tgt_nr,
&tca->tca_ptrec->cancel_create);
rc = tgt_vos_preallocate_parallel(
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr,
&tca->tca_ptrec->cancel_create);
}
if (rc)
goto out;
Expand All @@ -1083,6 +1082,8 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req)
pthread_t thread;
bool canceled_thread = false;
int rc = 0;
size_t tgt_scm_sz;
size_t tgt_meta_sz;

/** incoming request buffer */
tc_in = crt_req_get(tc_req);
Expand Down Expand Up @@ -1119,6 +1120,17 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req)
D_DEBUG(DB_MGMT, DF_UUID": record inserted to dpt_creates_ht\n",
DP_UUID(tca.tca_ptrec->dptr_uuid));

tgt_scm_sz = tc_in->tc_scm_size / dss_tgt_nr;
tgt_meta_sz = tc_in->tc_meta_size / dss_tgt_nr;
rc = vos_pool_roundup_size(&tgt_scm_sz, &tgt_meta_sz);
if (rc) {
D_ERROR(DF_UUID": failed to roundup the vos size: "DF_RC"\n",
DP_UUID(tc_in->tc_pool_uuid), DP_RC(rc));
goto out_rec;
}
tc_in->tc_scm_size = tgt_scm_sz * dss_tgt_nr;
tc_in->tc_meta_size = tgt_meta_sz * dss_tgt_nr;

tca.tca_scm_size = tc_in->tc_scm_size;
tca.tca_nvme_size = tc_in->tc_nvme_size;
tca.tca_dx = dss_current_xstream();
Expand Down
55 changes: 52 additions & 3 deletions src/vos/vos_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,51 @@ init_umem_store(struct umem_store *store, struct bio_meta_context *mc)
store->store_type = DAOS_MD_BMEM;
}

static int
vos_pool_store_type(daos_size_t scm_sz, daos_size_t meta_sz)
{
int backend;

backend = umempobj_get_backend_type();
D_ASSERT((meta_sz != 0) && (scm_sz != 0));

if (scm_sz > meta_sz) {
D_ERROR("memsize %lu is greater than metasize %lu", scm_sz, meta_sz);
return -DER_INVAL;
}

if (scm_sz < meta_sz) {
if ((backend == DAOS_MD_BMEM) && umempobj_allow_md_bmem_v2())
backend = DAOS_MD_BMEM_V2;
else if (backend != DAOS_MD_BMEM_V2) {
D_ERROR("scm_sz %lu is less than meta_sz %lu", scm_sz, meta_sz);
return -DER_INVAL;
}
}

return backend;
}

int
vos_pool_roundup_size(daos_size_t *scm_sz, daos_size_t *meta_sz)
{
size_t alignsz;
int rc;

D_ASSERT(*scm_sz != 0);
rc = vos_pool_store_type(*scm_sz, *meta_sz ? *meta_sz : *scm_sz);
if (rc < 0)
return rc;

/* Round up the size such that it is compatible with backend */
alignsz = umempobj_pgsz(rc);
*scm_sz = max(D_ALIGNUP(*scm_sz, alignsz), 1 << 24);
if (*meta_sz)
*meta_sz = max(D_ALIGNUP(*meta_sz, alignsz), 1 << 24);

return 0;
}

static int
vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,
size_t scm_sz, size_t nvme_sz, size_t wal_sz, size_t meta_sz,
Expand Down Expand Up @@ -851,9 +896,13 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,
if (!meta_sz)
meta_sz = scm_sz_actual;

store.store_type = umempobj_get_backend_type();
if (store.store_type == DAOS_MD_BMEM && meta_sz > scm_sz_actual)
store.store_type = DAOS_MD_BMEM_V2;
rc = vos_pool_store_type(scm_sz_actual, meta_sz);
if (rc < 0) {
D_ERROR("Failed to determine the store type for xs:%p pool:"DF_UUID". "DF_RC,
xs_ctxt, DP_UUID(pool_id), DP_RC(rc));
return rc;
}
store.store_type = rc;

D_DEBUG(DB_MGMT, "Create BIO meta context for xs:%p pool:"DF_UUID" "
"scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu backend:%d\n",
Expand Down

0 comments on commit af71f88

Please sign in to comment.