-
Notifications
You must be signed in to change notification settings - Fork 301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-16591 mgmt, vos, common: Align scm/meta size #15146
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1048,15 +1048,14 @@ tgt_create_preallocate(void *arg) | |
* 16MB minimum per pmemobj file (SCM partition) | ||
*/ | ||
D_ASSERT(dss_tgt_nr > 0); | ||
D_ASSERT((tca->tca_scm_size / dss_tgt_nr) >= (1 << 24)); | ||
if (!bio_nvme_configured(SMD_DEV_TYPE_META)) { | ||
rc = tgt_vos_preallocate_sequential(tca->tca_ptrec->dptr_uuid, | ||
max(tca->tca_scm_size / dss_tgt_nr, | ||
1 << 24), dss_tgt_nr); | ||
rc = tgt_vos_preallocate_sequential( | ||
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr); | ||
} else { | ||
rc = tgt_vos_preallocate_parallel(tca->tca_ptrec->dptr_uuid, | ||
max(tca->tca_scm_size / dss_tgt_nr, | ||
1 << 24), dss_tgt_nr, | ||
&tca->tca_ptrec->cancel_create); | ||
rc = tgt_vos_preallocate_parallel( | ||
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr, | ||
&tca->tca_ptrec->cancel_create); | ||
} | ||
if (rc) | ||
goto out; | ||
|
@@ -1123,7 +1122,12 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req) | |
|
||
tgt_scm_sz = tc_in->tc_scm_size / dss_tgt_nr; | ||
tgt_meta_sz = tc_in->tc_meta_size / dss_tgt_nr; | ||
vos_pool_roundup_size(&tgt_scm_sz, &tgt_meta_sz); | ||
rc = vos_pool_roundup_size(&tgt_scm_sz, &tgt_meta_sz); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this strange formatting enforced by the clang linter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
if (rc) { | ||
D_ERROR(DF_UUID": failed to roundup the vos size: "DF_RC"\n", | ||
DP_UUID(tc_in->tc_pool_uuid), DP_RC(rc)); | ||
goto out_rec; | ||
} | ||
tc_in->tc_scm_size = tgt_scm_sz * dss_tgt_nr; | ||
tc_in->tc_meta_size = tgt_meta_sz * dss_tgt_nr; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -753,6 +753,50 @@ init_umem_store(struct umem_store *store, struct bio_meta_context *mc) | |
store->store_type = DAOS_MD_BMEM; | ||
} | ||
|
||
static int | ||
vos_pool_store_type(daos_size_t scm_sz, daos_size_t meta_sz) | ||
{ | ||
int backend; | ||
|
||
backend = umempobj_get_backend_type(); | ||
D_ASSERT((meta_sz != 0) && (scm_sz != 0)); | ||
|
||
if (scm_sz > meta_sz) { | ||
D_ERROR("memsize %lu is greater than metasize %lu", scm_sz, meta_sz); | ||
return -DER_INVAL; | ||
} | ||
|
||
if (scm_sz < meta_sz) { | ||
if ((backend == DAOS_MD_BMEM) && umempobj_allow_md_bmem_v2()) | ||
backend = DAOS_MD_BMEM_V2; | ||
else if (backend != DAOS_MD_BMEM_V2) { | ||
D_ERROR("scm_sz %lu is less than meta_sz %lu", scm_sz, meta_sz); | ||
return -DER_INVAL; | ||
} | ||
} | ||
|
||
return backend; | ||
} | ||
|
||
int | ||
vos_pool_roundup_size(daos_size_t *scm_sz, daos_size_t *meta_sz) | ||
{ | ||
size_t alignsz; | ||
int rc; | ||
|
||
D_ASSERT((*scm_sz != 0) && (*meta_sz != 0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This assert is triggered in CI tests, looks we pass 0 meta_size sometimes (for pmem or phase1 mode). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed with the 3rd commit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From https://build.hpdd.intel.com/job/daos-stack/job/daos/view/change-requests/job/PR-15146/4/artifact/Functional%20Hardware%20Medium/daos_test/rebuild.py/daos_logs.wolf-254/01-DAOS_Rebuild_0to10_daos_control.log/ failed we can see that the assert happens before pool create, just after engine start (before any
So whilst |
||
rc = vos_pool_store_type(*scm_sz, *meta_sz); | ||
if (rc < 0) | ||
return rc; | ||
|
||
/* Round up the size such that it is compatible with backend */ | ||
alignsz = umempobj_pgsz(rc); | ||
*scm_sz = max(D_ALIGNUP(*scm_sz, alignsz), 1 << 24); | ||
*meta_sz = max(D_ALIGNUP(*meta_sz, alignsz), 1 << 24); | ||
|
||
return 0; | ||
} | ||
|
||
static int | ||
vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, | ||
size_t scm_sz, size_t nvme_sz, size_t wal_sz, size_t meta_sz, | ||
|
@@ -794,9 +838,13 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, | |
if (!meta_sz) | ||
meta_sz = scm_sz_actual; | ||
|
||
store.store_type = umempobj_get_backend_type(); | ||
if (store.store_type == DAOS_MD_BMEM && meta_sz > scm_sz_actual) | ||
store.store_type = DAOS_MD_BMEM_V2; | ||
rc = vos_pool_store_type(scm_sz_actual, meta_sz); | ||
if (rc < 0) { | ||
D_ERROR("Failed to determine the store type for xs:%p pool:"DF_UUID". "DF_RC, | ||
xs_ctxt, DP_UUID(pool_id), DP_RC(rc)); | ||
return rc; | ||
} | ||
store.store_type = rc; | ||
|
||
D_DEBUG(DB_MGMT, "Create BIO meta context for xs:%p pool:"DF_UUID" " | ||
"scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu backend:%d\n", | ||
|
@@ -1272,26 +1320,6 @@ vos_pool_create_ex(const char *path, uuid_t uuid, daos_size_t scm_sz, daos_size_ | |
return rc; | ||
} | ||
|
||
int | ||
vos_pool_roundup_size(daos_size_t *scm_sz, daos_size_t *meta_sz) | ||
{ | ||
int backend; | ||
size_t alignsz; | ||
|
||
backend = umempobj_get_backend_type(); | ||
if ((*scm_sz != *meta_sz) && (backend == DAOS_MD_BMEM)) | ||
backend = DAOS_MD_BMEM_V2; | ||
|
||
/* Round up the size such that it is compatible with backend */ | ||
alignsz = umempobj_pgsz(backend); | ||
|
||
*scm_sz = D_ALIGNUP(*scm_sz, alignsz); | ||
if (*meta_sz) | ||
*meta_sz = D_ALIGNUP(*meta_sz, alignsz); | ||
|
||
return 0; | ||
} | ||
|
||
int | ||
vos_pool_create(const char *path, uuid_t uuid, daos_size_t scm_sz, daos_size_t data_sz, | ||
daos_size_t meta_sz, unsigned int flags, uint32_t version, daos_handle_t *poh) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we have to allocate sequentially in MD mode rather than parallel like in PMem?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its the other way round, its sequential in PMEM and parallel in MD. This change was done in phase 1 to overcome the overhead of fallocate on tmpfs.