-
Notifications
You must be signed in to change notification settings - Fork 297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-16591 mgmt, vos, common: Align scm/meta size #15146
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1048,15 +1048,14 @@ tgt_create_preallocate(void *arg) | |
* 16MB minimum per pmemobj file (SCM partition) | ||
*/ | ||
D_ASSERT(dss_tgt_nr > 0); | ||
D_ASSERT((tca->tca_scm_size / dss_tgt_nr) >= (1 << 24)); | ||
if (!bio_nvme_configured(SMD_DEV_TYPE_META)) { | ||
rc = tgt_vos_preallocate_sequential(tca->tca_ptrec->dptr_uuid, | ||
max(tca->tca_scm_size / dss_tgt_nr, | ||
1 << 24), dss_tgt_nr); | ||
rc = tgt_vos_preallocate_sequential( | ||
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr); | ||
} else { | ||
rc = tgt_vos_preallocate_parallel(tca->tca_ptrec->dptr_uuid, | ||
max(tca->tca_scm_size / dss_tgt_nr, | ||
1 << 24), dss_tgt_nr, | ||
&tca->tca_ptrec->cancel_create); | ||
rc = tgt_vos_preallocate_parallel( | ||
tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr, | ||
&tca->tca_ptrec->cancel_create); | ||
} | ||
if (rc) | ||
goto out; | ||
|
@@ -1083,6 +1082,8 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req) | |
pthread_t thread; | ||
bool canceled_thread = false; | ||
int rc = 0; | ||
size_t tgt_scm_sz; | ||
size_t tgt_meta_sz; | ||
|
||
/** incoming request buffer */ | ||
tc_in = crt_req_get(tc_req); | ||
|
@@ -1119,6 +1120,17 @@ ds_mgmt_hdlr_tgt_create(crt_rpc_t *tc_req) | |
D_DEBUG(DB_MGMT, DF_UUID": record inserted to dpt_creates_ht\n", | ||
DP_UUID(tca.tca_ptrec->dptr_uuid)); | ||
|
||
tgt_scm_sz = tc_in->tc_scm_size / dss_tgt_nr; | ||
tgt_meta_sz = tc_in->tc_meta_size / dss_tgt_nr; | ||
rc = vos_pool_roundup_size(&tgt_scm_sz, &tgt_meta_sz); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this strange formatting enforced by the clang linter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
if (rc) { | ||
D_ERROR(DF_UUID": failed to roundup the vos size: "DF_RC"\n", | ||
DP_UUID(tc_in->tc_pool_uuid), DP_RC(rc)); | ||
goto out_rec; | ||
} | ||
tc_in->tc_scm_size = tgt_scm_sz * dss_tgt_nr; | ||
tc_in->tc_meta_size = tgt_meta_sz * dss_tgt_nr; | ||
|
||
tca.tca_scm_size = tc_in->tc_scm_size; | ||
tca.tca_nvme_size = tc_in->tc_nvme_size; | ||
tca.tca_dx = dss_current_xstream(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -753,6 +753,50 @@ init_umem_store(struct umem_store *store, struct bio_meta_context *mc) | |
store->store_type = DAOS_MD_BMEM; | ||
} | ||
|
||
static int | ||
vos_pool_store_type(daos_size_t scm_sz, daos_size_t meta_sz) | ||
{ | ||
int backend; | ||
|
||
backend = umempobj_get_backend_type(); | ||
D_ASSERT((meta_sz != 0) && (scm_sz != 0)); | ||
|
||
if (scm_sz > meta_sz) { | ||
D_ERROR("memsize %lu is greater than metasize %lu", scm_sz, meta_sz); | ||
return -DER_INVAL; | ||
} | ||
|
||
if (scm_sz < meta_sz) { | ||
if ((backend == DAOS_MD_BMEM) && umempobj_allow_md_bmem_v2()) | ||
backend = DAOS_MD_BMEM_V2; | ||
else if (backend != DAOS_MD_BMEM_V2) { | ||
D_ERROR("scm_sz %lu is less than meta_sz %lu", scm_sz, meta_sz); | ||
return -DER_INVAL; | ||
} | ||
} | ||
|
||
return backend; | ||
} | ||
|
||
int | ||
vos_pool_roundup_size(daos_size_t *scm_sz, daos_size_t *meta_sz) | ||
{ | ||
size_t alignsz; | ||
int rc; | ||
|
||
D_ASSERT((*scm_sz != 0) && (*meta_sz != 0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This assert is triggered in CI tests, looks we pass 0 meta_size sometimes (for pmem or phase1 mode). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed with the 3rd commit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From https://build.hpdd.intel.com/job/daos-stack/job/daos/view/change-requests/job/PR-15146/4/artifact/Functional%20Hardware%20Medium/daos_test/rebuild.py/daos_logs.wolf-254/01-DAOS_Rebuild_0to10_daos_control.log/ failed we can see that the assert happens before pool create, just after engine start (before any
So whilst |
||
rc = vos_pool_store_type(*scm_sz, *meta_sz); | ||
if (rc < 0) | ||
return rc; | ||
|
||
/* Round up the size such that it is compatible with backend */ | ||
alignsz = umempobj_pgsz(rc); | ||
*scm_sz = max(D_ALIGNUP(*scm_sz, alignsz), 1 << 24); | ||
*meta_sz = max(D_ALIGNUP(*meta_sz, alignsz), 1 << 24); | ||
|
||
return 0; | ||
} | ||
|
||
static int | ||
vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, | ||
size_t scm_sz, size_t nvme_sz, size_t wal_sz, size_t meta_sz, | ||
|
@@ -794,9 +838,13 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, | |
if (!meta_sz) | ||
meta_sz = scm_sz_actual; | ||
|
||
store.store_type = umempobj_get_backend_type(); | ||
if (store.store_type == DAOS_MD_BMEM && meta_sz > scm_sz_actual) | ||
store.store_type = DAOS_MD_BMEM_V2; | ||
rc = vos_pool_store_type(scm_sz_actual, meta_sz); | ||
if (rc < 0) { | ||
D_ERROR("Failed to determine the store type for xs:%p pool:"DF_UUID". "DF_RC, | ||
xs_ctxt, DP_UUID(pool_id), DP_RC(rc)); | ||
return rc; | ||
} | ||
store.store_type = rc; | ||
|
||
D_DEBUG(DB_MGMT, "Create BIO meta context for xs:%p pool:"DF_UUID" " | ||
"scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu backend:%d\n", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we have to allocate sequentially in MD mode rather than parallel like in PMem?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its the other way round, its sequential in PMEM and parallel in MD. This change was done in phase 1 to overcome the overhead of fallocate on tmpfs.