diff --git a/ci/docker_nlt.sh b/ci/docker_nlt.sh index 5e422435261..d7dbc933fa1 100755 --- a/ci/docker_nlt.sh +++ b/ci/docker_nlt.sh @@ -22,7 +22,7 @@ pushd "$TMP_DIR" set +e sudo --preserve-env=VIRTUAL_ENV,PATH ./node_local_test.py \ - --no-root --memcheck no --system-ram-reserved 32 --server-debug WARN "$@" + --no-root --memcheck no --system-ram-reserved 48 --server-debug WARN "$@" RC=$? set -e diff --git a/docs/admin/env_variables.md b/docs/admin/env_variables.md index 3fca2f1db1e..daac2c8e6a6 100644 --- a/docs/admin/env_variables.md +++ b/docs/admin/env_variables.md @@ -68,6 +68,7 @@ Environment variables in this section only apply to the client side. |Variable |Description| |-------------------------|-----------| |FI\_MR\_CACHE\_MAX\_COUNT|Enable MR (Memory Registration) caching in OFI layer. Recommended to be set to 0 (disable) when CRT\_DISABLE\_MEM\_PIN is NOT set to 1. INTEGER. Default to unset.| +|D\_POLL\_TIMEOUT|Polling timeout passed to network progress for synchronous operations. Default to 0 (busy polling), value in micro-seconds otherwise.| ## Debug System (Client & Server) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 6c72fde5256..6dc8f11f7de 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -37,10 +37,14 @@ struct crt_na_dict crt_na_dict[] = { .nad_str = "ofi+gni", .nad_contig_eps = true, .nad_port_bind = false, + }, { + .nad_type = CRT_PROV_OFI_TCP, + .nad_str = "ofi+tcp", + .nad_contig_eps = true, + .nad_port_bind = true, }, { .nad_type = CRT_PROV_OFI_TCP_RXM, .nad_str = "ofi+tcp;ofi_rxm", - .nad_alt_str = "ofi+tcp", .nad_contig_eps = true, .nad_port_bind = true, }, { @@ -655,7 +659,7 @@ crt_get_opx_info_string(char *provider, char *domain, char *ip, D_GOTO(out, rc = -DER_INVAL); } - strcpy(domain_name, domain); + strncpy(domain_name, domain, sizeof(domain_name) - 1); strtok_r(domain_name, &domain[delimiter], &hfi_str); hfi = (unsigned int)strtoul(hfi_str, NULL, 10); diff --git a/src/cart/crt_hg.h b/src/cart/crt_hg.h index 395b773e4d1..7c6c49570d7 100644 --- a/src/cart/crt_hg.h +++ b/src/cart/crt_hg.h @@ -31,6 +31,35 @@ struct crt_rpc_priv; struct crt_common_hdr; struct crt_corpc_hdr; +/** + * Enumeration specifying providers supported by the library + */ +typedef enum { + CRT_PROV_SM = 0, + CRT_PROV_OFI_SOCKETS, + CRT_PROV_OFI_VERBS_RXM, + CRT_PROV_OFI_GNI, + CRT_PROV_OFI_TCP, + CRT_PROV_OFI_TCP_RXM, + CRT_PROV_OFI_CXI, + CRT_PROV_OFI_OPX, + CRT_PROV_OFI_LAST = CRT_PROV_OFI_OPX, + CRT_PROV_UCX_RC, + CRT_PROV_UCX_UD, + CRT_PROV_UCX_RC_UD, + CRT_PROV_UCX_RC_O, + CRT_PROV_UCX_UD_O, + CRT_PROV_UCX_RC_UD_O, + CRT_PROV_UCX_RC_X, + CRT_PROV_UCX_UD_X, + CRT_PROV_UCX_RC_UD_X, + CRT_PROV_UCX_DC_X, + CRT_PROV_UCX_TCP, + CRT_PROV_UCX_LAST = CRT_PROV_UCX_TCP, + /* Note: This entry should be the last valid one in enum */ + CRT_PROV_COUNT, + CRT_PROV_UNKNOWN = -1, +} crt_provider_t; crt_provider_t crt_prov_str_to_prov(const char *prov_str); diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 4d27e778bf0..0c449173e0b 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -71,7 +71,8 @@ dump_envariables(void) "CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG", "FI_UNIVERSE_SIZE", "CRT_ENABLE_MEM_PIN", "FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE", - "CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST"}; + "CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST", + "D_POLL_TIMEOUT"}; D_INFO("-- ENVARS: --\n"); for (i = 0; i < ARRAY_SIZE(envars); i++) { @@ -545,10 +546,6 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) if (prov == CRT_PROV_OFI_CXI) mrc_enable = 1; - else { - /* Use tagged messages for other providers, disable multi-recv */ - apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1"); - } d_getenv_int("CRT_MRC_ENABLE", &mrc_enable); if (mrc_enable == 0) { @@ -556,6 +553,10 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) setenv("FI_MR_CACHE_MAX_COUNT", "0", 1); } + /* Use tagged messages for other providers, disable multi-recv */ + if (prov != CRT_PROV_OFI_CXI && prov != CRT_PROV_OFI_TCP) + apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1"); + g_prov_settings_applied[prov] = true; } diff --git a/src/cart/utils/crt_utils.c b/src/cart/utils/crt_utils.c index 9a249bfc9f0..c65aa284a08 100644 --- a/src/cart/utils/crt_utils.c +++ b/src/cart/utils/crt_utils.c @@ -134,18 +134,19 @@ crtu_progress_fn(void *data) while (opts.shutdown == 0) crt_progress(*p_ctx, 1000); - if (opts.is_swim_enabled && idx == 0) - crt_swim_disable_all(); + if (opts.is_server) { + if (opts.is_swim_enabled && idx == 0) + crt_swim_disable_all(); - rc = crtu_drain_queue(*p_ctx); - D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc); + rc = crtu_drain_queue(*p_ctx); + D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc); - if (opts.delay_shutdown_sec > 0) - sleep(opts.delay_shutdown_sec); + if (opts.delay_shutdown_sec > 0) + sleep(opts.delay_shutdown_sec); + } rc = crt_context_destroy(*p_ctx, 1); - D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n", - p_ctx, rc); + D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n", p_ctx, rc); pthread_exit(rc ? *p_ctx : NULL); diff --git a/src/client/api/event.c b/src/client/api/event.c index 62082374bd0..ba12a05a996 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -20,6 +20,12 @@ static __thread daos_event_t ev_thpriv; static __thread bool ev_thpriv_is_init; +/** + * Global progress timeout for synchronous operation + * busy-polling by default (0), timeout in us otherwise + */ +static uint32_t ev_prog_timeout; + #define EQ_WITH_CRT #if !defined(EQ_WITH_CRT) @@ -91,6 +97,8 @@ daos_eq_lib_init() eq_ref = 1; + d_getenv_int("D_POLL_TIMEOUT", &ev_prog_timeout); + unlock: D_MUTEX_UNLOCK(&daos_eq_lock); return rc; @@ -1261,7 +1269,7 @@ daos_event_priv_wait() /* Wait on the event to complete */ while (evx->evx_status != DAOS_EVS_READY) { - rc = crt_progress_cond(evx->evx_ctx, 0, ev_progress_cb, &epa); + rc = crt_progress_cond(evx->evx_ctx, ev_prog_timeout, ev_progress_cb, &epa); /** progress succeeded, loop can exit if event completed */ if (rc == 0) { diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 42cdf32d420..da44df06060 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -1822,7 +1822,7 @@ struct key_query_props { char akey_val; daos_recx_t recx; daos_size_t *size; - daos_size_t max_epoch; + daos_epoch_t max_epoch; tse_task_t *ptask; }; @@ -1831,8 +1831,7 @@ free_query_cb(tse_task_t *task, void *data) { struct key_query_props *props = *((struct key_query_props **)data); - if (props->array) - array_decref(props->array); + array_decref(props->array); D_FREE(props); return 0; } @@ -1905,7 +1904,7 @@ dc_array_get_size(tse_task_t *task) query_args->dkey = &kqp->dkey; query_args->akey = &kqp->akey; query_args->recx = &kqp->recx; - query_args->max_epoch = NULL; + query_args->max_epoch = &kqp->max_epoch; rc = tse_task_register_comp_cb(task, free_query_cb, &kqp, sizeof(kqp)); if (rc != 0) @@ -2285,7 +2284,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l daos_key_t *dkey; struct io_params *params = NULL; tse_task_t *io_task; - bool free_iod_recxs = true; + bool free_params = true; int rc; D_ALLOC_PTR(params); @@ -2336,7 +2335,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l rc = tse_task_register_comp_cb(io_task, free_io_params_cb, ¶ms, sizeof(params)); if (rc) D_GOTO(err_task, rc); - free_iod_recxs = false; + free_params = false; rc = tse_task_register_deps(props->ptask, 1, &io_task); if (rc) @@ -2351,9 +2350,10 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l if (io_task) tse_task_complete(io_task, rc); err: - D_FREE(params); - if (free_iod_recxs) + if (free_params) { D_FREE(iod->iod_recxs); + D_FREE(params); + } return rc; } @@ -2478,6 +2478,7 @@ dc_array_set_size(tse_task_t *task) daos_obj_list_dkey_t *enum_args; struct set_size_props *set_size_props = NULL; tse_task_t *enum_task; + bool cleanup = true; int rc; args = daos_task_get_args(task); @@ -2537,31 +2538,34 @@ dc_array_set_size(tse_task_t *task) enum_args->sgl = &set_size_props->sgl; enum_args->dkey_anchor = &set_size_props->anchor; - rc = tse_task_register_cbs(enum_task, NULL, NULL, 0, adjust_array_size_cb, &set_size_props, - sizeof(set_size_props)); + rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props, + sizeof(set_size_props)); if (rc) D_GOTO(err_enum_task, rc); + cleanup = false; - rc = tse_task_register_deps(task, 1, &enum_task); + rc = tse_task_register_comp_cb(enum_task, adjust_array_size_cb, &set_size_props, + sizeof(set_size_props)); if (rc) D_GOTO(err_enum_task, rc); - rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props, - sizeof(set_size_props)); + rc = tse_task_register_deps(task, 1, &enum_task); if (rc) D_GOTO(err_enum_task, rc); rc = tse_task_schedule(enum_task, true); if (rc) - D_GOTO(err_enum_task, rc); + D_GOTO(err_task, rc); return 0; err_enum_task: tse_task_complete(enum_task, rc); err_task: - D_FREE(set_size_props); - if (array) - array_decref(array); tse_task_complete(task, rc); + if (cleanup) { + D_FREE(set_size_props); + if (array) + array_decref(array); + } return rc; } /* end daos_array_set_size */ diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 79e3374aa6b..280ec52318d 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1246,8 +1246,14 @@ open_file(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, rc = insert_entry(dfs->layout_v, parent->oh, DAOS_TX_NONE, file->name, len, DAOS_COND_DKEY_INSERT, entry); if (rc == EEXIST && !oexcl) { + int rc2; + /** just try fetching entry to open the file */ - daos_array_close(file->oh, NULL); + rc2 = daos_array_close(file->oh, NULL); + if (rc2 == -DER_NOMEM) + rc2 = daos_array_close(file->oh, NULL); + if (rc2) + return daos_der2errno(rc2); } else if (rc) { int rc2; @@ -6453,6 +6459,7 @@ dfs_dir_anchor_set(dfs_obj_t *obj, const char name[], daos_anchor_t *anchor) #define DFS_ITER_NR 128 #define DFS_ITER_DKEY_BUF (DFS_ITER_NR * sizeof(uint64_t)) #define DFS_ITER_ENTRY_BUF (DFS_ITER_NR * DFS_MAX_NAME) +#define DFS_ELAPSED_TIME 30 struct dfs_oit_args { daos_handle_t oit; @@ -6460,6 +6467,9 @@ struct dfs_oit_args { uint64_t snap_epoch; uint64_t skipped; uint64_t failed; + time_t start_time; + time_t print_time; + uint64_t num_scanned; }; static int @@ -6555,8 +6565,19 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) daos_obj_id_t oid; d_iov_t marker; bool mark_data = true; + struct timespec current_time; int rc; + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + return errno; + oit_args->num_scanned ++; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Scanned "DF_U64" files/directories (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + /** open the entry name and get the oid */ rc = dfs_lookup_rel(dfs, parent, name, O_RDONLY, &obj, NULL, NULL); if (rc) { @@ -6702,12 +6723,24 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * d_iov_t marker; bool mark_data = true; daos_epoch_range_t epr; - struct timespec now; + struct timespec now, current_time; uid_t uid = geteuid(); gid_t gid = getegid(); unsigned int co_flags = DAOS_COO_EX; + char now_name[24]; + struct tm *now_tm; + daos_size_t len; int rc, rc2; + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + return errno; + now_tm = localtime(&now.tv_sec); + len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); + if (len == 0) + return EINVAL; + D_PRINT("DFS checker: Start (%s)\n", now_name); + if (flags & DFS_CHECK_RELINK && flags & DFS_CHECK_REMOVE) { D_ERROR("can't request remove and link to l+f at the same time\n"); return EINVAL; @@ -6728,6 +6761,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_cont, rc); } + D_PRINT("DFS checker: Create OIT table\n"); /** create snapshot for OIT */ rc = daos_cont_create_snap_opt(coh, &snap_epoch, NULL, DAOS_SNAP_OPT_CR | DAOS_SNAP_OPT_OIT, NULL); @@ -6741,6 +6775,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_dfs, rc = ENOMEM); oit_args->flags = flags; oit_args->snap_epoch = snap_epoch; + oit_args->start_time = now.tv_sec; + oit_args->print_time = now.tv_sec; /** Open OIT table */ rc = daos_oit_open(coh, snap_epoch, &oit_args->oit, NULL); @@ -6757,10 +6793,11 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_oit, rc = daos_der2errno(rc)); } rc = daos_oit_mark(oit_args->oit, dfs->root.oid, &marker, NULL); - if (rc) { + if (rc && rc != -DER_NONEXIST) { D_ERROR("Failed to mark ROOT OID in OIT: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_oit, rc = daos_der2errno(rc)); } + rc = 0; if (flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(coh, dfs->super_oid, snap_epoch); @@ -6790,6 +6827,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } } + D_PRINT("DFS checker: Iterating namespace and marking objects\n"); + oit_args->num_scanned = 2; /** iterate through the namespace and mark OITs starting from the root object */ while (!daos_anchor_is_eof(&anchor)) { rc = dfs_iterate(dfs, &dfs->root, &anchor, &nr_entries, DFS_MAX_NAME * nr_entries, @@ -6802,14 +6841,14 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * nr_entries = DFS_ITER_NR; } - /** Create lost+found directory and properly link unmarked oids there. */ - if (flags & DFS_CHECK_RELINK) { - char now_name[24]; - - rc = clock_gettime(CLOCK_REALTIME, &now); - if (rc) - D_GOTO(out_oit, rc = errno); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_oit, rc = errno); + D_PRINT("DFS checker: marked "DF_U64" files/directories (runtime: "DF_U64" sec))\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + /** Create lost+found directory to link unmarked oids there. */ + if (flags & DFS_CHECK_RELINK) { rc = dfs_open(dfs, NULL, "lost+found", S_IFDIR | 0755, O_CREAT | O_RDWR, 0, 0, NULL, &lf); if (rc) { @@ -6818,21 +6857,15 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } if (name == NULL) { - struct tm *now_tm; - size_t len; /* * Create a directory with current timestamp in l+f where leaked oids will * be linked in this run. */ - now_tm = localtime(&now.tv_sec); - len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); - if (len == 0) { - D_ERROR("Invalid time format\n"); - D_GOTO(out_lf1, rc = EINVAL); - } - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", now_name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + now_name); } else { - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + name); } rc = dfs_open(dfs, lf, name ? name : now_name, S_IFDIR | 0755, @@ -6864,6 +6897,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * * Pass 1: check directories only and descend to mark all oids in the namespace of each dir. * Pass 2: relink remaining oids in the L+F root that are unmarked still after first pass. */ + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 1)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); /** Start Pass 1 */ while (!daos_anchor_is_eof(&anchor)) { @@ -6874,6 +6909,16 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { if (flags & DFS_CHECK_RELINK) { enum daos_otype_t otype = daos_obj_id2type(oids[i]); @@ -6934,6 +6979,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * if (!(flags & DFS_CHECK_RELINK)) goto done; + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 2)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); while (!daos_anchor_is_eof(&anchor)) { nr_entries = DFS_ITER_NR; @@ -6943,11 +6990,20 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { struct dfs_entry entry = {0}; enum daos_otype_t otype = daos_obj_id2type(oids[i]); char oid_name[DFS_MAX_NAME + 1]; - daos_size_t len; if (flags & DFS_CHECK_PRINT) D_PRINT("oid["DF_U64"]: "DF_OID"\n", unmarked_entries, @@ -7016,8 +7072,12 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } done: - if (flags & DFS_CHECK_PRINT) - D_PRINT("Number of Leaked OIDs in Namespace = "DF_U64"\n", unmarked_entries); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + D_PRINT("DFS checker: Done! (runtime: "DF_U64" sec)\n", + current_time.tv_sec - oit_args->start_time); + D_PRINT("DFS checker: Number of leaked OIDs in namespace = "DF_U64"\n", unmarked_entries); if (flags & DFS_CHECK_VERIFY) { if (oit_args->failed) { D_ERROR(""DF_U64" OIDs failed data consistency check!\n", oit_args->failed); @@ -7057,6 +7117,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * rc2 = daos_cont_close(coh, NULL); if (rc == 0) rc = daos_der2errno(rc2); + return rc; } diff --git a/src/client/dfs/dfs_internal.c b/src/client/dfs/dfs_internal.c index b717e683055..5d9f33ed204 100644 --- a/src/client/dfs/dfs_internal.c +++ b/src/client/dfs/dfs_internal.c @@ -88,11 +88,15 @@ static d_hash_table_ops_t hdl_hash_ops = { .hop_rec_hash = rec_hash }; -int +bool dfs_is_init() { - if (module_initialized > 0) + D_MUTEX_LOCK(&module_lock); + if (module_initialized > 0) { + D_MUTEX_UNLOCK(&module_lock); return true; + } + D_MUTEX_UNLOCK(&module_lock); return false; } diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 79d674779d2..20ce7a5b003 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -43,7 +43,7 @@ dfs_hdl_insert(const char *str, int type, const char *pool, daos_handle_t *oh, struct dfs_mnt_hdls **_hdl); int dfs_hdl_cont_destroy(const char *pool, const char *cont, bool force); -int +bool dfs_is_init(); /* diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index aaac700d414..bb14e10bbd0 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -271,17 +271,10 @@ _ph_free(struct dfuse_pool *dfp) if (daos_handle_is_valid(dfp->dfp_poh)) { rc = daos_pool_disconnect(dfp->dfp_poh, NULL); /* Hook for fault injection testing, if the disconnect fails with out of memory - * then simply try it again, only what might have happened is that the first - * call might have disconnected, but then failed to notify about the disconnect, - * in which case the subsequent call will return -DER_NO_HDL, if that's the case - * then this is expected, if odd, behavior so silence that case and just return - * success. + * then simply try it again. */ - if (rc == -DER_NOMEM) { + if (rc == -DER_NOMEM) rc = daos_pool_disconnect(dfp->dfp_poh, NULL); - if (rc == -DER_NO_HDL) - rc = -DER_SUCCESS; - } if (rc != -DER_SUCCESS) DFUSE_TRA_ERROR(dfp, "daos_pool_disconnect() failed: " DF_RC, DP_RC(rc)); } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 75156ad9ede..36ab227ab40 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -3789,6 +3789,7 @@ chdir(const char *path) char item_name[DFS_MAX_NAME]; char *parent_dir = NULL; char *full_path = NULL; + bool is_root; if (next_chdir == NULL) { next_chdir = dlsym(RTLD_NEXT, "chdir"); @@ -3811,10 +3812,13 @@ chdir(const char *path) return rc; } - if (!parent && (strncmp(item_name, "/", 2) == 0)) + if (!parent && (strncmp(item_name, "/", 2) == 0)) { + is_root = true; rc = dfs_stat(dfs_mt->dfs, NULL, NULL, &stat_buf); - else + } else { + is_root = false; rc = dfs_stat(dfs_mt->dfs, parent, item_name, &stat_buf); + } if (rc) D_GOTO(out_err, rc); if (!S_ISDIR(stat_buf.st_mode)) { @@ -3822,7 +3826,10 @@ chdir(const char *path) strerror(ENOTDIR)); D_GOTO(out_err, rc = ENOTDIR); } - rc = dfs_access(dfs_mt->dfs, parent, item_name, X_OK); + if (is_root) + rc = dfs_access(dfs_mt->dfs, NULL, NULL, X_OK); + else + rc = dfs_access(dfs_mt->dfs, parent, item_name, X_OK); if (rc) D_GOTO(out_err, rc); len_str = snprintf(cur_dir, DFS_MAX_PATH, "%s%s", dfs_mt->fs_root, full_path); diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index ffd6750fc3c..91c6c373b57 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -2184,8 +2184,7 @@ arena_reserve_addr(struct ad_arena *arena, daos_size_t size, struct ad_reserv_ac if (rc == -DER_ENOENT || /* no arena, no group */ rc == -DER_NOSPACE) { /* no space in this arena */ grp_at = 0; - grp = NULL; - rc = 0; + grp = NULL; /* fall through */ } else if (rc != 0) { D_ERROR("Failed to find group, arena=%d, size=%d, rc=%d\n", diff --git a/src/common/dav/alloc_class.c b/src/common/dav/alloc_class.c index a0b2c0f4ae8..3dc5745db6a 100644 --- a/src/common/dav/alloc_class.c +++ b/src/common/dav/alloc_class.c @@ -212,7 +212,7 @@ alloc_class_new(int id, struct alloc_class_collection *ac, if (id < 0 && alloc_class_find_first_free_slot(ac, &slot) != 0) - goto error_class_alloc; + goto error_map_insert; id = slot; size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(c->unit_size, diff --git a/src/common/dav/heap.c b/src/common/dav/heap.c index 5d11dff9f4b..4384fe40f8c 100644 --- a/src/common/dav/heap.c +++ b/src/common/dav/heap.c @@ -1371,13 +1371,16 @@ heap_get_arena_buckets(struct palloc_heap *heap, unsigned arena_id) int heap_get_arena_auto(struct palloc_heap *heap, unsigned arena_id) { + int value; + util_mutex_lock(&heap->rt->arenas.lock); struct arena *a = heap_get_arena_by_id(heap, arena_id); + value = a->automatic; util_mutex_unlock(&heap->rt->arenas.lock); - return a->automatic; + return value; } /* diff --git a/src/common/tests/acl_api_tests.c b/src/common/tests/acl_api_tests.c index 186121bfd46..79ad3b26ae1 100644 --- a/src/common/tests/acl_api_tests.c +++ b/src/common/tests/acl_api_tests.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1035,7 +1035,8 @@ test_acl_add_ace_everyone_to_existing_list(void **state) static void expect_add_duplicate_ace_unchanged(enum daos_acl_principal_type type) { - int num_aces = NUM_DAOS_ACL_TYPES; + int num_aces = NUM_DAOS_ACL_TYPES; + ssize_t size; struct daos_ace *ace[num_aces]; struct daos_ace *new_ace; struct daos_acl *acl; @@ -1046,10 +1047,11 @@ expect_add_duplicate_ace_unchanged(enum daos_acl_principal_type type) orig_acl = daos_acl_dup(acl); /* Create an exact duplicate */ - D_ALLOC(new_ace, daos_ace_get_size(ace[type])); + size = daos_ace_get_size(ace[type]); + assert_true(size > 0); + D_ALLOC(new_ace, size); assert_non_null(new_ace); - memcpy(new_ace, ace[type], - daos_ace_get_size(ace[type])); + memcpy(new_ace, ace[type], size); assert_rc_equal(daos_acl_add_ace(&acl, new_ace), 0); diff --git a/src/control/cmd/daos_server/storage_nvme.go b/src/control/cmd/daos_server/storage_nvme.go index 494738ad24e..11b2dc1ca29 100644 --- a/src/control/cmd/daos_server/storage_nvme.go +++ b/src/control/cmd/daos_server/storage_nvme.go @@ -305,21 +305,36 @@ func resetNVMe(resetReq storage.BdevPrepareRequest, cmd *nvmeCmd, resetBackend n if err := processNVMePrepReq(cmd.Logger, cfgParam, cmd, &resetReq); err != nil { return errors.Wrap(err, "processing request parameters") } - // As reset nvme backend doesn't use NrHugepages, overwrite any set value with zero. + + // Apply request parameter field values required specifically for reset operation. resetReq.HugepageCount = 0 + resetReq.HugeNodes = "" + resetReq.CleanHugepagesOnly = false + resetReq.Reset_ = true cmd.Debugf("nvme reset request parameters: %+v", resetReq) - // TODO SPDK-2926: If VMD is enabled and PCI_ALLOWED list is set to a subset of VMD - // controllers (as specified in the server config file) then the backing - // devices of the unselected VMD controllers will be bound to no driver - // and therefore inaccessible from both OS and SPDK. Workaround is to run - // nvme scan --ignore-config to reset driver bindings. + resetResp, err := resetBackend(resetReq) + if err != nil { + return errors.Wrap(err, "nvme reset backend") + } - // Reset NVMe device access. - _, err := resetBackend(resetReq) + // SPDK-2926: If VMD has been detected, perform an extra SPDK reset (without PCI_ALLOWED) + // to reset dangling NVMe devices left unbound after the DRIVER_OVERRIDE=none + // setup call was used in nvme prepare. + if resetResp.VMDPrepared { + resetReq.PCIAllowList = "" + resetReq.PCIBlockList = "" + resetReq.EnableVMD = false // Prevents VMD endpoints being auto populated - return err + cmd.Debugf("vmd second nvme reset request parameters: %+v", resetReq) + + if _, err := resetBackend(resetReq); err != nil { + return errors.Wrap(err, "nvme reset backend") + } + } + + return nil } func (cmd *resetNVMeCmd) Execute(_ []string) error { @@ -336,7 +351,6 @@ func (cmd *resetNVMeCmd) Execute(_ []string) error { PCIAllowList: cmd.Args.PCIAllowList, PCIBlockList: cmd.PCIBlockList, DisableVFIO: cmd.DisableVFIO, - Reset_: true, } return resetNVMe(req, &cmd.nvmeCmd, scs.NvmePrepare) @@ -391,16 +405,8 @@ func (cmd *scanNVMeCmd) scanNVMe(scanBackend nvmeScanFn, prepResetBackend nvmePr cmd.Info(bld.String()) if !cmd.SkipPrep { - // TODO SPDK-2926: If VMD is enabled and PCI_ALLOWED list is set to a subset of VMD - // controllers (as specified in the server config file) then the - // backing devices of the unselected VMD controllers will be bound - // to no driver and therefore inaccessible from both OS and SPDK. - // Workaround is to run nvme scan --ignore-config to reset driver - // bindings. - req := storage.BdevPrepareRequest{ PCIAllowList: strings.Join(req.DeviceList.Devices(), storage.BdevPciAddrSep), - Reset_: true, } if err := resetNVMe(req, &cmd.nvmeCmd, prepResetBackend); err != nil { return errors.Wrap(err, diff --git a/src/control/cmd/daos_server/storage_nvme_test.go b/src/control/cmd/daos_server/storage_nvme_test.go index 2a15c5fb858..8cd0b144698 100644 --- a/src/control/cmd/daos_server/storage_nvme_test.go +++ b/src/control/cmd/daos_server/storage_nvme_test.go @@ -290,43 +290,74 @@ func TestDaosServer_resetNVMe(t *testing.T) { bmbc *bdev.MockBackendConfig iommuDisabled bool expErr error - expResetCall *storage.BdevPrepareRequest + expResetCalls []storage.BdevPrepareRequest }{ "no devices; success": { - expResetCall: &storage.BdevPrepareRequest{ - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + // If empty TargetUser in cmd, expect current user in call. + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, - "succeeds; user params": { + "succeeds; user params; vmd prepared": { resetCmd: newResetCmd(), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + bmbc: &bdev.MockBackendConfig{ + ResetRes: &storage.BdevPrepareResponse{ + // Response flag indicates VMD is active and triggers + // second reset call. + VMDPrepared: true, + }, + }, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + EnableVMD: true, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, + // VMD was acted on in first call so reset called a second time + // without allow list. EnableVMD is false to prevent VMD domain + // addresses being automatically added to allow list in backend. + { + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, - "succeeds; different target user; multi allow list": { - resetCmd: newResetCmd().WithTargetUser("bob").WithPCIAllowList(defaultMultiAddrList), - expResetCall: &storage.BdevPrepareRequest{ - TargetUser: "bob", - PCIAllowList: spaceSepMultiAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + "succeeds; different target user; multi allow list; vmd not prepared": { + resetCmd: newResetCmd(). + WithTargetUser("bob"). + WithPCIAllowList(defaultMultiAddrList), + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + TargetUser: "bob", + PCIAllowList: spaceSepMultiAddrList, + PCIBlockList: spaceSepMultiAddrList, + Reset_: true, + }, }, }, "fails; user params": { resetCmd: newResetCmd(), bmbc: &bdev.MockBackendConfig{ + ResetRes: &storage.BdevPrepareResponse{ + VMDPrepared: true, + }, ResetErr: errors.New("backend prep reset failed"), }, - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, expErr: errors.New("backend prep reset failed"), }, @@ -341,12 +372,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { "root; vfio disabled; iommu not detected": { resetCmd: newResetCmd().WithTargetUser("root").WithDisableVFIO(true), iommuDisabled: true, - expResetCall: &storage.BdevPrepareRequest{ - TargetUser: "root", - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - DisableVFIO: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + TargetUser: "root", + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + DisableVFIO: true, + Reset_: true, + }, }, }, "config parameters ignored; settings exist": { @@ -358,11 +391,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevDeviceList(test.MockPCIAddr(8)))). WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters ignored; set ignore-config in cmd": { @@ -380,9 +416,12 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters applied; disable vmd": { @@ -400,11 +439,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), storage.BdevPciAddrSep, - test.MockPCIAddr(8)), - PCIBlockList: test.MockPCIAddr(9), - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), + storage.BdevPciAddrSep, test.MockPCIAddr(8)), + PCIBlockList: test.MockPCIAddr(9), + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters applied; disable vfio": { @@ -419,25 +461,30 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithLegacyStorage(engine.LegacyStorage{ BdevClass: storage.ClassNvme, BdevConfig: storage.BdevConfig{ - DeviceList: storage.MustNewBdevDeviceList(test.MockPCIAddr(7)), + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(7)), }, }), engine.NewConfig(). WithLegacyStorage(engine.LegacyStorage{ BdevClass: storage.ClassNvme, BdevConfig: storage.BdevConfig{ - DeviceList: storage.MustNewBdevDeviceList(test.MockPCIAddr(8)), + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(8)), }, }), ). WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), storage.BdevPciAddrSep, - test.MockPCIAddr(8)), - PCIBlockList: test.MockPCIAddr(9), - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), + storage.BdevPciAddrSep, test.MockPCIAddr(8)), + PCIBlockList: test.MockPCIAddr(9), + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, } { @@ -482,23 +529,9 @@ func TestDaosServer_resetNVMe(t *testing.T) { mbb.PrepareCalls[0]); diff != "" { t.Fatalf("unexpected clean hugepage calls (-want, +got):\n%s\n", diff) } - if tc.expResetCall == nil { - if len(mbb.ResetCalls) != 0 { - t.Fatalf("unexpected number of reset calls, want 0 got %d", - len(mbb.ResetCalls)) - } - } else { - if len(mbb.ResetCalls) != 1 { - t.Fatalf("unexpected number of reset calls, want 1 got %d", - len(mbb.ResetCalls)) - } - // If empty TargetUser in cmd, expect current user in call. - if tc.resetCmd.TargetUser == "" { - tc.expResetCall.TargetUser = getCurrentUsername(t) - } - if diff := cmp.Diff(*tc.expResetCall, mbb.ResetCalls[0]); diff != "" { - t.Fatalf("unexpected reset calls (-want, +got):\n%s\n", diff) - } + + if diff := cmp.Diff(tc.expResetCalls, mbb.ResetCalls); diff != "" { + t.Fatalf("unexpected reset calls (-want, +got):\n%s\n", diff) } mbb.RUnlock() }) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 6846e295dd1..0169f89ac4c 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -506,6 +506,21 @@ func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { } } +func checkEnginesReady(instances []Engine) error { + for _, inst := range instances { + if !inst.IsReady() { + var err error = FaultDataPlaneNotStarted + if inst.IsStarted() { + err = errInstanceNotReady + } + + return errors.Wrapf(err, "instance %d", inst.Index()) + } + } + + return nil +} + // StorageScan discovers non-volatile storage hardware on node. func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScanReq) (*ctlpb.StorageScanResp, error) { if req == nil { @@ -513,6 +528,17 @@ func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScan } resp := new(ctlpb.StorageScanResp) + // In the case that usage stats are being requested, relevant flags for both SCM and NVMe + // will be set and so fail if engines are not ready for comms. This restriction should not + // be applied if only the Meta flag is set in the NVMe component of the request to continue + // to support off-line storage scan functionality which uses cached stats (e.g. dmg storage + // scan --nvme-meta). + if req.Scm.Usage && req.Nvme.Meta { + if err := checkEnginesReady(c.harness.Instances()); err != nil { + return nil, err + } + } + respScm, err := c.scanScm(ctx, req.Scm) if err != nil { return nil, err diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 272d5174b24..cee5ceb30a7 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -73,6 +73,7 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { smbc *scm.MockBackendConfig tierCfgs storage.TierConfigs expResp *ctlpb.StorageScanResp + expErr error }{ "successful scan; scm namespaces": { bmbc: &bdev.MockBackendConfig{ @@ -403,6 +404,17 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { MemInfo: proto.MockPBMemInfo(), }, }, + "scan usage": { + req: &ctlpb.StorageScanReq{ + Scm: &ctlpb.ScanScmReq{ + Usage: true, + }, + Nvme: &ctlpb.ScanNvmeReq{ + Meta: true, + }, + }, + expErr: FaultDataPlaneNotStarted, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -434,8 +446,9 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { } resp, err := cs.StorageScan(test.Context(t), tc.req) + test.CmpErr(t, tc.expErr, err) if err != nil { - t.Fatal(err) + return } if tc.req.Nvme.Health || tc.req.Nvme.Meta { @@ -680,6 +693,7 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { smsc *system.MockSysConfig storageCfgs []storage.TierConfigs engineTargetCount []int + enginesNotReady bool scanTwice bool junkResp bool drpcResps map[int][]*mockDrpcResponse @@ -1389,6 +1403,45 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { MemInfo: proto.MockPBMemInfo(), }, }, + "multi-engine; multi-tier; with usage; engines not ready": { + req: &ctlpb.StorageScanReq{ + Scm: &ctlpb.ScanScmReq{Usage: true}, + Nvme: &ctlpb.ScanNvmeReq{Meta: true}, + }, + storageCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassDcpm.String()). + WithScmMountPoint(mockPbScmMount0.Path). + WithScmDeviceList(mockPbScmNamespace0.Blockdev), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(newCtrlr(1).PciAddr), + }, + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassDcpm.String()). + WithScmMountPoint(mockPbScmMount1.Path). + WithScmDeviceList(mockPbScmNamespace1.Blockdev), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(newCtrlr(2).PciAddr), + }, + }, + engineTargetCount: []int{4, 4}, + enginesNotReady: true, + drpcResps: map[int][]*mockDrpcResponse{ + 0: { + {Message: newSmdDevResp(1)}, + {Message: newBioHealthResp(1)}, + }, + 1: { + {Message: newSmdDevResp(2)}, + {Message: newBioHealthResp(2)}, + }, + }, + expErr: errInstanceNotReady, + }, // Sometimes when more than a few ssds are assigned to engine without many targets, // some of the smd entries for the latter ssds are in state "NEW" rather than // "NORMAL", when in this state, health is unavailable and DER_NONEXIST is returned. @@ -1552,7 +1605,11 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { Controllers: *tc.eCtrlrs[idx], }) } - ne := newTestEngine(log, false, sp, ec) + te := newTestEngine(log, false, sp, ec) + + if tc.enginesNotReady { + te.ready.SetFalse() + } // mock drpc responses dcc := new(mockDrpcClientConfig) @@ -1566,17 +1623,17 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { } else { t.Fatal("drpc response mocks unpopulated") } - ne.setDrpcClient(newMockDrpcClient(dcc)) - ne._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1)) - ne.setTargetCount(tc.engineTargetCount[idx]) - for _, tc := range ne.storage.GetBdevConfigs() { + te.setDrpcClient(newMockDrpcClient(dcc)) + te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1)) + te.setTargetCount(tc.engineTargetCount[idx]) + for _, tc := range te.storage.GetBdevConfigs() { tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll) } - md := ne.storage.GetControlMetadata() + md := te.storage.GetControlMetadata() md.Path = "/foo" md.DevicePath = md.Path - cs.harness.instances[idx] = ne + cs.harness.instances[idx] = te } cs.harness.started.SetTrue() diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index ab684cd9ed5..7be5c570b86 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -185,12 +185,12 @@ func (ei *EngineInstance) logScmStorage() error { // ScanBdevTiers calls in to the private engine storage provider to scan bdev // tiers. Scan will avoid using any cached results if direct is set to true. func (ei *EngineInstance) ScanBdevTiers() ([]storage.BdevTierScanResult, error) { - isStarted := ei.IsStarted() + isUp := ei.IsReady() upDn := "down" - if isStarted { + if isUp { upDn = "up" } ei.log.Debugf("scanning engine-%d bdev tiers while engine is %s", ei.Index(), upDn) - return ei.storage.ScanBdevTiers(!isStarted) + return ei.storage.ScanBdevTiers(!isUp) } diff --git a/src/control/server/storage/bdev/backend.go b/src/control/server/storage/bdev/backend.go index 1d3122bc232..caf4e022aa8 100644 --- a/src/control/server/storage/bdev/backend.go +++ b/src/control/server/storage/bdev/backend.go @@ -235,13 +235,16 @@ func (sb *spdkBackend) prepare(req storage.BdevPrepareRequest, vmdDetect vmdDete } // reset receives function pointers for external interfaces. -func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetectFn) error { +func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetectFn) (*storage.BdevPrepareResponse, error) { + resp := &storage.BdevPrepareResponse{} + // Update request if VMD has been explicitly enabled and there are VMD endpoints configured. if err := updatePrepareRequest(sb.log, &req, vmdDetect); err != nil { - return errors.Wrapf(err, "update prepare request") + return resp, errors.Wrapf(err, "update prepare request") } + resp.VMDPrepared = req.EnableVMD - return errors.Wrap(sb.script.Reset(&req), "unbinding nvme devices from userspace drivers") + return resp, errors.Wrap(sb.script.Reset(&req), "unbinding nvme devices from userspace drivers") } // Reset will perform a lookup on the requested target user to validate existence @@ -251,7 +254,7 @@ func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetect // owned by the target user. // Backend call executes the SPDK setup.sh script to rebind PCI devices as selected by // devs specified in bdev_list and bdev_exclude provided in the server config file. -func (sb *spdkBackend) Reset(req storage.BdevPrepareRequest) error { +func (sb *spdkBackend) Reset(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { sb.log.Debugf("spdk backend reset (script call): %+v", req) return sb.reset(req, DetectVMD) } diff --git a/src/control/server/storage/bdev/backend_test.go b/src/control/server/storage/bdev/backend_test.go index ec3a5b657e8..76780e6ea87 100644 --- a/src/control/server/storage/bdev/backend_test.go +++ b/src/control/server/storage/bdev/backend_test.go @@ -961,6 +961,9 @@ func TestBackend_Prepare(t *testing.T) { Args: []string{"reset"}, }, }, + expResp: &storage.BdevPrepareResponse{ + VMDPrepared: true, + }, }, "prepare setup; defaults": { req: storage.BdevPrepareRequest{ @@ -1298,14 +1301,14 @@ func TestBackend_Prepare(t *testing.T) { } var gotErr error + var gotResp *storage.BdevPrepareResponse if tc.reset { - gotErr = b.reset(tc.req, mockVmdDetect) + gotResp, gotErr = b.reset(tc.req, mockVmdDetect) } else { - var gotResp *storage.BdevPrepareResponse gotResp, gotErr = b.prepare(tc.req, mockVmdDetect, mockHpClean) - if diff := cmp.Diff(tc.expResp, gotResp); diff != "" { - t.Fatalf("\nunexpected prepare response (-want, +got):\n%s\n", diff) - } + } + if diff := cmp.Diff(tc.expResp, gotResp); diff != "" { + t.Fatalf("\nunexpected prepare response (-want, +got):\n%s\n", diff) } test.CmpErr(t, tc.expErr, gotErr) diff --git a/src/control/server/storage/bdev/mocks.go b/src/control/server/storage/bdev/mocks.go index 2e732a2a761..d6239baeca0 100644 --- a/src/control/server/storage/bdev/mocks.go +++ b/src/control/server/storage/bdev/mocks.go @@ -16,9 +16,10 @@ import ( type ( MockBackendConfig struct { VMDEnabled bool // VMD is disabled by default - ResetErr error PrepareRes *storage.BdevPrepareResponse PrepareErr error + ResetRes *storage.BdevPrepareResponse + ResetErr error ScanRes *storage.BdevScanResponse ScanErr error FormatRes *storage.BdevFormatResponse @@ -93,16 +94,19 @@ func (mb *MockBackend) Prepare(req storage.BdevPrepareRequest) (*storage.BdevPre } } -func (mb *MockBackend) Reset(req storage.BdevPrepareRequest) error { +func (mb *MockBackend) Reset(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { mb.Lock() mb.ResetCalls = append(mb.ResetCalls, req) mb.Unlock() - if mb.cfg.ResetErr != nil { - return mb.cfg.ResetErr + switch { + case mb.cfg.ResetErr != nil: + return nil, mb.cfg.ResetErr + case mb.cfg.ResetRes == nil: + return &storage.BdevPrepareResponse{}, nil + default: + return mb.cfg.ResetRes, nil } - - return nil } func (mb *MockBackend) UpdateFirmware(_ string, _ string, _ int32) error { diff --git a/src/control/server/storage/bdev/provider.go b/src/control/server/storage/bdev/provider.go index 3346af77a16..fdb9cb37c08 100644 --- a/src/control/server/storage/bdev/provider.go +++ b/src/control/server/storage/bdev/provider.go @@ -19,7 +19,7 @@ type ( // Backend defines a set of methods to be implemented by a Block Device backend. Backend interface { Prepare(storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) - Reset(storage.BdevPrepareRequest) error + Reset(storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) Scan(storage.BdevScanRequest) (*storage.BdevScanResponse, error) Format(storage.BdevFormatRequest) (*storage.BdevFormatResponse, error) UpdateFirmware(pciAddr string, path string, slot int32) error @@ -62,7 +62,7 @@ func (p *Provider) Scan(req storage.BdevScanRequest) (resp *storage.BdevScanResp func (p *Provider) Prepare(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { if req.Reset_ { p.log.Debug("run bdev storage provider prepare reset") - return &storage.BdevPrepareResponse{}, p.backend.Reset(req) + return p.backend.Reset(req) } p.log.Debug("run bdev storage provider prepare setup") diff --git a/src/gurt/debug.c b/src/gurt/debug.c index 960db55b001..4fb112b7dcc 100644 --- a/src/gurt/debug.c +++ b/src/gurt/debug.c @@ -560,9 +560,8 @@ d_log_init(void) void d_log_fini(void) { - D_ASSERT(d_log_refcount > 0); - D_MUTEX_LOCK(&d_log_lock); + D_ASSERT(d_log_refcount > 0); d_log_refcount--; if (d_log_refcount == 0) { cleanup_dbg_namebit(); diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 5754ebbe666..a4bda75fc24 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -90,35 +90,6 @@ typedef struct crt_init_options { char *cio_auth_key; } crt_init_options_t; -/** - * Enumeration specifying providers supported by the library - */ -typedef enum { - CRT_PROV_SM = 0, - CRT_PROV_OFI_SOCKETS, - CRT_PROV_OFI_VERBS_RXM, - CRT_PROV_OFI_GNI, - CRT_PROV_OFI_TCP_RXM, - CRT_PROV_OFI_CXI, - CRT_PROV_OFI_OPX, - CRT_PROV_OFI_LAST = CRT_PROV_OFI_OPX, - CRT_PROV_UCX_RC, - CRT_PROV_UCX_UD, - CRT_PROV_UCX_RC_UD, - CRT_PROV_UCX_RC_O, - CRT_PROV_UCX_UD_O, - CRT_PROV_UCX_RC_UD_O, - CRT_PROV_UCX_RC_X, - CRT_PROV_UCX_UD_X, - CRT_PROV_UCX_RC_UD_X, - CRT_PROV_UCX_DC_X, - CRT_PROV_UCX_TCP, - CRT_PROV_UCX_LAST = CRT_PROV_UCX_TCP, - /* Note: This entry should be the last valid one in enum */ - CRT_PROV_COUNT, - CRT_PROV_UNKNOWN = -1, -} crt_provider_t; - typedef int crt_status_t; /** diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 06257056c36..66fcc0c78e7 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -25,7 +25,7 @@ int dc_obj_proto_version; int dc_obj_init(void) { - uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; + uint32_t ver_array[1] = {DAOS_OBJ_VERSION}; int rc; rc = obj_utils_init(); @@ -37,15 +37,12 @@ dc_obj_init(void) D_GOTO(out_utils, rc); dc_obj_proto_version = 0; - rc = daos_rpc_proto_query(obj_proto_fmt_0.cpf_base, ver_array, 2, &dc_obj_proto_version); + rc = daos_rpc_proto_query(obj_proto_fmt.cpf_base, ver_array, 1, &dc_obj_proto_version); if (rc) D_GOTO(out_class, rc); - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) { - rc = daos_rpc_register(&obj_proto_fmt_0, OBJ_PROTO_CLI_COUNT, NULL, - DAOS_OBJ_MODULE); - } else if (dc_obj_proto_version == DAOS_OBJ_VERSION) { - rc = daos_rpc_register(&obj_proto_fmt_1, OBJ_PROTO_CLI_COUNT, NULL, + if (dc_obj_proto_version == DAOS_OBJ_VERSION) { + rc = daos_rpc_register(&obj_proto_fmt, OBJ_PROTO_CLI_COUNT, NULL, DAOS_OBJ_MODULE); } else { D_ERROR("%d version object RPC not supported.\n", dc_obj_proto_version); @@ -61,10 +58,7 @@ dc_obj_init(void) rc = obj_ec_codec_init(); if (rc) { D_ERROR("failed to obj_ec_codec_init: "DF_RC"\n", DP_RC(rc)); - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) - daos_rpc_unregister(&obj_proto_fmt_0); - else - daos_rpc_unregister(&obj_proto_fmt_1); + daos_rpc_unregister(&obj_proto_fmt); D_GOTO(out_class, rc); } out_class: @@ -82,10 +76,7 @@ dc_obj_init(void) void dc_obj_fini(void) { - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) - daos_rpc_unregister(&obj_proto_fmt_0); - else - daos_rpc_unregister(&obj_proto_fmt_1); + daos_rpc_unregister(&obj_proto_fmt); obj_ec_codec_fini(); obj_class_fini(); obj_utils_fini(); diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 3c13f7b83d4..9040559db7b 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -2097,8 +2097,8 @@ static int obj_shard_query_key_cb(tse_task_t *task, void *data) { struct obj_query_key_cb_args *cb_args; - struct obj_query_key_1_in *okqi; - struct obj_query_key_1_out *okqo; + struct obj_query_key_in *okqi; + struct obj_query_key_out *okqo; uint32_t flags; int opc; int ret = task->dt_result; @@ -2247,7 +2247,7 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint struct dtx_id *dti, uint32_t *map_ver, daos_handle_t th, tse_task_t *task) { struct dc_pool *pool = NULL; - struct obj_query_key_1_in *okqi; + struct obj_query_key_in *okqi; crt_rpc_t *req; struct obj_query_key_cb_args cb_args; daos_unit_oid_t oid; @@ -2296,6 +2296,8 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint okqi->okqi_epoch_first = epoch->oe_first; okqi->okqi_api_flags = flags; okqi->okqi_oid = oid; + d_iov_set(&okqi->okqi_dkey, NULL, 0); + d_iov_set(&okqi->okqi_akey, NULL, 0); if (dkey != NULL) okqi->okqi_dkey = *dkey; if (akey != NULL) diff --git a/src/object/obj_rpc.c b/src/object/obj_rpc.c index a8625605f8d..7328ac6d780 100644 --- a/src/object/obj_rpc.c +++ b/src/object/obj_rpc.c @@ -1078,8 +1078,7 @@ crt_proc_struct_daos_cpd_sg(crt_proc_t proc, crt_proc_op_t proc_op, CRT_RPC_DEFINE(obj_rw, DAOS_ISEQ_OBJ_RW, DAOS_OSEQ_OBJ_RW) CRT_RPC_DEFINE(obj_key_enum, DAOS_ISEQ_OBJ_KEY_ENUM, DAOS_OSEQ_OBJ_KEY_ENUM) CRT_RPC_DEFINE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) -CRT_RPC_DEFINE(obj_query_key_0, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_0) -CRT_RPC_DEFINE(obj_query_key_1, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_1) +CRT_RPC_DEFINE(obj_query_key, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY) CRT_RPC_DEFINE(obj_sync, DAOS_ISEQ_OBJ_SYNC, DAOS_OSEQ_OBJ_SYNC) CRT_RPC_DEFINE(obj_migrate, DAOS_ISEQ_OBJ_MIGRATE, DAOS_OSEQ_OBJ_MIGRATE) CRT_RPC_DEFINE(obj_ec_agg, DAOS_ISEQ_OBJ_EC_AGG, DAOS_OSEQ_OBJ_EC_AGG) @@ -1098,29 +1097,17 @@ CRT_RPC_DEFINE(obj_key2anchor, DAOS_ISEQ_OBJ_KEY2ANCHOR, DAOS_OSEQ_OBJ_KEY2ANCHO .prf_co_ops = NULL, \ }, -static struct crt_proto_rpc_format obj_proto_rpc_fmt_0[] = { - OBJ_PROTO_CLI_RPC_LIST(0) -}; - -static struct crt_proto_rpc_format obj_proto_rpc_fmt_1[] = { - OBJ_PROTO_CLI_RPC_LIST(1) +static struct crt_proto_rpc_format obj_proto_rpc_fmt[] = { + OBJ_PROTO_CLI_RPC_LIST }; #undef X -struct crt_proto_format obj_proto_fmt_0 = { - .cpf_name = "daos-object", - .cpf_ver = DAOS_OBJ_VERSION - 1, - .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt_0), - .cpf_prf = obj_proto_rpc_fmt_0, - .cpf_base = DAOS_RPC_OPCODE(0, DAOS_OBJ_MODULE, 0) -}; - -struct crt_proto_format obj_proto_fmt_1 = { +struct crt_proto_format obj_proto_fmt = { .cpf_name = "daos-object", .cpf_ver = DAOS_OBJ_VERSION, - .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt_1), - .cpf_prf = obj_proto_rpc_fmt_1, + .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt), + .cpf_prf = obj_proto_rpc_fmt, .cpf_base = DAOS_RPC_OPCODE(0, DAOS_OBJ_MODULE, 0) }; @@ -1153,7 +1140,7 @@ obj_reply_set_status(crt_rpc_t *rpc, int status) ((struct obj_punch_out *)reply)->opo_ret = status; break; case DAOS_OBJ_RPC_QUERY_KEY: - ((struct obj_query_key_0_out *)reply)->okqo_ret = status; + ((struct obj_query_key_out *)reply)->okqo_ret = status; break; case DAOS_OBJ_RPC_SYNC: ((struct obj_sync_out *)reply)->oso_ret = status; @@ -1197,7 +1184,7 @@ obj_reply_get_status(crt_rpc_t *rpc) case DAOS_OBJ_RPC_TGT_PUNCH_AKEYS: return ((struct obj_punch_out *)reply)->opo_ret; case DAOS_OBJ_RPC_QUERY_KEY: - return ((struct obj_query_key_0_out *)reply)->okqo_ret; + return ((struct obj_query_key_out *)reply)->okqo_ret; case DAOS_OBJ_RPC_SYNC: return ((struct obj_sync_out *)reply)->oso_ret; case DAOS_OBJ_RPC_EC_AGGREGATE: @@ -1241,7 +1228,7 @@ obj_reply_map_version_set(crt_rpc_t *rpc, uint32_t map_version) ((struct obj_punch_out *)reply)->opo_map_version = map_version; break; case DAOS_OBJ_RPC_QUERY_KEY: - ((struct obj_query_key_0_out *)reply)->okqo_map_version = map_version; + ((struct obj_query_key_out *)reply)->okqo_map_version = map_version; break; case DAOS_OBJ_RPC_SYNC: ((struct obj_sync_out *)reply)->oso_map_version = map_version; @@ -1285,7 +1272,7 @@ obj_reply_map_version_get(crt_rpc_t *rpc) case DAOS_OBJ_RPC_TGT_PUNCH_AKEYS: return ((struct obj_punch_out *)reply)->opo_map_version; case DAOS_OBJ_RPC_QUERY_KEY: - return ((struct obj_query_key_0_out *)reply)->okqo_map_version; + return ((struct obj_query_key_out *)reply)->okqo_map_version; case DAOS_OBJ_RPC_SYNC: return ((struct obj_sync_out *)reply)->oso_map_version; case DAOS_OBJ_RPC_CPD: diff --git a/src/object/obj_rpc.h b/src/object/obj_rpc.h index 7a2e00f2315..3b65c705034 100644 --- a/src/object/obj_rpc.h +++ b/src/object/obj_rpc.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -36,16 +36,7 @@ * OPCODE, flags, FMT, handler, corpc_hdlr and name */ -#define QUERY_KEY_0 \ - X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, &CQF_obj_query_key, \ - ds_obj_query_key_handler_0, NULL, "key_query") -#define QUERY_KEY_1 \ - X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, &CQF_obj_query_key, \ - ds_obj_query_key_handler_1, NULL, "key_query") - -#define OBJ_PROTO_CLI_RPC_LIST(ver) \ +#define OBJ_PROTO_CLI_RPC_LIST \ X(DAOS_OBJ_RPC_UPDATE, \ 0, &CQF_obj_rw, \ ds_obj_rw_handler, NULL, "update") \ @@ -74,9 +65,8 @@ 0, &CQF_obj_punch, \ ds_obj_punch_handler, NULL, "akey_punch") \ X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, ver == 0 ? &CQF_obj_query_key_0 : &CQF_obj_query_key_1, \ - ver == 0 ? ds_obj_query_key_handler_0 : \ - ds_obj_query_key_handler_1, NULL, "key_query") \ + 0, &CQF_obj_query_key, \ + ds_obj_query_key_handler, NULL, "key_query") \ X(DAOS_OBJ_RPC_SYNC, \ 0, &CQF_obj_sync, \ ds_obj_sync_handler, NULL, "obj_sync") \ @@ -111,14 +101,13 @@ /* Define for RPC enum population below */ #define X(a, b, c, d, e, f) a, enum obj_rpc_opc { - OBJ_PROTO_CLI_RPC_LIST(1) + OBJ_PROTO_CLI_RPC_LIST OBJ_PROTO_CLI_COUNT, OBJ_PROTO_CLI_LAST = OBJ_PROTO_CLI_COUNT - 1, }; #undef X -extern struct crt_proto_format obj_proto_fmt_0; -extern struct crt_proto_format obj_proto_fmt_1; +extern struct crt_proto_format obj_proto_fmt; extern int dc_obj_proto_version; /* Helper function to convert opc to name */ @@ -127,7 +116,7 @@ obj_opc_to_str(crt_opcode_t opc) { switch (opc) { #define X(a, b, c, d, e, f) case a: return f; - OBJ_PROTO_CLI_RPC_LIST(1) + OBJ_PROTO_CLI_RPC_LIST #undef X } return "unknown"; @@ -306,7 +295,7 @@ CRT_RPC_DECLARE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) ((daos_key_t) (okqi_dkey) CRT_VAR) \ ((daos_key_t) (okqi_akey) CRT_VAR) -#define DAOS_OSEQ_OBJ_QUERY_KEY_0 /* output fields */ \ +#define DAOS_OSEQ_OBJ_QUERY_KEY /* output fields */ \ ((int32_t) (okqo_ret) CRT_VAR) \ ((uint32_t) (okqo_map_version) CRT_VAR) \ ((uint64_t) (okqo_epoch) CRT_VAR) \ @@ -320,27 +309,11 @@ CRT_RPC_DECLARE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) /* recx for EC parity space */ \ ((daos_recx_t) (okqo_recx_parity) CRT_VAR) \ /* recx for punched EC extents */ \ - ((daos_recx_t) (okqo_recx_punched) CRT_VAR) - -#define DAOS_OSEQ_OBJ_QUERY_KEY_1 /* output fields */ \ - ((int32_t) (okqo_ret) CRT_VAR) \ - ((uint32_t) (okqo_map_version) CRT_VAR) \ - ((uint64_t) (okqo_epoch) CRT_VAR) \ - ((uint32_t) (okqo_flags) CRT_VAR) \ - ((uint32_t) (okqo_pad32_1) CRT_VAR) \ - ((daos_key_t) (okqo_dkey) CRT_VAR) \ - ((daos_key_t) (okqo_akey) CRT_VAR) \ - /* recx for visible extent */ \ - ((daos_recx_t) (okqo_recx) CRT_VAR) \ - /* recx for EC parity space */ \ - ((daos_recx_t) (okqo_recx_parity) CRT_VAR) \ - /* recx for punched EC extents */ \ ((daos_recx_t) (okqo_recx_punched) CRT_VAR) \ /* epoch for max write */ \ ((uint64_t) (okqo_max_epoch) CRT_VAR) -CRT_RPC_DECLARE(obj_query_key_0, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_0) -CRT_RPC_DECLARE(obj_query_key_1, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_1) +CRT_RPC_DECLARE(obj_query_key, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY) #define DAOS_ISEQ_OBJ_SYNC /* input fields */ \ ((uuid_t) (osi_co_hdl) CRT_VAR) \ diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 0581a836172..2e417e83629 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -259,8 +259,7 @@ void ds_obj_enum_handler(crt_rpc_t *rpc); void ds_obj_key2anchor_handler(crt_rpc_t *rpc); void ds_obj_punch_handler(crt_rpc_t *rpc); void ds_obj_tgt_punch_handler(crt_rpc_t *rpc); -void ds_obj_query_key_handler_0(crt_rpc_t *rpc); -void ds_obj_query_key_handler_1(crt_rpc_t *rpc); +void ds_obj_query_key_handler(crt_rpc_t *rpc); void ds_obj_sync_handler(crt_rpc_t *rpc); void ds_obj_migrate_handler(crt_rpc_t *rpc); void ds_obj_ec_agg_handler(crt_rpc_t *rpc); diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index f0b05d25656..15d52941c64 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -67,12 +67,8 @@ obj_mod_fini(void) .dr_corpc_ops = e, \ }, -static struct daos_rpc_handler obj_handlers_0[] = { - OBJ_PROTO_CLI_RPC_LIST(0) -}; - -static struct daos_rpc_handler obj_handlers_1[] = { - OBJ_PROTO_CLI_RPC_LIST(1) +static struct daos_rpc_handler obj_handlers[] = { + OBJ_PROTO_CLI_RPC_LIST }; #undef X @@ -348,10 +344,10 @@ struct dss_module obj_module = { .sm_ver = DAOS_OBJ_VERSION, .sm_init = obj_mod_init, .sm_fini = obj_mod_fini, - .sm_proto_count = 2, - .sm_proto_fmt = {&obj_proto_fmt_0, &obj_proto_fmt_1}, - .sm_cli_count = {OBJ_PROTO_CLI_COUNT, OBJ_PROTO_CLI_COUNT}, - .sm_handlers = {obj_handlers_0, obj_handlers_1}, + .sm_proto_count = 1, + .sm_proto_fmt = {&obj_proto_fmt}, + .sm_cli_count = {OBJ_PROTO_CLI_COUNT}, + .sm_handlers = {obj_handlers}, .sm_key = &obj_module_key, .sm_mod_ops = &ds_obj_mod_ops, .sm_metrics = &obj_metrics, diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 05df46ca104..ca767f03266 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -3873,11 +3873,11 @@ ds_obj_punch_handler(crt_rpc_t *rpc) obj_ioc_end(&ioc, rc); } -static void -ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) +void +ds_obj_query_key_handler(crt_rpc_t *rpc) { - struct obj_query_key_1_in *okqi; - struct obj_query_key_1_out *okqo; + struct obj_query_key_in *okqi; + struct obj_query_key_out *okqo; daos_key_t *dkey; daos_key_t *akey; struct dtx_handle *dth = NULL; @@ -3935,7 +3935,7 @@ ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) re_query: rc = vos_obj_query_key(ioc.ioc_vos_coh, okqi->okqi_oid, query_flags, okqi->okqi_epoch, dkey, akey, &okqo->okqo_recx, - return_epoch ? &okqo->okqo_max_epoch : NULL, + &okqo->okqo_max_epoch, cell_size, stripe_size, dth); if (obj_dtx_need_refresh(dth, rc)) { rc = dtx_refresh(dth, ioc.ioc_coc); @@ -3956,18 +3956,6 @@ ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) D_ERROR("send reply failed: "DF_RC"\n", DP_RC(rc)); } -void -ds_obj_query_key_handler_0(crt_rpc_t *rpc) -{ - ds_obj_query_key_handler(rpc, false); -} - -void -ds_obj_query_key_handler_1(crt_rpc_t *rpc) -{ - ds_obj_query_key_handler(rpc, true); -} - void ds_obj_sync_handler(crt_rpc_t *rpc) { diff --git a/src/pool/cli.c b/src/pool/cli.c index 37219efedd0..81b007c4351 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -87,10 +87,14 @@ pool_free(struct d_hlink *hlink) pool = container_of(hlink, struct dc_pool, dp_hlink); D_ASSERT(daos_hhash_link_empty(&pool->dp_hlink)); + + D_RWLOCK_RDLOCK(&pool->dp_co_list_lock); + D_ASSERT(d_list_empty(&pool->dp_co_list)); + D_RWLOCK_UNLOCK(&pool->dp_co_list_lock); + D_RWLOCK_DESTROY(&pool->dp_map_lock); D_MUTEX_DESTROY(&pool->dp_client_lock); D_RWLOCK_DESTROY(&pool->dp_co_list_lock); - D_ASSERT(d_list_empty(&pool->dp_co_list)); if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); @@ -770,6 +774,7 @@ pool_disconnect_cp(tse_task_t *task, void *data) */ D_ERROR("failed to notify agent of pool disconnect: "DF_RC"\n", DP_RC(rc)); + rc = 0; } /* remove pool from hhash */ diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index e9515edcb45..0a9075518cc 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -256,7 +256,6 @@ rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replica rc = rdb_lc_lookup(lc, index, RDB_LC_ATTRS, &rdb_lc_nreplicas, &value); if (rc == -DER_NONEXIST) { D_DEBUG(DB_MD, "no replicas in "DF_U64"\n", index); - rc = 0; nreplicas = 0; } else if (rc != 0) { return rc; diff --git a/src/utils/daos_hdlr.c b/src/utils/daos_hdlr.c index f6f87b4911b..f0cdeaf4361 100644 --- a/src/utils/daos_hdlr.c +++ b/src/utils/daos_hdlr.c @@ -1669,42 +1669,56 @@ dm_disconnect(struct cmd_args_s *ap, if (is_posix_copy) { rc = dfs_sys_umount(src_file_dfs->dfs_sys); if (rc != 0) { - rc = daos_errno2der(rc); - DH_PERROR_DER(ap, rc, "failed to unmount source"); - dfs_sys_umount(src_file_dfs->dfs_sys); + DH_PERROR_DER(ap, daos_errno2der(rc), "failed to unmount source"); + rc = dfs_sys_umount(src_file_dfs->dfs_sys); + if (rc != 0) + DH_PERROR_DER(ap, daos_errno2der(rc), + "failed to unmount source on retry"); } src_file_dfs->dfs_sys = NULL; } rc = daos_cont_close(ca->src_coh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to close source container"); - daos_cont_close(ca->src_coh, NULL); + rc = daos_cont_close(ca->src_coh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, "failed to close source container on retry"); } rc = daos_pool_disconnect(ca->src_poh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to disconnect source pool"); - daos_pool_disconnect(ca->src_poh, NULL); + rc = daos_pool_disconnect(ca->src_poh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, "failed to disconnect source pool on retry"); } } if (dst_file_dfs->type == DAOS) { if (is_posix_copy) { rc = dfs_sys_umount(dst_file_dfs->dfs_sys); if (rc != 0) { - rc = daos_errno2der(rc); - DH_PERROR_DER(ap, rc, "failed to unmount source"); - dfs_sys_umount(dst_file_dfs->dfs_sys); + DH_PERROR_DER(ap, daos_errno2der(rc), "failed to unmount source"); + rc = dfs_sys_umount(dst_file_dfs->dfs_sys); + if (rc != 0) + DH_PERROR_DER(ap, daos_errno2der(rc), + "failed to unmount source on retry"); } dst_file_dfs->dfs_sys = NULL; } rc = daos_cont_close(ca->dst_coh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to close destination container"); - daos_cont_close(ca->dst_coh, NULL); + rc = daos_cont_close(ca->dst_coh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, + "failed to close destination container on retry"); } rc = daos_pool_disconnect(ca->dst_poh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to disconnect destination pool"); - daos_pool_disconnect(ca->dst_poh, NULL); + rc = daos_pool_disconnect(ca->dst_poh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, + "failed to disconnect destination pool on retry"); } } return rc; diff --git a/utils/build.config b/utils/build.config index df8a456afd8..c81f4630ceb 100644 --- a/utils/build.config +++ b/utils/build.config @@ -14,4 +14,4 @@ UCX=v1.13.0 [patch_versions] spdk=https://github.com/spdk/spdk/commit/b0aba3fcd5aceceea530a702922153bc75664978.diff,https://github.com/spdk/spdk/commit/445a4c808badbad3942696ecf16fa60e8129a747.diff -ofi=https://github.com/ofiwg/libfabric/commit/2abe57cd893d70a52137758c2c5b3c7fbf0be2c1.diff,https://github.com/ofiwg/libfabric/commit/9057c4c95b6440774dc92eb8219d297515141137.diff +ofi=https://github.com/ofiwg/libfabric/commit/2abe57cd893d70a52137758c2c5b3c7fbf0be2c1.diff,https://github.com/ofiwg/libfabric/commit/9057c4c95b6440774dc92eb8219d297515141137.diff,https://github.com/ofiwg/libfabric/commit/91b05e9d9ee63c49eac032029dc462fa32ef632f.diff,https://raw.githubusercontent.com/daos-stack/libfabric/06c3dce3f046f1869e87e840bd6fece81882e9af/prov_opx_u32_extended.patch diff --git a/utils/node_local_test.py b/utils/node_local_test.py index db674bfe36e..28620350452 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -3548,7 +3548,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 2': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 2': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3558,7 +3558,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse @@ -3613,7 +3613,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 4': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 4': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3623,7 +3623,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse @@ -3851,6 +3851,10 @@ def test_pil4dfs(self): dir1 = join(path, 'dir1') self.server.run_daos_client_cmd_pil4dfs(['mkdir', dir1]) + # create multiple levels dirs + dirabcd = join(path, 'dira/dirb/dirc/dird') + self.server.run_daos_client_cmd_pil4dfs(['mkdir', '-p', dirabcd]) + # find to list all files/dirs. self.server.run_daos_client_cmd_pil4dfs(['find', path])