Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/release/2.4' into dbohning/daos-…
Browse files Browse the repository at this point in the history
…13555-2.4

Test-tag: test_always_passes,vm
Skip-unit-tests: true
Skip-fault-injection-test: true
  • Loading branch information
daltonbohning committed Jul 7, 2023
2 parents 56e6bfe + 776249a commit eb72047
Show file tree
Hide file tree
Showing 40 changed files with 524 additions and 347 deletions.
2 changes: 1 addition & 1 deletion ci/docker_nlt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pushd "$TMP_DIR"
set +e

sudo --preserve-env=VIRTUAL_ENV,PATH ./node_local_test.py \
--no-root --memcheck no --system-ram-reserved 32 --server-debug WARN "$@"
--no-root --memcheck no --system-ram-reserved 48 --server-debug WARN "$@"

RC=$?
set -e
Expand Down
1 change: 1 addition & 0 deletions docs/admin/env_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Environment variables in this section only apply to the client side.
|Variable |Description|
|-------------------------|-----------|
|FI\_MR\_CACHE\_MAX\_COUNT|Enable MR (Memory Registration) caching in OFI layer. Recommended to be set to 0 (disable) when CRT\_DISABLE\_MEM\_PIN is NOT set to 1. INTEGER. Default to unset.|
|D\_POLL\_TIMEOUT|Polling timeout passed to network progress for synchronous operations. Default to 0 (busy polling), value in micro-seconds otherwise.|


## Debug System (Client & Server)
Expand Down
8 changes: 6 additions & 2 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@ struct crt_na_dict crt_na_dict[] = {
.nad_str = "ofi+gni",
.nad_contig_eps = true,
.nad_port_bind = false,
}, {
.nad_type = CRT_PROV_OFI_TCP,
.nad_str = "ofi+tcp",
.nad_contig_eps = true,
.nad_port_bind = true,
}, {
.nad_type = CRT_PROV_OFI_TCP_RXM,
.nad_str = "ofi+tcp;ofi_rxm",
.nad_alt_str = "ofi+tcp",
.nad_contig_eps = true,
.nad_port_bind = true,
}, {
Expand Down Expand Up @@ -655,7 +659,7 @@ crt_get_opx_info_string(char *provider, char *domain, char *ip,
D_GOTO(out, rc = -DER_INVAL);
}

strcpy(domain_name, domain);
strncpy(domain_name, domain, sizeof(domain_name) - 1);
strtok_r(domain_name, &domain[delimiter], &hfi_str);
hfi = (unsigned int)strtoul(hfi_str, NULL, 10);

Expand Down
29 changes: 29 additions & 0 deletions src/cart/crt_hg.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,35 @@ struct crt_rpc_priv;
struct crt_common_hdr;
struct crt_corpc_hdr;

/**
* Enumeration specifying providers supported by the library
*/
typedef enum {
CRT_PROV_SM = 0,
CRT_PROV_OFI_SOCKETS,
CRT_PROV_OFI_VERBS_RXM,
CRT_PROV_OFI_GNI,
CRT_PROV_OFI_TCP,
CRT_PROV_OFI_TCP_RXM,
CRT_PROV_OFI_CXI,
CRT_PROV_OFI_OPX,
CRT_PROV_OFI_LAST = CRT_PROV_OFI_OPX,
CRT_PROV_UCX_RC,
CRT_PROV_UCX_UD,
CRT_PROV_UCX_RC_UD,
CRT_PROV_UCX_RC_O,
CRT_PROV_UCX_UD_O,
CRT_PROV_UCX_RC_UD_O,
CRT_PROV_UCX_RC_X,
CRT_PROV_UCX_UD_X,
CRT_PROV_UCX_RC_UD_X,
CRT_PROV_UCX_DC_X,
CRT_PROV_UCX_TCP,
CRT_PROV_UCX_LAST = CRT_PROV_UCX_TCP,
/* Note: This entry should be the last valid one in enum */
CRT_PROV_COUNT,
CRT_PROV_UNKNOWN = -1,
} crt_provider_t;

crt_provider_t
crt_prov_str_to_prov(const char *prov_str);
Expand Down
11 changes: 6 additions & 5 deletions src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ dump_envariables(void)
"CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG",
"FI_UNIVERSE_SIZE", "CRT_ENABLE_MEM_PIN",
"FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE",
"CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST"};
"CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST",
"D_POLL_TIMEOUT"};

D_INFO("-- ENVARS: --\n");
for (i = 0; i < ARRAY_SIZE(envars); i++) {
Expand Down Expand Up @@ -545,17 +546,17 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt)

if (prov == CRT_PROV_OFI_CXI)
mrc_enable = 1;
else {
/* Use tagged messages for other providers, disable multi-recv */
apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1");
}

d_getenv_int("CRT_MRC_ENABLE", &mrc_enable);
if (mrc_enable == 0) {
D_INFO("Disabling MR CACHE (FI_MR_CACHE_MAX_COUNT=0)\n");
setenv("FI_MR_CACHE_MAX_COUNT", "0", 1);
}

/* Use tagged messages for other providers, disable multi-recv */
if (prov != CRT_PROV_OFI_CXI && prov != CRT_PROV_OFI_TCP)
apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1");

g_prov_settings_applied[prov] = true;
}

Expand Down
17 changes: 9 additions & 8 deletions src/cart/utils/crt_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,19 @@ crtu_progress_fn(void *data)
while (opts.shutdown == 0)
crt_progress(*p_ctx, 1000);

if (opts.is_swim_enabled && idx == 0)
crt_swim_disable_all();
if (opts.is_server) {
if (opts.is_swim_enabled && idx == 0)
crt_swim_disable_all();

rc = crtu_drain_queue(*p_ctx);
D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc);
rc = crtu_drain_queue(*p_ctx);
D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc);

if (opts.delay_shutdown_sec > 0)
sleep(opts.delay_shutdown_sec);
if (opts.delay_shutdown_sec > 0)
sleep(opts.delay_shutdown_sec);
}

rc = crt_context_destroy(*p_ctx, 1);
D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n",
p_ctx, rc);
D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n", p_ctx, rc);

pthread_exit(rc ? *p_ctx : NULL);

Expand Down
10 changes: 9 additions & 1 deletion src/client/api/event.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
static __thread daos_event_t ev_thpriv;
static __thread bool ev_thpriv_is_init;

/**
* Global progress timeout for synchronous operation
* busy-polling by default (0), timeout in us otherwise
*/
static uint32_t ev_prog_timeout;

#define EQ_WITH_CRT

#if !defined(EQ_WITH_CRT)
Expand Down Expand Up @@ -91,6 +97,8 @@ daos_eq_lib_init()

eq_ref = 1;

d_getenv_int("D_POLL_TIMEOUT", &ev_prog_timeout);

unlock:
D_MUTEX_UNLOCK(&daos_eq_lock);
return rc;
Expand Down Expand Up @@ -1261,7 +1269,7 @@ daos_event_priv_wait()

/* Wait on the event to complete */
while (evx->evx_status != DAOS_EVS_READY) {
rc = crt_progress_cond(evx->evx_ctx, 0, ev_progress_cb, &epa);
rc = crt_progress_cond(evx->evx_ctx, ev_prog_timeout, ev_progress_cb, &epa);

/** progress succeeded, loop can exit if event completed */
if (rc == 0) {
Expand Down
38 changes: 21 additions & 17 deletions src/client/array/dc_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -1822,7 +1822,7 @@ struct key_query_props {
char akey_val;
daos_recx_t recx;
daos_size_t *size;
daos_size_t max_epoch;
daos_epoch_t max_epoch;
tse_task_t *ptask;
};

Expand All @@ -1831,8 +1831,7 @@ free_query_cb(tse_task_t *task, void *data)
{
struct key_query_props *props = *((struct key_query_props **)data);

if (props->array)
array_decref(props->array);
array_decref(props->array);
D_FREE(props);
return 0;
}
Expand Down Expand Up @@ -1905,7 +1904,7 @@ dc_array_get_size(tse_task_t *task)
query_args->dkey = &kqp->dkey;
query_args->akey = &kqp->akey;
query_args->recx = &kqp->recx;
query_args->max_epoch = NULL;
query_args->max_epoch = &kqp->max_epoch;

rc = tse_task_register_comp_cb(task, free_query_cb, &kqp, sizeof(kqp));
if (rc != 0)
Expand Down Expand Up @@ -2285,7 +2284,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l
daos_key_t *dkey;
struct io_params *params = NULL;
tse_task_t *io_task;
bool free_iod_recxs = true;
bool free_params = true;
int rc;

D_ALLOC_PTR(params);
Expand Down Expand Up @@ -2336,7 +2335,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l
rc = tse_task_register_comp_cb(io_task, free_io_params_cb, &params, sizeof(params));
if (rc)
D_GOTO(err_task, rc);
free_iod_recxs = false;
free_params = false;

rc = tse_task_register_deps(props->ptask, 1, &io_task);
if (rc)
Expand All @@ -2351,9 +2350,10 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l
if (io_task)
tse_task_complete(io_task, rc);
err:
D_FREE(params);
if (free_iod_recxs)
if (free_params) {
D_FREE(iod->iod_recxs);
D_FREE(params);
}
return rc;
}

Expand Down Expand Up @@ -2478,6 +2478,7 @@ dc_array_set_size(tse_task_t *task)
daos_obj_list_dkey_t *enum_args;
struct set_size_props *set_size_props = NULL;
tse_task_t *enum_task;
bool cleanup = true;
int rc;

args = daos_task_get_args(task);
Expand Down Expand Up @@ -2537,31 +2538,34 @@ dc_array_set_size(tse_task_t *task)
enum_args->sgl = &set_size_props->sgl;
enum_args->dkey_anchor = &set_size_props->anchor;

rc = tse_task_register_cbs(enum_task, NULL, NULL, 0, adjust_array_size_cb, &set_size_props,
sizeof(set_size_props));
rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props,
sizeof(set_size_props));
if (rc)
D_GOTO(err_enum_task, rc);
cleanup = false;

rc = tse_task_register_deps(task, 1, &enum_task);
rc = tse_task_register_comp_cb(enum_task, adjust_array_size_cb, &set_size_props,
sizeof(set_size_props));
if (rc)
D_GOTO(err_enum_task, rc);

rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props,
sizeof(set_size_props));
rc = tse_task_register_deps(task, 1, &enum_task);
if (rc)
D_GOTO(err_enum_task, rc);

rc = tse_task_schedule(enum_task, true);
if (rc)
D_GOTO(err_enum_task, rc);
D_GOTO(err_task, rc);

return 0;
err_enum_task:
tse_task_complete(enum_task, rc);
err_task:
D_FREE(set_size_props);
if (array)
array_decref(array);
tse_task_complete(task, rc);
if (cleanup) {
D_FREE(set_size_props);
if (array)
array_decref(array);
}
return rc;
} /* end daos_array_set_size */
Loading

0 comments on commit eb72047

Please sign in to comment.