Skip to content

Commit

Permalink
DAOS-14262 cart: add ability to select traffic class for SWIM context (
Browse files Browse the repository at this point in the history
…#14893) (#14917)

Add SWIM_TRAFFIC_CLASS env var (default is unspec)

Signed-off-by: Jerome Soumagne <jerome.soumagne@intel.com>
  • Loading branch information
soumagne authored Oct 24, 2024
1 parent ec3aa1c commit 2b5620b
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 14 deletions.
12 changes: 7 additions & 5 deletions src/cart/README.env
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
This file lists the environment variables used in CaRT.

. D_PROVIDER (Deprecated: CRT_PHY_ADDR_STR)
It determines which mercury NA plugin to be used:
It determines which mercury NA plugin and transport to be used:
- set it as "ofi+verbs;ofi_rxm" to use OFI verbs;ofi_rxm provider
- set it as "ofi+gni" to use OFI gni provider
- set it as "sm" to use SM plugin which only works within single node
- set it as "ofi+tcp;ofi_rxm" to use OFI tcp;ofi_rxm provider.
- set it as "ofi+sockets" to use OFI sockets provider
NOTE: This provider is deprecated in favor of "ofi+tcp;ofi_rxm"
- set it as "ofi+tcp" to use OFI tcp provider.
- by default (not set or set as any other value) it will use ofi tcp
provider.

Expand Down Expand Up @@ -205,3 +202,8 @@ This file lists the environment variables used in CaRT.
start copying data in an effort to release multi-recv buffers. Copy will occur when at
most D_MRECV_BUF_COPY buffers remain.

SWIM_TRAFFIC_CLASS
(server only) Select a traffic class for the SWIM protocol to use and prevent potential
traffic congestion. Available options are: "unspec" (default), "best_effort",
"low_latency", "bulk_data".

3 changes: 3 additions & 0 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,9 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_
init_info.request_post_incr = crt_gdata.cg_post_incr;
init_info.multi_recv_op_max = crt_gdata.cg_mrecv_buf;
init_info.multi_recv_copy_threshold = crt_gdata.cg_mrecv_buf_copy;
/* Separate SWIM traffic in an effort to prevent potential congestion. */
if (crt_is_service() && ctx_idx == crt_gdata.cg_swim_crt_idx)
init_info.traffic_class = (enum na_traffic_class)crt_gdata.cg_swim_tc;

hg_class = HG_Init_opt2(info_string, crt_is_service(), HG_VERSION(2, 4), &init_info);
if (hg_class == NULL) {
Expand Down
36 changes: 27 additions & 9 deletions src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ static volatile int gdata_init_flag;
struct crt_plugin_gdata crt_plugin_gdata;
static bool g_prov_settings_applied[CRT_PROV_COUNT];

#define X(a, b) b,
static const char *const crt_tc_name[] = {CRT_TRAFFIC_CLASSES};
#undef X

static void
crt_lib_init(void) __attribute__((__constructor__));

Expand Down Expand Up @@ -237,18 +241,30 @@ crt_gdata_dump(void)
DUMP_GDATA_FIELD("%d", cg_rpc_quota);
}

static enum crt_traffic_class
crt_str_to_tc(const char *str)
{
enum crt_traffic_class i = 0;

while (str != NULL && strcmp(crt_tc_name[i], str) != 0 && i < CRT_TC_UNKNOWN)
i++;

return i == CRT_TC_UNKNOWN ? CRT_TC_UNSPEC : i;
}

/* first step init - for initializing crt_gdata */
static int data_init(int server, crt_init_options_t *opt)
{
uint32_t timeout = 0;
uint32_t credits;
uint32_t fi_univ_size = 0;
uint32_t mem_pin_enable = 0;
uint32_t is_secondary;
uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR;
unsigned int mrecv_buf = CRT_HG_MRECV_BUF;
unsigned int mrecv_buf_copy = 0; /* buf copy disabled by default */
int rc = 0;
uint32_t timeout = 0;
uint32_t credits;
uint32_t fi_univ_size = 0;
uint32_t mem_pin_enable = 0;
uint32_t is_secondary;
uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR;
unsigned int mrecv_buf = CRT_HG_MRECV_BUF;
unsigned int mrecv_buf_copy = 0; /* buf copy disabled by default */
char *swim_traffic_class = NULL;
int rc = 0;

crt_env_dump();

Expand All @@ -261,6 +277,8 @@ static int data_init(int server, crt_init_options_t *opt)
crt_gdata.cg_mrecv_buf = mrecv_buf;
crt_env_get(D_MRECV_BUF_COPY, &mrecv_buf_copy);
crt_gdata.cg_mrecv_buf_copy = mrecv_buf_copy;
crt_env_get(SWIM_TRAFFIC_CLASS, &swim_traffic_class);
crt_gdata.cg_swim_tc = crt_str_to_tc(swim_traffic_class);

is_secondary = 0;
/* Apply CART-890 workaround for server side only */
Expand Down
15 changes: 15 additions & 0 deletions src/cart/crt_internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,17 @@ struct crt_na_config {
char **noc_domain_str; /* Array of domains */
};

#define CRT_TRAFFIC_CLASSES \
X(CRT_TC_UNSPEC, "unspec") /* Leave it upon plugin to choose */ \
X(CRT_TC_BEST_EFFORT, "best_effort") /* Best effort */ \
X(CRT_TC_LOW_LATENCY, "low_latency") /* Low latency */ \
X(CRT_TC_BULK_DATA, "bulk_data") /* Bulk data */ \
X(CRT_TC_UNKNOWN, "unknown") /* Unknown */

#define X(a, b) a,
enum crt_traffic_class { CRT_TRAFFIC_CLASSES };
#undef X

struct crt_prov_gdata {
/** NA plugin type */
int cpg_provider;
Expand Down Expand Up @@ -105,6 +116,9 @@ struct crt_gdata {
/** global swim index for all servers */
int32_t cg_swim_crt_idx;

/** traffic class used by SWIM */
enum crt_traffic_class cg_swim_tc;

/** credits limitation for #in-flight RPCs per target EP CTX */
uint32_t cg_credit_ep_ctx;

Expand Down Expand Up @@ -220,6 +234,7 @@ struct crt_event_cb_priv {
ENV(SWIM_PING_TIMEOUT) \
ENV(SWIM_PROTOCOL_PERIOD_LEN) \
ENV(SWIM_SUSPECT_TIMEOUT) \
ENV_STR(SWIM_TRAFFIC_CLASS) \
ENV_STR(UCX_IB_FORK_INIT)

/* uint env */
Expand Down

0 comments on commit 2b5620b

Please sign in to comment.