Skip to content

Commit

Permalink
HG/NA: fix init info changes made to prevent ABI breakage
Browse files Browse the repository at this point in the history
Fix HG NA init info to v4.0 for now and duplicate tclass info
  • Loading branch information
soumagne committed Jul 1, 2024
1 parent 3525d84 commit 663457e
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 62 deletions.
22 changes: 16 additions & 6 deletions src/mercury_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,13 +1164,21 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
struct hg_core_private_class **class_p)
{
struct hg_init_info hg_init_info = HG_INIT_INFO_INITIALIZER;
struct na_init_info na_init_info = NA_INIT_INFO_INITIALIZER;
unsigned int na_version = NA_VERSION(NA_VERSION_MAJOR, NA_VERSION_MINOR);
struct na_init_info *na_init_info_p = NULL;
struct hg_core_private_class *hg_core_class = NULL;
#ifdef NA_HAS_SM
const char *na_class_name;
#endif
hg_return_t ret;
int rc;

/* Prevent newer versions */
HG_CHECK_SUBSYS_ERROR(cls,
HG_VERSION_LT(HG_VERSION(HG_VERSION_MAJOR, HG_VERSION_MINOR), version),
error, ret, HG_INVALID_ARG, "API version cannot be newer than library");

/* Create new HG class */
hg_core_class =
(struct hg_core_private_class *) calloc(1, sizeof(*hg_core_class));
Expand Down Expand Up @@ -1203,11 +1211,13 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
"API version cannot be 0");
HG_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d",
HG_MAJOR(version), HG_MINOR(version));
na_init_info_p = &na_init_info;

/* Get init info and overwrite defaults */
if (HG_VERSION_GE(version, HG_VERSION(2, 4)))
if (HG_VERSION_GE(version, HG_VERSION(2, 4))) {
hg_init_info = *hg_init_info_p;
else if (HG_VERSION_GE(version, HG_VERSION(2, 3)))
na_init_info.traffic_class = hg_init_info.traffic_class;
} else if (HG_VERSION_GE(version, HG_VERSION(2, 3)))
hg_init_info_dup_2_3(&hg_init_info,
(const struct hg_init_info_2_3 *) hg_init_info_p);
else
Expand Down Expand Up @@ -1271,8 +1281,8 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
hg_core_class->init_info.na_ext_init = true;
} else {
/* Initialize NA if not provided externally */
hg_core_class->core_class.na_class = NA_Initialize_opt(na_info_string,
hg_core_class->init_info.listen, &hg_init_info.na_init_info);
hg_core_class->core_class.na_class = NA_Initialize_opt2(na_info_string,
hg_core_class->init_info.listen, na_version, na_init_info_p);
HG_CHECK_SUBSYS_ERROR(cls, hg_core_class->core_class.na_class == NULL,
error, ret, HG_NA_ERROR,
"Could not initialize NA class (info_string=%s, listen=%d)",
Expand Down Expand Up @@ -1325,8 +1335,8 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
info_string_p = "na+sm";

/* Initialize NA SM first so that tmp directories are created */
hg_core_class->core_class.na_sm_class = NA_Initialize_opt(info_string_p,
hg_core_class->init_info.listen, &hg_init_info.na_init_info);
hg_core_class->core_class.na_sm_class = NA_Initialize_opt2(info_string_p,
hg_core_class->init_info.listen, na_version, na_init_info_p);
HG_CHECK_SUBSYS_ERROR(cls,
hg_core_class->core_class.na_sm_class == NULL, error, ret,
HG_NA_ERROR,
Expand Down
43 changes: 23 additions & 20 deletions src/mercury_core_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ typedef enum hg_checksum_level {
*/
struct hg_init_info {
/* NA init info struct, see na_types.h for documentation */
struct na_init_info na_init_info;
struct na_init_info_4_0 na_init_info;

/* Optional NA class that can be used for initializing an HG class. Using
* that option makes the init string passed to HG_Init() ignored.
Expand All @@ -54,17 +54,10 @@ struct hg_init_info {
* Default value is: 512 */
int32_t request_post_incr;

/* Controls the number of multi-recv buffers that are posted. Incrementing
* this value may be beneficial in cases where RPC handles remain in use for
* longer periods of time and release_input_early is not set, preventing
* existing buffers from being reposted.
* Default value is: 4 */
unsigned int multi_recv_op_max;

/* Controls whether the NA shared-memory interface should be automatically
* used if/when the RPC target address shares the same node as its origin.
* Default is: false */
bool auto_sm;
uint8_t auto_sm;

/* Overrides the default info string used to initialize the NA shared-memory
* interface when auto_sm is set to true (e.g., "foo-bar" will create
Expand All @@ -80,33 +73,43 @@ struct hg_init_info {
/* Controls whether mercury should _NOT_ attempt to transfer small bulk data
* along with the RPC request.
* Default is: false */
bool no_bulk_eager;
uint8_t no_bulk_eager;

/* Disable internal loopback interface that enables forwarding of RPC
* requests to self addresses. Doing so will force traffic to be routed
* through NA. For performance reasons, users should be cautious when using
* that option.
* Default is: false */
bool no_loopback;
uint8_t no_loopback;

/* (Debug) Print stats at exit.
* Default is: false */
bool stats;
uint8_t stats;

/* Disable use of multi_recv when available and post separate buffers.
* Default is: false */
bool no_multi_recv;
uint8_t no_multi_recv;

/* Release input buffers as early as possible (usually after HG_Get_input())
* as opposed to releasing them after a call to handle destroy. This may be
* beneficial in cases where the RPC execution time is longer than usual.
* Default is: false */
bool release_input_early;
uint8_t release_input_early;

/* Preferred traffic class. Default is NA_TC_UNSPEC */
enum na_traffic_class traffic_class;

/* Disable use of overflow buffers when RPC message size is above the eager
* message size threshold.
* Default is: false */
bool no_overflow;

/* Controls the number of multi-recv buffers that are posted. Incrementing
* this value may be beneficial in cases where RPC handles remain in use for
* longer periods of time and release_input_early is not set, preventing
* existing buffers from being reposted.
* Default value is: 4 */
unsigned int multi_recv_op_max;
};

/* Error return codes:
Expand Down Expand Up @@ -196,12 +199,12 @@ typedef enum {
#define HG_INIT_INFO_INITIALIZER \
(struct hg_init_info) \
{ \
.na_init_info = NA_INIT_INFO_INITIALIZER, .na_class = NULL, \
.request_post_init = 0, .request_post_incr = 0, \
.multi_recv_op_max = 0, .auto_sm = false, .sm_info_string = NULL, \
.checksum_level = HG_CHECKSUM_NONE, .no_bulk_eager = false, \
.no_loopback = false, .stats = false, .no_multi_recv = false, \
.release_input_early = false, .no_overflow = false \
.na_init_info = NA_INIT_INFO_INITIALIZER_4_0, .na_class = NULL, \
.request_post_init = 0, .request_post_incr = 0, .auto_sm = false, \
.sm_info_string = NULL, .checksum_level = HG_CHECKSUM_NONE, \
.no_bulk_eager = false, .no_loopback = false, .stats = false, \
.no_multi_recv = false, .release_input_early = false, \
.no_overflow = false, .multi_recv_op_max = 0 \
}

#endif /* MERCURY_CORE_TYPES_H */
18 changes: 10 additions & 8 deletions src/mercury_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,10 @@ static HG_INLINE void
hg_init_info_dup_2_3(
struct hg_init_info *new_info, const struct hg_init_info_2_3 *old_info)
{
*new_info = (struct hg_init_info){.na_class = old_info->na_class,
*new_info = (struct hg_init_info){.na_init_info = old_info->na_init_info,
.na_class = old_info->na_class,
.request_post_init = old_info->request_post_init,
.request_post_incr = (int32_t) old_info->request_post_incr,
.multi_recv_op_max = 0,
.auto_sm = old_info->auto_sm,
.sm_info_string = old_info->sm_info_string,
.checksum_level = old_info->checksum_level,
Expand All @@ -169,19 +169,20 @@ hg_init_info_dup_2_3(
.stats = old_info->stats,
.no_multi_recv = old_info->no_multi_recv,
.release_input_early = old_info->release_input_early,
.no_overflow = false};
na_init_info_dup_4_0(&new_info->na_init_info, &old_info->na_init_info);
.traffic_class = NA_TC_UNSPEC,
.no_overflow = false,
.multi_recv_op_max = 0};
}

/*---------------------------------------------------------------------------*/
static HG_INLINE void
hg_init_info_dup_2_2(
struct hg_init_info *new_info, const struct hg_init_info_2_2 *old_info)
{
*new_info = (struct hg_init_info){.na_class = old_info->na_class,
*new_info = (struct hg_init_info){.na_init_info = old_info->na_init_info,
.na_class = old_info->na_class,
.request_post_init = old_info->request_post_init,
.request_post_incr = (int32_t) old_info->request_post_incr,
.multi_recv_op_max = 0,
.auto_sm = old_info->auto_sm,
.sm_info_string = old_info->sm_info_string,
.checksum_level = old_info->checksum_level,
Expand All @@ -190,8 +191,9 @@ hg_init_info_dup_2_2(
.stats = old_info->stats,
.no_multi_recv = false,
.release_input_early = false,
.no_overflow = false};
na_init_info_dup_4_0(&new_info->na_init_info, &old_info->na_init_info);
.traffic_class = NA_TC_UNSPEC,
.no_overflow = false,
.multi_recv_op_max = 0};
}

#ifdef __cplusplus
Expand Down
8 changes: 4 additions & 4 deletions src/na/na.c
Original file line number Diff line number Diff line change
Expand Up @@ -767,9 +767,9 @@ na_class_t *
NA_Initialize_opt(const char *info_string, bool listen,
const struct na_init_info *na_init_info)
{
/* Keep as latest version until info struct is modified */
return NA_Initialize_opt2(info_string, listen,
NA_VERSION(NA_VERSION_MAJOR, NA_VERSION_MINOR), na_init_info);
/* v4.0 is latest version for which init struct was not versioned */
return NA_Initialize_opt2(
info_string, listen, NA_VERSION(4, 0), na_init_info);
}

/*---------------------------------------------------------------------------*/
Expand Down Expand Up @@ -802,7 +802,7 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version,
NA_MAJOR(version), NA_MINOR(version));

/* Get init info and overwrite defaults */
if (NA_VERSION_GE(version, NA_VERSION(4, 1)))
if (NA_VERSION_GE(version, NA_VERSION(5, 0)))
na_info->na_init_info = *na_init_info;
else
na_init_info_dup_4_0(&na_info->na_init_info,
Expand Down
20 changes: 3 additions & 17 deletions src/na/na.h
Original file line number Diff line number Diff line change
Expand Up @@ -984,20 +984,6 @@ NA_Error_to_string(na_return_t errnum) NA_WARN_UNUSED_RESULT;
/* Local Type and Struct Definition */
/************************************/

/* Previous versions of init info to keep compatiblity with older versions,
* see public definition in na_types.h */
struct na_init_info_4_0 {
const char *ip_subnet;
const char *auth_key;
size_t max_unexpected_size;
size_t max_expected_size;
uint8_t progress_mode;
enum na_addr_format addr_format;
uint8_t max_contexts;
uint8_t thread_mode;
bool request_mem_device;
};

/* NA info definition */
struct na_info {
char *protocol_name; /* Protocol (e.g., tcp, ib) */
Expand Down Expand Up @@ -1129,12 +1115,12 @@ na_init_info_dup_4_0(
.auth_key = old_info->auth_key,
.max_unexpected_size = old_info->max_unexpected_size,
.max_expected_size = old_info->max_expected_size,
.addr_format = old_info->addr_format,
.traffic_class = NA_TC_UNSPEC,
.progress_mode = old_info->progress_mode,
.addr_format = old_info->addr_format,
.max_contexts = old_info->max_contexts,
.thread_mode = old_info->thread_mode,
.request_mem_device = old_info->request_mem_device};
.request_mem_device = old_info->request_mem_device,
.traffic_class = NA_TC_UNSPEC};
}

/*---------------------------------------------------------------------------*/
Expand Down
39 changes: 32 additions & 7 deletions src/na/na_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,13 @@ struct na_init_info {
* used. */
size_t max_expected_size;

/* Preferred address format. Default is NA_ADDR_UNSPEC. */
enum na_addr_format addr_format;

/* Preferred traffic class. Default is NA_TC_UNSPEC */
enum na_traffic_class traffic_class;

/* Progress mode flag. Setting NA_NO_BLOCK will force busy-spin on progress
* and remove any wait/notification calls. */
uint8_t progress_mode;

/* Preferred address format. Default is NA_ADDR_UNSPEC. */
enum na_addr_format addr_format;

/* Maximum number of contexts that are expected to be created. */
uint8_t max_contexts;

Expand All @@ -97,6 +94,22 @@ struct na_init_info {
/* Request support for tranfers to/from memory devices (e.g., GPU, etc).
* Default is: false. */
bool request_mem_device;

/* Preferred traffic class. Default is NA_TC_UNSPEC */
enum na_traffic_class traffic_class;
};

/* Previous versions of init info to keep compatiblity with older versions */
struct na_init_info_4_0 {
const char *ip_subnet;
const char *auth_key;
size_t max_unexpected_size;
size_t max_expected_size;
uint8_t progress_mode;
enum na_addr_format addr_format;
uint8_t max_contexts;
uint8_t thread_mode;
bool request_mem_device;
};

/* Segment */
Expand Down Expand Up @@ -253,9 +266,21 @@ typedef void (*na_cb_t)(const struct na_cb_info *callback_info);
.auth_key = NULL, \
.max_unexpected_size = 0, \
.max_expected_size = 0, \
.progress_mode = 0, \
.addr_format = NA_ADDR_UNSPEC, \
.traffic_class = NA_TC_UNSPEC, \
.max_contexts = 1, \
.thread_mode = 0, \
.request_mem_device = false, \
.traffic_class = NA_TC_UNSPEC})

/* NA init info initializer */
#define NA_INIT_INFO_INITIALIZER_4_0 \
((struct na_init_info_4_0){.ip_subnet = NULL, \
.auth_key = NULL, \
.max_unexpected_size = 0, \
.max_expected_size = 0, \
.progress_mode = 0, \
.addr_format = NA_ADDR_UNSPEC, \
.max_contexts = 1, \
.thread_mode = 0, \
.request_mem_device = false})
Expand Down

0 comments on commit 663457e

Please sign in to comment.