Skip to content

Commit

Permalink
HG: fix NA init info not correctly set from HG core
Browse files Browse the repository at this point in the history
Add some more debug info
  • Loading branch information
soumagne committed Aug 23, 2024
1 parent 2635745 commit d0fe7d4
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
20 changes: 19 additions & 1 deletion src/mercury_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1218,20 +1218,38 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
if (hg_init_info_p) {
HG_CHECK_SUBSYS_ERROR(cls, version == 0, error, ret, HG_INVALID_ARG,
"API version cannot be 0");
HG_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d",
HG_LOG_SUBSYS_DEBUG(cls, "HG init info version used: v%d.%d",
HG_MAJOR(version), HG_MINOR(version));
na_init_info_p = &na_init_info;

/* Get init info and overwrite defaults */
if (HG_VERSION_GE(version, HG_VERSION(2, 4))) {
hg_init_info = *hg_init_info_p;
na_init_info_dup_4_0(&na_init_info, &hg_init_info.na_init_info);
na_init_info.traffic_class = hg_init_info.traffic_class;
} else if (HG_VERSION_GE(version, HG_VERSION(2, 3)))
hg_init_info_dup_2_3(&hg_init_info,
(const struct hg_init_info_2_3 *) hg_init_info_p);
else
hg_init_info_dup_2_2(&hg_init_info,
(const struct hg_init_info_2_2 *) hg_init_info_p);

HG_LOG_SUBSYS_DEBUG(cls,
"HG Init info: na_class=%p, request_post_init=%" PRIu32
", request_post_incr=%" PRId32 ", auto_sm=%" PRIu8
", sm_info_string=%s, checksum_level=%d, no_bulk_eager=%" PRIu8
", no_loopback=%" PRIu8 ", stats=%" PRIu8 ", no_multi_recv=%" PRIu8
", release_input_early=%" PRIu8
", traffic_class=%d, no_overflow=%d, multi_recv_op_max=%u, "
"multi_recv_copy_threshold=%u",
(void *) hg_init_info.na_class, hg_init_info.request_post_init,
hg_init_info.request_post_incr, hg_init_info.auto_sm,
hg_init_info.sm_info_string, hg_init_info.checksum_level,
hg_init_info.no_bulk_eager, hg_init_info.no_loopback,
hg_init_info.stats, hg_init_info.no_multi_recv,
hg_init_info.release_input_early, hg_init_info.traffic_class,
hg_init_info.no_overflow, hg_init_info.multi_recv_op_max,
hg_init_info.multi_recv_copy_threshold);
}

/* Set post init / incr / multi-recv values */
Expand Down
17 changes: 16 additions & 1 deletion src/na/na.c
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version,
if (na_init_info) {
NA_CHECK_SUBSYS_ERROR(fatal, version == 0, error, ret, NA_INVALID_ARG,
"API version cannot be 0");
NA_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d",
NA_LOG_SUBSYS_DEBUG(cls, "NA init info version used: v%d.%d",
NA_MAJOR(version), NA_MINOR(version));

/* Get init info and overwrite defaults */
Expand All @@ -808,6 +808,21 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version,
na_init_info_dup_4_0(&na_info->na_init_info,
(const struct na_init_info_4_0 *) na_init_info);

NA_LOG_SUBSYS_DEBUG(cls,
"NA Init info: ip_subnet=%s, auth_key=%s, max_unexpected_size=%zu, "
"max_expected_size=%zu, progress_mode=%" PRIu8
", addr_format=%d, max_contexts=%" PRIu8 ", thread_mode=%" PRIu8
", request_mem_device=%u, traffic_class=%d",
na_info->na_init_info.ip_subnet, na_info->na_init_info.auth_key,
na_info->na_init_info.max_unexpected_size,
na_info->na_init_info.max_expected_size,
na_info->na_init_info.progress_mode,
na_info->na_init_info.addr_format,
na_info->na_init_info.max_contexts,
na_info->na_init_info.thread_mode,
na_info->na_init_info.request_mem_device,
na_info->na_init_info.traffic_class);

na_private_class->na_class.progress_mode = na_init_info->progress_mode;
}

Expand Down
8 changes: 8 additions & 0 deletions src/na/na_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -8332,6 +8332,10 @@ na_ofi_msg_send_unexpected(na_class_t *na_class, na_context_t *context,
!(hg_atomic_get32(&na_ofi_op_id->status) & NA_OFI_OP_COMPLETED), error,
ret, NA_BUSY, "Attempting to use OP ID that was not completed (%s)",
na_cb_type_to_string(na_ofi_op_id->type));
NA_CHECK_SUBSYS_ERROR(msg,
buf_size > na_ofi_class->endpoint->unexpected_msg_size_max, error, ret,
NA_INVALID_ARG, "Invalid msg size (%zu > %zu)", buf_size,
na_ofi_class->endpoint->unexpected_msg_size_max);

NA_OFI_OP_RESET(na_ofi_op_id, context, FI_SEND, NA_CB_SEND_UNEXPECTED,
callback, arg, na_ofi_addr);
Expand Down Expand Up @@ -8510,6 +8514,10 @@ na_ofi_msg_send_expected(na_class_t *na_class, na_context_t *context,
!(hg_atomic_get32(&na_ofi_op_id->status) & NA_OFI_OP_COMPLETED), error,
ret, NA_BUSY, "Attempting to use OP ID that was not completed (%s)",
na_cb_type_to_string(na_ofi_op_id->type));
NA_CHECK_SUBSYS_ERROR(msg,
buf_size > na_ofi_class->endpoint->expected_msg_size_max, error, ret,
NA_INVALID_ARG, "Invalid msg size (%zu > %zu)", buf_size,
na_ofi_class->endpoint->expected_msg_size_max);

NA_OFI_OP_RESET(na_ofi_op_id, context, FI_SEND, NA_CB_SEND_EXPECTED,
callback, arg, na_ofi_addr);
Expand Down

0 comments on commit d0fe7d4

Please sign in to comment.