From d0fe7d4c8e042e44cad5790fd2c493b93d74bd80 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Fri, 23 Aug 2024 10:36:35 -0500 Subject: [PATCH] HG: fix NA init info not correctly set from HG core Add some more debug info --- src/mercury_core.c | 20 +++++++++++++++++++- src/na/na.c | 17 ++++++++++++++++- src/na/na_ofi.c | 8 ++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/mercury_core.c b/src/mercury_core.c index 7c4bf36a..a614621c 100644 --- a/src/mercury_core.c +++ b/src/mercury_core.c @@ -1218,13 +1218,14 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, if (hg_init_info_p) { HG_CHECK_SUBSYS_ERROR(cls, version == 0, error, ret, HG_INVALID_ARG, "API version cannot be 0"); - HG_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d", + HG_LOG_SUBSYS_DEBUG(cls, "HG init info version used: v%d.%d", HG_MAJOR(version), HG_MINOR(version)); na_init_info_p = &na_init_info; /* Get init info and overwrite defaults */ if (HG_VERSION_GE(version, HG_VERSION(2, 4))) { hg_init_info = *hg_init_info_p; + na_init_info_dup_4_0(&na_init_info, &hg_init_info.na_init_info); na_init_info.traffic_class = hg_init_info.traffic_class; } else if (HG_VERSION_GE(version, HG_VERSION(2, 3))) hg_init_info_dup_2_3(&hg_init_info, @@ -1232,6 +1233,23 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, else hg_init_info_dup_2_2(&hg_init_info, (const struct hg_init_info_2_2 *) hg_init_info_p); + + HG_LOG_SUBSYS_DEBUG(cls, + "HG Init info: na_class=%p, request_post_init=%" PRIu32 + ", request_post_incr=%" PRId32 ", auto_sm=%" PRIu8 + ", sm_info_string=%s, checksum_level=%d, no_bulk_eager=%" PRIu8 + ", no_loopback=%" PRIu8 ", stats=%" PRIu8 ", no_multi_recv=%" PRIu8 + ", release_input_early=%" PRIu8 + ", traffic_class=%d, no_overflow=%d, multi_recv_op_max=%u, " + "multi_recv_copy_threshold=%u", + (void *) hg_init_info.na_class, hg_init_info.request_post_init, + hg_init_info.request_post_incr, hg_init_info.auto_sm, + hg_init_info.sm_info_string, hg_init_info.checksum_level, + hg_init_info.no_bulk_eager, hg_init_info.no_loopback, + hg_init_info.stats, hg_init_info.no_multi_recv, + hg_init_info.release_input_early, hg_init_info.traffic_class, + hg_init_info.no_overflow, hg_init_info.multi_recv_op_max, + hg_init_info.multi_recv_copy_threshold); } /* Set post init / incr / multi-recv values */ diff --git a/src/na/na.c b/src/na/na.c index 27e5a0cd..3e74f1b2 100644 --- a/src/na/na.c +++ b/src/na/na.c @@ -798,7 +798,7 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version, if (na_init_info) { NA_CHECK_SUBSYS_ERROR(fatal, version == 0, error, ret, NA_INVALID_ARG, "API version cannot be 0"); - NA_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d", + NA_LOG_SUBSYS_DEBUG(cls, "NA init info version used: v%d.%d", NA_MAJOR(version), NA_MINOR(version)); /* Get init info and overwrite defaults */ @@ -808,6 +808,21 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version, na_init_info_dup_4_0(&na_info->na_init_info, (const struct na_init_info_4_0 *) na_init_info); + NA_LOG_SUBSYS_DEBUG(cls, + "NA Init info: ip_subnet=%s, auth_key=%s, max_unexpected_size=%zu, " + "max_expected_size=%zu, progress_mode=%" PRIu8 + ", addr_format=%d, max_contexts=%" PRIu8 ", thread_mode=%" PRIu8 + ", request_mem_device=%u, traffic_class=%d", + na_info->na_init_info.ip_subnet, na_info->na_init_info.auth_key, + na_info->na_init_info.max_unexpected_size, + na_info->na_init_info.max_expected_size, + na_info->na_init_info.progress_mode, + na_info->na_init_info.addr_format, + na_info->na_init_info.max_contexts, + na_info->na_init_info.thread_mode, + na_info->na_init_info.request_mem_device, + na_info->na_init_info.traffic_class); + na_private_class->na_class.progress_mode = na_init_info->progress_mode; } diff --git a/src/na/na_ofi.c b/src/na/na_ofi.c index cb728c62..c833dc14 100644 --- a/src/na/na_ofi.c +++ b/src/na/na_ofi.c @@ -8332,6 +8332,10 @@ na_ofi_msg_send_unexpected(na_class_t *na_class, na_context_t *context, !(hg_atomic_get32(&na_ofi_op_id->status) & NA_OFI_OP_COMPLETED), error, ret, NA_BUSY, "Attempting to use OP ID that was not completed (%s)", na_cb_type_to_string(na_ofi_op_id->type)); + NA_CHECK_SUBSYS_ERROR(msg, + buf_size > na_ofi_class->endpoint->unexpected_msg_size_max, error, ret, + NA_INVALID_ARG, "Invalid msg size (%zu > %zu)", buf_size, + na_ofi_class->endpoint->unexpected_msg_size_max); NA_OFI_OP_RESET(na_ofi_op_id, context, FI_SEND, NA_CB_SEND_UNEXPECTED, callback, arg, na_ofi_addr); @@ -8510,6 +8514,10 @@ na_ofi_msg_send_expected(na_class_t *na_class, na_context_t *context, !(hg_atomic_get32(&na_ofi_op_id->status) & NA_OFI_OP_COMPLETED), error, ret, NA_BUSY, "Attempting to use OP ID that was not completed (%s)", na_cb_type_to_string(na_ofi_op_id->type)); + NA_CHECK_SUBSYS_ERROR(msg, + buf_size > na_ofi_class->endpoint->expected_msg_size_max, error, ret, + NA_INVALID_ARG, "Invalid msg size (%zu > %zu)", buf_size, + na_ofi_class->endpoint->expected_msg_size_max); NA_OFI_OP_RESET(na_ofi_op_id, context, FI_SEND, NA_CB_SEND_EXPECTED, callback, arg, na_ofi_addr);