Skip to content

Commit

Permalink
NA OFI: fix compatibility with libfabric 2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
soumagne committed Nov 14, 2024
1 parent 27d7c20 commit 121fd85
Showing 1 changed file with 18 additions and 9 deletions.
27 changes: 18 additions & 9 deletions src/na/na_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,9 @@ struct na_ofi_op_queue {
struct na_ofi_eq {
struct fid_cq *fi_cq; /* CQ handle */
struct na_ofi_op_queue *retry_op_queue; /* Retry op queue */
struct fid_wait *fi_wait; /* Optional wait set handle */
#if FI_VERSION_LT(FI_COMPILE_VERSION, FI_VERSION(2, 0))
struct fid_wait *fi_wait; /* Optional wait set handle */
#endif
};

/* Context */
Expand Down Expand Up @@ -2038,8 +2040,7 @@ static const char *const na_ofi_log_subsys_g[] = {[FI_LOG_CORE] = "core",
[FI_LOG_CQ] = "cq",
[FI_LOG_EQ] = "eq",
[FI_LOG_MR] = "mr",
[FI_LOG_CNTR] = "cntr",
[FI_LOG_SUBSYS_MAX] = NULL};
[FI_LOG_CNTR] = "cntr"};

/* Log interval in ms */
static int na_ofi_log_interval_g = 2000;
Expand Down Expand Up @@ -2110,8 +2111,8 @@ na_ofi_log(const struct fi_provider *prov, enum fi_log_level level,
enum fi_log_subsys subsys, const char *func, int line, const char *msg)
{
HG_LOG_WRITE_FUNC(na_libfabric, na_ofi_log_level_to_hg(level), prov->name,
na_ofi_log_subsys_g[subsys], (unsigned int) line, func, true, "%s",
msg);
subsys > FI_LOG_CNTR ? "unknown" : na_ofi_log_subsys_g[subsys],
(unsigned int) line, func, true, "%s", msg);
}

/*---------------------------------------------------------------------------*/
Expand Down Expand Up @@ -2145,7 +2146,6 @@ na_ofi_log_level_to_hg(enum fi_log_level level)
case FI_LOG_INFO:
case FI_LOG_DEBUG:
return HG_LOG_LEVEL_DEBUG;
case FI_LOG_MAX:
default:
return HG_LOG_LEVEL_MAX;
}
Expand Down Expand Up @@ -3453,8 +3453,8 @@ na_ofi_getinfo(enum na_ofi_prov_type prov_type, const struct na_ofi_info *info,
hints->caps = FI_MSG | FI_TAGGED | FI_RMA | FI_DIRECTED_RECV;

/* msg_order, comp_order */
hints->tx_attr->msg_order = FI_ORDER_NONE;
hints->tx_attr->comp_order = FI_ORDER_NONE;
hints->tx_attr->msg_order = 0;
hints->tx_attr->comp_order = 0;

/* Generate completion event when it is safe to re-use buffer */
hints->tx_attr->op_flags = FI_INJECT_COMPLETE;
Expand Down Expand Up @@ -5372,6 +5372,7 @@ na_ofi_eq_open(const struct na_ofi_fabric *na_ofi_fabric,
if (!no_wait) {
if (na_ofi_prov_flags[na_ofi_fabric->prov_type] & NA_OFI_WAIT_FD)
cq_attr.wait_obj = FI_WAIT_FD; /* Wait on fd */
#if FI_VERSION_LT(FI_COMPILE_VERSION, FI_VERSION(2, 0))
else {
struct fi_wait_attr wait_attr = {0};

Expand All @@ -5385,6 +5386,7 @@ na_ofi_eq_open(const struct na_ofi_fabric *na_ofi_fabric,
cq_attr.wait_obj = FI_WAIT_SET; /* Wait on wait set */
cq_attr.wait_set = na_ofi_eq->fi_wait;
}
#endif
}
cq_attr.wait_cond = FI_CQ_COND_NONE;
cq_attr.format = FI_CQ_FORMAT_TAGGED;
Expand All @@ -5409,10 +5411,12 @@ na_ofi_eq_open(const struct na_ofi_fabric *na_ofi_fabric,
(void) fi_close(&na_ofi_eq->fi_cq->fid);
na_ofi_eq->fi_cq = NULL;
}
#if FI_VERSION_LT(FI_COMPILE_VERSION, FI_VERSION(2, 0))
if (na_ofi_eq->fi_wait != NULL) {
(void) fi_close(&na_ofi_eq->fi_wait->fid);
na_ofi_eq->fi_wait = NULL;
}
#endif
if (na_ofi_eq->retry_op_queue) {
hg_thread_spin_destroy(&na_ofi_eq->retry_op_queue->lock);
free(na_ofi_eq->retry_op_queue);
Expand All @@ -5439,13 +5443,15 @@ na_ofi_eq_close(struct na_ofi_eq *na_ofi_eq)
na_ofi_eq->fi_cq = NULL;
}

#if FI_VERSION_LT(FI_COMPILE_VERSION, FI_VERSION(2, 0))
/* Close wait set */
if (na_ofi_eq->fi_wait) {
rc = fi_close(&na_ofi_eq->fi_wait->fid);
NA_CHECK_SUBSYS_ERROR(ctx, rc != 0, out, ret, na_ofi_errno_to_na(-rc),
"fi_close() wait failed, rc: %d (%s)", rc, fi_strerror(-rc));
na_ofi_eq->fi_wait = NULL;
}
#endif

if (na_ofi_eq->retry_op_queue) {
hg_thread_spin_destroy(&na_ofi_eq->retry_op_queue->lock);
Expand Down Expand Up @@ -9142,9 +9148,11 @@ na_ofi_poll_wait(na_class_t *na_class, na_context_t *context,
deadline = hg_time_add(now, hg_time_from_ms(timeout_ms));

do {
struct na_ofi_context *na_ofi_context = NA_OFI_CONTEXT(context);
unsigned int count = 0;

#if FI_VERSION_LT(FI_COMPILE_VERSION, FI_VERSION(2, 0))
struct na_ofi_context *na_ofi_context = NA_OFI_CONTEXT(context);

if (timeout_ms != 0 && na_ofi_context->eq->fi_wait != NULL) {
/* Wait in wait set if provider does not support wait on FDs */
int rc = fi_wait(na_ofi_context->eq->fi_wait,
Expand All @@ -9161,6 +9169,7 @@ na_ofi_poll_wait(na_class_t *na_class, na_context_t *context,
na_ofi_errno_to_na(-rc), "fi_wait() failed, rc: %d (%s)", rc,
fi_strerror(-rc));
}
#endif

ret = na_ofi_poll(na_class, context, &count);
NA_CHECK_SUBSYS_NA_ERROR(poll, error, ret, "Could not poll");
Expand Down

0 comments on commit 121fd85

Please sign in to comment.