Skip to content

Commit

Permalink
NA OFI: set CXI domain ops w/ slingshot 2.2
Browse files Browse the repository at this point in the history
Set domain ops to prevent memory corruptions
  • Loading branch information
soumagne committed Oct 26, 2023
1 parent 8601e32 commit b44e107
Showing 1 changed file with 152 additions and 28 deletions.
180 changes: 152 additions & 28 deletions src/na/na_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@
# define FI_ADDR_CXI (FI_ADDR_OPX + 1)
# define FI_PROTO_CXI (FI_PROTO_OPX + 1)
#endif
#ifndef NA_OFI_HAS_EXT_CXI_H
# define FI_CXI_DOM_OPS_6 "dom_ops_v6"
#endif

/* Fallback for undefined XNET values */
#if FI_VERSION_LT(FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION), \
Expand Down Expand Up @@ -693,6 +696,21 @@ struct fi_gni_auth_key {
#endif

#ifndef NA_OFI_HAS_EXT_CXI_H
/* CXI domain ops (v6) */
struct fi_cxi_dom_ops {
int (*cntr_read)(struct fid *fid, unsigned int cntr, uint64_t *value,
struct timespec *ts);
int (*topology)(struct fid *fid, unsigned int *group_id,
unsigned int *switch_id, unsigned int *port_id);
int (*enable_hybrid_mr_desc)(struct fid *fid, bool enable);
size_t (*ep_get_unexp_msgs)(struct fid_ep *fid_ep,
struct fi_cq_tagged_entry *entry, size_t count, fi_addr_t *src_addr,
size_t *ux_count);
int (*get_dwq_depth)(struct fid *fid, size_t *depth);
int (*enable_mr_match_events)(struct fid *fid, bool enable);
int (*enable_optimized_mrs)(struct fid *fid, bool enable);
};

/* CXI Authorization Key */
struct cxi_auth_key {
uint32_t svc_id;
Expand Down Expand Up @@ -1079,6 +1097,13 @@ na_ofi_fabric_open(enum na_ofi_prov_type prov_type, struct fi_fabric_attr *attr,
na_return_t
na_ofi_fabric_close(struct na_ofi_fabric *na_ofi_fabric);

/**
* Set optional domain ops.
*/
static na_return_t
na_ofi_set_domain_ops(
enum na_ofi_prov_type prov_type, struct na_ofi_domain *na_ofi_domain);

#ifdef NA_OFI_HAS_EXT_GNI_H
/**
* Optional domain set op value for GNI provider.
Expand All @@ -1093,6 +1118,20 @@ na_ofi_gni_set_domain_op_value(
static na_return_t
na_ofi_gni_get_domain_op_value(
struct na_ofi_domain *na_ofi_domain, int op, void *value);

/**
* Set GNI specific domain ops.
*/
static na_return_t
na_ofi_gni_set_domain_ops(struct na_ofi_domain *na_ofi_domain);
#endif

#ifdef FI_CXI_DOM_OPS_6
/**
* Set CXI specific domain ops.
*/
static na_return_t
na_ofi_cxi_set_domain_ops(struct na_ofi_domain *na_ofi_domain);
#endif

/**
Expand Down Expand Up @@ -3910,6 +3949,40 @@ na_ofi_fabric_close(struct na_ofi_fabric *na_ofi_fabric)
return ret;
}

/*---------------------------------------------------------------------------*/
static na_return_t
na_ofi_set_domain_ops(
enum na_ofi_prov_type prov_type, struct na_ofi_domain *na_ofi_domain)
{
switch (prov_type) {
case NA_OFI_PROV_GNI:
#ifdef NA_OFI_HAS_EXT_GNI_H
return na_ofi_gni_set_domain_ops(na_ofi_domain);
#else
return NA_PROTONOSUPPORT;
#endif
case NA_OFI_PROV_CXI:
#ifdef FI_CXI_DOM_OPS_6
return na_ofi_cxi_set_domain_ops(na_ofi_domain);
#else
(void) na_ofi_domain;
#endif
case NA_OFI_PROV_SOCKETS:
case NA_OFI_PROV_TCP:
case NA_OFI_PROV_TCP_RXM:
case NA_OFI_PROV_PSM2:
case NA_OFI_PROV_OPX:
case NA_OFI_PROV_VERBS_RXM:
return NA_SUCCESS;
case NA_OFI_PROV_NULL:
default:
NA_LOG_SUBSYS_ERROR(fatal,
"auth_key not supported for this provider: %s",
na_ofi_prov_name[prov_type]);
return NA_PROTONOSUPPORT;
}
}

/*---------------------------------------------------------------------------*/
#ifdef NA_OFI_HAS_EXT_GNI_H
static na_return_t
Expand Down Expand Up @@ -3954,6 +4027,82 @@ na_ofi_gni_get_domain_op_value(
out:
return ret;
}

/*---------------------------------------------------------------------------*/
static na_return_t
na_ofi_gni_set_domain_ops(struct na_ofi_domain *na_ofi_domain)
{
int32_t enable = 1;
na_return_t ret;

# ifdef NA_OFI_GNI_HAS_UDREG
char *other_reg_type = "udreg";
int32_t udreg_limit = NA_OFI_GNI_UDREG_REG_LIMIT;

/* Enable use of udreg instead of internal MR cache */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_CACHE, &other_reg_type);
NA_CHECK_SUBSYS_NA_ERROR(
cls, error, ret, "Could not set domain op value for GNI_MR_CACHE");

/* Experiments on Theta showed default value of 2048 too high if
* launching multiple clients on one node */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_UDREG_REG_LIMIT, &udreg_limit);
NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret,
"Could not set domain op value for GNI_MR_UDREG_REG_LIMIT");
# endif

/* Enable lazy deregistration in MR cache */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_CACHE_LAZY_DEREG, &enable);
NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret,
"Could not set domain op value for GNI_MR_CACHE_LAZY_DEREG");

return NA_SUCCESS;

error:
return ret;
}
#endif

/*---------------------------------------------------------------------------*/
#ifdef FI_CXI_DOM_OPS_6
static na_return_t
na_ofi_cxi_set_domain_ops(struct na_ofi_domain *na_ofi_domain)
{
struct fi_cxi_dom_ops *dom_ops;
na_return_t ret;
int rc;

rc = fi_open_ops(&na_ofi_domain->fi_domain->fid, FI_CXI_DOM_OPS_6, 0,
(void **) &dom_ops, NULL);
if (rc != 0) {
/* Silently ignore */
NA_LOG_SUBSYS_DEBUG(
cls, "fi_open_ops() failed, rc: %d (%s)", rc, fi_strerror(-rc));
return NA_SUCCESS;
}

/* Prevent potential memory corruption by ensuring that memory backing MRs
* cannot be accessed after invoking fi_close() even if that memory remains
* in the MR cache. */
rc = dom_ops->enable_mr_match_events(&na_ofi_domain->fi_domain->fid, true);
NA_CHECK_SUBSYS_ERROR(cls, rc != 0, error, ret, na_ofi_errno_to_na(-rc),
"enable_mr_match_events() failed, rc: %d (%s)", rc, fi_strerror(-rc));

/* Disable use of optimized MRs to prevent quick recycle of MR keys (when
* using FI_MR_PROV_KEY). This prevents potential memory corruptions when
* multiple regions could end up using the same key. */
rc = dom_ops->enable_optimized_mrs(&na_ofi_domain->fi_domain->fid, false);
NA_CHECK_SUBSYS_ERROR(cls, rc != 0, error, ret, na_ofi_errno_to_na(-rc),
"enable_optimized_mrs() failed, rc: %d (%s)", rc, fi_strerror(-rc));

return NA_SUCCESS;

error:
return ret;
}
#endif

/*---------------------------------------------------------------------------*/
Expand Down Expand Up @@ -4226,34 +4375,9 @@ na_ofi_domain_open(const struct na_ofi_fabric *na_ofi_fabric,
NA_LOG_SUBSYS_DEBUG_EXT(cls, "fi_domain opened", "%s",
fi_tostr(domain_attr, FI_TYPE_DOMAIN_ATTR));

#ifdef NA_OFI_HAS_EXT_GNI_H
if (na_ofi_fabric->prov_type == NA_OFI_PROV_GNI) {
int32_t enable = 1;
# ifdef NA_OFI_GNI_HAS_UDREG
char *other_reg_type = "udreg";
int32_t udreg_limit = NA_OFI_GNI_UDREG_REG_LIMIT;

/* Enable use of udreg instead of internal MR cache */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_CACHE, &other_reg_type);
NA_CHECK_SUBSYS_NA_ERROR(
cls, error, ret, "Could not set domain op value for GNI_MR_CACHE");

/* Experiments on Theta showed default value of 2048 too high if
* launching multiple clients on one node */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_UDREG_REG_LIMIT, &udreg_limit);
NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret,
"Could not set domain op value for GNI_MR_UDREG_REG_LIMIT");
# endif

/* Enable lazy deregistration in MR cache */
ret = na_ofi_gni_set_domain_op_value(
na_ofi_domain, GNI_MR_CACHE_LAZY_DEREG, &enable);
NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret,
"Could not set domain op value for GNI_MR_CACHE_LAZY_DEREG");
}
#endif
/* Set optional domain ops */
ret = na_ofi_set_domain_ops(na_ofi_fabric->prov_type, na_ofi_domain);
NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, "Could not set domain ops");

/* Open fi address vector */
av_attr.type = FI_AV_MAP;
Expand Down

0 comments on commit b44e107

Please sign in to comment.