diff --git a/src/na/na_ofi.c b/src/na/na_ofi.c index 92c61bdb..2745d8ae 100644 --- a/src/na/na_ofi.c +++ b/src/na/na_ofi.c @@ -95,6 +95,9 @@ # define FI_ADDR_CXI (FI_ADDR_OPX + 1) # define FI_PROTO_CXI (FI_PROTO_OPX + 1) #endif +#ifndef NA_OFI_HAS_EXT_CXI_H +# define FI_CXI_DOM_OPS_6 "dom_ops_v6" +#endif /* Fallback for undefined XNET values */ #if FI_VERSION_LT(FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION), \ @@ -693,6 +696,21 @@ struct fi_gni_auth_key { #endif #ifndef NA_OFI_HAS_EXT_CXI_H +/* CXI domain ops (v6) */ +struct fi_cxi_dom_ops { + int (*cntr_read)(struct fid *fid, unsigned int cntr, uint64_t *value, + struct timespec *ts); + int (*topology)(struct fid *fid, unsigned int *group_id, + unsigned int *switch_id, unsigned int *port_id); + int (*enable_hybrid_mr_desc)(struct fid *fid, bool enable); + size_t (*ep_get_unexp_msgs)(struct fid_ep *fid_ep, + struct fi_cq_tagged_entry *entry, size_t count, fi_addr_t *src_addr, + size_t *ux_count); + int (*get_dwq_depth)(struct fid *fid, size_t *depth); + int (*enable_mr_match_events)(struct fid *fid, bool enable); + int (*enable_optimized_mrs)(struct fid *fid, bool enable); +}; + /* CXI Authorization Key */ struct cxi_auth_key { uint32_t svc_id; @@ -1079,6 +1097,13 @@ na_ofi_fabric_open(enum na_ofi_prov_type prov_type, struct fi_fabric_attr *attr, na_return_t na_ofi_fabric_close(struct na_ofi_fabric *na_ofi_fabric); +/** + * Set optional domain ops. + */ +static na_return_t +na_ofi_set_domain_ops( + enum na_ofi_prov_type prov_type, struct na_ofi_domain *na_ofi_domain); + #ifdef NA_OFI_HAS_EXT_GNI_H /** * Optional domain set op value for GNI provider. @@ -1093,6 +1118,20 @@ na_ofi_gni_set_domain_op_value( static na_return_t na_ofi_gni_get_domain_op_value( struct na_ofi_domain *na_ofi_domain, int op, void *value); + +/** + * Set GNI specific domain ops. + */ +static na_return_t +na_ofi_gni_set_domain_ops(struct na_ofi_domain *na_ofi_domain); +#endif + +#ifdef FI_CXI_DOM_OPS_6 +/** + * Set CXI specific domain ops. + */ +static na_return_t +na_ofi_cxi_set_domain_ops(struct na_ofi_domain *na_ofi_domain); #endif /** @@ -3910,6 +3949,40 @@ na_ofi_fabric_close(struct na_ofi_fabric *na_ofi_fabric) return ret; } +/*---------------------------------------------------------------------------*/ +static na_return_t +na_ofi_set_domain_ops( + enum na_ofi_prov_type prov_type, struct na_ofi_domain *na_ofi_domain) +{ + switch (prov_type) { + case NA_OFI_PROV_GNI: +#ifdef NA_OFI_HAS_EXT_GNI_H + return na_ofi_gni_set_domain_ops(na_ofi_domain); +#else + return NA_PROTONOSUPPORT; +#endif + case NA_OFI_PROV_CXI: +#ifdef FI_CXI_DOM_OPS_6 + return na_ofi_cxi_set_domain_ops(na_ofi_domain); +#else + (void) na_ofi_domain; +#endif + case NA_OFI_PROV_SOCKETS: + case NA_OFI_PROV_TCP: + case NA_OFI_PROV_TCP_RXM: + case NA_OFI_PROV_PSM2: + case NA_OFI_PROV_OPX: + case NA_OFI_PROV_VERBS_RXM: + return NA_SUCCESS; + case NA_OFI_PROV_NULL: + default: + NA_LOG_SUBSYS_ERROR(fatal, + "auth_key not supported for this provider: %s", + na_ofi_prov_name[prov_type]); + return NA_PROTONOSUPPORT; + } +} + /*---------------------------------------------------------------------------*/ #ifdef NA_OFI_HAS_EXT_GNI_H static na_return_t @@ -3954,6 +4027,82 @@ na_ofi_gni_get_domain_op_value( out: return ret; } + +/*---------------------------------------------------------------------------*/ +static na_return_t +na_ofi_gni_set_domain_ops(struct na_ofi_domain *na_ofi_domain) +{ + int32_t enable = 1; + na_return_t ret; + +# ifdef NA_OFI_GNI_HAS_UDREG + char *other_reg_type = "udreg"; + int32_t udreg_limit = NA_OFI_GNI_UDREG_REG_LIMIT; + + /* Enable use of udreg instead of internal MR cache */ + ret = na_ofi_gni_set_domain_op_value( + na_ofi_domain, GNI_MR_CACHE, &other_reg_type); + NA_CHECK_SUBSYS_NA_ERROR( + cls, error, ret, "Could not set domain op value for GNI_MR_CACHE"); + + /* Experiments on Theta showed default value of 2048 too high if + * launching multiple clients on one node */ + ret = na_ofi_gni_set_domain_op_value( + na_ofi_domain, GNI_MR_UDREG_REG_LIMIT, &udreg_limit); + NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, + "Could not set domain op value for GNI_MR_UDREG_REG_LIMIT"); +# endif + + /* Enable lazy deregistration in MR cache */ + ret = na_ofi_gni_set_domain_op_value( + na_ofi_domain, GNI_MR_CACHE_LAZY_DEREG, &enable); + NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, + "Could not set domain op value for GNI_MR_CACHE_LAZY_DEREG"); + + return NA_SUCCESS; + +error: + return ret; +} +#endif + +/*---------------------------------------------------------------------------*/ +#ifdef FI_CXI_DOM_OPS_6 +static na_return_t +na_ofi_cxi_set_domain_ops(struct na_ofi_domain *na_ofi_domain) +{ + struct fi_cxi_dom_ops *dom_ops; + na_return_t ret; + int rc; + + rc = fi_open_ops(&na_ofi_domain->fi_domain->fid, FI_CXI_DOM_OPS_6, 0, + (void **) &dom_ops, NULL); + if (rc != 0) { + /* Silently ignore */ + NA_LOG_SUBSYS_DEBUG( + cls, "fi_open_ops() failed, rc: %d (%s)", rc, fi_strerror(-rc)); + return NA_SUCCESS; + } + + /* Prevent potential memory corruption by ensuring that memory backing MRs + * cannot be accessed after invoking fi_close() even if that memory remains + * in the MR cache. */ + rc = dom_ops->enable_mr_match_events(&na_ofi_domain->fi_domain->fid, true); + NA_CHECK_SUBSYS_ERROR(cls, rc != 0, error, ret, na_ofi_errno_to_na(-rc), + "enable_mr_match_events() failed, rc: %d (%s)", rc, fi_strerror(-rc)); + + /* Disable use of optimized MRs to prevent quick recycle of MR keys (when + * using FI_MR_PROV_KEY). This prevents potential memory corruptions when + * multiple regions could end up using the same key. */ + rc = dom_ops->enable_optimized_mrs(&na_ofi_domain->fi_domain->fid, false); + NA_CHECK_SUBSYS_ERROR(cls, rc != 0, error, ret, na_ofi_errno_to_na(-rc), + "enable_optimized_mrs() failed, rc: %d (%s)", rc, fi_strerror(-rc)); + + return NA_SUCCESS; + +error: + return ret; +} #endif /*---------------------------------------------------------------------------*/ @@ -3977,9 +4126,8 @@ na_ofi_parse_auth_key(const char *str, enum na_ofi_prov_type prov_type, case NA_OFI_PROV_OPX: case NA_OFI_PROV_VERBS_RXM: default: - NA_LOG_SUBSYS_ERROR(fatal, - "auth_key not supported for this provider: %s", - na_ofi_prov_name[prov_type]); + NA_LOG_SUBSYS_ERROR( + fatal, "unsupported provider: %s", na_ofi_prov_name[prov_type]); return NA_PROTONOSUPPORT; } } @@ -4226,34 +4374,9 @@ na_ofi_domain_open(const struct na_ofi_fabric *na_ofi_fabric, NA_LOG_SUBSYS_DEBUG_EXT(cls, "fi_domain opened", "%s", fi_tostr(domain_attr, FI_TYPE_DOMAIN_ATTR)); -#ifdef NA_OFI_HAS_EXT_GNI_H - if (na_ofi_fabric->prov_type == NA_OFI_PROV_GNI) { - int32_t enable = 1; -# ifdef NA_OFI_GNI_HAS_UDREG - char *other_reg_type = "udreg"; - int32_t udreg_limit = NA_OFI_GNI_UDREG_REG_LIMIT; - - /* Enable use of udreg instead of internal MR cache */ - ret = na_ofi_gni_set_domain_op_value( - na_ofi_domain, GNI_MR_CACHE, &other_reg_type); - NA_CHECK_SUBSYS_NA_ERROR( - cls, error, ret, "Could not set domain op value for GNI_MR_CACHE"); - - /* Experiments on Theta showed default value of 2048 too high if - * launching multiple clients on one node */ - ret = na_ofi_gni_set_domain_op_value( - na_ofi_domain, GNI_MR_UDREG_REG_LIMIT, &udreg_limit); - NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, - "Could not set domain op value for GNI_MR_UDREG_REG_LIMIT"); -# endif - - /* Enable lazy deregistration in MR cache */ - ret = na_ofi_gni_set_domain_op_value( - na_ofi_domain, GNI_MR_CACHE_LAZY_DEREG, &enable); - NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, - "Could not set domain op value for GNI_MR_CACHE_LAZY_DEREG"); - } -#endif + /* Set optional domain ops */ + ret = na_ofi_set_domain_ops(na_ofi_fabric->prov_type, na_ofi_domain); + NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, "Could not set domain ops"); /* Open fi address vector */ av_attr.type = FI_AV_MAP;