Skip to content

Commit

Permalink
Merge pull request #9864 from ivankochin/ucp/avoid-lane-maps-storage-…
Browse files Browse the repository at this point in the history
…inside-request

UCP/PROTOV1: Remove redundant fieds from request structure
  • Loading branch information
ivankochin authored May 9, 2024
2 parents 477d72a + d6bae0a commit 4e10990
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 25 deletions.
4 changes: 0 additions & 4 deletions src/ucp/core/ucp_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ struct ucp_request {
struct {
/* Actual lanes map */
ucp_lane_map_t lanes_map_all;

/* Actual lanes count */
uint8_t lanes_count;
} zcopy;

struct {
Expand Down Expand Up @@ -377,7 +374,6 @@ struct ucp_request {
union {
ucp_lane_index_t am_bw_index; /* AM BW lane index */
ucp_lane_index_t multi_lane_idx; /* Index of the lane with multi-send */
ucp_lane_map_t lanes_map_avail; /* Used lanes map */
};
uint8_t mem_type; /* Memory type, values are
* ucs_memory_type_t */
Expand Down
43 changes: 22 additions & 21 deletions src/ucp/rndv/rndv.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ static ucp_lane_index_t ucp_rndv_zcopy_get_lane(ucp_request_t *rndv_req,
return UCP_NULL_LANE;
}

lane_idx = ucs_ffs64_safe(rndv_req->send.lanes_map_avail);
lane_idx = rndv_req->send.multi_lane_idx;
ucs_assert(lane_idx < UCP_MAX_LANES);
rkey = rndv_req->send.rndv.rkey;

Expand All @@ -515,10 +515,12 @@ static ucp_lane_index_t ucp_rndv_zcopy_get_lane(ucp_request_t *rndv_req,

static void ucp_rndv_zcopy_next_lane(ucp_request_t *rndv_req)
{
rndv_req->send.lanes_map_avail &= rndv_req->send.lanes_map_avail - 1;
if (!rndv_req->send.lanes_map_avail) {
rndv_req->send.lanes_map_avail = rndv_req->send.rndv.zcopy.lanes_map_all;
}
ucp_lane_map_t lanes_map_all = rndv_req->send.rndv.zcopy.lanes_map_all;
ucp_lane_map_t lane_map;
lane_map = lanes_map_all & ~UCS_MASK(rndv_req->send.multi_lane_idx + 1);

rndv_req->send.multi_lane_idx = ucs_ffs32((lane_map > 0) ? lane_map :
lanes_map_all);
}

static ucs_status_t
Expand All @@ -529,6 +531,7 @@ ucp_rndv_progress_rma_zcopy_common(ucp_request_t *req, ucp_lane_index_t lane,
ucp_ep_h ep = req->send.ep;
uct_ep_h uct_ep = ucp_ep_get_lane(ep, lane);
ucp_ep_config_t *config = ucp_ep_config(ep);
size_t lanes_count;
uct_iov_t iov[max_iovcnt];
size_t iovcnt;
uct_iface_attr_t *attrs;
Expand All @@ -542,7 +545,7 @@ ucp_rndv_progress_rma_zcopy_common(ucp_request_t *req, ucp_lane_index_t lane,
int pending_add_res;

ucs_assert_always(req->send.lane != UCP_NULL_LANE);
ucs_assert_always(req->send.rndv.zcopy.lanes_count > 0);
ucs_assert(ucs_popcount(req->send.rndv.zcopy.lanes_map_all) > 0);

if (req->send.rndv.mdesc == NULL) {
status = ucp_request_send_reg_lane(req, lane);
Expand Down Expand Up @@ -572,9 +575,10 @@ ucp_rndv_progress_rma_zcopy_common(ucp_request_t *req, ucp_lane_index_t lane,
if ((offset == 0) && (remaining > 0) && (req->send.length > ucp_mtu)) {
length = ucp_mtu - remaining;
} else {
chunk = ucs_align_up((size_t)(req->send.length /
req->send.rndv.zcopy.lanes_count * scale),
align);
lanes_count = ucs_popcount(req->send.rndv.zcopy.lanes_map_all);
chunk = ucs_align_up((size_t)(req->send.length /
lanes_count * scale),
align);
length = ucs_min(chunk, req->send.length - offset);
}

Expand Down Expand Up @@ -687,12 +691,10 @@ UCS_PROFILE_FUNC_VOID(ucp_rndv_put_completion, (self), uct_completion_t *self)
}

static void ucp_rndv_req_init_lanes(ucp_request_t *req,
ucp_lane_map_t lanes_map,
uint8_t lanes_count)
ucp_lane_map_t lanes_map)
{
req->send.lanes_map_avail = lanes_map;
req->send.rndv.zcopy.lanes_map_all = lanes_map;
req->send.rndv.zcopy.lanes_count = lanes_count;
req->send.multi_lane_idx = ucs_ffs32(lanes_map);
}

static void ucp_rndv_req_init_zcopy_lane_map(ucp_request_t *rndv_req,
Expand Down Expand Up @@ -800,19 +802,19 @@ static void ucp_rndv_req_init_zcopy_lane_map(ucp_request_t *rndv_req,
chunk_count);

out:
ucp_rndv_req_init_lanes(rndv_req, lane_map, ucs_popcount(lane_map));
ucp_rndv_req_init_lanes(rndv_req, lane_map);
}

static void ucp_rndv_req_init(ucp_request_t *req, ucp_request_t *super_req,
ucp_lane_map_t lanes_map, uint8_t lanes_count,
ucp_lane_map_t lanes_map,
ucp_rkey_h rkey, uint64_t remote_address)
{
req->send.rndv.rkey = rkey;
req->send.rndv.remote_address = remote_address;
req->send.pending_lane = UCP_NULL_LANE;

ucp_request_set_super(req, super_req);
ucp_rndv_req_init_lanes(req, lanes_map, lanes_count);
ucp_rndv_req_init_lanes(req, lanes_map);
}

static void
Expand Down Expand Up @@ -857,7 +859,7 @@ ucp_rndv_rkey_ptr_get_mem_type(ucp_request_t *sreq, size_t length,
freq->send.ep = mem_type_ep;
freq->send.state.dt.dt.contig.memh = NULL;

ucp_rndv_req_init(freq, sreq, lanes_map, ucs_popcount(lanes_map), NULL,
ucp_rndv_req_init(freq, sreq, lanes_map, NULL,
remote_address);

UCP_WORKER_STAT_RNDV(freq->send.ep->worker, GET_ZCOPY, 1);
Expand All @@ -875,7 +877,6 @@ ucp_rndv_req_init_remote_from_super_req(ucp_request_t *req,
req->send.ep = super_req->send.ep;

ucp_rndv_req_init(req, super_req, super_req->send.rndv.zcopy.lanes_map_all,
super_req->send.rndv.zcopy.lanes_count,
super_req->send.rndv.rkey,
super_req->send.rndv.remote_address +
remote_address_offset);
Expand Down Expand Up @@ -1056,7 +1057,7 @@ ucp_rndv_recv_frag_put_mem_type(ucp_request_t *rreq, ucp_request_t *freq,
rreq->recv.dt_iter.mem_info.type, length,
ucp_rndv_progress_rma_put_zcopy);

ucp_rndv_req_init(freq, rreq, 0, 0, NULL,
ucp_rndv_req_init(freq, rreq, 0, NULL,
(uintptr_t)UCS_PTR_BYTE_OFFSET(
rreq->recv.dt_iter.type.contig.buffer, offset));

Expand Down Expand Up @@ -1192,7 +1193,7 @@ static void ucp_rndv_send_frag_get_mem_type(ucp_request_t *sreq, size_t length,
ucp_rndv_init_mem_type_frag_req(worker, freq, UCP_REQUEST_SEND_PROTO_RNDV_GET,
comp_cb, mdesc, remote_mem_type, length,
ucp_rndv_progress_rma_get_zcopy);
ucp_rndv_req_init(freq, sreq, lanes_map, ucs_popcount(lanes_map), rkey,
ucp_rndv_req_init(freq, sreq, lanes_map, rkey,
remote_address);

if (update_get_rkey) {
Expand Down Expand Up @@ -1844,7 +1845,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_progress_rma_put_zcopy, (self),
ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct);
uct_rkey_t uct_rkey;

ucs_assert_always(sreq->send.rndv.zcopy.lanes_count > 0);
ucs_assert(ucs_popcount(sreq->send.rndv.zcopy.lanes_map_all) > 0);

/* Figure out which lane to use for put operation */
sreq->send.lane = ucp_rndv_zcopy_get_lane(sreq, &uct_rkey,
Expand Down

0 comments on commit 4e10990

Please sign in to comment.