Skip to content

Commit

Permalink
ch4/shm: add fast path for host buffer
Browse files Browse the repository at this point in the history
Check if source buffer is on host and choose typerep fast path for H2H.
  • Loading branch information
yfguo committed Aug 1, 2024
1 parent 174033a commit 2d50266
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 12 deletions.
21 changes: 13 additions & 8 deletions src/mpid/ch4/shm/posix/eager/iqueue/iqueue_send.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ MPIDI_POSIX_eager_send(int grank, MPIDI_POSIX_am_header_t * msg_hdr, const void
cell->type = MPIDI_POSIX_EAGER_IQUEUE_CELL_TYPE_HDR;
/* send am_hdr if this is the first segment */
if (is_topo_local) {
MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, MPIR_TYPEREP_FLAG_NONE);
MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz,
MPIR_TYPEREP_FLAG_H2H | MPIR_TYPEREP_FLAG_NONE);
} else {
MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, MPIR_TYPEREP_FLAG_STREAM);
MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz,
MPIR_TYPEREP_FLAG_H2H | MPIR_TYPEREP_FLAG_STREAM);
}
/* make sure the data region starts at the boundary of MAX_ALIGNMENT */
payload = payload + resized_am_hdr_sz;
Expand All @@ -114,13 +116,16 @@ MPIDI_POSIX_eager_send(int grank, MPIDI_POSIX_am_header_t * msg_hdr, const void
* not reliable because the derived datatype could have zero block size which contains no
* data. */
if (bytes_sent) {
if (is_topo_local) {
MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size,
MPIR_TYPEREP_FLAG_NONE);
} else {
MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size,
MPIR_TYPEREP_FLAG_STREAM);
int typerep_flags = MPIR_TYPEREP_NONE;
if (msg_hdr == MPIDI_POSIX_AM_TYPE__SHORT_HOST
|| msg_hdr == MPIDI_POSIX_AM_TYPE__PIPELINE_HOST) {
typerep_flags |= MPIR_TYPEREP_FLAG_H2H;
}
if (!is_topo_local) {
typerep_flags |= MPIR_TYPEREP_FLAG_STREAM;
}
MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size,
typerep_flags);
cell->payload_size += packed_size;
*bytes_sent = packed_size;
}
Expand Down
16 changes: 14 additions & 2 deletions src/mpid/ch4/shm/posix/posix_am.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#include "posix_eager.h"
#include "mpidu_genq.h"

#undef IS_HOST
#define IS_HOST(attr) \
((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST))

MPL_STATIC_INLINE_PREFIX MPI_Aint MPIDI_POSIX_am_eager_limit(void)
{
return MPIDI_POSIX_eager_payload_limit() - MAX_ALIGNMENT;
Expand Down Expand Up @@ -284,9 +288,17 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_do_am_isend(int grank,

msg_hdr_p = msg_hdr;
if (data_sz + am_hdr_sz <= MPIDI_POSIX_am_eager_limit()) {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT;
if (IS_HOST(MPIDIG_REQUEST(sreq, buf_attr))) {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT_HOST;
} else {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT;
}
} else {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE;
if (IS_HOST(MPIDIG_REQUEST(sreq, buf_attr))) {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE_HOST;
} else {
msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE;
}
}

MPIDIG_am_send_async_init(sreq, datatype, data_sz);
Expand Down
4 changes: 3 additions & 1 deletion src/mpid/ch4/shm/posix/posix_pre.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ typedef enum {
typedef enum {
MPIDI_POSIX_AM_TYPE__HDR,
MPIDI_POSIX_AM_TYPE__SHORT,
MPIDI_POSIX_AM_TYPE__PIPELINE
MPIDI_POSIX_AM_TYPE__SHORT_HOST,
MPIDI_POSIX_AM_TYPE__PIPELINE,
MPIDI_POSIX_AM_TYPE__PIPELINE_HOST
} MPIDI_POSIX_am_type_t;

struct MPIR_Request;
Expand Down
2 changes: 2 additions & 0 deletions src/mpid/ch4/shm/posix/posix_progress.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_progress_recv(int vci, int *made_progre
switch (msg_hdr->am_type) {
case MPIDI_POSIX_AM_TYPE__HDR:
case MPIDI_POSIX_AM_TYPE__SHORT:
case MPIDI_POSIX_AM_TYPE__SHORT_HOST:
MPIDIG_global.target_msg_cbs[msg_hdr->handler_id] (am_hdr, payload, payload_left,
attr, NULL);
MPIDI_POSIX_eager_recv_commit(&transaction);
goto fn_exit;
case MPIDI_POSIX_AM_TYPE__PIPELINE:
case MPIDI_POSIX_AM_TYPE__PIPELINE_HOST:
MPIDIG_global.target_msg_cbs[msg_hdr->handler_id] (am_hdr, NULL, payload_left,
attr | MPIDIG_AM_ATTR__IS_ASYNC,
&rreq);
Expand Down
12 changes: 11 additions & 1 deletion src/mpid/ch4/shm/posix/posix_send.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

#include "posix_impl.h"

#undef IS_HOST
#define IS_HOST(attr) \
((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST))

#define MPIDI_POSIX_SEND_VSIS(vci_src_, vci_dst_) \
do { \
MPIDI_EXPLICIT_VCIS(comm, attr, comm->rank, rank, vci_src_, vci_dst_); \
Expand Down Expand Up @@ -52,7 +56,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_isend(const void *buf, MPI_Aint cou
MPIDI_POSIX_am_header_t msg_hdr;
msg_hdr.handler_id = MPIDIG_SEND;
msg_hdr.am_hdr_sz = sizeof(MPIDIG_hdr_t);
msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT;
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(buf, &attr);
if (IS_HOST(attr)) {
msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT_HOST;
} else {
msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT;
}

MPIDIG_hdr_t am_hdr;
am_hdr.src_rank = comm->rank;
Expand Down
4 changes: 4 additions & 0 deletions src/mpid/ch4/src/mpidig_send_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#ifndef MPIDIG_SEND_UTILS_H_INCLUDED
#define MPIDIG_SEND_UTILS_H_INCLUDED

#undef
#define IS_HOST(attr) \
((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST))

/* This file is for supporting routines used for pipelined data send. These routines mainly is for
* managing the send request counters, completion counters and DT refcount */

Expand Down

0 comments on commit 2d50266

Please sign in to comment.