From c4d1e02aafeca4ace44140c70a3d541f01b0fef8 Mon Sep 17 00:00:00 2001 From: Yanfei Guo Date: Thu, 1 Aug 2024 13:56:58 -0500 Subject: [PATCH] ch4/shm: add fast path for host buffer Check if source buffer is on host and choose typerep fast path for H2H. --- .../ch4/shm/posix/eager/iqueue/iqueue_send.h | 21 ++++++++++++------- src/mpid/ch4/shm/posix/posix_am.h | 16 ++++++++++++-- src/mpid/ch4/shm/posix/posix_pre.h | 4 +++- src/mpid/ch4/shm/posix/posix_progress.h | 2 ++ src/mpid/ch4/shm/posix/posix_send.h | 12 ++++++++++- src/mpid/ch4/src/mpidig_send_utils.h | 4 ++++ 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_send.h b/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_send.h index 0ec3931cd93..8193a00b7a6 100644 --- a/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_send.h +++ b/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_send.h @@ -95,9 +95,11 @@ MPIDI_POSIX_eager_send(int grank, MPIDI_POSIX_am_header_t * msg_hdr, const void cell->type = MPIDI_POSIX_EAGER_IQUEUE_CELL_TYPE_HDR; /* send am_hdr if this is the first segment */ if (is_topo_local) { - MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, MPIR_TYPEREP_FLAG_NONE); + MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, + MPIR_TYPEREP_FLAG_H2H | MPIR_TYPEREP_FLAG_NONE); } else { - MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, MPIR_TYPEREP_FLAG_STREAM); + MPIR_Typerep_copy(payload, am_hdr, am_hdr_sz, + MPIR_TYPEREP_FLAG_H2H | MPIR_TYPEREP_FLAG_STREAM); } /* make sure the data region starts at the boundary of MAX_ALIGNMENT */ payload = payload + resized_am_hdr_sz; @@ -114,13 +116,16 @@ MPIDI_POSIX_eager_send(int grank, MPIDI_POSIX_am_header_t * msg_hdr, const void * not reliable because the derived datatype could have zero block size which contains no * data. */ if (bytes_sent) { - if (is_topo_local) { - MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size, - MPIR_TYPEREP_FLAG_NONE); - } else { - MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size, - MPIR_TYPEREP_FLAG_STREAM); + int typerep_flags = MPIR_TYPEREP_NONE; + if (msg_hdr == MPIDI_POSIX_AM_TYPE__SHORT_HOST + || msg_hdr == MPIDI_POSIX_AM_TYPE__PIPELINE_HOST) { + typerep_flags |= MPIR_TYPEREP_FLAG_H2H; + } + if (!is_topo_local) { + typerep_flags |= MPIR_TYPEREP_FLAG_STREAM; } + MPIR_Typerep_pack(buf, count, datatype, offset, payload, available, &packed_size, + typerep_flags); cell->payload_size += packed_size; *bytes_sent = packed_size; } diff --git a/src/mpid/ch4/shm/posix/posix_am.h b/src/mpid/ch4/shm/posix/posix_am.h index 5ddc2a45ec1..2b29b3801cb 100644 --- a/src/mpid/ch4/shm/posix/posix_am.h +++ b/src/mpid/ch4/shm/posix/posix_am.h @@ -11,6 +11,10 @@ #include "posix_eager.h" #include "mpidu_genq.h" +#undef IS_HOST +#define IS_HOST(attr) \ + ((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST)) + MPL_STATIC_INLINE_PREFIX MPI_Aint MPIDI_POSIX_am_eager_limit(void) { return MPIDI_POSIX_eager_payload_limit() - MAX_ALIGNMENT; @@ -284,9 +288,17 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_do_am_isend(int grank, msg_hdr_p = msg_hdr; if (data_sz + am_hdr_sz <= MPIDI_POSIX_am_eager_limit()) { - msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT; + if (IS_HOST(MPIDIG_REQUEST(sreq, buf_attr))) { + msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT_HOST; + } else { + msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__SHORT; + } } else { - msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE; + if (IS_HOST(MPIDIG_REQUEST(sreq, buf_attr))) { + msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE_HOST; + } else { + msg_hdr_p->am_type = MPIDI_POSIX_AM_TYPE__PIPELINE; + } } MPIDIG_am_send_async_init(sreq, datatype, data_sz); diff --git a/src/mpid/ch4/shm/posix/posix_pre.h b/src/mpid/ch4/shm/posix/posix_pre.h index 1357eed5895..8e25e5be9af 100644 --- a/src/mpid/ch4/shm/posix/posix_pre.h +++ b/src/mpid/ch4/shm/posix/posix_pre.h @@ -22,7 +22,9 @@ typedef enum { typedef enum { MPIDI_POSIX_AM_TYPE__HDR, MPIDI_POSIX_AM_TYPE__SHORT, - MPIDI_POSIX_AM_TYPE__PIPELINE + MPIDI_POSIX_AM_TYPE__SHORT_HOST, + MPIDI_POSIX_AM_TYPE__PIPELINE, + MPIDI_POSIX_AM_TYPE__PIPELINE_HOST } MPIDI_POSIX_am_type_t; struct MPIR_Request; diff --git a/src/mpid/ch4/shm/posix/posix_progress.h b/src/mpid/ch4/shm/posix/posix_progress.h index f0096bb6e04..e65ccc70c7a 100644 --- a/src/mpid/ch4/shm/posix/posix_progress.h +++ b/src/mpid/ch4/shm/posix/posix_progress.h @@ -57,11 +57,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_progress_recv(int vci, int *made_progre switch (msg_hdr->am_type) { case MPIDI_POSIX_AM_TYPE__HDR: case MPIDI_POSIX_AM_TYPE__SHORT: + case MPIDI_POSIX_AM_TYPE__SHORT_HOST: MPIDIG_global.target_msg_cbs[msg_hdr->handler_id] (am_hdr, payload, payload_left, attr, NULL); MPIDI_POSIX_eager_recv_commit(&transaction); goto fn_exit; case MPIDI_POSIX_AM_TYPE__PIPELINE: + case MPIDI_POSIX_AM_TYPE__PIPELINE_HOST: MPIDIG_global.target_msg_cbs[msg_hdr->handler_id] (am_hdr, NULL, payload_left, attr | MPIDIG_AM_ATTR__IS_ASYNC, &rreq); diff --git a/src/mpid/ch4/shm/posix/posix_send.h b/src/mpid/ch4/shm/posix/posix_send.h index 0796f671568..6a7e390de01 100644 --- a/src/mpid/ch4/shm/posix/posix_send.h +++ b/src/mpid/ch4/shm/posix/posix_send.h @@ -16,6 +16,10 @@ #include "posix_impl.h" +#undef IS_HOST +#define IS_HOST(attr) \ + ((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST)) + #define MPIDI_POSIX_SEND_VSIS(vci_src_, vci_dst_) \ do { \ MPIDI_EXPLICIT_VCIS(comm, attr, comm->rank, rank, vci_src_, vci_dst_); \ @@ -52,7 +56,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_isend(const void *buf, MPI_Aint cou MPIDI_POSIX_am_header_t msg_hdr; msg_hdr.handler_id = MPIDIG_SEND; msg_hdr.am_hdr_sz = sizeof(MPIDIG_hdr_t); - msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT; + MPL_pointer_attr_t attr; + MPIR_GPU_query_pointer_attr(buf, &attr); + if (IS_HOST(attr)) { + msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT_HOST; + } else { + msg_hdr.am_type = MPIDI_POSIX_AM_TYPE__SHORT; + } MPIDIG_hdr_t am_hdr; am_hdr.src_rank = comm->rank; diff --git a/src/mpid/ch4/src/mpidig_send_utils.h b/src/mpid/ch4/src/mpidig_send_utils.h index f950aecbeef..c8018d424b3 100644 --- a/src/mpid/ch4/src/mpidig_send_utils.h +++ b/src/mpid/ch4/src/mpidig_send_utils.h @@ -6,6 +6,10 @@ #ifndef MPIDIG_SEND_UTILS_H_INCLUDED #define MPIDIG_SEND_UTILS_H_INCLUDED +#undef IS_HOST +#define IS_HOST(attr) \ + ((attr).type & (MPL_GPU_POINTER_UNREGISTERED_HOST | MPL_GPU_POINTER_REGISTERED_HOST)) + /* This file is for supporting routines used for pipelined data send. These routines mainly is for * managing the send request counters, completion counters and DT refcount */