Skip to content

Commit

Permalink
Merge pull request #145 from esmf-org/redist_multitile
Browse files Browse the repository at this point in the history
Multi-tile I/O: Support layouts that have something other than 1 DE per PET

Limitations:
- Requires at least as many PETs as tiles
- Doesn't work with replicated dimensions
  • Loading branch information
billsacks authored Oct 13, 2023
2 parents ea64eb3 + 3d6678e commit 260aedd
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 31 deletions.
90 changes: 59 additions & 31 deletions src/Infrastructure/IO/src/ESMCI_IO.C
Original file line number Diff line number Diff line change
Expand Up @@ -1246,63 +1246,91 @@ void IO::redist_arraycreate1de(Array *src_array_p, Array **dest_array_p, int pet

DistGrid *dg_orig = src_array_p->getDistGrid();

localrc = (dg_orig->getTileCount() == 1) ? ESMF_SUCCESS : ESMF_RC_NOT_IMPL;
if (ESMC_LogDefault.MsgFoundError(localrc, "Tile count != 1 is not supported", ESMC_CONTEXT, rc))
return;

const int *minIndexTile = dg_orig->getMinIndexPDimPTile();
const int *maxIndexTile = dg_orig->getMaxIndexPDimPTile();
const int *minIndexPDimPTile = dg_orig->getMinIndexPDimPTile();
const int *maxIndexPDimPTile = dg_orig->getMaxIndexPDimPTile();
const int *distgridToArrayMap = src_array_p->getDistGridToArrayMap();

int ndims = dg_orig->getDimCount();
int rank = src_array_p->getRank();
int tileCount = dg_orig->getTileCount();

if (tileCount > petCount) {
// If tileCount > petCount, the decomposition created by the default DistGrid sets
// deCount = tileCount, which leads to having more DEs than PETs, which is exactly
// what we're trying to avoid in this function.
ESMC_LogDefault.MsgFoundError(ESMF_RC_INTNRL_BAD,
"Multi-tile I/O requires at least as many PETs as tiles", ESMC_CONTEXT, rc);
return; // bail out
}

int replicatedDims = src_array_p->getReplicatedDimCount();

std::vector<int> minIndexTileVec;
std::vector<int> maxIndexTileVec;
std::vector<int> minIndexPDimPTileVec;
std::vector<int> maxIndexPDimPTileVec;
std::vector<int> distgridToArrayMapVec;
if (replicatedDims>0){
// TODO(wjs, 2023-05-26) Still need to fix this loop to work with the multi-tile
// case; then remove the following error check.
//
// See commit 28f951b333 (reverted in c5f596127e) for an attempt to get this working
// in the multi-tile case. However, this attempt was aborted due to finding what
// appear to be issues with I/O with Arrays with replicated dimensions even in the
// single-tile case - see https://github.com/esmf-org/esmf/issues/184. We have decided
// to leave this functionality in place in case anyone is relying on it, but we do not
// expect I/O with Arrays with replicated dimensions to work reliably or consistently.
if (tileCount > 1) {
ESMC_LogDefault.MsgFoundError(ESMF_RC_NOT_IMPL,
"Multi-tile with > 1 DE per PET and replicated dims not yet implemented", ESMC_CONTEXT, rc);
return;
}

// eliminate replicated dimensions from the destination
for (int i=0; i<ndims; i++){
if (distgridToArrayMap[i]!=0){
// not a replicated dim -> keep
minIndexTileVec.push_back(minIndexTile[i]);
maxIndexTileVec.push_back(maxIndexTile[i]);
minIndexPDimPTileVec.push_back(minIndexPDimPTile[i]);
maxIndexPDimPTileVec.push_back(maxIndexPDimPTile[i]);
distgridToArrayMapVec.push_back(distgridToArrayMap[i]);
}
}
if (minIndexTileVec.size()<1){
if (minIndexPDimPTileVec.size()<1){
ESMC_LogDefault.MsgFoundError(ESMF_RC_INTNRL_BAD,
"Not enough distributed dimensions", ESMC_CONTEXT, rc);
return; // bail out
}
// now point to the set of reduced lists
minIndexTile = &(minIndexTileVec[0]);
maxIndexTile = &(maxIndexTileVec[0]);
minIndexPDimPTile = &(minIndexPDimPTileVec[0]);
maxIndexPDimPTile = &(maxIndexPDimPTileVec[0]);
distgridToArrayMap = &(distgridToArrayMapVec[0]);
}

if ((maxIndexTile[0]-minIndexTile[0]+1)<petCount){
ESMC_LogDefault.MsgFoundError(ESMF_RC_INTNRL_BAD,
"Index space too small to be distributed across all PETs", ESMC_CONTEXT, rc);
return; // bail out
}

ESMCI::InterArray<int> minIndexInterface((int*)minIndexTile, ndims-replicatedDims);
ESMCI::InterArray<int> maxIndexInterface((int*)maxIndexTile, ndims-replicatedDims);
DistGrid *distgrid;
if (tileCount == 1) {
ESMCI::InterArray<int> minIndexInterface((int*)minIndexPDimPTile, ndims-replicatedDims);
ESMCI::InterArray<int> maxIndexInterface((int*)maxIndexPDimPTile, ndims-replicatedDims);
#if 0
std::cout << ESMC_METHOD << "[" << me << "]: setting maxindex to: (";
for (int i=0; i<ndims; i++)
std::cout << " " << maxIndexTile[i];
std::cout << " )" << std::endl;
std::cout << ESMC_METHOD << "[" << me << "]: setting maxindex to: (";
for (int i=0; i<ndims; i++)
std::cout << " " << maxIndexPDimPTile[i];
std::cout << " )" << std::endl;
#endif
// create default DistGrid, which means 1DE per PET
DistGrid *distgrid = DistGrid::create(&minIndexInterface, &maxIndexInterface, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, (ESMCI::DELayout*)NULL, NULL,
&localrc);
if (ESMC_LogDefault.MsgFoundError(localrc, ESMCI_ERR_PASSTHRU, ESMC_CONTEXT, rc))
return;
// create default DistGrid, which means 1DE per PET
distgrid = DistGrid::create(&minIndexInterface, &maxIndexInterface, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, (ESMCI::DELayout*)NULL, NULL,
&localrc);
if (ESMC_LogDefault.MsgFoundError(localrc, ESMCI_ERR_PASSTHRU, ESMC_CONTEXT, rc))
return;
} else {
int dummyLen[] = {ndims-replicatedDims, tileCount};
ESMCI::InterArray<int> minIndexInterface((int*)minIndexPDimPTile, 2, dummyLen);
ESMCI::InterArray<int> maxIndexInterface((int*)maxIndexPDimPTile, 2, dummyLen);
// create default DistGrid, which means 1DE per PET
distgrid = DistGrid::create(&minIndexInterface, &maxIndexInterface, NULL,
NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, (ESMCI::DELayout*)NULL, NULL,
&localrc);
if (ESMC_LogDefault.MsgFoundError(localrc, ESMCI_ERR_PASSTHRU, ESMC_CONTEXT, rc))
return;
}

// std::cout << ESMC_METHOD << ": creating temp Array for redistribution" << std::endl;
ESMCI::ArraySpec arrayspec;
Expand Down
182 changes: 182 additions & 0 deletions src/Infrastructure/IO/tests/ESMF_IO_MultitileUTest.F90
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ program ESMF_IO_MultitileUTest
type(ESMF_VM) :: vm
integer :: localPet
type(ESMF_Grid) :: grid6tile
type(ESMF_Grid) :: grid6tileUnevenDEs
integer :: grid6tileUnevenDEsLdeCount
integer :: lde
type(ESMF_DistGrid) :: distgrid3tile

! Fields used for writing:
Expand All @@ -58,6 +61,10 @@ program ESMF_IO_MultitileUTest
! This field is not in the field bundle:
type(ESMF_Field) :: field3
real(ESMF_KIND_R8), pointer :: field3Data(:,:)
! These fields are for tests with something other than 1 DE per PET:
type(ESMF_Field) :: field1UnevenDEs, field4dUnevenDEs
real(ESMF_KIND_R8), pointer :: field1UnevenDEsData(:,:)
real(ESMF_KIND_R8), pointer :: field4dUnevenDEsData(:,:,:,:)

! Fields used for reading:
!
Expand All @@ -68,6 +75,10 @@ program ESMF_IO_MultitileUTest
! This field is not in the field bundle:
type(ESMF_Field) :: field3Read
real(ESMF_KIND_R8), pointer :: field3ReadData(:,:)
! These fields are for tests with something other than 1 DE per PET:
type(ESMF_Field) :: field1UnevenDEsRead, field4dUnevenDEsRead
real(ESMF_KIND_R8), pointer :: field1UnevenDEsReadData(:,:)
real(ESMF_KIND_R8), pointer :: field4dUnevenDEsReadData(:,:,:,:)

! This is used for error testing:
type(ESMF_Grid) :: gridSingleTile
Expand Down Expand Up @@ -99,6 +110,7 @@ program ESMF_IO_MultitileUTest
character(len=*), parameter :: fileNameFields = "ESMF_IO_MultitileUTestFields*.nc"
character(len=*), parameter :: fileNameArrays = "ESMF_IO_MultitileUTestArrays*.nc"
character(len=*), parameter :: fileNameFail = "ESMF_IO_MultitileUTestFail*.nc"
character(len=*), parameter :: fileNameUnevenDEs = "ESMF_IO_MultitileUTestUnevenDEs*.nc"

!------------------------------------------------------------------------
call ESMF_TestStart(ESMF_SRCLINE, rc=rc) ! calls ESMF_Initialize() internally
Expand Down Expand Up @@ -340,6 +352,104 @@ program ESMF_IO_MultitileUTest
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Write a multi-tile Field with uneven DEs per PET"
write(failMsg, *) "Did not return ESMF_SUCCESS"
call ESMF_FieldWrite(field1UnevenDEs, fileName=fileNameUnevenDEs, overwrite=.true., rc=rc)
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test((rc == ESMF_SUCCESS), name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Did not return ESMF_RC_LIB_NOT_PRESENT"
call ESMF_Test((rc == ESMF_RC_LIB_NOT_PRESENT), name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Read a multi-tile Field with uneven DEs per PET"
write(failMsg, *) "Did not return ESMF_SUCCESS"
call ESMF_FieldRead(field1UnevenDEsRead, fileName=fileNameUnevenDEs, rc=rc)
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test((rc == ESMF_SUCCESS), name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Did not return ESMF_RC_LIB_NOT_PRESENT"
call ESMF_Test((rc == ESMF_RC_LIB_NOT_PRESENT), name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Confirm that Field-read field matches original with uneven DEs per PET"
write(failMsg, *) "Read-in field differs from original"
allEqual = .true.
do lde = 0, grid6tileUnevenDEsLdeCount - 1
! For simplicity, bail out if the following FieldGets fail rather than calling them their own unit test
call ESMF_FieldGet(field1UnevenDEs, localDe=lde, farrayPtr=field1UnevenDEsData, rc=rc)
if (rc /= ESMF_SUCCESS) call ESMF_Finalize(endflag=ESMF_END_ABORT)
call ESMF_FieldGet(field1UnevenDEsRead, localDe=lde, farrayPtr=field1UnevenDEsReadData, rc=rc)
if (rc /= ESMF_SUCCESS) call ESMF_Finalize(endflag=ESMF_END_ABORT)
if (.not. all(field1UnevenDEsReadData == field1UnevenDEsData)) then
allEqual = .false.
end if
end do
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test(allEqual, name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Comparison did not fail as expected"
call ESMF_Test(.not. allEqual, name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Write a multi-tile Field with uneven DEs per PET and ungridded dims"
write(failMsg, *) "Did not return ESMF_SUCCESS"
call ESMF_FieldWrite(field4dUnevenDEs, fileName=fileNameUnevenDEs, overwrite=.true., rc=rc)
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test((rc == ESMF_SUCCESS), name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Did not return ESMF_RC_LIB_NOT_PRESENT"
call ESMF_Test((rc == ESMF_RC_LIB_NOT_PRESENT), name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Read a multi-tile Field with uneven DEs per PET and ungridded dims"
write(failMsg, *) "Did not return ESMF_SUCCESS"
call ESMF_FieldRead(field4dUnevenDEsRead, fileName=fileNameUnevenDEs, rc=rc)
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test((rc == ESMF_SUCCESS), name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Did not return ESMF_RC_LIB_NOT_PRESENT"
call ESMF_Test((rc == ESMF_RC_LIB_NOT_PRESENT), name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

!------------------------------------------------------------------------
!EX_UTest_Multi_Proc_Only
write(name,*) "Confirm that Field-read field matches original with uneven DEs per PET and ungridded dims"
write(failMsg, *) "Read-in field differs from original"
allEqual = .true.
do lde = 0, grid6tileUnevenDEsLdeCount - 1
! For simplicity, bail out if the following FieldGets fail rather than calling them their own unit test
call ESMF_FieldGet(field4dUnevenDEs, localDe=lde, farrayPtr=field4dUnevenDEsData, rc=rc)
if (rc /= ESMF_SUCCESS) call ESMF_Finalize(endflag=ESMF_END_ABORT)
call ESMF_FieldGet(field4dUnevenDEsRead, localDe=lde, farrayPtr=field4dUnevenDEsReadData, rc=rc)
if (rc /= ESMF_SUCCESS) call ESMF_Finalize(endflag=ESMF_END_ABORT)
if (.not. all(field4dUnevenDEsReadData == field4dUnevenDEsData)) then
allEqual = .false.
end if
end do
#if (defined ESMF_PIO && (defined ESMF_NETCDF || defined ESMF_PNETCDF))
call ESMF_Test(allEqual, name, failMsg, result, ESMF_SRCLINE)
#else
write(failMsg, *) "Comparison did not fail as expected"
call ESMF_Test(.not. allEqual, name, failMsg, result, ESMF_SRCLINE)
#endif
!------------------------------------------------------------------------

#endif ! ESMF_TESTEXHAUSTIVE

!------------------------------------------------------------------------
Expand All @@ -353,9 +463,11 @@ subroutine createFields(rc)
integer, intent(out) :: rc

integer :: decompPTile(2,6)
integer :: decompPTileUnevenDEs(2,6)
type(ESMF_ArraySpec) :: arraySpec
type(ESMF_ArraySpec) :: arraySpec_w_ungridded
type(ESMF_Array) :: array1
type(ESMF_DELayout) :: delayout
real(ESMF_KIND_R8), pointer :: coordPtrX(:,:), coordPtrY(:,:)
integer :: u1, u2, i, j
real :: multiplier
Expand Down Expand Up @@ -475,6 +587,76 @@ subroutine createFields(rc)
call ESMF_FieldBundleAdd(fieldBundleRead, [field1Read, field2Read, field1CopyRead, field4dRead], rc=rc)
if (rc /= ESMF_SUCCESS) return

!------------------------------------------------------------------------
! Set up a 6-tile grid with an uneven distribution of DEs to PETs, and create fields
! on this grid
!------------------------------------------------------------------------

! Decomposition for 8 PEs but 16 DEs
!
! The number of DEs per tile is:
! Tile : 1 2 3 4 5 6
! # DEs: 2 1 6 1 3 3
!
! The DEs are scattered in a disorganized fashion across PETs. We have the following
! number of DEs on each PET:
! PET #: 0 1 2 3 4 5 6 7
! # DEs: 1 2 3 4 0 3 0 3
decompPTileUnevenDEs(1,:) = [2,1,3,1,1,3]
decompPTileUnevenDEs(2,:) = [1,1,2,1,3,1]
delayout = ESMF_DELayoutCreate(petMap=[3,2,5,5,1,3,2,1,7,3,0,7,2,7,3,5])
grid6tileUnevenDEs = ESMF_GridCreateCubedSphere( &
tilesize = 6, &
regDecompPTile = decompPTileUnevenDEs, &
delayout = delayout, &
staggerLocList = [ESMF_STAGGERLOC_CENTER], &
rc = rc)
if (rc /= ESMF_SUCCESS) return
call ESMF_GridGet(grid6tileUnevenDEs, localDECount=grid6tileUnevenDEsLdeCount, rc=rc)
if (rc /= ESMF_SUCCESS) return

field1UnevenDEs = ESMF_FieldCreate(grid6tileUnevenDEs, arraySpec, name="field1UnevenDEs", rc=rc)
if (rc /= ESMF_SUCCESS) return
call ESMF_FieldFill(field1UnevenDEs, dataFillScheme='sincos', member=1, rc=rc)
if (rc /= ESMF_SUCCESS) return
! Note that we can't get farrayPtr here because we'll need to do that in a loop over DEs

field1UnevenDEsRead = ESMF_FieldCreate(grid6tileUnevenDEs, arraySpec, name="field1UnevenDEs", rc=rc)
if (rc /= ESMF_SUCCESS) return
! Note that we can't get farrayPtr here because we'll need to do that in a loop over DEs

field4dUnevenDEs = ESMF_FieldCreate(grid6tileUnevenDEs, arraySpec_w_ungridded, name="field4dUnevenDEs", &
ungriddedLBound=[2,15], ungriddedUBound=[4,18], &
! 2nd and 4th dimensions are ungridded dimensions
gridToFieldMap=[1,3], &
rc=rc)
if (rc /= ESMF_SUCCESS) return
do lde = 0, grid6tileUnevenDEsLdeCount-1
call ESMF_FieldGet(field4dUnevenDEs, localDe=lde, farrayPtr=field4dUnevenDEsData, rc=rc)
if (rc /= ESMF_SUCCESS) return
call ESMF_GridGetCoord(grid6tileUnevenDEs, coordDim=1, localDe=lde, farrayPtr=coordPtrX, rc=rc)
if (rc /= ESMF_SUCCESS) return
call ESMF_GridGetCoord(grid6tileUnevenDEs, coordDim=2, localDe=lde, farrayPtr=coordPtrY, rc=rc)
if (rc /= ESMF_SUCCESS) return
do u1 = 2,4
do u2 = 15,18
do i = lbound(field4dUnevenDEsData, 1), ubound(field4dUnevenDEsData, 1)
do j = lbound(field4dUnevenDEsData, 3), ubound(field4dUnevenDEsData, 3)
multiplier = 5.**(u2-15)
field4dUnevenDEsData(i,u1,j,u2) = u1*multiplier*(coordPtrX(i,j) - coordPtrY(i,j))
end do
end do
end do
end do
end do

field4dUnevenDEsRead = ESMF_FieldCreate(grid6tileUnevenDEs, arraySpec_w_ungridded, name="field4dUnevenDEs", &
ungriddedLBound=[2,15], ungriddedUBound=[4,18], &
! 2nd and 4th dimensions are ungridded dimensions
gridToFieldMap=[1,3], &
rc=rc)
if (rc /= ESMF_SUCCESS) return

end subroutine createFields

subroutine createSingleTileField(rc)
Expand Down

0 comments on commit 260aedd

Please sign in to comment.