Skip to content

Commit

Permalink
Use PetscCall() instead of CHKERRQ() in tinyasm (#3959)
Browse files Browse the repository at this point in the history
  • Loading branch information
connorjward authored Jan 9, 2025
1 parent 3fd32f3 commit 39742a6
Showing 1 changed file with 79 additions and 88 deletions.
167 changes: 79 additions & 88 deletions tinyasm/tinyasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ class BlockJacobi {
fwork = vector<PetscScalar>(biggestBlock, 0.);
localmats_aij = NULL;
dofis = vector<IS>(numBlocks);
auto ierr = PetscMalloc1(numBlocks, &localmats);CHKERRV(ierr);
PetscMalloc1(numBlocks, &localmats);
for(int p=0; p<numBlocks; p++) {
auto ndof = dofsPerBlock[p].size();
localmats[p] = NULL;
ISCreateGeneral(MPI_COMM_SELF, globalDofsPerBlock[p].size(), globalDofsPerBlock[p].data(), PETSC_USE_POINTER, dofis.data() + p);
}
Expand All @@ -77,47 +76,43 @@ class BlockJacobi {
ISDestroy(&dofis[p]);
}
if(localmats_aij) {
PetscErrorCode ierr;
ierr = MatDestroySubMatrices(numBlocks, &localmats_aij);CHKERRV(ierr);
MatDestroySubMatrices(numBlocks, &localmats_aij);
}
if (localmats) {
PetscErrorCode ierr;
for (int p=0; p<numBlocks; p++) {
ierr = MatDestroy(&localmats[p]);CHKERRV(ierr);
MatDestroy(&localmats[p]);
}
ierr = PetscFree(localmats);CHKERRV(ierr);
PetscFree(localmats);
}
PetscSFDestroy(&sf);
}

PetscInt updateValuesPerBlock(Mat P) {
PetscInt ierr;
PetscBLASInt dof, info;
int numBlocks = dofsPerBlock.size();
ierr = MatCreateSubMatrices(P, numBlocks, dofis.data(), dofis.data(), localmats_aij ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, &localmats_aij);CHKERRQ(ierr);
PetscCall(MatCreateSubMatrices(P, numBlocks, dofis.data(), dofis.data(), localmats_aij ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, &localmats_aij));
PetscScalar *vv;
for(int p=0; p<numBlocks; p++) {
ierr = MatConvert(localmats_aij[p], MATDENSE, localmats[p] ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,&localmats[p]);CHKERRQ(ierr);
PetscCall(MatConvert(localmats_aij[p], MATDENSE, localmats[p] ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,&localmats[p]));
PetscCall(PetscBLASIntCast(dofsPerBlock[p].size(), &dof));
ierr = MatDenseGetArrayWrite(localmats[p],&vv);CHKERRQ(ierr);
PetscCall(MatDenseGetArrayWrite(localmats[p],&vv));
if (dof) mymatinvert(&dof, vv, piv.data(), &info, fwork.data());
ierr = MatDenseRestoreArrayWrite(localmats[p],&vv);CHKERRQ(ierr);
PetscCall(MatDenseRestoreArrayWrite(localmats[p],&vv));
}
return 0;
}


PetscInt solve(const PetscScalar* __restrict b, PetscScalar* __restrict x) {
PetscInt ierr;
PetscScalar dOne = 1.0;
PetscBLASInt dof, one = 1;
PetscScalar dZero = 0.0;

const PetscScalar *matvalues;
for(int p=0; p<dofsPerBlock.size(); p++) {
for(size_t p=0; p<dofsPerBlock.size(); p++) {
dof = dofsPerBlock[p].size();
auto dofmap = dofsPerBlock[p];
ierr = MatDenseGetArrayRead(localmats[p],&matvalues);CHKERRQ(ierr);
PetscCall(MatDenseGetArrayRead(localmats[p],&matvalues));
for(int j=0; j<dof; j++)
workb[j] = b[dofmap[j]];
if(dof < 6)
Expand All @@ -129,7 +124,7 @@ class BlockJacobi {
for(int i=0; i<dof; i++)
x[dofmap[i]] += worka[i];
}
ierr = MatDenseRestoreArrayRead(localmats[p],&matvalues);CHKERRQ(ierr);
PetscCall(MatDenseRestoreArrayRead(localmats[p],&matvalues));
}
return 0;
}
Expand All @@ -138,13 +133,12 @@ class BlockJacobi {
PetscErrorCode CreateCombinedSF(PC pc, const std::vector<PetscSF>& sf, const std::vector<PetscInt>& bs, PetscSF *newsf)
{
PetscInt i;
PetscErrorCode ierr;
auto n = sf.size();

PetscFunctionBegin;
if (n == 1 && bs[0] == 1) {
*newsf= sf[0];
ierr = PetscObjectReference((PetscObject) *newsf);CHKERRQ(ierr);
PetscCall(PetscObjectReference((PetscObject) *newsf));
} else {
PetscInt allRoots = 0, allLeaves = 0;
PetscInt leafOffset = 0;
Expand All @@ -168,68 +162,68 @@ PetscErrorCode CreateCombinedSF(PC pc, const std::vector<PetscSF>& sf, const std
for (i = 0; i < n; ++i) {
PetscInt nroots, nleaves;

ierr = PetscSFGetGraph(sf[i], &nroots, &nleaves, NULL, NULL);CHKERRQ(ierr);
PetscCall(PetscSFGetGraph(sf[i], &nroots, &nleaves, NULL, NULL));
allRoots += nroots * bs[i];
allLeaves += nleaves * bs[i];
}
ierr = PetscMalloc1(allLeaves, &ilocal);CHKERRQ(ierr);
ierr = PetscMalloc1(allLeaves, &iremote);CHKERRQ(ierr);
PetscCall(PetscMalloc1(allLeaves, &ilocal));
PetscCall(PetscMalloc1(allLeaves, &iremote));
// Now build an SF that just contains process connectivity.
ierr = PetscHSetICreate(&ranksUniq);CHKERRQ(ierr);
PetscCall(PetscHSetICreate(&ranksUniq));
for (i = 0; i < n; ++i) {
const PetscMPIInt *ranks = NULL;
PetscMPIInt nranks, j;

ierr = PetscSFSetUp(sf[i]);CHKERRQ(ierr);
ierr = PetscSFGetRootRanks(sf[i], &nranks, &ranks, NULL, NULL, NULL);CHKERRQ(ierr);
PetscCall(PetscSFSetUp(sf[i]));
PetscCall(PetscSFGetRootRanks(sf[i], &nranks, &ranks, NULL, NULL, NULL));
// These are all the ranks who communicate with me.
for (j = 0; j < nranks; ++j) {
ierr = PetscHSetIAdd(ranksUniq, (PetscInt) ranks[j]);CHKERRQ(ierr);
PetscCall(PetscHSetIAdd(ranksUniq, (PetscInt) ranks[j]));
}
}
ierr = PetscHSetIGetSize(ranksUniq, &numRanks);CHKERRQ(ierr);
ierr = PetscMalloc1(numRanks, &remote);CHKERRQ(ierr);
ierr = PetscMalloc1(numRanks, &ranks);CHKERRQ(ierr);
ierr = PetscHSetIGetElems(ranksUniq, &index, ranks);CHKERRQ(ierr);
PetscCall(PetscHSetIGetSize(ranksUniq, &numRanks));
PetscCall(PetscMalloc1(numRanks, &remote));
PetscCall(PetscMalloc1(numRanks, &ranks));
PetscCall(PetscHSetIGetElems(ranksUniq, &index, ranks));

ierr = PetscHMapICreate(&rankToIndex);CHKERRQ(ierr);
PetscCall(PetscHMapICreate(&rankToIndex));
for (i = 0; i < numRanks; ++i) {
remote[i].rank = ranks[i];
remote[i].index = 0;
ierr = PetscHMapISet(rankToIndex, ranks[i], i);CHKERRQ(ierr);
PetscCall(PetscHMapISet(rankToIndex, ranks[i], i));
}
ierr = PetscFree(ranks);CHKERRQ(ierr);
ierr = PetscHSetIDestroy(&ranksUniq);CHKERRQ(ierr);
ierr = PetscSFCreate(PetscObjectComm((PetscObject) pc), &rankSF);CHKERRQ(ierr);
ierr = PetscSFSetGraph(rankSF, 1, numRanks, NULL, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER);CHKERRQ(ierr);
ierr = PetscSFSetUp(rankSF);CHKERRQ(ierr);
PetscCall(PetscFree(ranks));
PetscCall(PetscHSetIDestroy(&ranksUniq));
PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) pc), &rankSF));
PetscCall(PetscSFSetGraph(rankSF, 1, numRanks, NULL, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
PetscCall(PetscSFSetUp(rankSF));
/* OK, use it to communicate the root offset on the remote
* processes for each subspace. */
ierr = PetscMalloc1(n, &offsets);CHKERRQ(ierr);
ierr = PetscMalloc1(n*numRanks, &remoteOffsets);CHKERRQ(ierr);
PetscCall(PetscMalloc1(n, &offsets));
PetscCall(PetscMalloc1(n*numRanks, &remoteOffsets));

offsets[0] = 0;
for (i = 1; i < n; ++i) {
PetscInt nroots;

ierr = PetscSFGetGraph(sf[i-1], &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
PetscCall(PetscSFGetGraph(sf[i-1], &nroots, NULL, NULL, NULL));
offsets[i] = offsets[i-1] + nroots*bs[i-1];
}
/* Offsets are the offsets on the current process of the
* global dof numbering for the subspaces. */
ierr = MPI_Type_contiguous(n, MPIU_INT, &contig);CHKERRQ(ierr);
ierr = MPI_Type_commit(&contig);CHKERRQ(ierr);
PetscCall(MPI_Type_contiguous(n, MPIU_INT, &contig));
PetscCall(MPI_Type_commit(&contig));

#if MY_PETSC_VERSION_LT(3, 14, 4)
ierr = PetscSFBcastBegin(rankSF, contig, offsets, remoteOffsets);CHKERRQ(ierr);
ierr = PetscSFBcastEnd(rankSF, contig, offsets, remoteOffsets);CHKERRQ(ierr);
PetscCall(PetscSFBcastBegin(rankSF, contig, offsets, remoteOffsets));
PetscCall(PetscSFBcastEnd(rankSF, contig, offsets, remoteOffsets));
#else
ierr = PetscSFBcastBegin(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE);CHKERRQ(ierr);
ierr = PetscSFBcastEnd(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE);CHKERRQ(ierr);
PetscCall(PetscSFBcastBegin(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE));
PetscCall(PetscSFBcastEnd(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE));
#endif
ierr = MPI_Type_free(&contig);CHKERRQ(ierr);
ierr = PetscFree(offsets);CHKERRQ(ierr);
ierr = PetscSFDestroy(&rankSF);CHKERRQ(ierr);
PetscCall(MPI_Type_free(&contig));
PetscCall(PetscFree(offsets));
PetscCall(PetscSFDestroy(&rankSF));
/* Now remoteOffsets contains the offsets on the remote
* processes who communicate with me. So now we can
* concatenate the list of SFs into a single one. */
Expand All @@ -239,12 +233,12 @@ PetscErrorCode CreateCombinedSF(PC pc, const std::vector<PetscSF>& sf, const std
const PetscInt *local = NULL;
PetscInt nroots, nleaves, j;

ierr = PetscSFGetGraph(sf[i], &nroots, &nleaves, &local, &remote);CHKERRQ(ierr);
PetscCall(PetscSFGetGraph(sf[i], &nroots, &nleaves, &local, &remote));
for (j = 0; j < nleaves; ++j) {
PetscInt rank = remote[j].rank;
PetscInt idx, rootOffset, k;

ierr = PetscHMapIGet(rankToIndex, rank, &idx);CHKERRQ(ierr);
PetscCall(PetscHMapIGet(rankToIndex, rank, &idx));
if (idx == -1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Didn't find rank, huh?");
//[> Offset on given rank for ith subspace <]
rootOffset = remoteOffsets[n*idx + i];
Expand All @@ -257,52 +251,50 @@ PetscErrorCode CreateCombinedSF(PC pc, const std::vector<PetscSF>& sf, const std
}
leafOffset += nleaves * bs[i];
}
ierr = PetscHMapIDestroy(&rankToIndex);CHKERRQ(ierr);
ierr = PetscFree(remoteOffsets);CHKERRQ(ierr);
ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc), newsf);CHKERRQ(ierr);
ierr = PetscSFSetGraph(*newsf, allRoots, allLeaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);CHKERRQ(ierr);
PetscCall(PetscHMapIDestroy(&rankToIndex));
PetscCall(PetscFree(remoteOffsets));
PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), newsf));
PetscCall(PetscSFSetGraph(*newsf, allRoots, allLeaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
}
PetscFunctionReturn(0);
}


PetscErrorCode PCSetup_TinyASM(PC pc) {
PetscInt ierr;
ierr = PetscLogEventBegin(PC_tinyasm_setup, pc, 0, 0, 0);CHKERRQ(ierr);
PetscCall(PetscLogEventBegin(PC_tinyasm_setup, pc, 0, 0, 0));
auto P = pc -> pmat;
auto blockjacobi = (BlockJacobi *)pc->data;
blockjacobi -> updateValuesPerBlock(P);
ierr = PetscLogEventEnd(PC_tinyasm_setup, pc, 0, 0, 0);CHKERRQ(ierr);
PetscCall(PetscLogEventEnd(PC_tinyasm_setup, pc, 0, 0, 0));
return 0;
}

PetscErrorCode PCApply_TinyASM(PC pc, Vec b, Vec x) {
PetscInt ierr;
ierr = PetscLogEventBegin(PC_tinyasm_apply, pc, 0, 0, 0);CHKERRQ(ierr);
ierr = VecSet(x, 0.0);CHKERRQ(ierr);
PetscCall(PetscLogEventBegin(PC_tinyasm_apply, pc, 0, 0, 0));
PetscCall(VecSet(x, 0.0));
auto blockjacobi = (BlockJacobi *)pc->data;

const PetscScalar *globalb;
PetscScalar *globalx;

ierr = VecGetArrayRead(b, &globalb);CHKERRQ(ierr);
PetscCall(VecGetArrayRead(b, &globalb));
#if MY_PETSC_VERSION_LT(3, 14, 4)
ierr = PetscSFBcastBegin(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]));CHKERRQ(ierr);
ierr = PetscSFBcastEnd(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]));CHKERRQ(ierr);
PetscCall(PetscSFBcastBegin(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0])));
PetscCall(PetscSFBcastEnd(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0])));
#else
ierr = PetscSFBcastBegin(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]), MPI_REPLACE);CHKERRQ(ierr);
ierr = PetscSFBcastEnd(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]), MPI_REPLACE);CHKERRQ(ierr);
PetscCall(PetscSFBcastBegin(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]), MPI_REPLACE));
PetscCall(PetscSFBcastEnd(blockjacobi->sf, MPIU_SCALAR, globalb, &(blockjacobi->localb[0]), MPI_REPLACE));
#endif
ierr = VecRestoreArrayRead(b, &globalb);CHKERRQ(ierr);
PetscCall(VecRestoreArrayRead(b, &globalb));

std::fill(blockjacobi->localx.begin(), blockjacobi->localx.end(), 0);

blockjacobi->solve(blockjacobi->localb.data(), blockjacobi->localx.data());
ierr = VecGetArray(x, &globalx);CHKERRQ(ierr);
ierr = PetscSFReduceBegin(blockjacobi->sf, MPIU_SCALAR, &(blockjacobi->localx[0]), globalx, MPI_SUM);CHKERRQ(ierr);
ierr = PetscSFReduceEnd(blockjacobi->sf, MPIU_SCALAR, &(blockjacobi->localx[0]), globalx, MPI_SUM);CHKERRQ(ierr);
ierr = VecRestoreArray(x, &globalx);CHKERRQ(ierr);
ierr = PetscLogEventEnd(PC_tinyasm_apply, pc, 0, 0, 0);CHKERRQ(ierr);
PetscCall(VecGetArray(x, &globalx));
PetscCall(PetscSFReduceBegin(blockjacobi->sf, MPIU_SCALAR, &(blockjacobi->localx[0]), globalx, MPI_SUM));
PetscCall(PetscSFReduceEnd(blockjacobi->sf, MPIU_SCALAR, &(blockjacobi->localx[0]), globalx, MPI_SUM));
PetscCall(VecRestoreArray(x, &globalx));
PetscCall(PetscLogEventEnd(PC_tinyasm_apply, pc, 0, 0, 0));
return 0;
}

Expand All @@ -314,20 +306,19 @@ PetscErrorCode PCDestroy_TinyASM(PC pc) {

PetscErrorCode PCView_TinyASM(PC pc, PetscViewer viewer) {
PetscBool isascii;
PetscInt ierr;
ierr = PetscObjectTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
PetscCall(PetscObjectTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &isascii));
if(pc->data) {
auto blockjacobi = (BlockJacobi *)pc->data;
PetscInt nblocks = blockjacobi->dofsPerBlock.size();
std::vector<PetscInt> blocksizes(nblocks);
std::transform(blockjacobi->dofsPerBlock.begin(), blockjacobi->dofsPerBlock.end(), blocksizes.begin(), [](std::vector<PetscInt> &v){ return v.size(); });
PetscInt biggestblock = *std::max_element(blocksizes.begin(), blocksizes.end());
PetscScalar avgblock = std::accumulate(blocksizes.begin(), blocksizes.end(), 0.)/nblocks;
ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
ierr = PetscViewerASCIIPrintf(viewer, "TinyASM (block Jacobi) preconditioner with %" PetscInt_FMT " blocks\n", nblocks);CHKERRQ(ierr);
ierr = PetscViewerASCIIPrintf(viewer, "Average block size %f \n", avgblock);CHKERRQ(ierr);
ierr = PetscViewerASCIIPrintf(viewer, "Largest block size %" PetscInt_FMT " \n", biggestblock);CHKERRQ(ierr);
ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
PetscCall(PetscViewerASCIIPushTab(viewer));
PetscCall(PetscViewerASCIIPrintf(viewer, "TinyASM (block Jacobi) preconditioner with %" PetscInt_FMT " blocks\n", nblocks));
PetscCall(PetscViewerASCIIPrintf(viewer, "Average block size %f \n", avgblock));
PetscCall(PetscViewerASCIIPrintf(viewer, "Largest block size %" PetscInt_FMT " \n", biggestblock));
PetscCall(PetscViewerASCIIPopTab(viewer));
}
return 0;
}
Expand Down Expand Up @@ -394,8 +385,8 @@ PYBIND11_MODULE(_tinyasm, m) {
PetscLogEventRegister("PCTinyASMApply", PC_CLASSID, &PC_tinyasm_apply);
m.def("SetASMLocalSubdomains",
[](PC pc, std::vector<IS> ises, std::vector<PetscSF> sfs, std::vector<PetscInt> blocksizes, int localsize) {
PetscInt ierr, i, p, size, numDofs, blocksize;
ierr = PetscLogEventBegin(PC_tinyasm_SetASMLocalSubdomains, pc, 0, 0, 0);CHKERRQ(ierr);
PetscInt i, p, numDofs;
PetscCall(PetscLogEventBegin(PC_tinyasm_SetASMLocalSubdomains, pc, 0, 0, 0));
auto P = pc->pmat;
ISLocalToGlobalMapping lgr;
ISLocalToGlobalMapping lgc;
Expand All @@ -407,8 +398,8 @@ PYBIND11_MODULE(_tinyasm, m) {
const PetscInt* isarray;

for (p = 0; p < numBlocks; p++) {
ierr = ISGetSize(ises[p], &numDofs);CHKERRQ(ierr);
ierr = ISGetIndices(ises[p], &isarray);CHKERRQ(ierr);
PetscCall(ISGetSize(ises[p], &numDofs));
PetscCall(ISGetIndices(ises[p], &isarray));

dofsPerBlock[p] = vector<PetscInt>();
dofsPerBlock[p].reserve(numDofs);
Expand All @@ -417,17 +408,17 @@ PYBIND11_MODULE(_tinyasm, m) {
for (i = 0; i < numDofs; i++) {
dofsPerBlock[p].push_back(isarray[i]);
}
ierr = ISRestoreIndices(ises[p], &isarray);CHKERRQ(ierr);
PetscCall(ISRestoreIndices(ises[p], &isarray));
ISLocalToGlobalMappingApply(lgr, numDofs, &dofsPerBlock[p][0], &globalDofsPerBlock[p][0]);
}
DM dm, plex;
ierr = PCGetDM(pc, &dm);CHKERRQ(ierr);
DM dm;
PetscCall(PCGetDM(pc, &dm));

PetscSF newsf;
ierr = CreateCombinedSF(pc, sfs, blocksizes, &newsf);CHKERRQ(ierr);
PetscCall(CreateCombinedSF(pc, sfs, blocksizes, &newsf));
auto blockjacobi = new BlockJacobi(dofsPerBlock, globalDofsPerBlock, localsize, newsf);
pc->data = (void*)blockjacobi;
ierr = PetscLogEventEnd(PC_tinyasm_SetASMLocalSubdomains, pc, 0, 0, 0);CHKERRQ(ierr);
return (int) ierr;
PetscCall(PetscLogEventEnd(PC_tinyasm_SetASMLocalSubdomains, pc, 0, 0, 0));
return 0;
});
}

0 comments on commit 39742a6

Please sign in to comment.