Skip to content

Commit

Permalink
pre-calculate base offsets for broadcasting not calculate for each step
Browse files Browse the repository at this point in the history
  • Loading branch information
mohawk2 committed Mar 11, 2024
1 parent 254880d commit 079134a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 23 deletions.
39 changes: 17 additions & 22 deletions Basic/Core/pdlbroadcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ PDL_Indx* pdl_get_threadoffsp_int(pdl_broadcast *broadcast, int *pthr, PDL_Indx
}

void pdl_freebroadcaststruct(pdl_broadcast *broadcast) {
PDLDEBUG_f(printf("freebroadcaststruct(%p, %p %p %p %p %p %p)\n",
(void*)broadcast,
(void*)(broadcast->inds), (void*)(broadcast->dims), (void*)(broadcast->offs),
(void*)(broadcast->incs), (void*)(broadcast->flags), (void*)(broadcast->pdls)));
PDLDEBUG_f(printf("freebroadcaststruct(%p)\n", broadcast));
if (!broadcast->inds) {return;}
Safefree(broadcast->inds);
Safefree(broadcast->dims);
Expand All @@ -85,10 +82,10 @@ void pdl_freebroadcaststruct(pdl_broadcast *broadcast) {
}

void pdl_clearbroadcaststruct(pdl_broadcast *it) {
PDLDEBUG_f(printf("clearbroadcaststruct(%p)\n", (void*)it));
it->transvtable = 0;it->inds = 0;it->dims = 0;
it->ndims = it->nimpl = it->npdls = 0; it->offs = 0;
it->pdls = 0;it->incs = 0; it->realdims=0; it->flags=0;
PDLDEBUG_f(printf("clearbroadcaststruct(%p)\n", it));
it->transvtable=0; it->pdls=0; it->flags = 0;
it->ndims = it->nimpl = it->npdls = 0;
it->offs = it->incs = it->realdims = it->inds = it->dims = 0;
it->gflags=0; /* unsets PDL_BROADCAST_INITIALIZED among others */
PDL_CLRMAGIC(it);
}
Expand Down Expand Up @@ -407,7 +404,7 @@ pdl_error pdl_initbroadcaststruct(int nobl,
if (broadcast->inds == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->inds in pdlbroadcast.c");
Newxz(broadcast->dims, ndims * nthr1, PDL_Indx);
if (broadcast->dims == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->dims in pdlbroadcast.c");
Newxz(broadcast->offs, npdls * nthr1, PDL_Indx); /* Create space for pthread-specific offs */
Newxz(broadcast->offs, 2 * npdls * nthr1, PDL_Indx); /* Create space for pthread-specific offs, then CORE21 also pre-calculated offsets (hence 2x) */
if (broadcast->offs == NULL) return pdl_make_error_simple(PDL_EFATAL, "Failed to allocate memory for broadcast->offs in pdlbroadcast.c");
}
for (nth=0; nth<ndims; nth++) broadcast->dims[nth]=1; // all start size 1
Expand Down Expand Up @@ -471,7 +468,6 @@ pdl_error pdl_initbroadcaststruct(int nobl,
}

/* If threading, make the true offsets and dims.. */

broadcast->mag_skip = 0;
broadcast->mag_stride = 0;
if (nthr > 0) {
Expand Down Expand Up @@ -529,9 +525,14 @@ See the manual for why this is impossible");

int pdl_startbroadcastloop(pdl_broadcast *broadcast,pdl_error (*func)(pdl_trans *),
pdl_trans *t, pdl_error *error_ret) {
PDL_Indx j, npdls = broadcast->npdls;
PDL_Indx i, j, npdls = broadcast->npdls;
PDL_Indx *offsp; int thr;
PDL_Indx *inds, *dims;
/* Pre-calculate base offsets for all pdls and thr once */
PDL_Indx nthr1 = PDLMAX(broadcast->mag_nthr, 1);
for (j=0; j<npdls; j++)
for (i=0; i<nthr1; i++)
broadcast->offs[j + i*npdls + nthr1*npdls] = PDL_BRC_THR_OFFSET(broadcast, i, j);
if ( (broadcast->gflags & (PDL_BROADCAST_MAGICKED | PDL_BROADCAST_MAGICK_BUSY))
== PDL_BROADCAST_MAGICKED ) {
/* If no function supplied (i.e. being called from PDL::broadcast_over), don't run in parallel */
Expand Down Expand Up @@ -566,15 +567,8 @@ int pdl_startbroadcastloop(pdl_broadcast *broadcast,pdl_error (*func)(pdl_trans
if (!offsp) return -1;
for (j=0; j<broadcast->ndims; j++)
if (!dims[j]) return 1; /* do nothing if empty */
for (j=0; j<npdls; j++) {
offsp[j] = PDL_BREPROFFS(broadcast->pdls[j],broadcast->flags[j]);
}
if (thr)
for (j=0; j<npdls; j++)
offsp[j] += PDL_BISTEMP(broadcast->flags[j])
? thr * broadcast->pdls[j]->dimincs[broadcast->pdls[j]->ndims-1]
: PDL_BRC_OFFSET(thr, broadcast) *
PDL_BRC_INC(broadcast->incs, broadcast->npdls, j, broadcast->mag_nth);
for (j=0; j<npdls; j++)
offsp[j] = broadcast->offs[j + thr*npdls + nthr1*npdls];
return 0;
}

Expand All @@ -585,6 +579,7 @@ int pdl_iterbroadcastloop(pdl_broadcast *broadcast,PDL_Indx nth) {
int another_broadcastloop = 0;
PDL_Indx *offsp; int thr;
PDL_Indx *inds, *dims;
PDL_Indx nthr1 = PDLMAX(broadcast->mag_nthr, 1), npdls = broadcast->npdls; /* CORE21 have new offs_thr area instead of back end of offs */
offsp = pdl_get_threadoffsp_int(broadcast,&thr, &inds, &dims);
if (!offsp) return -1;
for (i=nth; i<broadcast->ndims; i++) {
Expand All @@ -593,9 +588,9 @@ int pdl_iterbroadcastloop(pdl_broadcast *broadcast,PDL_Indx nth) {
}
if (another_broadcastloop)
for (j=0; j<broadcast->npdls; j++) {
offsp[j] = PDL_BRC_THR_OFFSET(broadcast, thr, j);
offsp[j] = broadcast->offs[j + thr*npdls + nthr1*npdls];
for (i=nth; i<broadcast->ndims; i++)
offsp[j] += PDL_BRC_INC(broadcast->incs, broadcast->npdls, j, i) * inds[i];
offsp[j] += PDL_BRC_INC(broadcast->incs, npdls, j, i) * inds[i];
}
return another_broadcastloop;
}
2 changes: 1 addition & 1 deletion Basic/Core/pdlcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define __PDLCORE_H

/* version 20: memory-management changes */
/* on 21, unify pdl_broadcast per_pdl_flags, par_flags; remove threadloop #defines; change creating to char; relocate struct pdl.value appropriately, remove pdl_null, safe_indterm, initbroadcaststruct to take trans & remove in[cd]_sizes, remove broadcaststruct from Core list */
/* on 21, look for comments "CORE21", unify pdl_trans per_pdl_flags, par_flags; remove threadloop #defines; change creating to char; relocate struct pdl.value appropriately, remove pdl_null, safe_indterm, initbroadcaststruct to take trans & remove in[cd]_sizes, remove broadcaststruct from Core list, maybe make pdl_broadcast not inside struct pdl, break pdl_broadcast offs part deux (pre-calc) into separate "offs_thr" */
#define PDL_CORE_VERSION 20
#define startbroadcastloop startthreadloop
#define pdl_startbroadcastloop pdl_startthreadloop
Expand Down

0 comments on commit 079134a

Please sign in to comment.