Skip to content

Commit

Permalink
Fixes #3088: sparse sum nightly failures (#3098)
Browse files Browse the repository at this point in the history
This PR fixes #3088. In order to reproduce it I need to set the `ARKOUDA_DEVELOPER` flag to enable runtime checks, which I should've figured out sooner... Once we do this we get the following error message

```chapel
$CHPL_HOME/modules/internal/ChapelArray.chpl:1164: error: halt reached - array slice out of bounds
note: slice index was 804..815 but array bounds are 0..814
```

So I made all the loops serially and printed out the domains to find where the off by one was happening. This gives the right result with both `ARKOUDA_DEV` and `CHPL_DEV` set for the set seed and random. I tested this with various numbers of locales

Co-authored-by: Tess Hayes <stress-tess@users.noreply.github.com>
  • Loading branch information
stress-tess and stress-tess authored Apr 17, 2024
1 parent 8dc679d commit 8c1c57e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 36 deletions.
2 changes: 1 addition & 1 deletion arkouda/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def attach_all(names: list):
return {n: attach(n) for n in names}


def sparse_sum_help(idx1, idx2, val1, val2, merge=False, percent_transfer_limit=100):
def sparse_sum_help(idx1, idx2, val1, val2, merge=True, percent_transfer_limit=100):
"""
Helper for summing two sparse matrices together
Expand Down
4 changes: 2 additions & 2 deletions src/ArraySetops.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,11 @@ module ArraySetops
// this not a local slice, but it should (hopefully) take advantage of bulk transfer
if pullLocal[0] {
const pullFromASlice = (aDom.last+1)..#numPullLocal;
tmp[(aSlice.size+bSlice.size)..#(numPullLocal+1)] = [(key,val) in zip(a[pullFromASlice], aVal[pullFromASlice])] (key,val);
tmp[(aSlice.size+bSlice.size)..#(numPullLocal)] = [(key,val) in zip(a[pullFromASlice], aVal[pullFromASlice])] (key,val);
}
else if pullLocal[1] {
const pullFromBSlice = (bDom.last+1)..#numPullLocal;
tmp[(aSlice.size+bSlice.size)..#(numPullLocal+1)] = [(key,val) in zip(b[pullFromBSlice], bVal[pullFromBSlice])] (key,val);
tmp[(aSlice.size+bSlice.size)..#(numPullLocal)] = [(key,val) in zip(b[pullFromBSlice], bVal[pullFromBSlice])] (key,val);
}

// run chpl's builtin parallel radix sort for local arrays with a comparator defined in
Expand Down
63 changes: 30 additions & 33 deletions tests/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,39 @@

class UtilTest(ArkoudaTest):
def test_sparse_sum_helper(self):
# seed and seed + 1 should produce completely different values...
# but I'm anxious about accidentally introducing dependent arrays
# that make this less likely to fail
rng = np.random.default_rng()
seed1 = rng.choice(2**63)
seed2 = rng.choice(2**63)
seed3 = rng.choice(2**63)
seed4 = rng.choice(2**63)
cfg = ak.get_config()
N = (10**3) * cfg["numLocales"]
select_from = ak.arange(N)
inds1 = select_from[ak.randint(0, 10, N, seed=seed1) % 3 == 0]
inds2 = select_from[ak.randint(0, 10, N, seed=seed2) % 3 == 0]
vals1 = ak.randint(-(2**32), 2**32, inds1.size, seed=seed3)
vals2 = ak.randint(-(2**32), 2**32, inds2.size, seed=seed4)

merge_idx, merge_vals = ak.util.sparse_sum_help(inds1, inds2, vals1, vals2, merge=True)
sort_idx, sort_vals = ak.util.sparse_sum_help(inds1, inds2, vals1, vals2, merge=False)
gb_idx, gb_vals = ak.GroupBy(ak.concatenate([inds1, inds2], ordered=False)).sum(
ak.concatenate((vals1, vals2), ordered=False)
)
seeds = [rng.choice(2**63), rng.choice(2**63), rng.choice(2**63), rng.choice(2**63)]
set_seeds = [1000509587142185552, 5931535381009490148, 5631286082363685405, 3867516237354681488]
# run twice: with random seeds and with the seeds that previously failed
for seed1, seed2, seed3, seed4 in seeds, set_seeds:
cfg = ak.get_config()
N = (10**3) * cfg["numLocales"]
select_from = ak.arange(N)
inds1 = select_from[ak.randint(0, 10, N, seed=seed1) % 3 == 0]
inds2 = select_from[ak.randint(0, 10, N, seed=seed2) % 3 == 0]
vals1 = ak.randint(-(2**32), 2**32, inds1.size, seed=seed3)
vals2 = ak.randint(-(2**32), 2**32, inds2.size, seed=seed4)

merge_idx, merge_vals = ak.util.sparse_sum_help(inds1, inds2, vals1, vals2, merge=True)
sort_idx, sort_vals = ak.util.sparse_sum_help(inds1, inds2, vals1, vals2, merge=False)
gb_idx, gb_vals = ak.GroupBy(ak.concatenate([inds1, inds2], ordered=False)).sum(
ak.concatenate((vals1, vals2), ordered=False)
)

def are_pdarrays_equal(pda1, pda2):
# we first check the sizes so that we won't hit shape mismatch
# before we can print the seed (due to short-circuiting)
return (pda1.size == pda2.size) and ((pda1 == pda2).all())
def are_pdarrays_equal(pda1, pda2):
# we first check the sizes so that we won't hit shape mismatch
# before we can print the seed (due to short-circuiting)
return (pda1.size == pda2.size) and ((pda1 == pda2).all())

cond = (
are_pdarrays_equal(merge_idx, sort_idx)
and are_pdarrays_equal(merge_idx, gb_idx)
and are_pdarrays_equal(merge_vals, sort_vals)
)

if not cond:
print(f"Failure with seeds:\n{seed1},\n{seed2},\n{seed3},\n{seed4}")
self.assertTrue(cond)
cond = (
are_pdarrays_equal(merge_idx, sort_idx)
and are_pdarrays_equal(merge_idx, gb_idx)
and are_pdarrays_equal(merge_vals, sort_vals)
)
if not cond:
print(f"\nnum locales: {cfg['numLocales']}")
print(f"Failure with seeds:\n{seed1},\n{seed2},\n{seed3},\n{seed4}")
self.assertTrue(cond)

def test_is_numeric(self):
a = ak.array(["a", "b"])
Expand Down

0 comments on commit 8c1c57e

Please sign in to comment.