Skip to content

Commit

Permalink
Merge pull request #19 from int-brain-lab/bincount2D
Browse files Browse the repository at this point in the history
Add `bincount2D` and bump version
  • Loading branch information
chris-langfield authored Jun 27, 2023
2 parents 727dc23 + 4e88879 commit 177bf30
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 45 deletions.
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog
## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.5.0]
## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.7.0]

### Added

- bincount2D moved from ibllib.processing to iblutil.numerical

## [1.6.0]

### Added

- numerical.rcoeff function that computes pairwise Pearson correlation coefficients for matrices

## [1.5.0]

### Added

Expand Down
2 changes: 1 addition & 1 deletion iblutil/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.6.0'
__version__ = '1.7.0'
116 changes: 95 additions & 21 deletions iblutil/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from numba import jit

D = TypeVar('D', bound=np.generic)
D = TypeVar("D", bound=np.generic)
Array = Union[np.ndarray, Sequence]


Expand All @@ -26,7 +26,7 @@ def between_sorted(sorted_v, bounds=None):
stops = stops[sbounds]
sel = sorted_v * 0
sel[np.searchsorted(sorted_v, starts)] = 1
istops = np.searchsorted(sorted_v, stops, side='right')
istops = np.searchsorted(sorted_v, stops, side="right")
sel[istops[istops < sorted_v.size]] += -1
return np.cumsum(sel).astype(bool)

Expand Down Expand Up @@ -93,16 +93,78 @@ def intersect2d(a0, a1, assume_unique=False):
:return: index of a0 such as intersection = a0[ia, :]
:return: index of b0 such as intersection = b0[ib, :]
"""
_, i0, i1 = np.intersect1d(a0[:, 0], a1[:, 0],
return_indices=True, assume_unique=assume_unique)
_, i0, i1 = np.intersect1d(
a0[:, 0], a1[:, 0], return_indices=True, assume_unique=assume_unique
)
for n in np.arange(1, a0.shape[1]):
_, ii0, ii1 = np.intersect1d(a0[i0, n], a1[i1, n],
return_indices=True, assume_unique=assume_unique)
_, ii0, ii1 = np.intersect1d(
a0[i0, n], a1[i1, n], return_indices=True, assume_unique=assume_unique
)
i0 = i0[ii0]
i1 = i1[ii1]
return a0[i0, :], i0, i1


def bincount2D(x, y, xbin=0, ybin=0, xlim=None, ylim=None, weights=None):
"""
Computes a 2D histogram by aggregating values in a 2D array.
:param x: values to bin along the 2nd dimension (c-contiguous)
:param y: values to bin along the 1st dimension
:param xbin:
scalar: bin size along 2nd dimension
0: aggregate according to unique values
array: aggregate according to exact values (count reduce operation)
:param ybin:
scalar: bin size along 1st dimension
0: aggregate according to unique values
array: aggregate according to exact values (count reduce operation)
:param xlim: (optional) 2 values (array or list) that restrict range along 2nd dimension
:param ylim: (optional) 2 values (array or list) that restrict range along 1st dimension
:param weights: (optional) defaults to None, weights to apply to each value for aggregation
:return: 3 numpy arrays MAP [ny,nx] image, xscale [nx], yscale [ny]
"""
# if no bounds provided, use min/max of vectors
if xlim is None:
xlim = [np.min(x), np.max(x)]
if ylim is None:
ylim = [np.min(y), np.max(y)]

def _get_scale_and_indices(v, bin, lim):
# if bin is a nonzero scalar, this is a bin size: create scale and indices
if np.isscalar(bin) and bin != 0:
scale = np.arange(lim[0], lim[1] + bin / 2, bin)
ind = (np.floor((v - lim[0]) / bin)).astype(np.int64)
# if bin == 0, aggregate over unique values
else:
scale, ind = np.unique(v, return_inverse=True)
return scale, ind

xscale, xind = _get_scale_and_indices(x, xbin, xlim)
yscale, yind = _get_scale_and_indices(y, ybin, ylim)
# aggregate by using bincount on absolute indices for a 2d array
nx, ny = [xscale.size, yscale.size]
ind2d = np.ravel_multi_index(np.c_[yind, xind].transpose(), dims=(ny, nx))
r = np.bincount(ind2d, minlength=nx * ny, weights=weights).reshape(ny, nx)

# if a set of specific values is requested output an array matching the scale dimensions
if not np.isscalar(xbin) and xbin.size > 1:
_, iout, ir = np.intersect1d(xbin, xscale, return_indices=True)
_r = r.copy()
r = np.zeros((ny, xbin.size))
r[:, iout] = _r[:, ir]
xscale = xbin

if not np.isscalar(ybin) and ybin.size > 1:
_, iout, ir = np.intersect1d(ybin, yscale, return_indices=True)
_r = r.copy()
r = np.zeros((ybin.size, r.shape[1]))
r[iout, :] = _r[ir, :]
yscale = ybin

return r, xscale, yscale


@jit(nopython=True)
def find_first_2d(mat, val):
"""
Expand All @@ -120,25 +182,35 @@ def find_first_2d(mat, val):

def rcoeff(x, y):
"""
Computes pairwise Person correlation coefficients for matrices.
Computes pairwise Pearson correlation coefficients for matrices.
That is for 2 matrices the same size, computes the row to row coefficients and outputs
a vector corresponding to the number of rows of the first matrix
If the second array is a vector then computes the correlation coefficient for all rows
a vector corresponding to the number of rows of the first matrix.
If the second array is a vector then computes the correlation coefficient for all rows.
:param x: np array [nc, ns]
:param y: np array [nc, ns] or [ns]
:return: r [nc]
"""

def normalize(z):
mean = np.mean(z, axis=-1)
return z - mean if mean.size == 1 else z - mean[:, np.newaxis]

xnorm = normalize(x)
ynorm = normalize(y)
rcor = np.sum(xnorm * ynorm, axis=-1) / np.sqrt(np.sum(np.square(xnorm), axis=-1) * np.sum(np.square(ynorm), axis=-1))
rcor = np.sum(xnorm * ynorm, axis=-1) / np.sqrt(
np.sum(np.square(xnorm), axis=-1) * np.sum(np.square(ynorm), axis=-1)
)
return rcor


def within_ranges(x: np.ndarray, ranges: Array, labels: Optional[Array] = None,
mode: str = 'vector', dtype: Type[D] = 'int8') -> np.ndarray:
def within_ranges(
x: np.ndarray,
ranges: Array,
labels: Optional[Array] = None,
mode: str = "vector",
dtype: Type[D] = "int8",
) -> np.ndarray:
"""
Detects which points of the input vector lie within one of the ranges specified in the ranges.
Returns an array the size of x with a 1 if the corresponding point is within a range.
Expand Down Expand Up @@ -220,32 +292,34 @@ def within_ranges(x: np.ndarray, ranges: Array, labels: Optional[Array] = None,

if labels is None:
# In 'matrix' mode default row index is 0
labels = np.zeros((n_ranges,), dtype='uint32')
if mode == 'vector': # Otherwise default numerical label is 1
labels = np.zeros((n_ranges,), dtype="uint32")
if mode == "vector": # Otherwise default numerical label is 1
labels += 1
assert len(labels) >= n_ranges, 'range labels do not match number of ranges'
assert len(labels) >= n_ranges, "range labels do not match number of ranges"
n_labels = np.unique(labels).size

# If no ranges given, short circuit function and return zeros
if n_ranges == 0:
return np.zeros_like(x, dtype=dtype)

# Check end comes after start in each case
assert np.all(np.diff(ranges, axis=1) > 0), 'ranges ends must all be greater than starts'
assert np.all(
np.diff(ranges, axis=1) > 0
), "ranges ends must all be greater than starts"

# Make array containing points, starts and finishes

# This order means it will be inclusive
to_sort = np.concatenate((ranges[:, 0], x, ranges[:, 1]))
# worst case O(n*log(n)) but will be better than this as most of the array is ordered;
# memory overhead ~n/2
idx = np.argsort(to_sort, kind='stable')
idx = np.argsort(to_sort, kind="stable")

# Make delta array containing 1 for every start and -1 for every stop
# with one row for each range label
if mode == 'matrix':
if mode == "matrix":
delta_shape = (n_labels, n_points + 2 * n_ranges)
delta = np.zeros(delta_shape, dtype='int8')
delta = np.zeros(delta_shape, dtype="int8")

delta[labels, np.arange(n_ranges)] = 1
delta[labels, n_points + n_ranges + np.arange(n_ranges)] = -1
Expand All @@ -261,9 +335,9 @@ def within_ranges(x: np.ndarray, ranges: Array, labels: Optional[Array] = None,
reordered[:, idx] = summed.reshape(delta_shape[0], -1)
return reordered[:, np.arange(n_ranges, n_points + n_ranges)]

elif mode == 'vector':
elif mode == "vector":
delta_shape = (n_points + 2 * n_ranges,)
r_delta = np.zeros(delta_shape, dtype='int32')
r_delta = np.zeros(delta_shape, dtype="int32")
r_delta[np.arange(n_ranges)] = labels
r_delta[n_points + n_ranges + np.arange(n_ranges)] = -labels

Expand Down
107 changes: 85 additions & 22 deletions tests/test_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


class TestRcoeff(unittest.TestCase):

def test_rcoeff(self):
x = np.random.rand(2, 1000)
y = x[0, :]
Expand All @@ -17,13 +16,14 @@ def test_rcoeff(self):


class TestBetweeenSorted(unittest.TestCase):

def test_between_sorted_single_time(self):
# test single time, falling right on edges
t = np.arange(100)
bounds = [10, 25]
ind = num.between_sorted(t, bounds)
assert np.all(t[ind] == np.arange(int(np.ceil(bounds[0])), int(np.floor(bounds[1] + 1))))
assert np.all(
t[ind] == np.arange(int(np.ceil(bounds[0])), int(np.floor(bounds[1] + 1)))
)
# test single time in between edges
bounds = [10.4, 25.2]
ind = num.between_sorted(t, bounds)
Expand All @@ -36,23 +36,31 @@ def test_between_sorted_multiple_times(self):
t = np.arange(100)
# non overlapping ranges
bounds = np.array([[10.4, 25.2], [67.2, 86.4]])
ind_ = np.logical_or(num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1]))
ind_ = np.logical_or(
num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1])
)
ind = num.between_sorted(t, bounds)
assert np.all(ind == ind_)
# overlapping ranges
bounds = np.array([[10.4, 83.2], [67.2, 86.4]])
ind_ = np.logical_or(num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1]))
ind_ = np.logical_or(
num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1])
)
ind = num.between_sorted(t, bounds)
assert np.all(ind == ind_)
# one range contains the other
bounds = np.array([[10.4, 83.2], [34, 78]])
ind_ = np.logical_or(num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1]))
ind_ = np.logical_or(
num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1])
)
ind = num.between_sorted(t, bounds)
assert np.all(ind == ind_)
# case when one range starts exactly where another stops
bounds = np.array([[10.4, 83.2], [83.2, 84]])
ind = num.between_sorted(t, bounds)
ind_ = np.logical_or(num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1]))
ind_ = np.logical_or(
num.between_sorted(t, bounds[0]), num.between_sorted(t, bounds[1])
)
assert np.all(ind == ind_)

def test_between_sorted_out_of_range(self):
Expand All @@ -64,7 +72,6 @@ def test_between_sorted_out_of_range(self):


class TestIsmember(unittest.TestCase):

def test_ismember2d(self):
b = np.reshape([0, 0, 0, 1, 1, 1], [3, 2])
locb = np.array([0, 1, 0, 2, 2, 1])
Expand All @@ -80,8 +87,12 @@ def test_ismember2d_uuids(self):
a = np.random.randint(0, nb + 3, na)
b = np.arange(nb)
lia, locb = num.ismember(a, b)
bb = np.random.randint(low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max,
size=(nb, 2), dtype=np.int64)
bb = np.random.randint(
low=np.iinfo(np.int64).min,
high=np.iinfo(np.int64).max,
size=(nb, 2),
dtype=np.int64,
)
aa = np.zeros((na, 2), dtype=np.int64)
aa[lia, :] = bb[locb, :]
lia_, locb_ = num.ismember2d(aa, bb)
Expand Down Expand Up @@ -127,27 +138,38 @@ def test_within_ranges(self):

# Matrix mode
ranges = np.array([[1, 2], [5, 8]])
verifiable = num.within_ranges(np.arange(10) + 1, ranges,
labels=np.array([0, 1]), mode='matrix')
expected = np.array([[1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0]], dtype=int)
verifiable = num.within_ranges(
np.arange(10) + 1, ranges, labels=np.array([0, 1]), mode="matrix"
)
expected = np.array(
[[1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0]], dtype=int
)
np.testing.assert_array_equal(verifiable, expected)

# Test overlap
verifiable = num.within_ranges(np.arange(11), [(1, 2), (5, 8), (4, 6)],
labels=[0, 1, 1], mode='matrix')
expected = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 2, 2, 1, 1, 0, 0]], dtype=int)
verifiable = num.within_ranges(
np.arange(11), [(1, 2), (5, 8), (4, 6)], labels=[0, 1, 1], mode="matrix"
)
expected = np.array(
[[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 2, 2, 1, 1, 0, 0]],
dtype=int,
)
np.testing.assert_array_equal(verifiable, expected)

# Vector mode
verifiable = num.within_ranges(np.arange(10) + 1, ranges, np.array([3, 1]), mode='vector')
verifiable = num.within_ranges(
np.arange(10) + 1, ranges, np.array([3, 1]), mode="vector"
)
expected = np.array([3, 3, 0, 0, 1, 1, 1, 1, 0, 0], dtype=int)
np.testing.assert_array_equal(verifiable, expected)

# Boolean
verifiable = num.within_ranges(np.arange(11), [(1, 2), (5, 8), (4, 6)], dtype=bool)
expected = np.array([False, True, True, False, True, True, True, True, True, False, False])
verifiable = num.within_ranges(
np.arange(11), [(1, 2), (5, 8), (4, 6)], dtype=bool
)
expected = np.array(
[False, True, True, False, True, True, True, True, True, False, False]
)
np.testing.assert_array_equal(verifiable, expected)

# Edge cases
Expand All @@ -156,4 +178,45 @@ def test_within_ranges(self):
np.testing.assert_array_equal(verifiable, expected)

with self.assertRaises(ValueError):
num.within_ranges(np.arange(11), [(1, 2)], mode='array')
num.within_ranges(np.arange(11), [(1, 2)], mode="array")


class TestBincount2D(unittest.TestCase):
def test_bincount_2d(self):
# first test simple with indices
x = np.array([0, 1, 1, 2, 2, 3, 3, 3])
y = np.array([3, 2, 2, 1, 1, 0, 0, 0])
r, xscale, yscale = num.bincount2D(x, y, xbin=1, ybin=1)
r_ = np.zeros_like(r)
# sometimes life would have been simpler in c:
for ix, iy in zip(x, y):
r_[iy, ix] += 1
self.assertTrue(np.all(np.equal(r_, r)))
# test with negative values
y = np.array([3, 2, 2, 1, 1, 0, 0, 0]) - 5
r, xscale, yscale = num.bincount2D(x, y, xbin=1, ybin=1)
self.assertTrue(np.all(np.equal(r_, r)))
# test unequal bins
r, xscale, yscale = num.bincount2D(x / 2, y / 2, xbin=1, ybin=2)
r_ = np.zeros_like(r)
for ix, iy in zip(np.floor(x / 2), np.floor((y / 2 + 2.5) / 2)):
r_[int(iy), int(ix)] += 1
self.assertTrue(np.all(r_ == r))
# test with weights
w = np.ones_like(x) * 2
r, xscale, yscale = num.bincount2D(x / 2, y / 2, xbin=1, ybin=2, weights=w)
self.assertTrue(np.all(r_ * 2 == r))
# test aggregation instead of binning
x = np.array([0, 1, 1, 2, 2, 4, 4, 4])
y = np.array([4, 2, 2, 1, 1, 0, 0, 0])
r, xscale, yscale = num.bincount2D(x, y)
self.assertTrue(
np.all(xscale == yscale) and np.all(xscale == np.array([0, 1, 2, 4]))
)
# test aggregation on a fixed scale
r, xscale, yscale = num.bincount2D(
x + 10, y + 10, xbin=np.arange(5) + 10, ybin=np.arange(3) + 10
)
self.assertTrue(np.all(xscale == np.arange(5) + 10))
self.assertTrue(np.all(yscale == np.arange(3) + 10))
self.assertTrue(np.all(r.shape == (3, 5)))

0 comments on commit 177bf30

Please sign in to comment.