Skip to content

Commit

Permalink
Optimize and test perf on small 2D arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
iver56 committed Jan 11, 2024
1 parent 21d9b0a commit fbd78dd
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 15 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import numpy as np
# Development

* Install dev/build/test dependencies as denoted in setup.py
* `pip install -e .`
* `CC=clang pip install -e .`
* `pytest`

# Acknowledgements
Expand Down
2 changes: 1 addition & 1 deletion numpy_minmax/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def minmax(a: NDArray) -> Tuple:
len(a),
)
return np.float32(result.min_val), np.float32(result.max_val)
elif a.ndim == 2 and a.shape[1] > 16:
elif a.ndim == 2:
result = _numpy_minmax.lib.minmax_2d(
_numpy_minmax.ffi.cast("float *", a.ctypes.data),
a.shape[0],
Expand Down
45 changes: 37 additions & 8 deletions numpy_minmax/_minmax.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ MinMaxResult minmax_1d(float *a, size_t length) {
if (length >= 16) {
return minmax_avx2_1d(a, length);
} else {
// TODO: test if this is faster than the numpy equivalent
return minmax_pairwise_1d(a, length);
}
}
Expand Down Expand Up @@ -124,12 +123,42 @@ MinMaxResult minmax_avx2_2d(float *a, size_t shape_0, size_t shape_1) {
return result;
}

MinMaxResult minmax_pairwise_2d(float *a, size_t shape_0, size_t shape_1) {
MinMaxResult result = { .min_val = FLT_MAX, .max_val = -FLT_MAX };

// Return early for empty arrays
if (shape_0 == 0 || shape_1 == 0) {
return (MinMaxResult){0.0, 0.0};
}

for (size_t row = 0; row < shape_0; ++row) {
size_t i = 0;
float* row_ptr = a + (row * shape_1);

// Initialize min and max for the row. Handle edge case for odd number of elements.
if (shape_1 % 2 != 0) {
float last_elem = row_ptr[shape_1 - 1];
if (last_elem < result.min_val) result.min_val = last_elem;
if (last_elem > result.max_val) result.max_val = last_elem;
}

// Process elements in pairs for each row
for (; i < shape_1 - 1; i += 2) {
float smaller = row_ptr[i] < row_ptr[i + 1] ? row_ptr[i] : row_ptr[i + 1];
float larger = row_ptr[i] < row_ptr[i + 1] ? row_ptr[i + 1] : row_ptr[i];

if (smaller < result.min_val) result.min_val = smaller;
if (larger > result.max_val) result.max_val = larger;
}
}

return result;
}

MinMaxResult minmax_2d(float *a, size_t shape_0, size_t shape_1) {
return minmax_avx2_2d(a, shape_0, shape_1);
// TODO:
// if (shape_1 >= 16) {
// return minmax_avx2_2d(a, length);
// } else {
// return minmax_pairwise_2d(a, length);
// }
if (shape_1 >= 16) {
return minmax_avx2_2d(a, shape_0, shape_1);
} else {
return minmax_pairwise_2d(a, shape_0, shape_1);
}
}
2 changes: 1 addition & 1 deletion packaging.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
* Bump version in `numpy_minmax/__init__.py`
* `pip install -e . && pytest`
* `CC=clang pip install -e . && pytest`
* Update CHANGELOG.md
* Commit and push the change with a commit message like this: "Release vx.y.z" (replace x.y.z with the package version)
* Wait for build workflow in Github Actions to complete
Expand Down
22 changes: 22 additions & 0 deletions scripts/perf_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,27 @@ def perf_benchmark_many_small_1d_c_contiguous():
min_val, max_val = numpy_minmax.minmax(a)


def perf_benchmark_many_small_2d_c_contiguous():
print("===\nperf_benchmark_many_small_2d_c_contiguous:")
arrays = []
for i in range(100_000):
a = np.random.uniform(low=-4.0, high=3.9, size=(3, 9)).astype(np.float32)
arrays.append(a)

with timer("numpy.amax and numpy.amin sequentially"):
for a in arrays:
min_val = np.amin(a)
max_val = np.amax(a)

with timer("diplib"):
for a in arrays:
min_val, max_val = dip.MaximumAndMinimum(a)

with timer("minmax") as t:
for a in arrays:
min_val, max_val = numpy_minmax.minmax(a)


def perf_benchmark_large_1d_c_contiguous():
print("===\nperf_benchmark_large_1d_c_contiguous:")
a = np.random.uniform(low=-4.0, high=3.9, size=(999_999_999,)).astype(np.float32)
Expand Down Expand Up @@ -158,6 +179,7 @@ def perf_benchmark_large_2d_not_c_contiguous():

if __name__ == "__main__":
perf_benchmark_many_small_1d_c_contiguous()
perf_benchmark_many_small_2d_c_contiguous()
perf_benchmark_large_1d_c_contiguous()
perf_benchmark_large_1d_not_c_contiguous()
perf_benchmark_large_2d_c_contiguous()
Expand Down
20 changes: 16 additions & 4 deletions tests/test_minmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,23 @@ def test_minmax_float64_numpy_fallback(self):
assert isinstance(min_val, np.float64)
assert isinstance(max_val, np.float64)

def test_minmax_2d_shape(self):
arr = np.arange(16, dtype=np.float32).reshape((2, 8))
def test_minmax_2d_small1(self):
arr = np.random.uniform(low=-6.0, high=3.0, size=(15, 2)).astype(np.float32)
min_val, max_val = numpy_minmax.minmax(arr)
assert min_val == 0.0
assert max_val == 15.0
assert min_val == np.amin(arr)
assert max_val == np.amax(arr)

def test_minmax_2d_small2(self):
arr = np.random.uniform(low=-6.0, high=3.0, size=(2, 15)).astype(np.float32)
min_val, max_val = numpy_minmax.minmax(arr)
assert min_val == np.amin(arr)
assert max_val == np.amax(arr)

def test_minmax_2d_shape_large(self):
arr = np.random.uniform(low=-6.0, high=3.0, size=(2, 999)).astype(np.float32)
min_val, max_val = numpy_minmax.minmax(arr)
assert min_val == np.amin(arr)
assert max_val == np.amax(arr)

@pytest.mark.parametrize("shape", [(0,), (0, 0)])
def test_minmax_empty_array(self, shape):
Expand Down

0 comments on commit fbd78dd

Please sign in to comment.