diff --git a/README.md b/README.md index 087e7ad..b219597 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ import numpy as np # Development * Install dev/build/test dependencies as denoted in setup.py -* `pip install -e .` +* `CC=clang pip install -e .` * `pytest` # Acknowledgements diff --git a/numpy_minmax/__init__.py b/numpy_minmax/__init__.py index 7c398ca..a9288a5 100644 --- a/numpy_minmax/__init__.py +++ b/numpy_minmax/__init__.py @@ -17,7 +17,7 @@ def minmax(a: NDArray) -> Tuple: len(a), ) return np.float32(result.min_val), np.float32(result.max_val) - elif a.ndim == 2 and a.shape[1] > 16: + elif a.ndim == 2: result = _numpy_minmax.lib.minmax_2d( _numpy_minmax.ffi.cast("float *", a.ctypes.data), a.shape[0], diff --git a/numpy_minmax/_minmax.c b/numpy_minmax/_minmax.c index 7436fcf..7f7d82b 100644 --- a/numpy_minmax/_minmax.c +++ b/numpy_minmax/_minmax.c @@ -79,7 +79,6 @@ MinMaxResult minmax_1d(float *a, size_t length) { if (length >= 16) { return minmax_avx2_1d(a, length); } else { - // TODO: test if this is faster than the numpy equivalent return minmax_pairwise_1d(a, length); } } @@ -124,12 +123,42 @@ MinMaxResult minmax_avx2_2d(float *a, size_t shape_0, size_t shape_1) { return result; } +MinMaxResult minmax_pairwise_2d(float *a, size_t shape_0, size_t shape_1) { + MinMaxResult result = { .min_val = FLT_MAX, .max_val = -FLT_MAX }; + + // Return early for empty arrays + if (shape_0 == 0 || shape_1 == 0) { + return (MinMaxResult){0.0, 0.0}; + } + + for (size_t row = 0; row < shape_0; ++row) { + size_t i = 0; + float* row_ptr = a + (row * shape_1); + + // Initialize min and max for the row. Handle edge case for odd number of elements. + if (shape_1 % 2 != 0) { + float last_elem = row_ptr[shape_1 - 1]; + if (last_elem < result.min_val) result.min_val = last_elem; + if (last_elem > result.max_val) result.max_val = last_elem; + } + + // Process elements in pairs for each row + for (; i < shape_1 - 1; i += 2) { + float smaller = row_ptr[i] < row_ptr[i + 1] ? row_ptr[i] : row_ptr[i + 1]; + float larger = row_ptr[i] < row_ptr[i + 1] ? row_ptr[i + 1] : row_ptr[i]; + + if (smaller < result.min_val) result.min_val = smaller; + if (larger > result.max_val) result.max_val = larger; + } + } + + return result; +} + MinMaxResult minmax_2d(float *a, size_t shape_0, size_t shape_1) { - return minmax_avx2_2d(a, shape_0, shape_1); - // TODO: -// if (shape_1 >= 16) { -// return minmax_avx2_2d(a, length); -// } else { -// return minmax_pairwise_2d(a, length); -// } + if (shape_1 >= 16) { + return minmax_avx2_2d(a, shape_0, shape_1); + } else { + return minmax_pairwise_2d(a, shape_0, shape_1); + } } diff --git a/packaging.md b/packaging.md index f47b47c..7f57c3c 100644 --- a/packaging.md +++ b/packaging.md @@ -1,5 +1,5 @@ * Bump version in `numpy_minmax/__init__.py` -* `pip install -e . && pytest` +* `CC=clang pip install -e . && pytest` * Update CHANGELOG.md * Commit and push the change with a commit message like this: "Release vx.y.z" (replace x.y.z with the package version) * Wait for build workflow in Github Actions to complete diff --git a/scripts/perf_benchmark.py b/scripts/perf_benchmark.py index 41a1c7c..ee45403 100644 --- a/scripts/perf_benchmark.py +++ b/scripts/perf_benchmark.py @@ -60,6 +60,27 @@ def perf_benchmark_many_small_1d_c_contiguous(): min_val, max_val = numpy_minmax.minmax(a) +def perf_benchmark_many_small_2d_c_contiguous(): + print("===\nperf_benchmark_many_small_2d_c_contiguous:") + arrays = [] + for i in range(100_000): + a = np.random.uniform(low=-4.0, high=3.9, size=(3, 9)).astype(np.float32) + arrays.append(a) + + with timer("numpy.amax and numpy.amin sequentially"): + for a in arrays: + min_val = np.amin(a) + max_val = np.amax(a) + + with timer("diplib"): + for a in arrays: + min_val, max_val = dip.MaximumAndMinimum(a) + + with timer("minmax") as t: + for a in arrays: + min_val, max_val = numpy_minmax.minmax(a) + + def perf_benchmark_large_1d_c_contiguous(): print("===\nperf_benchmark_large_1d_c_contiguous:") a = np.random.uniform(low=-4.0, high=3.9, size=(999_999_999,)).astype(np.float32) @@ -158,6 +179,7 @@ def perf_benchmark_large_2d_not_c_contiguous(): if __name__ == "__main__": perf_benchmark_many_small_1d_c_contiguous() + perf_benchmark_many_small_2d_c_contiguous() perf_benchmark_large_1d_c_contiguous() perf_benchmark_large_1d_not_c_contiguous() perf_benchmark_large_2d_c_contiguous() diff --git a/tests/test_minmax.py b/tests/test_minmax.py index f3f36be..83cb584 100644 --- a/tests/test_minmax.py +++ b/tests/test_minmax.py @@ -53,11 +53,23 @@ def test_minmax_float64_numpy_fallback(self): assert isinstance(min_val, np.float64) assert isinstance(max_val, np.float64) - def test_minmax_2d_shape(self): - arr = np.arange(16, dtype=np.float32).reshape((2, 8)) + def test_minmax_2d_small1(self): + arr = np.random.uniform(low=-6.0, high=3.0, size=(15, 2)).astype(np.float32) min_val, max_val = numpy_minmax.minmax(arr) - assert min_val == 0.0 - assert max_val == 15.0 + assert min_val == np.amin(arr) + assert max_val == np.amax(arr) + + def test_minmax_2d_small2(self): + arr = np.random.uniform(low=-6.0, high=3.0, size=(2, 15)).astype(np.float32) + min_val, max_val = numpy_minmax.minmax(arr) + assert min_val == np.amin(arr) + assert max_val == np.amax(arr) + + def test_minmax_2d_shape_large(self): + arr = np.random.uniform(low=-6.0, high=3.0, size=(2, 999)).astype(np.float32) + min_val, max_val = numpy_minmax.minmax(arr) + assert min_val == np.amin(arr) + assert max_val == np.amax(arr) @pytest.mark.parametrize("shape", [(0,), (0, 0)]) def test_minmax_empty_array(self, shape):