closes #2927 power divergence statistic (#2932)

* closes #2927 power divergence statistic * add scipy to requirements * add arkouda/akstats/_stats_py.pyi * Fix F403 and F401 error codes on flake8 arkouda from arkouda/akmath/__init__.py and arkouda/akstats/__init__.py * un-pin scipy from specific version * add scipy license and minor changes in response to code review * Update tests/akmath/akmath_test.py --------- Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com> Co-authored-by: pierce <48131946+pierce314159@users.noreply.github.com>
Bears-R-Us · Feb 2, 2024 · 3613d76 · 3613d76
1 parent ab0bb3f
commit 3613d76
Show file tree

Hide file tree

Showing 17 changed files with 529 additions and 4 deletions.
diff --git a/PROTO_tests/tests/akmath/akmath_tests.py b/PROTO_tests/tests/akmath/akmath_tests.py
@@ -0,0 +1,28 @@
+import math
+
+import numpy as np
+
+import arkouda as ak
+from arkouda.akmath import xlogy
+from arkouda.pdarrayclass import pdarray
+
+
+class TestStats:
+ def test_xlogy(self):
+ from scipy.special import xlogy as scipy_xlogy
+
+ ys = [ak.array([1, 2, 3]), ak.array([10, 100, 100]), ak.array([-1, 0, np.nan])]
+ xs = [3, 5, np.float64(6), ak.array([1.0, 2.0, 4.5])]
+
+ for y in ys:
+ for x in xs:
+ ak_result = xlogy(x, y)
+
+ np_y = y.to_ndarray()
+ np_x = x
+ if isinstance(np_x, pdarray):
+ np_x = np_x.to_ndarray()
+
+ scipy_result = scipy_xlogy(np_x, np_y)
+
+ assert np.allclose(ak_result.to_ndarray(), scipy_result, equal_nan=True)
diff --git a/PROTO_tests/tests/akstats/akstats_test.py b/PROTO_tests/tests/akstats/akstats_test.py
@@ -0,0 +1,73 @@
+import math
+
+import numpy as np
+from scipy.stats import power_divergence as scipy_power_divergence
+
+import arkouda as ak
+from arkouda.akstats import power_divergence as ak_power_divergence
+
+
+class TestStats:
+ @staticmethod
+ def create_stat_test_pairs():
+ pairs = [
+ (
+ ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]),
+ ak.array([10000000, 20000000, 30000000, 40000001, 50000000, 60000000, 70000000]),
+ ),
+ (ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]), None),
+ (ak.array([44, 24, 29, 3]) / 100 * 189, ak.array([43, 52, 54, 40])),
+ ]
+ return pairs
+
+ def test_power_divergence(self):
+ pairs = self.create_stat_test_pairs()
+
+ lambdas = [
+ "pearson",
+ "log-likelihood",
+ "freeman-tukey",
+ "mod-log-likelihood",
+ "neyman",
+ "cressie-read",
+ ]
+
+ ddofs = [0, 1, 2, 3, 4, 5]
+
+ for f_obs, f_exp in pairs:
+ for lambda0 in lambdas:
+ for ddof in ddofs:
+ ak_power_div = ak_power_divergence(f_obs, f_exp, ddof=ddof, lambda_=lambda0)
+
+ np_f_obs = f_obs.to_ndarray()
+ np_f_exp = None
+ if f_exp is not None:
+ np_f_exp = f_exp.to_ndarray()
+
+ scipy_power_div = scipy_power_divergence(
+ np_f_obs, np_f_exp, ddof=ddof, axis=0, lambda_=lambda0
+ )
+
+ assert np.allclose(ak_power_div, scipy_power_div, equal_nan=True)
+
+ def test_chisquare(self):
+ from scipy.stats import chisquare as scipy_chisquare
+
+ from arkouda.akstats import chisquare as ak_chisquare
+
+ pairs = self.create_stat_test_pairs()
+
+ ddofs = [0, 1, 2, 3, 4, 5]
+
+ for f_obs, f_exp in pairs:
+ for ddof in ddofs:
+ ak_chisq = ak_chisquare(f_obs, f_exp, ddof=ddof)
+
+ np_f_obs = f_obs.to_ndarray()
+ np_f_exp = None
+ if f_exp is not None:
+ np_f_exp = f_exp.to_ndarray()
+
+ scipy_chisq = scipy_chisquare(np_f_obs, np_f_exp, ddof=ddof, axis=0)
+
+ assert np.allclose(ak_chisq, scipy_chisq, equal_nan=True)
diff --git a/arkouda-env-dev.yml b/arkouda-env-dev.yml
@@ -20,6 +20,7 @@ dependencies:
  - libiconv
  - libidn2
  - jupyter
+ - scipy
 
  # Developer dependencies
  - pexpect
@@ -42,4 +43,4 @@ dependencies:
  - furo # sphinx theme
  - myst-parser
  - linkify-it-py
-
+
diff --git a/arkouda-env.yml b/arkouda-env.yml
@@ -20,6 +20,7 @@ dependencies:
  - libiconv
  - libidn2
  - jupyter
-
+ - scipy
+
  - pip:
- - typeguard==2.10.0
+ - typeguard==2.10.0
diff --git a/arkouda/__init__.py b/arkouda/__init__.py
@@ -39,3 +39,5 @@
  is_registered,
  broadcast_dims,
 )
+from arkouda.akmath import *
+from arkouda.akstats import *
diff --git a/arkouda/akmath/__init__.py b/arkouda/akmath/__init__.py
@@ -0,0 +1,5 @@
+from ._math import xlogy
+
+__all__ = [
+ "xlogy",
+]
diff --git a/arkouda/akmath/_math.py b/arkouda/akmath/_math.py
@@ -0,0 +1,52 @@
+from typing import Union
+from warnings import warn
+
+import numpy as np
+
+from arkouda.numeric import log
+from arkouda.pdarrayclass import pdarray
+
+
+def xlogy(x: Union[pdarray, np.float64], y: pdarray):
+ """
+ Computes x * log(y).
+
+ Parameters
+ ----------
+ x : pdarray or np.float64
+ x must have a datatype that is castable to float64
+ y : pdarray
+
+ Returns
+ -------
+ arkouda.pdarrayclass.pdarray
+
+ Examples
+ --------
+
+ >>> import arkouda as ak
+ >>> ak.connect()
+ >>> from arkouda.akmath import xlogy
+ >>> xlogy( ak.array([1, 2, 3, 4]), ak.array([5,6,7,8]))
+ array([1.6094379124341003 3.5835189384561099 5.8377304471659395 8.317766166719343])
+ >>> xlogy( 5.0, ak.array([1, 2, 3, 4]))
+ array([0.00000000000000000 3.4657359027997265 5.4930614433405491 6.9314718055994531])
+
+
+ """
+ if not isinstance(x, (np.float64, pdarray)) and np.can_cast(x, np.float64):
+ x = np.float64(x)
+
+ if isinstance(x, pdarray) and isinstance(y, pdarray):
+ if x.size == y.size:
+ return x * log(y)
+ else:
+ msg = "x and y must have the same size."
+ warn(msg, UserWarning)
+ return None
+ elif isinstance(x, np.float64) and isinstance(y, pdarray):
+ return x * log(y)
+ else:
+ msg = "x and y must both be pdarrays or x must be castable to float64 and y must be a pdarray."
+ warn(msg, UserWarning)
+ return None
diff --git a/arkouda/akstats/LICENSE.txt b/arkouda/akstats/LICENSE.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2001-2002 Enthought, Inc. 2003-2024, SciPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/arkouda/akstats/__init__.py b/arkouda/akstats/__init__.py
@@ -0,0 +1,3 @@
+from ._stats_py import Power_divergenceResult, chisquare, power_divergence
+
+__all__ = ["power_divergence", "chisquare", "Power_divergenceResult"]