Skip to content

Commit

Permalink
closes #2927 power divergence statistic (#2932)
Browse files Browse the repository at this point in the history
* closes #2927 power divergence statistic

* add scipy to requirements

* add arkouda/akstats/_stats_py.pyi

* Fix F403 and F401 error codes on flake8 arkouda from arkouda/akmath/__init__.py and arkouda/akstats/__init__.py

* un-pin scipy from specific version

* add scipy license and minor changes in response to code review

* Update tests/akmath/akmath_test.py

---------

Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
Co-authored-by: pierce <48131946+pierce314159@users.noreply.github.com>
  • Loading branch information
3 people committed Feb 2, 2024
1 parent ab0bb3f commit 3613d76
Show file tree
Hide file tree
Showing 17 changed files with 529 additions and 4 deletions.
28 changes: 28 additions & 0 deletions PROTO_tests/tests/akmath/akmath_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import math

import numpy as np

import arkouda as ak
from arkouda.akmath import xlogy
from arkouda.pdarrayclass import pdarray


class TestStats:
def test_xlogy(self):
from scipy.special import xlogy as scipy_xlogy

ys = [ak.array([1, 2, 3]), ak.array([10, 100, 100]), ak.array([-1, 0, np.nan])]
xs = [3, 5, np.float64(6), ak.array([1.0, 2.0, 4.5])]

for y in ys:
for x in xs:
ak_result = xlogy(x, y)

np_y = y.to_ndarray()
np_x = x
if isinstance(np_x, pdarray):
np_x = np_x.to_ndarray()

scipy_result = scipy_xlogy(np_x, np_y)

assert np.allclose(ak_result.to_ndarray(), scipy_result, equal_nan=True)
73 changes: 73 additions & 0 deletions PROTO_tests/tests/akstats/akstats_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import math

import numpy as np
from scipy.stats import power_divergence as scipy_power_divergence

import arkouda as ak
from arkouda.akstats import power_divergence as ak_power_divergence


class TestStats:
@staticmethod
def create_stat_test_pairs():
pairs = [
(
ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]),
ak.array([10000000, 20000000, 30000000, 40000001, 50000000, 60000000, 70000000]),
),
(ak.array([10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000]), None),
(ak.array([44, 24, 29, 3]) / 100 * 189, ak.array([43, 52, 54, 40])),
]
return pairs

def test_power_divergence(self):
pairs = self.create_stat_test_pairs()

lambdas = [
"pearson",
"log-likelihood",
"freeman-tukey",
"mod-log-likelihood",
"neyman",
"cressie-read",
]

ddofs = [0, 1, 2, 3, 4, 5]

for f_obs, f_exp in pairs:
for lambda0 in lambdas:
for ddof in ddofs:
ak_power_div = ak_power_divergence(f_obs, f_exp, ddof=ddof, lambda_=lambda0)

np_f_obs = f_obs.to_ndarray()
np_f_exp = None
if f_exp is not None:
np_f_exp = f_exp.to_ndarray()

scipy_power_div = scipy_power_divergence(
np_f_obs, np_f_exp, ddof=ddof, axis=0, lambda_=lambda0
)

assert np.allclose(ak_power_div, scipy_power_div, equal_nan=True)

def test_chisquare(self):
from scipy.stats import chisquare as scipy_chisquare

from arkouda.akstats import chisquare as ak_chisquare

pairs = self.create_stat_test_pairs()

ddofs = [0, 1, 2, 3, 4, 5]

for f_obs, f_exp in pairs:
for ddof in ddofs:
ak_chisq = ak_chisquare(f_obs, f_exp, ddof=ddof)

np_f_obs = f_obs.to_ndarray()
np_f_exp = None
if f_exp is not None:
np_f_exp = f_exp.to_ndarray()

scipy_chisq = scipy_chisquare(np_f_obs, np_f_exp, ddof=ddof, axis=0)

assert np.allclose(ak_chisq, scipy_chisq, equal_nan=True)
3 changes: 2 additions & 1 deletion arkouda-env-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies:
- libiconv
- libidn2
- jupyter
- scipy

# Developer dependencies
- pexpect
Expand All @@ -42,4 +43,4 @@ dependencies:
- furo # sphinx theme
- myst-parser
- linkify-it-py


5 changes: 3 additions & 2 deletions arkouda-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies:
- libiconv
- libidn2
- jupyter

- scipy

- pip:
- typeguard==2.10.0
- typeguard==2.10.0
2 changes: 2 additions & 0 deletions arkouda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,5 @@
is_registered,
broadcast_dims,
)
from arkouda.akmath import *
from arkouda.akstats import *
5 changes: 5 additions & 0 deletions arkouda/akmath/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from ._math import xlogy

__all__ = [
"xlogy",
]
52 changes: 52 additions & 0 deletions arkouda/akmath/_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import Union
from warnings import warn

import numpy as np

from arkouda.numeric import log
from arkouda.pdarrayclass import pdarray


def xlogy(x: Union[pdarray, np.float64], y: pdarray):
"""
Computes x * log(y).
Parameters
----------
x : pdarray or np.float64
x must have a datatype that is castable to float64
y : pdarray
Returns
-------
arkouda.pdarrayclass.pdarray
Examples
--------
>>> import arkouda as ak
>>> ak.connect()
>>> from arkouda.akmath import xlogy
>>> xlogy( ak.array([1, 2, 3, 4]), ak.array([5,6,7,8]))
array([1.6094379124341003 3.5835189384561099 5.8377304471659395 8.317766166719343])
>>> xlogy( 5.0, ak.array([1, 2, 3, 4]))
array([0.00000000000000000 3.4657359027997265 5.4930614433405491 6.9314718055994531])
"""
if not isinstance(x, (np.float64, pdarray)) and np.can_cast(x, np.float64):
x = np.float64(x)

if isinstance(x, pdarray) and isinstance(y, pdarray):
if x.size == y.size:
return x * log(y)
else:
msg = "x and y must have the same size."
warn(msg, UserWarning)
return None
elif isinstance(x, np.float64) and isinstance(y, pdarray):
return x * log(y)
else:
msg = "x and y must both be pdarrays or x must be castable to float64 and y must be a pdarray."
warn(msg, UserWarning)
return None
30 changes: 30 additions & 0 deletions arkouda/akstats/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Copyright (c) 2001-2002 Enthought, Inc. 2003-2024, SciPy Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3 changes: 3 additions & 0 deletions arkouda/akstats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._stats_py import Power_divergenceResult, chisquare, power_divergence

__all__ = ["power_divergence", "chisquare", "Power_divergenceResult"]
Loading

0 comments on commit 3613d76

Please sign in to comment.