Skip to content

Commit

Permalink
Closes Bears-R-Us#3278: Add ziggurat method to standard normal (Bears…
Browse files Browse the repository at this point in the history
…-R-Us#3596)

* Closes Bears-R-Us#3278: Add ziggurat method to standard normal

This PR (closes Bears-R-Us#3278) adds `ziggurat` method for calculating standard normal based on numpys implemenation.

I also realized that even though the zig method for both `standard_normal` and `standard_exponential` can't handle multi dimensional arrays yet, their other methods can. I created two versions of each proc with `where array_nd > 1` and `where array_nd == 1`, so now we can generate multi-dim exponentially and normally arrays if we use the compatible method.

I added parameterization for the methods in the tests

* double defined idx

---------

Co-authored-by: Tess Hayes <stress-tess@users.noreply.github.com>
  • Loading branch information
stress-tess and stress-tess authored Aug 1, 2024
1 parent 07fc7b5 commit ed20f95
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 57 deletions.
31 changes: 16 additions & 15 deletions PROTO_tests/tests/random_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,16 @@ def test_choice_hypothesis_testing(self):
# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

def test_normal_hypothesis_testing(self):
@pytest.mark.parametrize("method", ["zig", "box"])
def test_normal_hypothesis_testing(self, method):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
rng = ak.random.default_rng(73)
num_samples = 10**4

mean = rng.uniform(-10, 10)
deviation = rng.uniform(0, 10)
sample = rng.normal(loc=mean, scale=deviation, size=num_samples)
sample = rng.normal(loc=mean, scale=deviation, size=num_samples, method=method)
sample_list = sample.to_list()

# first test if samples are normal at all
Expand Down Expand Up @@ -330,24 +331,24 @@ def test_poisson_hypothesis_testing(self):
_, pval = sp_stats.chisquare(f_obs=obs_counts, f_exp=exp_counts)
assert pval > 0.05

def test_exponential_hypothesis_testing(self):
@pytest.mark.parametrize("method", ["zig", "inv"])
def test_exponential_hypothesis_testing(self, method):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

scale = rng.uniform(0, 10)
for method in "zig", "inv":
sample = rng.exponential(scale=scale, size=num_samples, method=method)
sample_list = sample.to_list()

# do the Kolmogorov-Smirnov test for goodness of fit
ks_res = sp_stats.kstest(
rvs=sample_list,
cdf=sp_stats.expon.cdf,
args=(0, scale),
)
assert ks_res.pvalue > 0.05
sample = rng.exponential(scale=scale, size=num_samples, method=method)
sample_list = sample.to_list()

# do the Kolmogorov-Smirnov test for goodness of fit
ks_res = sp_stats.kstest(
rvs=sample_list,
cdf=sp_stats.expon.cdf,
args=(0, scale),
)
assert ks_res.pvalue > 0.05

def test_legacy_randint(self):
testArray = ak.random.randint(0, 10, 5)
Expand Down
28 changes: 22 additions & 6 deletions arkouda/random/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,21 +196,27 @@ def standard_exponential(self, size=None, method="zig"):
pdarray
Drawn samples from the standard exponential distribution.
"""
from arkouda.util import _calc_shape

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.standard_exponential(method=method)

shape, full_size, ndim = _calc_shape(size)
if full_size < 0:
raise ValueError("The size parameter must be > 0")

rep_msg = generic_msg(
cmd="standardExponential",
cmd=f"standardExponential<{ndim}>",
args={
"name": self._name_dict[akdtype("float64")],
"size": size,
"size": shape,
"method": method.upper(),
"has_seed": self._seed is not None,
"state": self._state,
},
)
self._state += size if method.upper() == "INV" else 1
self._state += full_size if method.upper() == "INV" else 1
return create_pdarray(rep_msg)

def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
Expand Down Expand Up @@ -290,7 +296,7 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
self._state += full_size
return create_pdarray(rep_msg)

def normal(self, loc=0.0, scale=1.0, size=None):
def normal(self, loc=0.0, scale=1.0, size=None, method="zig"):
r"""
Draw samples from a normal (Gaussian) distribution
Expand All @@ -305,6 +311,10 @@ def normal(self, loc=0.0, scale=1.0, size=None):
size: numeric_scalars, optional
Output shape. Default is None, in which case a single value is returned.
method : str, optional
Either 'box' or 'zig'. 'box' uses the Box–Muller transform
'zig' uses the Ziggurat method.
Notes
-----
The probability density for the Gaussian distribution is:
Expand Down Expand Up @@ -337,7 +347,7 @@ def normal(self, loc=0.0, scale=1.0, size=None):
if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0:
raise TypeError("scale must be non-negative.")

return loc + scale * self.standard_normal(size=size)
return loc + scale * self.standard_normal(size=size, method=method)

def random(self, size=None):
"""
Expand Down Expand Up @@ -378,7 +388,7 @@ def random(self, size=None):
return self._np_generator.random()
return self.uniform(size=size)

def standard_normal(self, size=None):
def standard_normal(self, size=None, method="zig"):
r"""
Draw samples from a standard Normal distribution (mean=0, stdev=1).
Expand All @@ -387,6 +397,10 @@ def standard_normal(self, size=None):
size: numeric_scalars, optional
Output shape. Default is None, in which case a single value is returned.
method : str, optional
Either 'box' or 'zig'. 'box' uses the Box–Muller transform
'zig' uses the Ziggurat method.
Returns
-------
pdarray
Expand Down Expand Up @@ -426,6 +440,8 @@ def standard_normal(self, size=None):
args={
"name": self._name_dict[akdtype("float64")],
"shape": shape,
"method": method.upper(),
"has_seed": self._seed is not None,
"state": self._state,
},
)
Expand Down
Loading

0 comments on commit ed20f95

Please sign in to comment.