From 250dee2ad118934bccc124880a482e092265b388 Mon Sep 17 00:00:00 2001 From: jmkuebler <jonas.m.kuebler@gmail.com> Date: Fri, 23 Oct 2020 13:27:58 +0200 Subject: [PATCH] Updated README and docstrings. --- README.md | 22 ++++++++++++++++++++++ tests_wo_split/methods/pvalue.py | 8 ++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3a54759..6dd46c2 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,28 @@ or using the `install` target in the `Makefile` by simply running make install +## Computing p-values + +If you want perform a two sample test on your own samples X and Y you can use the function `pvalue()` in +`tests-wo-split/methods/pvalue`. +A simple test of the validity of our method is to see whether the p-values are uniformly distributed +under the null hypothesis (samples come from the same distribution). +#### Example: uniform distribution of p-values + import matplotlib.pyplot as plt + from tests_wo_split.methods.pvalue import pvalue + import numpy as np + runs = 1000 + size = 1000 + p = [] + for i in range(runs): + x = np.random.normal(0,1, size=size) + y = np.random.normal(0,1, size=size) + p.append(pvalue(x=x, y=y)) + plt.hist(p) + plt.show() + + + ## Reproducing Figure 2 To reproduce our results of Figure 2 you can use the provided diff --git a/tests_wo_split/methods/pvalue.py b/tests_wo_split/methods/pvalue.py index 5ba8461..3a6efda 100644 --- a/tests_wo_split/methods/pvalue.py +++ b/tests_wo_split/methods/pvalue.py @@ -6,12 +6,12 @@ def pvalue(x: list, y: list, bandwidths_factors=[-2,-1,0,1,2], method='ost', constraints='Sigma', max_condition=1e-6) \ -> float: """ - Method that runs experiments. Iterate over the paramenter exp_number to consider different methods and samplesizes. - All the parameters can be controlled via the file 'config.yml'. + Compute a p-value for two samples. :param x: Sample from P :param y: Sample from Q - :param bandwidths_factors: factors for the gaussian kernels that are considered. - :param methods: which method ('wald', 'ost', 'split0.1'...) + :param bandwidths_factors: factors for the gaussian kernels that are considered. The bandwidths are constructed as + sigma_0 * 2**factor, where sigma_0 is chosen by the median heuristic, and factors runs over the list. + :param method: which method ('wald', 'ost', 'split0.1'...) :param constraints: 'Sigma' => leads to the suggested OST. 'positive' uses the canonical constraints without remark 1 :param max_condition: just to numerically stabilize in case of almost singular covariance (see Appendix of the paper) :return: pvalue