From 250dee2ad118934bccc124880a482e092265b388 Mon Sep 17 00:00:00 2001
From: jmkuebler <jonas.m.kuebler@gmail.com>
Date: Fri, 23 Oct 2020 13:27:58 +0200
Subject: [PATCH] Updated README and docstrings.

---
 README.md                        | 22 ++++++++++++++++++++++
 tests_wo_split/methods/pvalue.py |  8 ++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 3a54759..6dd46c2 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,28 @@ or using the `install` target in the `Makefile` by simply running
     make install
 
 
+## Computing p-values
+
+If you want perform a two sample test on your own samples X and Y you can use the function `pvalue()` in 
+`tests-wo-split/methods/pvalue`.
+A simple test of the validity of our method is to see whether the p-values are uniformly distributed 
+under the null hypothesis (samples come from the same distribution).
+#### Example: uniform distribution of p-values
+    import matplotlib.pyplot as plt
+    from tests_wo_split.methods.pvalue import pvalue
+    import numpy as np
+    runs = 1000
+    size = 1000
+    p = []
+    for i in range(runs):
+        x = np.random.normal(0,1, size=size)
+        y = np.random.normal(0,1, size=size)
+        p.append(pvalue(x=x, y=y))
+    plt.hist(p)
+    plt.show()
+
+
+
 ## Reproducing Figure 2
 
 To reproduce our results of Figure 2 you can use the provided
diff --git a/tests_wo_split/methods/pvalue.py b/tests_wo_split/methods/pvalue.py
index 5ba8461..3a6efda 100644
--- a/tests_wo_split/methods/pvalue.py
+++ b/tests_wo_split/methods/pvalue.py
@@ -6,12 +6,12 @@
 def pvalue(x: list, y: list, bandwidths_factors=[-2,-1,0,1,2], method='ost', constraints='Sigma', max_condition=1e-6) \
         -> float:
     """
-    Method that runs experiments. Iterate over the paramenter exp_number to consider different methods and samplesizes.
-    All the parameters can be controlled via the file 'config.yml'.
+    Compute a p-value for two samples.
     :param x: Sample from P
     :param y: Sample from Q
-    :param bandwidths_factors: factors for the gaussian kernels that are considered.
-    :param methods: which method ('wald', 'ost', 'split0.1'...)
+    :param bandwidths_factors: factors for the gaussian kernels that are considered. The bandwidths are constructed as
+    sigma_0 * 2**factor, where sigma_0 is chosen by the median heuristic, and factors runs over the list.
+    :param method: which method ('wald', 'ost', 'split0.1'...)
     :param constraints: 'Sigma' => leads to the suggested OST. 'positive' uses the canonical constraints without remark 1
     :param max_condition: just to numerically stabilize in case of almost singular covariance (see Appendix of the paper)
     :return: pvalue