Merge pull request #14 from deepak7376/develop

Fix the Sn code and optimize it using numpy
deepak7376 · Mar 20, 2023 · 7434ad0 · 7434ad0
2 parents e3d4f48 + 9ba65ce
commit 7434ad0
Show file tree

Hide file tree

Showing 9 changed files with 116 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -49,7 +49,6 @@ x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 
 # with bias correction
 res = Sn(x)  # ans = 3.5778 
-# Note: This is not working properly as R Sn code works (Fix it)
 
 # Without bias correction
 res = Sn(x, finite_corr=False)  # ans = 3.5778

diff --git a/examples/test.py b/examples/test.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-certifi==2019.11.28
-docutils==0.15.2
-numpy==1.18.0
-statistics==1.0.3.5
+certifi>=2019.11.28
+docutils>=0.15.2
+numpy>=1.18.0
+statistics>=1.0.3.5
diff --git a/setup.py b/setup.py
@@ -1,12 +1,35 @@
 
 from setuptools import setup, find_packages
+import subprocess
+import os
+
+version = (
+    subprocess.run(["git", "describe", "--tags"], stdout=subprocess.PIPE)
+    .stdout.decode("utf-8")
+    .strip()
+)
+
+if "-" in version:
+    # when not on tag, git describe outputs: "1.3.3-22-gdf81228"
+    # pip has gotten strict with version numbers
+    # so change it to: "1.3.3+22.git.gdf81228"
+    # See: https://peps.python.org/pep-0440/#local-version-segments
+    v,i,s = version.split("-")
+    version = v + "+" + i + ".git." + s
+
+assert "-" not in version
+assert "." in version
+
+assert os.path.isfile("version.py")
+with open("VERSION", "w", encoding="utf-8") as fh:
+    fh.write("%s\n" % version)
+
 
 # reading long description from file 
 with open('README.md', encoding='utf-8') as f:
     long_description = f.read()
 
-
-
+
 # some more details 
 CLASSIFIERS = [ 
     'Development Status :: 3 - Alpha', 
@@ -18,7 +41,7 @@
 
 # calling the setup function  
 setup(name='robustbase', 
-      version='0.2.8', 
+      version=version, 
       description='A Python Based Library to Calculate Estimators (Sn, Qn, MAD, IQR)', 
       long_description=long_description, 
       long_description_content_type='text/markdown',

diff --git a/src/.travis.yml b/src/.travis.yml
@@ -4,4 +4,4 @@ python:
 cache: pip
 install:
   - pip install -r requirements.txt
-script: python tests/test_robust.py
+script: python tests/test_robustbase.py
diff --git a/src/robustbase.py b/src/robustbase.py
@@ -72,24 +72,41 @@ def mad(x, center = None, constant = 1.4826, na = False,
 
 
 def Sn(x, constant = 1.1926, finite_corr=True):
+
     """
     Sn scale estimator , Gaussian efficiency 58%
+
+    Attributes
+    ----------
+    x : list
+        numeric vector of observations.
+    constant : float
+        number by which the result is multiplied; the default achieves consisteny for normally distributed data.
+    finite_corr : bool
+        logical indicating if the finite sample bias correction factor should be applied. Default to TRUE unless constant is specified.
     """
     n = len(x)
-
     if n==0:
         raise Exception("x sholud be non-empty !!!")
     if n==1:
         return 0
-
-    med=[]
-    for i in x:
-        diff=[]
-        for j in x:
-            diff.append(abs(i-j))
-        med.append(median(diff))
-    return round(bias_corr(n) * median(med) * constant, 6) if finite_corr==True else round(median(med) * constant, 6)
 
+    y = np.array([x,]*n)
+    z = y.transpose()
+    diff = abs(y-z)
+    med = np.median(diff, axis=0)
+    r = round(median(med) * constant, 6)
+
+    if finite_corr==True :
+        if n <=9 :
+            correction = [.743, 1.851, .954, 1.351, .993, 1.198, 1.005, 1.131]
+            correction = correction[n-2]
+        elif (n % 2) == 1:
+            correction = n/(n-.9)
+        else : 
+            correction = 1
+        r= correction*r
+    return r
 
 def iqr(x):
     """
@@ -108,6 +125,15 @@ def iqr(x):
 def Qn(x, constant = 2.21914, finite_corr=True):
     """
     Qn scale estimator, Gaussian effieciency 82%
+
+    Attributes
+    ----------
+    x : list
+        numeric vector of observations.
+    constant : float
+        number by which the result is multiplied; the default achieves consisteny for normally distributed data.
+    finite_corr : bool
+        logical indicating if the finite sample bias correction factor should be applied. Default to TRUE unless constant is specified.
     """
     n = len(x)
 
@@ -128,16 +154,3 @@ def Qn(x, constant = 2.21914, finite_corr=True):
     k=int(h*(h-1)/2)                  
     return round(constant*diff[k-1]*bias_corr(n), 6) if finite_corr==True else round(constant*diff[k-1], 6)
 
-
-if __name__ == '__main__':
-
-    x = [i for i in range(1,11)]
-    #a = robustbase()
-    # print(median(x, low=True))
-    # print(mad(x,high=True))
-    # print(iqr([1]))
-    print(Sn(x))
-    print(Sn(x, finite_corr=False))
-
-
-
diff --git a/tests/test_robust.py b/tests/test_robust.py
diff --git a/tests/test_robustbase.py b/tests/test_robustbase.py
@@ -0,0 +1,38 @@
+from src.robustbase import Sn, Qn, iqr, mad
+
+def test_robustbase():
+	x1 = [x for x in range(1, 201)]
+	outlier = [x for x in range(501, 516)]
+	x2 = x1 + outlier
+
+	# Sn tests
+	assert Sn(x2, finite_corr=True) == 73.05440915460065
+	assert Sn(x2, finite_corr=False) == 72.7486
+	assert Sn(x2, constant=1, finite_corr=True) == 61.25642223260159
+	assert Sn(x2, constant=1, finite_corr=False) == 61.0
+
+	# Qn tests
+	assert Qn(x2, finite_corr=True) == 68.287735
+	assert Qn(x2, finite_corr=False) == 68.79334
+	assert Qn(x2, constant=1, finite_corr=True) == 30.772162
+	assert Qn(x2, constant=1, finite_corr=False) == 31
+
+	# IQR test
+	assert iqr(x2) == (161.5, 54.5)
+
+	# MAD tests
+	assert mad(x2, center = None, constant = 1.4826, na = False, low = False, high = False) == 80.0604
+	assert mad(x2, center = 1.5, constant = 1.4826, na = False, low = False, high = False) == 157.8969
+	assert mad(x2, center = None, constant = 1.4826, na = False, low = True, high = False) == 80.0604
+	assert mad(x2, center = None, constant = 1.4826, na = False, low = False, high = True) == 80.0604
+
+
+
+if __name__ == "__main__":
+	test_robustbase()
+	print("All tests passed!!!")
+
+
+
+
+
diff --git a/version.py b/version.py
@@ -0,0 +1,12 @@
+import os
+
+
+def string():
+    try:
+        with open(os.path.dirname(__file__) + "/VERSION", "r", encoding="utf-8") as fh:
+            version = fh.read().strip()
+            if version:
+                return version
+    except:
+        pass
+    return "unknown (git checkout)"