add readthedoc conf file, fix #84

cokelaer · Aug 8, 2023 · 6e4b1b5 · 6e4b1b5
1 parent bacb558
commit 6e4b1b5
Show file tree

Hide file tree

Showing 8 changed files with 48 additions and 34 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        python: [3.7, 3.8, 3.9]
+        python: [3.8, 3.9, '3.10']
       fail-fast: false
 
     steps:
@@ -27,17 +27,13 @@ jobs:
     - name: Install the package itself
       run: |
           pip install poetry
-          poetry install
+          poetry install 
     - name: Test with pytest
       run: |
-        pip install pytest
-        pip install pytest-cov
-        pytest --cov-report term --cov=fitter 
+        poetry run pytest --cov-report term --cov=fitter 
 
-    - name: coveralls            
+    - name: coveralls
       run: |
-        pip install coverage
-        coveralls --service=github
+        poetry run coveralls --service=github
       env:
           GITHUB_TOKEN: ${{ github.token }}
-
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,17 @@
+version: 2
+sphinx:
+    configuration: doc/conf.py
+
+
+formats: 
+  - htmlzip
+
+python:
+    version: 3.8
+    install:
+        - method: pip
+          path: .
+
+# if present, this section is used and python section ignored before sphinx is built
+#conda:
+#    environment: environment.yml
diff --git a/README.rst b/README.rst
@@ -26,7 +26,7 @@ Compatible with Python 3.7, and 3.8, 3.9
 What is it ?
 ################
 
-The **fitter** package is a Python library for fitting probability distributions to data. It provides a simple and intuitive interface for estimating the parameters of different types of distributions, including continuous and discrete distributions. With **fitter**, you can easily fit a variety of distributions to your data and compare the fit of different distributions to choose the best-fitting one. The package is designed to be easy to use and requires minimal setup, making it a useful tool for data scientists and statisticians working with probability distributions.
+The **fitter** package is a Python library used for fitting probability distributions to data. It provides a straightforward and and intuitive interface to estimate parameters for various types of distributions, both continuous and discrete. Using **fitter**, you can easily fit a range of distributions to your data and compare their fit, aiding in the selection of the most suitable distribution. The package is designed to be user-friendly and requires minimal setup, making it a useful tool for data scientists and statisticians working with probability distributions.
 
 Installation
 ###################
@@ -99,6 +99,8 @@ Changelog
 ========= ==========================================================================
 Version   Description
 ========= ==========================================================================
+1.6.0     * for developers: uses pyproject.toml instead of setup.py
+          * Fix progress bar fixing https://github.com/cokelaer/fitter/pull/74
 1.5.2     * PR https://github.com/cokelaer/fitter/pull/74 to fix logger
 1.5.1     * fixed regression putting back joblib
 1.5.0     * removed easydev and replaced by tqdm for progress bar

diff --git a/doc/tuto.rst b/doc/tuto.rst
@@ -73,8 +73,10 @@ The histfit module provides the HistFit class to generate plots of your data
 with a fitting curve based on several attempt at fitting your X/Y data with some
 errors on the data set. For instance here below, we introduce 3% of errors and
 fit the data 20 times to see if the fit makes sense.
- 
+
 .. plot::
+    :include-source:
+    :width: 80%
 
     from fitter import HistFit
     from pylab import hist

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fitter"
-version = "1.5.2"
+version = "1.6.0"
 description = "A tool to fit data to many distributions and get the best one(s)"
 authors = ["Thomas Cokelaer <cokelaer@gmail.com>"]
 license = "GPL"

diff --git a/src/fitter/fitter.py b/src/fitter/fitter.py
@@ -40,8 +40,7 @@
 __all__ = ["get_common_distributions", "get_distributions", "Fitter"]
 
 
-
-# A solution to wrap joblib parallel call in tqdm from 
+# A solution to wrap joblib parallel call in tqdm from
 # https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution/58936697#58936697
 # and https://github.com/louisabraham/tqdm_joblib
 @contextlib.contextmanager
@@ -68,12 +67,6 @@ def __call__(self, *args, **kwargs):
         tqdm_object.close()
 
 
-
-
-
-
-
-
 def get_distributions():
     distributions = []
     for this in dir(scipy.stats):
@@ -237,7 +230,7 @@ def _init(self):
         self._ks_stat = {}
         self._ks_pval = {}
         self._fit_i = 0  # fit progress
-        #self.pb = None
+        # self.pb = None
 
     def _update_data_pdf(self):
         # histogram retuns X with N+1 values. So, we rearrange the X output into only N
@@ -368,9 +361,10 @@ def fit(self, progress=False, n_jobs=-1, max_workers=-1):
         warnings.filterwarnings("ignore", category=RuntimeWarning)
 
         N = len(self.distributions)
-        with tqdm_joblib(desc=f"Fitting {N} distributions", total=N) as progress_bar:
-            Parallel(n_jobs=max_workers, backend='threading')(delayed(self._fit_single_distribution)(dist) for dist in self.distributions)
-
+        with tqdm_joblib(desc=f"Fitting {N} distributions", total=N, disable=not progress) as progress_bar:
+            Parallel(n_jobs=max_workers, backend="threading")(
+                delayed(self._fit_single_distribution)(dist) for dist in self.distributions
+            )
 
         self.df_errors = pd.DataFrame(
             {

diff --git a/src/fitter/histfit.py b/src/fitter/histfit.py
@@ -7,15 +7,13 @@
 
 
 class HistFit:
-    """Plot the histogram of the data (barplot) and the fitted histogram.
+    """Plot the histogram of the data (barplot) and the fitted histogram (gaussian case only)
 
     The input data can be a series. In this case, we compute the histogram.
     Then, we fit a curve on top on the histogram that best fit the histogram.
 
-    If you already have the histogram, you can provide the arguments.
-    In this case, X should be evenly spaced
-
-
+    If you already have the histogram, you can provide the density function..
+    In such case, we assume the data to be evenly spaced from 1 to N.
 
     If you have some data, histogram is computed, then we add some noise during
     the fitting process and repeat the process Nfit=20 times. This gives us a

diff --git a/src/fitter/main.py b/src/fitter/main.py
@@ -40,19 +40,23 @@ def main():  # pragma: no cover
 
 @main.command()
 @click.argument("filename", type=click.STRING)
-@click.option("--column-number", type=click.INT, default=1)
-@click.option("--delimiter", type=click.STRING, default=",", help="look at the first column")
+@click.option("--column-number", type=click.INT, default=1, help="data column to use (first column by default)")
+@click.option("--delimiter", type=click.STRING, default=",", help="column delimiter (comma by default)")
 @click.option(
     "--distributions",
     type=click.STRING,
     default="gamma,beta",
-    help="llist of distribution",
+    help="list of distribution",
 )
 @click.option("--tag", type=click.STRING, default="fitter", help="tag to name output files")
 @click.option("--progress/--no-progress", default=True)
 @click.option("--verbose/--no-verbose", default=True)
 def fitdist(**kwargs):
-    """"""
+    """
+
+    fitter fitdist data.csv
+
+    """
     import csv
 
     col = kwargs["column_number"]
@@ -89,11 +93,12 @@ def fitdist(**kwargs):
     with open("{}.log".format(tag), "w") as fout:
         fout.write(msg)
 
+
 @main.command()
 def show_distributions(**kwargs):
     from fitter import get_distributions
-    print("\n".join(get_distributions()))
 
+    print("\n".join(get_distributions()))
 
 
 if __name__ == "__main__":  # pragma: no cover