DOC: Add info about incremental algorithms and BS (#2103) (#2112)

* DOC: Add info about incremental algorithms and BS (#2103) - Add 'Non-Scikit algorithms' part to the docs - Add info about IncrementalPCA - Add sphinx.napoleon extension to generate docs from docstrings. - Update docstrings for non-scikit algorithms * Remove info about 2025.1 changes from BS and IncBS docstrings --------- Co-authored-by: Samir Nasibli <samir.nasibli@intel.com>
intel · Oct 22, 2024 · 65afadb · 65afadb
1 parent e0535f6
commit 65afadb
Show file tree

Hide file tree

Showing 8 changed files with 186 additions and 41 deletions.
diff --git a/doc/sources/algorithms.rst b/doc/sources/algorithms.rst
@@ -159,6 +159,9 @@ Dimensionality Reduction
 
        - ``svd_solver`` not in [`'full'`, `'covariance_eigh'`]
      - Sparse data is not supported
+   * - `IncrementalPCA`
+     - All parameters are supported
+     - Sparse data is not supported
    * - `TSNE`
      - All parameters are supported except:
 

diff --git a/doc/sources/conf.py b/doc/sources/conf.py
@@ -67,6 +67,7 @@
     "notfound.extension",
     "sphinx_design",
     "sphinx_copybutton",
+    "sphinx.ext.napoleon",
 ]
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/doc/sources/index.rst b/doc/sources/index.rst
@@ -105,6 +105,7 @@ Enable Intel(R) GPU optimizations
    algorithms.rst
    oneAPI and GPU support <oneapi-gpu.rst>
    distributed-mode.rst
+   non-scikit-algorithms.rst
    array_api.rst
    verbose.rst
    deprecation.rst

diff --git a/doc/sources/non-scikit-algorithms.rst b/doc/sources/non-scikit-algorithms.rst
@@ -0,0 +1,44 @@
+.. ******************************************************************************
+.. * Copyright 2024 Intel Corporation
+.. *
+.. * Licensed under the Apache License, Version 2.0 (the "License");
+.. * you may not use this file except in compliance with the License.
+.. * You may obtain a copy of the License at
+.. *
+.. *     http://www.apache.org/licenses/LICENSE-2.0
+.. *
+.. * Unless required by applicable law or agreed to in writing, software
+.. * distributed under the License is distributed on an "AS IS" BASIS,
+.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+.. * See the License for the specific language governing permissions and
+.. * limitations under the License.
+.. *******************************************************************************/
+
+Non-Scikit-Learn Algorithms
+===========================
+Algorithms not presented in the original scikit-learn are described here. All algorithms are 
+available for both CPU and GPU (including distributed mode)
+
+BasicStatistics
+---------------
+.. autoclass:: sklearnex.basic_statistics.BasicStatistics
+.. automethod:: sklearnex.basic_statistics.BasicStatistics.fit
+
+IncrementalBasicStatistics
+--------------------------
+.. autoclass:: sklearnex.basic_statistics.IncrementalBasicStatistics
+.. automethod:: sklearnex.basic_statistics.IncrementalBasicStatistics.fit
+.. automethod:: sklearnex.basic_statistics.IncrementalBasicStatistics.partial_fit
+
+IncrementalEmpiricalCovariance
+------------------------------
+.. autoclass:: sklearnex.covariance.IncrementalEmpiricalCovariance
+.. automethod:: sklearnex.covariance.IncrementalEmpiricalCovariance.fit
+.. automethod:: sklearnex.covariance.IncrementalEmpiricalCovariance.partial_fit
+
+IncrementalLinearRegression
+---------------------------
+.. autoclass:: sklearnex.linear_model.IncrementalLinearRegression
+.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.fit
+.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.partial_fit
+.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.predict
diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py
@@ -32,12 +32,16 @@ class BasicStatistics(BaseEstimator):
     """
     Estimator for basic statistics.
     Allows to compute basic statistics for provided data.
+
     Parameters
     ----------
     result_options: string or list, default='all'
-        List of statistics to compute
+        Used to set statistics to calculate. Possible values are ``'min'``, ``'max'``, ``'sum'``, ``'mean'``, ``'variance'``,
+        ``'variation'``, ``sum_squares'``, ``sum_squares_centered'``, ``'standard_deviation'``, ``'second_order_raw_moment'``
+        or a list containing any of these values. If set to ``'all'`` then all possible statistics will be
+        calculated.
 
-    Attributes (are existing only if corresponding result option exists)
+    Attributes
     ----------
         min : ndarray of shape (n_features,)
             Minimum of each feature over all samples.
@@ -59,6 +63,27 @@ class BasicStatistics(BaseEstimator):
             Centered sum of squares for each feature over all samples.
         second_order_raw_moment : ndarray of shape (n_features,)
             Second order moment of each feature over all samples.
+
+    Note
+    ----
+    Attribute exists only if corresponding result option has been provided.
+
+    Note
+    ----
+    Some results can exhibit small variations due to
+    floating point error accumulation and multithreading.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearnex.basic_statistics import BasicStatistics
+    >>> bs = BasicStatistics(result_options=['sum', 'min', 'max'])
+    >>> X = np.array([[1, 2], [3, 4]])
+    >>> bs.fit(X)
+    >>> bs.sum_
+    np.array([4., 6.])
+    >>> bs.min_
+    np.array([1., 2.])
     """
 
     def __init__(self, result_options="all"):
@@ -113,14 +138,14 @@ def fit(self, X, y=None, *, sample_weight=None):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Data for compute, where `n_samples` is the number of samples and
-            `n_features` is the number of features.
+            Data for compute, where ``n_samples`` is the number of samples and
+            ``n_features`` is the number of features.
 
         y : Ignored
             Not used, present for API consistency by convention.
 
         sample_weight : array-like of shape (n_samples,), default=None
-            Weights for compute weighted statistics, where `n_samples` is the number of samples.
+            Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
 
         Returns
         -------

diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py
@@ -37,8 +37,10 @@
 @control_n_jobs(decorated_methods=["partial_fit", "_onedal_finalize_fit"])
 class IncrementalBasicStatistics(BaseEstimator):
     """
-    Incremental estimator for basic statistics.
-    Allows to compute basic statistics if data are splitted into batches.
+    Calculates basic statistics on the given data, allows for computation when the data are split into
+    batches. The user can use ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
+    the entire dataset.
+
     Parameters
     ----------
     result_options: string or list, default='all'
@@ -47,10 +49,9 @@ class IncrementalBasicStatistics(BaseEstimator):
     batch_size : int, default=None
         The number of samples to use for each batch. Only used when calling
         ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
-        is inferred from the data and set to ``5 * n_features``, to provide a
-        balance between approximation accuracy and memory consumption.
+        is inferred from the data and set to ``5 * n_features``.
 
-    Attributes (are existing only if corresponding result option exists)
+    Attributes
     ----------
         min : ndarray of shape (n_features,)
             Minimum of each feature over all samples.
@@ -81,6 +82,38 @@ class IncrementalBasicStatistics(BaseEstimator):
 
         second_order_raw_moment : ndarray of shape (n_features,)
             Second order moment of each feature over all samples.
+
+        n_samples_seen_ : int
+            The number of samples processed by the estimator. Will be reset on
+            new calls to ``fit``, but increments across ``partial_fit`` calls.
+
+        batch_size_ : int
+            Inferred batch size from ``batch_size``.
+
+        n_features_in_ : int
+            Number of features seen during ``fit`` or  ``partial_fit``.
+
+    Note
+    ----
+    Attribute exists only if corresponding result option has been provided.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearnex.basic_statistics import IncrementalBasicStatistics
+    >>> incbs = IncrementalBasicStatistics(batch_size=1)
+    >>> X = np.array([[1, 2], [3, 4]])
+    >>> incbs.partial_fit(X[:1])
+    >>> incbs.partial_fit(X[1:])
+    >>> incbs.sum_
+    np.array([4., 6.])
+    >>> incbs.min_
+    np.array([1., 2.])
+    >>> incbs.fit(X)
+    >>> incbs.sum_
+    np.array([4., 6.])
+    >>> incbs.max_
+    np.array([3., 4.])
     """
 
     _onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics)
@@ -229,14 +262,14 @@ def partial_fit(self, X, sample_weight=None):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Data for compute, where `n_samples` is the number of samples and
-            `n_features` is the number of features.
+            Data for compute, where ``n_samples`` is the number of samples and
+            ``n_features`` is the number of features.
 
         y : Ignored
             Not used, present for API consistency by convention.
 
         sample_weight : array-like of shape (n_samples,), default=None
-            Weights for compute weighted statistics, where `n_samples` is the number of samples.
+            Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
 
         Returns
         -------
@@ -261,14 +294,14 @@ def fit(self, X, y=None, sample_weight=None):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Data for compute, where `n_samples` is the number of samples and
-            `n_features` is the number of features.
+            Data for compute, where ``n_samples`` is the number of samples and
+            ``n_features`` is the number of features.
 
         y : Ignored
             Not used, present for API consistency by convention.
 
         sample_weight : array-like of shape (n_samples,), default=None
-            Weights for compute weighted statistics, where `n_samples` is the number of samples.
+            Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
 
         Returns
         -------

diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py
@@ -44,9 +44,9 @@
 @control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
 class IncrementalEmpiricalCovariance(BaseEstimator):
     """
-    Incremental estimator for covariance.
-    Allows to compute empirical covariance estimated by maximum
-    likelihood method if data are splitted into batches.
+    Maximum likelihood covariance estimator that allows for the estimation when the data are split into
+    batches. The user can use the ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
+    the entire dataset.
 
     Parameters
     ----------
@@ -79,13 +79,31 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
 
     n_samples_seen_ : int
         The number of samples processed by the estimator. Will be reset on
-        new calls to fit, but increments across ``partial_fit`` calls.
+        new calls to ``fit``, but increments across ``partial_fit`` calls.
 
     batch_size_ : int
         Inferred batch size from ``batch_size``.
 
     n_features_in_ : int
-        Number of features seen during :term:`fit` `partial_fit`.
+        Number of features seen during ``fit`` or ``partial_fit``.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearnex.covariance import IncrementalEmpiricalCovariance
+    >>> inccov = IncrementalEmpiricalCovariance(batch_size=1)
+    >>> X = np.array([[1, 2], [3, 4]])
+    >>> inccov.partial_fit(X[:1])
+    >>> inccov.partial_fit(X[1:])
+    >>> inccov.covariance_
+    np.array([[1., 1.],[1., 1.]])
+    >>> inccov.location_
+    np.array([2., 3.])
+    >>> inccov.fit(X)
+    >>> inccov.covariance_
+    np.array([[1., 1.],[1., 1.]])
+    >>> inccov.location_
+    np.array([2., 3.])
     """
 
     _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)