Merge pull request #8 from Doctorado-ML/fix_python_random_init

Fix python random init
Doctorado-ML · Apr 29, 2022 · 7300bd6 · 7300bd6
2 parents 98cadc7 + 114f53d
commit 7300bd6
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 12 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,23 +1,23 @@
 repos:
   - repo: https://github.com/ambv/black
-    rev: 20.8b1
+    rev: 22.3.0
     hooks:
       - id: black
         exclude: ".virtual_documents"
         language_version: python3.9
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.4
+    rev: 3.9.2
     hooks:
       - id: flake8
         exclude: ".virtual_documents"
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v0.790" # Use the sha / tag you want to point at
+    rev: "v0.942" # Use the sha / tag you want to point at
     hooks:
       - id: mypy
         #args: [--strict, --ignore-missing-imports]
         exclude: odte/tests
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.2.0
     hooks:
       - id: trailing-whitespace
       - id: check-case-conflict

diff --git a/odte/Odte.py b/odte/Odte.py
@@ -15,7 +15,7 @@
     check_classification_targets,
 )
 from sklearn.base import clone, BaseEstimator, ClassifierMixin  # type: ignore
-from sklearn.utils import check_random_state
+from sklearn.utils import check_random_state  # type: ignore
 from sklearn.ensemble import BaseEnsemble  # type: ignore
 from sklearn.utils.validation import (  # type: ignore
     check_is_fitted,
@@ -30,7 +30,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
     def __init__(
         self,
         # n_jobs = -1 to use all available cores
-        n_jobs: int = 1,
+        n_jobs: int = -1,
         base_estimator: BaseEstimator = None,
         random_state: int = 0,
         max_features: Optional[Union[str, int, float]] = None,
@@ -141,8 +141,10 @@ def _parallel_build_tree(
         hyperparams_.update(dict(random_state=random_seed))
         clf.set_params(**hyperparams_)
         n_samples = X.shape[0]
-        # bootstrap
+        # initialize random boxes
+        random.seed(random_seed)
         random_box = check_random_state(random_seed)
+        # bootstrap
         indices = random_box.randint(0, n_samples, boot_samples)
         # update weights with the chosen samples
         weights_update = np.bincount(indices, minlength=n_samples)

diff --git a/odte/_version.py b/odte/_version.py
@@ -1 +1 @@
-__version__ = "0.3.2"
+__version__ = "0.3.3"
diff --git a/odte/tests/Odte_tests.py b/odte/tests/Odte_tests.py
@@ -1,7 +1,6 @@
 # type: ignore
 import unittest
 import os
-import random
 import warnings
 import json
 from sklearn.exceptions import ConvergenceWarning, NotFittedError
@@ -46,7 +45,9 @@ def test_initialize_max_feature(self):
         )
         for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
             tclf = Odte(
-                random_state=self._random_state, max_features=max_features
+                random_state=self._random_state,
+                max_features=max_features,
+                n_jobs=1,
             )
             tclf.fit(X, y)
             computed = tclf._get_random_subspace(X, y, tclf.max_features_)
@@ -135,7 +136,6 @@ def test_score_splitter_max_features(self):
             0.97,  # iwss None
             0.97,  # cfs None
         ]
-        random.seed(self._random_state)
         for max_features in ["auto", None]:
             for splitter in [
                 "best",
@@ -149,6 +149,7 @@ def test_score_splitter_max_features(self):
                     base_estimator=Stree(),
                     random_state=self._random_state,
                     n_estimators=3,
+                    n_jobs=1,
                 )
                 tclf.set_params(
                     **dict(
@@ -160,7 +161,7 @@ def test_score_splitter_max_features(self):
                 expected = results.pop(0)
                 computed = tclf.fit(X, y).score(X, y)
                 # print(computed, splitter, max_features)
-                self.assertAlmostEqual(expected, computed)
+                self.assertAlmostEqual(expected, computed, msg=splitter)
 
     def test_generate_subspaces(self):
         features = 250