From 064ffa4077803ec6f4bfdc9e40dd96d737fd17a2 Mon Sep 17 00:00:00 2001
From: Gareth Jones <garethgithub@gmail.com>
Date: Fri, 22 May 2020 18:42:32 +0100
Subject: [PATCH] V0.4 (#7)

* Typo

* General tidy

* Address sonar issues

* Address sonar issues

* Update requirements

* General tidy
---
 README.md                                     |  13 +-
 incremental_trees/__init__.py                 |   2 +-
 incremental_trees/add_ins/__init__.py         |   0
 .../add_ins/classifier_additions.py           |  27 +
 .../add_ins/classifier_overloads.py           |  65 ++
 incremental_trees/add_ins/forest_additions.py |  83 +++
 incremental_trees/add_ins/forest_overloads.py |  46 ++
 .../add_ins/regressor_additions.py            |   9 +
 .../add_ins/regressor_overloads.py            |   8 +
 .../add_ins/sklearn_overloads.py              |  46 ++
 incremental_trees/models/__init__.py          |   0
 .../models/classification/__init__.py         |   0
 .../models/classification/streaming_extc.py   |  74 +++
 .../models/classification/streaming_rfc.py    |  92 +++
 .../models/regression/__init__.py             |   0
 .../models/regression/streaming_extr.py       |  65 ++
 .../models/regression/streaming_rfr.py        |  65 ++
 incremental_trees/trees.py                    | 565 +-----------------
 requirements.txt                              |  13 +-
 setup.py                                      |   6 +-
 tests/common/__init__.py                      |   0
 tests/{data.py => common/data_fixture.py}     |   5 +-
 tests/{params.py => common/param_fixtures.py} |   0
 tests/integration/base.py                     |   3 +-
 .../incremental_trees/test_trees.py           | 103 ++--
 .../test_trees_benchmarks.py                  |  36 +-
 .../incremental_trees/test_trees_dask.py      |  37 +-
 .../incremental_trees/test_trees_grids.py     | 178 +++---
 .../test_trees_inconsistent_classes.py        | 189 +++---
 29 files changed, 895 insertions(+), 835 deletions(-)
 create mode 100644 incremental_trees/add_ins/__init__.py
 create mode 100644 incremental_trees/add_ins/classifier_additions.py
 create mode 100644 incremental_trees/add_ins/classifier_overloads.py
 create mode 100644 incremental_trees/add_ins/forest_additions.py
 create mode 100644 incremental_trees/add_ins/forest_overloads.py
 create mode 100644 incremental_trees/add_ins/regressor_additions.py
 create mode 100644 incremental_trees/add_ins/regressor_overloads.py
 create mode 100644 incremental_trees/add_ins/sklearn_overloads.py
 create mode 100644 incremental_trees/models/__init__.py
 create mode 100644 incremental_trees/models/classification/__init__.py
 create mode 100644 incremental_trees/models/classification/streaming_extc.py
 create mode 100644 incremental_trees/models/classification/streaming_rfc.py
 create mode 100644 incremental_trees/models/regression/__init__.py
 create mode 100644 incremental_trees/models/regression/streaming_extr.py
 create mode 100644 incremental_trees/models/regression/streaming_rfr.py
 create mode 100644 tests/common/__init__.py
 rename tests/{data.py => common/data_fixture.py} (98%)
 rename tests/{params.py => common/param_fixtures.py} (100%)

diff --git a/README.md b/README.md
index 5d5f773..7fa730a 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Incremental trees v0.3.3
+# Incremental trees v0.4.0
 ![The overcomplicated tests are...](https://github.com/garethjns/IncrementalTrees/workflows/The%20overcomplicated%20tests%20are.../badge.svg)
 
 Adds partial fit method to sklearn's forest estimators (currently RandomForestClassifier/Regressor and ExtraTreesClassifier/Regressor) to allow [incremental training](https://scikit-learn.org/0.15/modules/scaling_strategies.html) without being limited to a linear model. Works with or without [Dask-ml's Incremental](http://ml.dask.org/incremental.html).
@@ -13,10 +13,7 @@ Quick start:
 
 1) Clone repo and build pip installable package.
    ````bash
-   git clone https://github.com/garethjns/IncrementalTrees.git
-   python -m pip install --upgrade pip setuptools wheel
-   cd IncrementalTrees
-   pip install .
+    pip install incremental_trees
    ````
 
 
@@ -39,7 +36,7 @@ Feeds .partial_fit() with randomly samples rows.
 ````python
 import numpy as np
 from sklearn.datasets import make_blobs
-from incremental_trees.trees import StreamingRFC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
 
 # Generate some data in memory
 x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40,
@@ -65,7 +62,7 @@ import dask_ml.datasets
 from dask_ml.wrappers import Incremental
 from dask.distributed import Client, LocalCluster
 from dask import delayed
-from incremental_trees.trees import StreamingRFC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
 
 # Generate some data out-of-core
 x, y = dask_ml.datasets.make_blobs(n_samples=2e5, chunks=1e4, random_state=0,
@@ -138,6 +135,8 @@ srfc = StreamingRFC(n_estimators_per_chunk=1,
 ````
 
 # Version history
+## v0.4
+ - Refactor and tidy, try with new versions of Dask/sklearn
 ## v0.3.1-3
   - Update Dask versions
 ## v0.3
diff --git a/incremental_trees/__init__.py b/incremental_trees/__init__.py
index 03174f4..222c11c 100644
--- a/incremental_trees/__init__.py
+++ b/incremental_trees/__init__.py
@@ -1 +1 @@
-__version__ = '0.3.3'
\ No newline at end of file
+__version__ = '0.4.0'
\ No newline at end of file
diff --git a/incremental_trees/add_ins/__init__.py b/incremental_trees/add_ins/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/incremental_trees/add_ins/classifier_additions.py b/incremental_trees/add_ins/classifier_additions.py
new file mode 100644
index 0000000..b6f0661
--- /dev/null
+++ b/incremental_trees/add_ins/classifier_additions.py
@@ -0,0 +1,27 @@
+from typing import List
+
+import numpy as np
+
+from incremental_trees.add_ins.forest_additions import ForestAdditions
+from incremental_trees.add_ins.sklearn_overloads import _check_partial_fit_first_call
+
+
+class ClassifierAdditions(ForestAdditions):
+    """
+    Additional functions specific to classifiers.
+    """
+
+    def _check_classes(self, classes: List[int]):
+        """Set classes if they haven't been set yet, otherwise do nothing."""
+
+        # Set classes for forest (this only needs to be done once).
+        # Not for each individual tree, these will be set by .fit() using the classes available in the subset.
+        # Check classes_ is set, or provided
+        # Returns false if nothing to do
+        classes_need_setting = _check_partial_fit_first_call(self, classes)
+
+        # If classes not set, set
+        # Above will error if not set and classes = None
+        if classes_need_setting:
+            self.classes_ = np.array(classes)
+            self.n_classes_ = len(classes)
diff --git a/incremental_trees/add_ins/classifier_overloads.py b/incremental_trees/add_ins/classifier_overloads.py
new file mode 100644
index 0000000..eb0920d
--- /dev/null
+++ b/incremental_trees/add_ins/classifier_overloads.py
@@ -0,0 +1,65 @@
+import warnings
+from typing import Union
+
+import numpy as np
+import pandas as pd
+
+from incremental_trees.add_ins.forest_overloads import ForestOverloads
+
+
+class ClassifierOverloads(ForestOverloads):
+    """
+    Overloaded methods specific to classifiers.
+    """
+
+    def predict_proba(self, x: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
+        """
+        Call each predict proba from tree, and accumulate. This handle possibly inconsistent shapes, but isn't
+        parallel?
+    ​
+        Cases where not all classes are presented in the first or subsequent subsets needs to be
+        handled. For the RandomForestClassifier, tree predictions are averaged in
+        sklearn.ensemble.forest.accumulate_prediction function. This sums the output matrix with dimensions
+        n rows x n classes and fails if the class dimension differs.
+        The class dimension is defined at the individual estimator level during the .fit() call, which sets the
+        following attributes:
+            - self.n_outputs_ = y.shape[1], which is then used by _validate_y_class_weight()), always called in .fit()
+              to set:
+                - self.classes_
+                - self.n_classes_
+
+        The .predict() method (sklearn.tree.tree.BaseDecisionTree.predict()) sets the output shape using:
+            # Classification
+            if is_classifier(self):
+                if self.n_outputs_ == 1:
+                    return self.classes_.take(np.argmax(proba, axis=1), axis=0)
+                else:
+                   [Not considering this yet]
+
+        :param x:
+        :return:
+        """
+        # Prepare expected output shape
+        preds = np.zeros(shape=(x.shape[0], self.n_classes_),
+                         dtype=np.float32)
+        counts = np.zeros(shape=(x.shape[0], self.n_classes_),
+                          dtype=np.int16)
+
+        for e in self.estimators_:
+            # Get the prediction from the tree
+            est_preds = e.predict_proba(x)
+            # Get the indexes of the classes present
+            present_classes = e.classes_.astype(int)
+            # Sum these in to the correct array columns
+            preds[:, present_classes] += est_preds
+            counts[:, present_classes] += 1
+
+        # Normalise predictions against counts
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            norm_prob = preds / counts
+
+        # And remove nans (0/0) and infs (n/0)
+        norm_prob[np.isnan(norm_prob) | np.isinf(norm_prob)] = 0
+
+        return norm_prob
diff --git a/incremental_trees/add_ins/forest_additions.py b/incremental_trees/add_ins/forest_additions.py
new file mode 100644
index 0000000..e0a49f4
--- /dev/null
+++ b/incremental_trees/add_ins/forest_additions.py
@@ -0,0 +1,83 @@
+import time
+from typing import Union
+
+import numpy as np
+import pandas as pd
+
+
+class ForestAdditions:
+    def partial_fit(self, X: Union[np.array, pd.DataFrame], y: Union[np.array, pd.Series],
+                    classes: Union[list, np.ndarray] = None):
+        """
+        Fit a single DTC using the given subset of x and y.
+​
+        This calls .fit, which is overloaded. However flags pf_call=True, so .fit() will handle calling super .fit().
+​
+        For classifiers;
+          - First call needs to be supplied with the expected classes (similar to existing models with .partial_fit())
+            in case not all classes are present in the first subset.
+
+        This object sets classes_ and n_classes_ depending on the supplied classes. The Individual trees set theirs
+        depending on the data available in the subset. The predict_proba method is modified to standardise shape to the
+        dimensions defined in this object.
+
+        For regressors:
+          - self._check_classes is overloaded with dummy method.
+​
+        :param x:
+        :param y:
+        :return:
+        """
+        if self.verbose > 1:
+            print(f"PF Call with set classes: "
+                  f"{getattr(self, 'classes_', '[no classes attr]')} and input classes {classes}")
+
+        self._check_classes(classes=classes)
+
+        # Fit the next estimator, if not done
+        if self._fit_estimators < self.max_n_estimators:
+            t0 = time.time()
+            self.fit(X, y,
+                     pf_call=True,
+                     classes_=getattr(self, 'classes_', None))  # Pass classes for enforcement, if classifier.
+            t1 = time.time()
+
+            if self.verbose > 1:
+                print(f"Fit estimators {self._fit_estimators} - {self._fit_estimators + self.n_estimators_per_chunk} "
+                      f"/ {self.max_n_estimators}")
+                print(f"Model reports {len(self.estimators_)}")
+                print(f"Fit time: {round(t1 - t0, 2)}")
+                print(len(self.estimators_))
+            self._fit_estimators += self.n_estimators_per_chunk
+
+            # If still not done, prep to fit next
+            if self._fit_estimators < self.max_n_estimators:
+                self.n_estimators += self.n_estimators_per_chunk
+
+        else:
+            if self.verbose > 0:
+                print('Done')
+
+        return self
+
+    def _sampled_partial_fit(self,
+                             x: Union[np.array, pd.DataFrame], y: [np.ndarray, pd.Series]):
+        """
+        This feeds partial_fit with random samples based on the spf_ parameters. Used by .fit() when not using dask.
+        :param x: Data.
+        :param y: Labels.
+        :return:
+        """
+
+        n_samples = int(self.spf_sample_prop * x.shape[0])
+
+        for _ in range(self.spf_n_fits):
+            idx = np.random.randint(0, x.shape[0], n_samples)
+
+            if self.verbose > 0:
+                print(f"_sampled_partial_fit size: {idx.shape}")
+
+            self.partial_fit(x[idx, :], y[idx],
+                             classes=np.unique(y))
+
+        return self
diff --git a/incremental_trees/add_ins/forest_overloads.py b/incremental_trees/add_ins/forest_overloads.py
new file mode 100644
index 0000000..e746fee
--- /dev/null
+++ b/incremental_trees/add_ins/forest_overloads.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+
+class ForestOverloads:
+    def set_params(self,
+                   **kwargs):
+        """
+        Ensure warm_Start is set to true, otherwise set other params as usual.
+
+        :param kwargs: Params to set.
+        """
+        # Warm start should be true to get .fit() to keep existing estimators.
+        kwargs['warm_start'] = True
+
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+        return self
+
+    def fit(self, *args,
+            pf_call: bool = False,
+            classes_: np.ndarray = None):
+        """
+        This fit handles calling either super().fit or partial_fit depending on the caller.
+
+        :param pf_call: True if called from partial fit, in this case super.fit() is called, instead of getting stuck in
+                        a recursive loop.
+        :param classes_: On pf calls, classes is passed from self.classes which will have already been set. These are
+                         re-set after the call to super's fit, which will change them based on observed data.
+        """
+
+        if not self.dask_feeding and not pf_call:
+            if self.verbose > 0:
+                print('Feeding with spf')
+            self._sampled_partial_fit(*args)
+
+        else:
+
+            if self.verbose > 0:
+                print('Fitting from a partial_fit call')
+            super().fit(*args)
+            if classes_ is not None:
+                self.classes_ = classes_
+                self.n_classes_ = len(classes_)
+
+        return self
diff --git a/incremental_trees/add_ins/regressor_additions.py b/incremental_trees/add_ins/regressor_additions.py
new file mode 100644
index 0000000..7018f89
--- /dev/null
+++ b/incremental_trees/add_ins/regressor_additions.py
@@ -0,0 +1,9 @@
+from incremental_trees.add_ins.forest_additions import ForestAdditions
+
+
+class RegressorAdditions(ForestAdditions):
+    def _check_classes(self, **kwargs) -> None:
+        """
+        Don't need to check classes with the regressor.
+        """
+        pass
diff --git a/incremental_trees/add_ins/regressor_overloads.py b/incremental_trees/add_ins/regressor_overloads.py
new file mode 100644
index 0000000..020c3cc
--- /dev/null
+++ b/incremental_trees/add_ins/regressor_overloads.py
@@ -0,0 +1,8 @@
+from incremental_trees.add_ins.forest_overloads import ForestOverloads
+
+
+class RegressorOverloads(ForestOverloads):
+    """
+    Nothing specific to overload for the Regressors. Predict doesn't need to deal with classes.
+    """
+    pass
diff --git a/incremental_trees/add_ins/sklearn_overloads.py b/incremental_trees/add_ins/sklearn_overloads.py
new file mode 100644
index 0000000..fd2ca7c
--- /dev/null
+++ b/incremental_trees/add_ins/sklearn_overloads.py
@@ -0,0 +1,46 @@
+import warnings
+
+import numpy as np
+from sklearn.utils.multiclass import unique_labels
+
+
+def _check_partial_fit_first_call(clf,
+                                  classes=None):
+    """
+    Modified sklearn function. If classes are inconsistent on second call, warn and reuse previous
+    on assumption first call specification was correct. Don't raise error.
+
+    Private helper function for factorizing common classes param logic
+
+    Estimators that implement the ``partial_fit`` API need to be provided with
+    the list of possible classes at the first call to partial_fit.
+
+    Modification:
+    Subsequent calls to partial_fit do not check () that ``classes`` is still
+    consistent with a previous value of ``clf.classes_`` when provided.
+
+    This function returns True if it detects that this was the first call to
+    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
+    set on ``clf``.
+    """
+
+    if getattr(clf, 'classes_', None) is None and classes is None:
+        raise ValueError("classes must be passed on the first call "
+                         "to partial_fit.")
+
+    elif classes is not None:
+        if getattr(clf, 'classes_', None) is not None:
+            if not np.array_equal(clf.classes_, unique_labels(classes)):
+                # Don't error here:
+                # Instead, use the previous classes setting, which must be correct on first setting
+                warnings.warn(f"Classes differ on this call, ignoring on the assumption first call was correct.")
+                return False
+
+        else:
+            # This is the first call to partial_fit
+            clf.classes_ = unique_labels(classes)
+            return True
+
+    # classes is None and clf.classes_ has already previously been set:
+    # nothing to do
+    return False
diff --git a/incremental_trees/models/__init__.py b/incremental_trees/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/incremental_trees/models/classification/__init__.py b/incremental_trees/models/classification/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/incremental_trees/models/classification/streaming_extc.py b/incremental_trees/models/classification/streaming_extc.py
new file mode 100644
index 0000000..9f82686
--- /dev/null
+++ b/incremental_trees/models/classification/streaming_extc.py
@@ -0,0 +1,74 @@
+import numpy as np
+from sklearn.ensemble import ExtraTreesClassifier
+from sklearn.tree import ExtraTreeClassifier
+
+from incremental_trees.add_ins.classifier_additions import ClassifierAdditions
+from incremental_trees.add_ins.classifier_overloads import ClassifierOverloads
+
+
+class StreamingEXTC(ClassifierAdditions, ClassifierOverloads, ExtraTreesClassifier):
+    """Overload sklearn.ensemble.ExtraTreesClassifier to add partial fit method and new params."""
+
+    def __init__(self,
+                 n_estimators_per_chunk: int = 1,
+                 n_estimators: bool = None,
+                 max_n_estimators=np.inf,
+                 criterion="gini",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features="auto",
+                 max_leaf_nodes=None,
+                 min_impurity_decrease=0.,
+                 min_impurity_split=None,
+                 bootstrap=False,
+                 oob_score=False,
+                 n_jobs=None,
+                 random_state=None,
+                 verbose=0,
+                 warm_start=True,
+                 class_weight=None,
+                 dask_feeding: bool = True,
+                 spf_n_fits=100,
+                 spf_sample_prop: float = 0.1):
+        super(ExtraTreesClassifier, self).__init__(
+            base_estimator=ExtraTreeClassifier(),
+            n_estimators=n_estimators_per_chunk,
+            estimator_params=("criterion", "max_depth", "min_samples_split",
+                              "min_samples_leaf", "min_weight_fraction_leaf",
+                              "max_features", "max_leaf_nodes",
+                              "min_impurity_decrease", "min_impurity_split",
+                              "random_state"),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start,
+            class_weight=class_weight)
+
+        self.max_n_estimators: int = None
+        self._fit_estimators: int = 0
+        self.classes_: np.array = None  # NB: Needs to be array, not list.
+        self.n_classes_: int = None
+
+        self._fit_estimators = 0
+        self.max_n_estimators = max_n_estimators
+        self.n_estimators_per_chunk = n_estimators
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+
+        # Set additional params.
+        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
+                        max_n_estimators=max_n_estimators,
+                        spf_n_fits=spf_n_fits,
+                        spf_sample_prop=spf_sample_prop,
+                        dask_feeding=dask_feeding)
diff --git a/incremental_trees/models/classification/streaming_rfc.py b/incremental_trees/models/classification/streaming_rfc.py
new file mode 100644
index 0000000..54199b3
--- /dev/null
+++ b/incremental_trees/models/classification/streaming_rfc.py
@@ -0,0 +1,92 @@
+import warnings
+
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+
+from incremental_trees.add_ins.classifier_additions import ClassifierAdditions
+from incremental_trees.add_ins.classifier_overloads import ClassifierOverloads
+
+
+class StreamingRFC(ClassifierAdditions, ClassifierOverloads, RandomForestClassifier):
+    """
+    Overload sklearn.ensemble.RandomForestClassifier to add partial fit method and new params.
+
+    Note this init is a slightly different structure to ExtraTressClassifier/Regressor and RandomForestRegressor.
+    """
+
+    def __init__(self,
+                 bootstrap=True,
+                 class_weight=None,
+                 criterion='gini',
+                 max_depth=None,
+                 max_features='auto',
+                 max_leaf_nodes=None,
+                 min_impurity_decrease=0.0,
+                 min_impurity_split=None,
+                 min_samples_leaf=1,
+                 min_samples_split=2,
+                 min_weight_fraction_leaf=0.0,
+                 n_estimators_per_chunk: int = 1,
+                 n_jobs=None,
+                 oob_score=False,
+                 random_state=None,
+                 verbose=0,
+                 warm_start: bool = True,
+                 dask_feeding: bool = True,
+                 max_n_estimators=10,
+                 spf_n_fits=100,
+                 spf_sample_prop=0.1) -> None:
+        """
+        :param bootstrap:
+        :param class_weight:
+        :param criterion:
+        :param max_depth:
+        :param max_features:
+        :param max_leaf_nodes:
+        :param min_impurity_decrease:
+        :param min_impurity_split:
+        :param min_samples_leaf:
+        :param min_samples_split:
+        :param min_weight_fraction_leaf:
+        :param n_estimators_per_chunk: Estimators per chunk to fit.
+        :param n_jobs:
+        :param oob_score:
+        :param random_state:
+        :param verbose:
+        :param warm_start:
+        :param max_n_estimators: Total max number of estimators to fit.
+        :param verb: If > 0 display debugging info during fit
+        """
+
+        # Run the super init, which also calls other parent inits to handle other params (like base estimator)
+        super().__init__()
+
+        self.max_n_estimators: int = None
+        self._fit_estimators: int = 0
+        self.classes_: np.array = None  # NB: Needs to be array, not list.
+        self.n_classes_: int = None
+
+        self.set_params(bootstrap=bootstrap,
+                        class_weight=class_weight,
+                        criterion=criterion,
+                        max_depth=max_depth,
+                        max_features=max_features,
+                        max_leaf_nodes=max_leaf_nodes,
+                        min_impurity_decrease=min_impurity_decrease,
+                        min_impurity_split=min_impurity_split,
+                        min_samples_leaf=min_samples_leaf,
+                        min_samples_split=min_samples_split,
+                        min_weight_fraction_leaf=min_weight_fraction_leaf,
+                        n_estimators_per_chunk=n_estimators_per_chunk,
+                        n_estimators=n_estimators_per_chunk,
+                        n_jobs=n_jobs,
+                        oob_score=oob_score,
+                        random_state=random_state,
+                        verbose=verbose,
+                        warm_start=warm_start,
+                        _fit_estimators=0,
+                        dask_feeding=dask_feeding,
+                        max_n_estimators=max_n_estimators,
+                        verb=0,
+                        spf_n_fits=spf_n_fits,
+                        spf_sample_prop=spf_sample_prop)
diff --git a/incremental_trees/models/regression/__init__.py b/incremental_trees/models/regression/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/incremental_trees/models/regression/streaming_extr.py b/incremental_trees/models/regression/streaming_extr.py
new file mode 100644
index 0000000..6d565d2
--- /dev/null
+++ b/incremental_trees/models/regression/streaming_extr.py
@@ -0,0 +1,65 @@
+import numpy as np
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.tree import ExtraTreeRegressor
+
+from incremental_trees.add_ins.regressor_additions import RegressorAdditions
+from incremental_trees.add_ins.regressor_overloads import RegressorOverloads
+
+
+class StreamingEXTR(RegressorAdditions, RegressorOverloads, ExtraTreesRegressor):
+    def __init__(self,
+                 n_estimators_per_chunk: int = 1,
+                 n_estimators='warn',
+                 max_n_estimators=np.inf,
+                 criterion="mse",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features="auto",
+                 max_leaf_nodes=None,
+                 min_impurity_decrease=0.,
+                 min_impurity_split=None,
+                 bootstrap=False,
+                 oob_score=False,
+                 n_jobs=None,
+                 random_state=None,
+                 verbose=0,
+                 warm_start=True,
+                 dask_feeding: bool = True,
+                 spf_n_fits: int = 100,
+                 spf_sample_prop: float = 0.1):
+        super(ExtraTreesRegressor, self).__init__(
+            base_estimator=ExtraTreeRegressor(),
+            n_estimators=n_estimators_per_chunk,
+            estimator_params=("criterion", "max_depth", "min_samples_split",
+                              "min_samples_leaf", "min_weight_fraction_leaf",
+                              "max_features", "max_leaf_nodes",
+                              "min_impurity_decrease", "min_impurity_split",
+                              "random_state"),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start)
+
+        self._fit_estimators = 0
+        self.max_n_estimators = max_n_estimators
+        self.n_estimators_per_chunk = n_estimators
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+
+        # Set additional params.
+        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
+                        max_n_estimators=max_n_estimators,
+                        spf_n_fits=spf_n_fits,
+                        spf_sample_prop=spf_sample_prop,
+                        dask_feeding=dask_feeding)
diff --git a/incremental_trees/models/regression/streaming_rfr.py b/incremental_trees/models/regression/streaming_rfr.py
new file mode 100644
index 0000000..a028c93
--- /dev/null
+++ b/incremental_trees/models/regression/streaming_rfr.py
@@ -0,0 +1,65 @@
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.tree import DecisionTreeRegressor
+
+from incremental_trees.add_ins.regressor_additions import RegressorAdditions
+from incremental_trees.add_ins.regressor_overloads import RegressorOverloads
+
+
+class StreamingRFR(RegressorAdditions, RegressorOverloads, RandomForestRegressor):
+    """Overload sklearn.ensemble.RandomForestClassifier to add partial fit method and new params."""
+
+    def __init__(self,
+                 n_estimators='warn',
+                 criterion="mse",
+                 max_depth=None,
+                 min_samples_split=2,
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
+                 max_features="auto",
+                 max_leaf_nodes=None,
+                 min_impurity_decrease=0.,
+                 min_impurity_split=None,
+                 bootstrap=True,
+                 oob_score=False,
+                 n_jobs=None,
+                 random_state=None,
+                 verbose=0,
+                 n_estimators_per_chunk: int = 1,
+                 warm_start: bool = True,
+                 dask_feeding: bool = True,
+                 max_n_estimators=10,
+                 spf_n_fits=100,
+                 spf_sample_prop=0.1):
+        super(RandomForestRegressor, self).__init__(
+            base_estimator=DecisionTreeRegressor(),
+            n_estimators=n_estimators_per_chunk,
+            estimator_params=("criterion", "max_depth", "min_samples_split",
+                              "min_samples_leaf", "min_weight_fraction_leaf",
+                              "max_features", "max_leaf_nodes",
+                              "min_impurity_decrease", "min_impurity_split",
+                              "random_state"),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start)
+
+        self._fit_estimators = 0
+        self.max_n_estimators = max_n_estimators
+        self.n_estimators_per_chunk = n_estimators
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
+        self.min_impurity_split = min_impurity_split
+
+        # Set additional params.
+        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
+                        spf_n_fits=spf_n_fits,
+                        spf_sample_prop=spf_sample_prop,
+                        dask_feeding=dask_feeding)
diff --git a/incremental_trees/trees.py b/incremental_trees/trees.py
index d92539e..4d29adf 100644
--- a/incremental_trees/trees.py
+++ b/incremental_trees/trees.py
@@ -1,553 +1,12 @@
-import time
-import warnings
-from typing import Union, List
-
 import numpy as np
-import pandas as pd
-from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, RandomForestRegressor, ExtraTreesRegressor
-from sklearn.tree import ExtraTreeClassifier, ExtraTreeRegressor, DecisionTreeRegressor
-from sklearn.utils.multiclass import unique_labels
-
-
-def _check_partial_fit_first_call(clf,
-                                  classes=None):
-    """
-    Modified sklearn function. If classes are inconsistent on second call, warn and reuse previous
-    on assumption first call specification was correct. Don't raise error.
-
-    Private helper function for factorizing common classes param logic
-
-    Estimators that implement the ``partial_fit`` API need to be provided with
-    the list of possible classes at the first call to partial_fit.
-
-    Modification:
-    Subsequent calls to partial_fit do not check () that ``classes`` is still
-    consistent with a previous value of ``clf.classes_`` when provided.
-
-    This function returns True if it detects that this was the first call to
-    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
-    set on ``clf``.
-    """
-
-    if getattr(clf, 'classes_', None) is None and classes is None:
-        raise ValueError("classes must be passed on the first call "
-                         "to partial_fit.")
-
-    elif classes is not None:
-        if getattr(clf, 'classes_', None) is not None:
-            if not np.array_equal(clf.classes_, unique_labels(classes)):
-                # Don't error here:
-                # Instead, use the previous classes setting, which must be correct on first setting
-                warnings.warn(f"Classes differ on this call, ignoring on the assumption first call was correct.")
-                return False
-                # raise ValueError(
-                #     "`classes=%r` is not the same as on last call "
-                #     "to partial_fit, was: %r" % (classes, clf.classes_))
-
-        else:
-            # This is the first call to partial_fit
-            clf.classes_ = unique_labels(classes)
-            return True
-
-    # classes is None and clf.classes_ has already previously been set:
-    # nothing to do
-    return False
-
-
-class ForestAdditions:
-    def partial_fit(self, X: Union[np.array, pd.DataFrame], y: Union[np.array, pd.Series],
-                    classes: Union[list, np.ndarray] = None):
-        """
-        Fit a single DTC using the given subset of x and y.
-​
-        This calls .fit, which is overloaded. However flags pf_call=True, so .fit() will handle calling super .fit().
-​
-        For classifiers;
-          - First call needs to be supplied with the expected classes (similar to existing models with .partial_fit())
-            in case not all classes are present in the first subset.
-
-        This object sets classes_ and n_classes_ depending on the supplied classes. The Individual trees set theirs
-        depending on the data available in the subset. The predict_proba method is modified to standardise shape to the
-        dimensions defined in this object.
-
-        For regressors:
-          - self._check_classes is overloaded with dummy method.
-​
-        :param x:
-        :param y:
-        :return:
-        """
-        if self.verbose > 1:
-            print(f"PF Call with set classes: "
-                  f"{getattr(self, 'classes_', '[no classes attr]')} and input classes {classes}")
-
-        self._check_classes(classes=classes)
-
-        # Fit the next estimator, if not done
-        if self._fit_estimators < self.max_n_estimators:
-            t0 = time.time()
-            self.fit(X, y,
-                     pf_call=True,
-                     classes_=getattr(self, 'classes_', None))  # Pass classes for enforcement, if classifier.
-            t1 = time.time()
-
-            if self.verbose > 1:
-                print(f"Fit estimators {self._fit_estimators} - {self._fit_estimators + self.n_estimators_per_chunk} "
-                      f"/ {self.max_n_estimators}")
-                print(f"Model reports {len(self.estimators_)}")
-                print(f"Fit time: {round(t1 - t0, 2)}")
-                print(len(self.estimators_))
-            self._fit_estimators += self.n_estimators_per_chunk
-
-            # If still not done, prep to fit next
-            if self._fit_estimators < self.max_n_estimators:
-                self.n_estimators += self.n_estimators_per_chunk
-
-        else:
-            if self.verbose > 0:
-                print('Done')
-
-        return self
-
-    def _sampled_partial_fit(self,
-                             x: Union[np.array, pd.DataFrame], y: [np.ndarray, pd.Series]):
-        """
-        This feeds partial_fit with random samples based on the spf_ parameters. Used by .fit() when not using dask.
-        :param x: Data.
-        :param y: Labels.
-        :return:
-        """
-
-        n_samples = int(self.spf_sample_prop * x.shape[0])
-
-        for _ in range(self.spf_n_fits):
-            idx = np.random.randint(0, x.shape[0], n_samples)
-
-            if self.verbose > 0:
-                print(f"_sampled_partial_fit size: {idx.shape}")
-
-            self.partial_fit(x[idx, :], y[idx],
-                             classes=np.unique(y))
-
-        return self
-
-
-class ClassifierAdditions(ForestAdditions):
-    """
-    Additional functions specific to classifiers.
-    """
-
-    def _check_classes(self, classes: List[int]):
-        """Set classes if they haven't been set yet, otherwise do nothing."""
-
-        # Set classes for forest (this only needs to be done once).
-        # Not for each individual tree, these will be set by .fit() using the classes available in the subset.
-        # Check classes_ is set, or provided
-        # Returns false if nothing to do
-        classes_need_setting = _check_partial_fit_first_call(self, classes)
-
-        # If classes not set, set
-        # Above will error if not set and classes = None
-        if classes_need_setting:
-            self.classes_ = np.array(classes)
-            self.n_classes_ = len(classes)
-
-
-class RegressorAdditions(ForestAdditions):
-    def _check_classes(self, **kwargs) -> None:
-        """
-        Don't need to check classes with the regressor.
-        """
-        pass
-
-
-class ForestOverloads:
-    def set_params(self,
-                   **kwargs):
-        """
-        Ensure warm_Start is set to true, otherwise set other params as usual.
-
-        :param kwargs: Params to set.
-        """
-        # Warm start should be true to get .fit() to keep existing estimators.
-        kwargs['warm_start'] = True
-
-        for key, value in kwargs.items():
-            setattr(self, key, value)
-
-        return self
-
-    def fit(self, *args,
-            pf_call: bool = False,
-            classes_: np.ndarray = None):
-        """
-        This fit handles calling either super().fit or partial_fit depending on the caller.
-
-        :param pf_call: True if called from partial fit, in this case super.fit() is called, instead of getting stuck in
-                        a recursive loop.
-        :param classes_: On pf calls, classes is passed from self.classes which will have already been set. These are
-                         re-set after the call to super's fit, which will change them based on observed data.
-        """
 
-        if not self.dask_feeding and not pf_call:
-            if self.verbose > 0:
-                print('Feeding with spf')
-            self._sampled_partial_fit(*args)
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
 
-        else:
 
-            if self.verbose > 0:
-                print('Fitting from a partial_fit call')
-            super().fit(*args)
-            if classes_ is not None:
-                self.classes_ = classes_
-                self.n_classes_ = len(classes_)
-
-        return self
-
-
-class ClassifierOverloads(ForestOverloads):
-    """
-    Overloaded methods specific to classifiers.
-    """
-
-    def predict_proba(self, x: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """
-        Call each predict proba from tree, and accumulate. This handle possibly inconsistent shapes, but isn't
-        parallel?
-    ​
-        Cases where not all classes are presented in the first or subsequent subsets needs to be
-        handled. For the RandomForestClassifier, tree predictions are averaged in
-        sklearn.ensemble.forest.accumulate_prediction function. This sums the output matrix with dimensions
-        n rows x n classes and fails if the class dimension differs.
-        The class dimension is defined at the individual estimator level during the .fit() call, which sets the
-        following attributes:
-            - self.n_outputs_ = y.shape[1], which is then used by _validate_y_class_weight()), always called in .fit()
-              to set:
-                - self.classes_
-                - self.n_classes_
-
-        The .predict() method (sklearn.tree.tree.BaseDecisionTree.predict()) sets the output shape using:
-            # Classification
-            if is_classifier(self):
-                if self.n_outputs_ == 1:
-                    return self.classes_.take(np.argmax(proba, axis=1), axis=0)
-                else:
-                   [Not considering this yet]
-
-        :param x:
-        :return:
-        """
-        # Prepare expected output shape
-        preds = np.zeros(shape=(x.shape[0], self.n_classes_),
-                         dtype=np.float32)
-        counts = np.zeros(shape=(x.shape[0], self.n_classes_),
-                          dtype=np.int16)
-
-        for e in self.estimators_:
-            # Get the prediction from the tree
-            est_preds = e.predict_proba(x)
-            # Get the indexes of the classes present
-            present_classes = e.classes_.astype(int)
-            # Sum these in to the correct array columns
-            preds[:, present_classes] += est_preds
-            counts[:, present_classes] += 1
-
-        # Normalise predictions against counts
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", RuntimeWarning)
-            norm_prob = preds / counts
-
-        # And remove nans (0/0) and infs (n/0)
-        norm_prob[np.isnan(norm_prob) | np.isinf(norm_prob)] = 0
-
-        return norm_prob
-
-
-class RegressorOverloads(ForestOverloads):
-    """
-    Nothing specific to overload for the Regressors. Predict doesn't need to deal with classes.
-    """
-    pass
-
-
-class StreamingRFR(RegressorAdditions, RegressorOverloads, RandomForestRegressor):
-    """Overload sklearn.ensemble.RandomForestClassifier to add partial fit method and new params."""
-
-    def __init__(self,
-                 n_estimators='warn',
-                 criterion="mse",
-                 max_depth=None,
-                 min_samples_split=2,
-                 min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.,
-                 max_features="auto",
-                 max_leaf_nodes=None,
-                 min_impurity_decrease=0.,
-                 min_impurity_split=None,
-                 bootstrap=True,
-                 oob_score=False,
-                 n_jobs=None,
-                 random_state=None,
-                 verbose=0,
-                 n_estimators_per_chunk: int = 1,
-                 warm_start: bool = True,
-                 dask_feeding: bool = True,
-                 max_n_estimators=10,
-                 spf_n_fits=100,
-                 spf_sample_prop=0.1):
-        super(RandomForestRegressor, self).__init__(
-            base_estimator=DecisionTreeRegressor(),
-            n_estimators=n_estimators_per_chunk,
-            estimator_params=("criterion", "max_depth", "min_samples_split",
-                              "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
-                              "min_impurity_decrease", "min_impurity_split",
-                              "random_state"),
-            bootstrap=bootstrap,
-            oob_score=oob_score,
-            n_jobs=n_jobs,
-            random_state=random_state,
-            verbose=verbose,
-            warm_start=warm_start)
-
-        self._fit_estimators = 0
-        self.max_n_estimators = max_n_estimators
-        self.n_estimators_per_chunk = n_estimators
-        self.criterion = criterion
-        self.max_depth = max_depth
-        self.min_samples_split = min_samples_split
-        self.min_samples_leaf = min_samples_leaf
-        self.min_weight_fraction_leaf = min_weight_fraction_leaf
-        self.max_features = max_features
-        self.max_leaf_nodes = max_leaf_nodes
-        self.min_impurity_decrease = min_impurity_decrease
-        self.min_impurity_split = min_impurity_split
-
-        # Set additional params.
-        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
-                        spf_n_fits=spf_n_fits,
-                        spf_sample_prop=spf_sample_prop,
-                        dask_feeding=dask_feeding)
-
-
-class StreamingRFC(ClassifierAdditions, ClassifierOverloads, RandomForestClassifier):
-    """
-    Overload sklearn.ensemble.RandomForestClassifier to add partial fit method and new params.
-
-    Note this init is a slightly different structure to ExtraTressClassifier/Regressor and RandomForestRegressor.
-    """
-
-    def __init__(self,
-                 bootstrap=True,
-                 class_weight=None,
-                 criterion='gini',
-                 max_depth=None,
-                 max_features='auto',
-                 max_leaf_nodes=None,
-                 min_impurity_decrease=0.0,
-                 min_impurity_split=None,
-                 min_samples_leaf=1,
-                 min_samples_split=2,
-                 min_weight_fraction_leaf=0.0,
-                 n_estimators_per_chunk: int = 1,
-                 n_estimators: bool = None,
-                 n_jobs=None,
-                 oob_score=False,
-                 random_state=None,
-                 verbose=0,
-                 warm_start: bool = True,
-                 dask_feeding: bool = True,
-                 max_n_estimators=10,
-                 spf_n_fits=100,
-                 spf_sample_prop=0.1) -> None:
-        """
-        :param bootstrap:
-        :param class_weight:
-        :param criterion:
-        :param max_depth:
-        :param max_features:
-        :param max_leaf_nodes:
-        :param min_impurity_decrease:
-        :param min_impurity_split:
-        :param min_samples_leaf:
-        :param min_samples_split:
-        :param min_weight_fraction_leaf:
-        :param n_estimators_per_chunk: Estimators per chunk to fit.
-        :param n_jobs:
-        :param oob_score:
-        :param random_state:
-        :param verbose:
-        :param warm_start:
-        :param max_n_estimators: Total max number of estimators to fit.
-        :param verb: If > 0 display debugging info during fit
-        """
-
-        # Run the super init, which also calls other parent inits to handle other params (like base estimator)
-        super().__init__()
-
-        self.max_n_estimators: int = None
-        self._fit_estimators: int = 0
-        self.classes_: np.array = None  # NB: Needs to be array, not list.
-        self.n_classes_: int = None
-
-        # n_estimators will be used by RFC to set how many ests are fit on each .fit() call
-        n_estimators = n_estimators_per_chunk
-
-        self.set_params(bootstrap=bootstrap,
-                        class_weight=class_weight,
-                        criterion=criterion,
-                        max_depth=max_depth,
-                        max_features=max_features,
-                        max_leaf_nodes=max_leaf_nodes,
-                        min_impurity_decrease=min_impurity_decrease,
-                        min_impurity_split=min_impurity_split,
-                        min_samples_leaf=min_samples_leaf,
-                        min_samples_split=min_samples_split,
-                        min_weight_fraction_leaf=min_weight_fraction_leaf,
-                        n_estimators_per_chunk=n_estimators_per_chunk,
-                        n_estimators=n_estimators,
-                        n_jobs=n_jobs,
-                        oob_score=oob_score,
-                        random_state=random_state,
-                        verbose=verbose,
-                        warm_start=warm_start,
-                        _fit_estimators=0,
-                        dask_feeding=dask_feeding,
-                        max_n_estimators=max_n_estimators,
-                        verb=0,
-                        spf_n_fits=spf_n_fits,
-                        spf_sample_prop=spf_sample_prop)
-
-
-class StreamingEXTR(RegressorAdditions, RegressorOverloads, ExtraTreesRegressor):
-    def __init__(self,
-                 n_estimators_per_chunk: int = 1,
-                 n_estimators='warn',
-                 max_n_estimators=np.inf,
-                 criterion="mse",
-                 max_depth=None,
-                 min_samples_split=2,
-                 min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.,
-                 max_features="auto",
-                 max_leaf_nodes=None,
-                 min_impurity_decrease=0.,
-                 min_impurity_split=None,
-                 bootstrap=False,
-                 oob_score=False,
-                 n_jobs=None,
-                 random_state=None,
-                 verbose=0,
-                 warm_start=True,
-                 dask_feeding: bool = True,
-                 spf_n_fits: int = 100,
-                 spf_sample_prop: float = 0.1):
-        super(ExtraTreesRegressor, self).__init__(
-            base_estimator=ExtraTreeRegressor(),
-            n_estimators=n_estimators_per_chunk,
-            estimator_params=("criterion", "max_depth", "min_samples_split",
-                              "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
-                              "min_impurity_decrease", "min_impurity_split",
-                              "random_state"),
-            bootstrap=bootstrap,
-            oob_score=oob_score,
-            n_jobs=n_jobs,
-            random_state=random_state,
-            verbose=verbose,
-            warm_start=warm_start)
-
-        self._fit_estimators = 0
-        self.max_n_estimators = max_n_estimators
-        self.n_estimators_per_chunk = n_estimators
-        self.criterion = criterion
-        self.max_depth = max_depth
-        self.min_samples_split = min_samples_split
-        self.min_samples_leaf = min_samples_leaf
-        self.min_weight_fraction_leaf = min_weight_fraction_leaf
-        self.max_features = max_features
-        self.max_leaf_nodes = max_leaf_nodes
-        self.min_impurity_decrease = min_impurity_decrease
-        self.min_impurity_split = min_impurity_split
-
-        # Set additional params.
-        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
-                        max_n_estimators=max_n_estimators,
-                        spf_n_fits=spf_n_fits,
-                        spf_sample_prop=spf_sample_prop,
-                        dask_feeding=dask_feeding)
-
-
-class StreamingEXTC(ClassifierAdditions, ClassifierOverloads, ExtraTreesClassifier):
-    """Overload sklearn.ensemble.ExtraTreesClassifier to add partial fit method and new params."""
-
-    def __init__(self,
-                 n_estimators_per_chunk: int = 1,
-                 n_estimators: bool = None,
-                 max_n_estimators=np.inf,
-                 criterion="gini",
-                 max_depth=None,
-                 min_samples_split=2,
-                 min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.,
-                 max_features="auto",
-                 max_leaf_nodes=None,
-                 min_impurity_decrease=0.,
-                 min_impurity_split=None,
-                 bootstrap=False,
-                 oob_score=False,
-                 n_jobs=None,
-                 random_state=None,
-                 verbose=0,
-                 warm_start=True,
-                 class_weight=None,
-                 dask_feeding: bool = True,
-                 spf_n_fits=100,
-                 spf_sample_prop: float = 0.1):
-        super(ExtraTreesClassifier, self).__init__(
-            base_estimator=ExtraTreeClassifier(),
-            n_estimators=n_estimators_per_chunk,
-            estimator_params=("criterion", "max_depth", "min_samples_split",
-                              "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
-                              "min_impurity_decrease", "min_impurity_split",
-                              "random_state"),
-            bootstrap=bootstrap,
-            oob_score=oob_score,
-            n_jobs=n_jobs,
-            random_state=random_state,
-            verbose=verbose,
-            warm_start=warm_start,
-            class_weight=class_weight)
-
-        self.max_n_estimators: int = None
-        self._fit_estimators: int = 0
-        self.classes_: np.array = None  # NB: Needs to be array, not list.
-        self.n_classes_: int = None
-
-        self._fit_estimators = 0
-        self.max_n_estimators = max_n_estimators
-        self.n_estimators_per_chunk = n_estimators
-        self.criterion = criterion
-        self.max_depth = max_depth
-        self.min_samples_split = min_samples_split
-        self.min_samples_leaf = min_samples_leaf
-        self.min_weight_fraction_leaf = min_weight_fraction_leaf
-        self.max_features = max_features
-        self.max_leaf_nodes = max_leaf_nodes
-        self.min_impurity_decrease = min_impurity_decrease
-        self.min_impurity_split = min_impurity_split
-
-        # Set additional params.
-        self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
-                        max_n_estimators=max_n_estimators,
-                        spf_n_fits=spf_n_fits,
-                        spf_sample_prop=spf_sample_prop,
-                        dask_feeding=dask_feeding)
-
-
-if __name__ == '__main__':
+def bunch_of_examples():
     from sklearn.datasets import make_blobs, make_regression
 
     x, y = make_regression(n_samples=int(2e5),
@@ -560,6 +19,8 @@ def __init__(self,
                         verbose=0,
                         n_jobs=2)
 
+    srfr.fit(x, y)
+
     # Fit 10 regressors
     for _ in range(10):
         x, y = make_regression(n_samples=int(2e5),
@@ -572,7 +33,7 @@ def __init__(self,
                             n_jobs=5)
 
         chunk_size = int(2e3)
-        for i in range(20):
+        for _ in range(20):
             sample_idx = np.random.randint(0, x.shape[0], chunk_size)
             srfr.partial_fit(x[sample_idx], y[sample_idx],
                              classes=np.unique(y))
@@ -584,7 +45,7 @@ def __init__(self,
                              verbose=0,
                              n_jobs=5)
 
-        for i in range(20):
+        for _ in range(20):
             sample_idx = np.random.randint(0, x.shape[0], chunk_size)
             sext.partial_fit(x[sample_idx], y[sample_idx],
                              classes=np.unique(y))
@@ -605,7 +66,7 @@ def __init__(self,
                             n_jobs=5)
 
         chunk_size = int(2e3)
-        for i in range(20):
+        for _ in range(20):
             sample_idx = np.random.randint(0, x.shape[0], chunk_size)
             srfc.partial_fit(x[sample_idx], y[sample_idx],
                              classes=np.unique(y))
@@ -617,9 +78,13 @@ def __init__(self,
                              verbose=0,
                              n_jobs=5)
 
-        for i in range(20):
+        for _ in range(20):
             sample_idx = np.random.randint(0, x.shape[0], chunk_size)
             sext.partial_fit(x[sample_idx], y[sample_idx],
                              classes=np.unique(y))
 
         print(f"SEXTC: {sext.score(x, y)}")
+
+
+if __name__ == '__main__':
+    bunch_of_examples()
diff --git a/requirements.txt b/requirements.txt
index cd38d29..0ce12d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,14 @@
-scikit-learn==0.22.2
-pandas==1.0.1
-dask==2.12.0
+scikit-learn>=0.22
+pandas
+numpy
+dask>=2
 dask-glm==0.2.0
-dask-ml==1.2.0
-distributed==2.12.0
+dask-ml>=1
+distributed>=2
 bokeh
 pytest
 jupyter
 jupyterlab
 ipykernel
 matplotlib
-fsspec>=0.3.3
+fsspec
diff --git a/setup.py b/setup.py
index 2201664..60af819 100644
--- a/setup.py
+++ b/setup.py
@@ -14,8 +14,8 @@
                  long_description_content_type="text/markdown",
                  packages=setuptools.find_packages(),
                  url="https://github.com/garethjns/IncrementalTrees",
-                 install_requires=["scikit-learn==0.22.2", "pandas==1.0.1",
-                                   "dask==2.12.0",
+                 install_requires=["scikit-learn>=0.22", "pandas",
+                                   "dask>=2",
                                    "dask-glm==0.2.0",
-                                   "dask-ml==1.2.0",
+                                   "dask-ml>=1",
                                    "bokeh"])
diff --git a/tests/common/__init__.py b/tests/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data.py b/tests/common/data_fixture.py
similarity index 98%
rename from tests/data.py
rename to tests/common/data_fixture.py
index 791fb9d..68bbd61 100644
--- a/tests/data.py
+++ b/tests/common/data_fixture.py
@@ -1,10 +1,10 @@
 from sklearn.datasets import make_blobs
 from sklearn.metrics import roc_auc_score
-from sklearn.model_selection import train_test_split
 from sklearn.metrics.classification import classification_report
+from sklearn.model_selection import train_test_split
 
 
-class Data:
+class DataFixture:
     def _prep_data(self):
         x, y = make_blobs(n_samples=int(2e4),
                           random_state=0,
@@ -18,7 +18,6 @@ def _prep_data(self):
         return self
 
     def _mod_report(self, mod):
-
         report = classification_report(self.y_test, mod.predict(self.x_test))
         train_auc = roc_auc_score(self.y_train, mod.predict_proba(self.x_train)[:, 1])
         test_auc = roc_auc_score(self.y_test, mod.predict_proba(self.x_test)[:, 1])
diff --git a/tests/params.py b/tests/common/param_fixtures.py
similarity index 100%
rename from tests/params.py
rename to tests/common/param_fixtures.py
diff --git a/tests/integration/base.py b/tests/integration/base.py
index b39e250..80135b2 100644
--- a/tests/integration/base.py
+++ b/tests/integration/base.py
@@ -1,7 +1,6 @@
-import dask_ml
-from dask_ml.datasets import make_blobs, make_regression
 import numpy as np
 import sklearn
+from dask_ml.datasets import make_blobs, make_regression
 from distributed import LocalCluster, Client
 from sklearn import clone
 from sklearn.model_selection import RandomizedSearchCV
diff --git a/tests/integration/incremental_trees/test_trees.py b/tests/integration/incremental_trees/test_trees.py
index d0b179b..ad7c2d9 100644
--- a/tests/integration/incremental_trees/test_trees.py
+++ b/tests/integration/incremental_trees/test_trees.py
@@ -4,16 +4,20 @@
 import sklearn
 import sklearn.datasets
 
-from incremental_trees.trees import StreamingRFC, StreamingRFR, StreamingEXTC, StreamingEXTR
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+from incremental_trees.trees import StreamingRFC
 from tests.integration.base import PartialFitTests, FitTests
 
 
-class TestStreamingRFC_1(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC1(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
 
     No limit on the total number of trees.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -35,12 +39,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFC_2(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC2(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
 
     Total models limited to 39.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -61,12 +66,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFC_3(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC3(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
 
     No limit on total models, 3 estimators per row subset (each with different feature subset)
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -88,12 +94,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFC_4(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC4(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 1 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -115,12 +122,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFC_5(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC5(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -143,12 +151,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFC_6(PartialFitTests, unittest.TestCase):
+class TestStreamingRFC6(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -171,7 +180,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreaming_RFC_7(FitTests, unittest.TestCase):
+class TestStreamingRFC7(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -188,11 +197,10 @@ def setUpClass(cls):
                                spf_sample_prop=cls.spf_sample_prop,
                                spf_n_fits=cls.spf_n_fits)
 
-
         super().setUpClass()
 
 
-class TestStreaming_RFC_8(FitTests, unittest.TestCase):
+class TestStreamingRFC8(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -208,11 +216,10 @@ def setUpClass(cls):
                                spf_sample_prop=cls.spf_sample_prop,
                                spf_n_fits=cls.spf_n_fits)
 
-
         super().setUpClass()
 
 
-class TestStreaming_RFC_9(FitTests, unittest.TestCase):
+class TestStreamingRFC9(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -232,12 +239,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_1(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR1(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
 
     No limit on the total number of trees.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -257,12 +265,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_2(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR2(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
 
     Total models limited to 39.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -281,12 +290,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_3(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR3(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
 
     No limit on total models, 3 estimators per row subset (each with different feature subset)
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -306,12 +316,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_4(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR4(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 1 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -331,12 +342,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_5(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR5(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -357,12 +369,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_6(PartialFitTests, unittest.TestCase):
+class TestStreamingRFR6(PartialFitTests, unittest.TestCase):
     """
     Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -383,7 +396,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_7(FitTests, unittest.TestCase):
+class TestStreamingRFR7(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -403,7 +416,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_8(FitTests, unittest.TestCase):
+class TestStreamingRFR8(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -422,7 +435,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingRFR_9(FitTests, unittest.TestCase):
+class TestStreamingRFR9(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -442,12 +455,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_1(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC1(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
 
     No limit on the total number of trees.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -468,12 +482,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_2(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC2(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
 
     Total models limited to 39.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -494,12 +509,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_3(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC3(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
 
     No limit on total models, 3 estimators per row subset (each with different feature subset)
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -521,12 +537,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_4(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC4(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 1 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -548,12 +565,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_5(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC5(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -576,12 +594,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_6(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTC6(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -604,7 +623,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_7(FitTests, unittest.TestCase):
+class TestStreamingEXTC7(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -624,7 +643,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_8(FitTests, unittest.TestCase):
+class TestStreamingEXTC8(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -643,7 +662,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTC_9(FitTests, unittest.TestCase):
+class TestStreamingEXTC9(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -663,12 +682,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_1(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR1(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
 
     No limit on the total number of trees.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -687,12 +707,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_2(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR2(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
 
     Total models limited to 39.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -711,12 +732,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_3(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR3(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
 
     No limit on total models, 3 estimators per row subset (each with different feature subset)
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -736,12 +758,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_4(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR4(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 1 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -761,12 +784,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_5(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR5(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -787,12 +811,13 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_6(PartialFitTests, unittest.TestCase):
+class TestStreamingEXTR6(PartialFitTests, unittest.TestCase):
     """
     Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
 
     No limit on total models, 3 estimators per row subset.
     """
+
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -813,7 +838,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_7(FitTests, unittest.TestCase):
+class TestStreamingEXTR7(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -833,7 +858,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_8(FitTests, unittest.TestCase):
+class TestStreamingEXTR8(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -852,7 +877,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestStreamingEXTR_9(FitTests, unittest.TestCase):
+class TestStreamingEXTR9(FitTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
diff --git a/tests/integration/incremental_trees/test_trees_benchmarks.py b/tests/integration/incremental_trees/test_trees_benchmarks.py
index dc4f0cc..33e9b7f 100644
--- a/tests/integration/incremental_trees/test_trees_benchmarks.py
+++ b/tests/integration/incremental_trees/test_trees_benchmarks.py
@@ -3,20 +3,21 @@
 import numpy as np
 from distributed import LocalCluster, Client
 from sklearn import clone
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble.forest import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
 from incremental_trees.trees import StreamingRFC
-from tests.data import Data
+from tests.common.data_fixture import DataFixture
 
 
-class PerformanceComparisons(Data):
+class PerformanceComparisons(DataFixture):
     """
     Compare srfc to benchmark rfc and logistic regression.
 
     TODO: Generalise naming, report functions.
     TODO: Set sensible parameters and add performance assets in child tests.
     """
+
     @classmethod
     def setUpClass(cls):
         """Prepare comparable models"""
@@ -55,7 +56,6 @@ def _assert_same_n_rows(self):
         Assert that given the training settings, the rfc and srfc will, overall, see the same number of rows of data.
         This is a more direct comparison than len(mod.estimators_) as the individual estimators see much less data in
         the srfc case.
-        :return:
         """
         # Will be available in actual test.
         n_rows = self.x_train.shape[0]
@@ -66,8 +66,8 @@ def _assert_same_n_rows(self):
                           int(n_rows / self.srfc_n_partial_fit_calls)))
 
     def _fit_srfc(self,
-                  sequential: bool=True,
-                  n_prop: float=0.1) -> StreamingRFC:
+                  sequential: bool = True,
+                  n_prop: float = 0.1) -> StreamingRFC:
         """
         Fit the streaming RFC. Total number of rows used in training varies depending on sequential.
 
@@ -100,7 +100,7 @@ def _fit_srfc(self,
             n_sample_rows = int(n_rows / self.srfc_n_partial_fit_calls)
             sidx = 0
             eidx = n_sample_rows
-            for i in range(self.srfc_n_partial_fit_calls):
+            for _ in range(self.srfc_n_partial_fit_calls):
                 idx = np.arange(sidx, eidx)
                 srfc.partial_fit(self.x_train[idx, :], self.y_train[idx],
                                  classes=[0, 1])
@@ -109,7 +109,7 @@ def _fit_srfc(self,
         else:
             # Sample n_prop of data self.srfc_n_partial_fit_calls times
             n_sample_rows = int(n_rows * n_prop)
-            for i in range(self.srfc_n_partial_fit_calls):
+            for _ in range(self.srfc_n_partial_fit_calls):
                 # Sample indexes with replacement
                 idx = np.random.randint(0, n_rows, n_sample_rows)
                 srfc.partial_fit(self.x_train[idx, :], self.y_train[idx],
@@ -143,13 +143,6 @@ def test_benchmark_manual_random(self):
         print(f"self.rfc score test AUC: {self.rfc_test_auc}")
         print(f"self.srfc_sam score test AUC: {self.srfc_sam_test_auc}")
 
-        # self.assertTrue(math.isclose(self.srfc_sam_test_auc, self.rfc_test_auc,
-        #                              rel_tol=0.05))
-        # self.assertTrue(math.isclose(self.srfc_sam_test_auc, self.log_reg_test_auc,
-        #                              rel_tol=0.05))
-
-        # self._assert_same_n_rows()
-
     def test_benchmark_auto_spf(self):
         self._fit_with_spf()
         self.srfc_spf_report, self.srfc_spf_train_auc, self.srfc_spf_test_auc = self._mod_report(mod=self.srfc_spf)
@@ -162,7 +155,7 @@ def test_benchmark_auto_spf(self):
     def test_benchmark_auto_dask(self):
         self._fit_with_dask()
         self.srfc_dask_report, self.srfc_dask_train_auc, self.srfc_dask_test_auc = \
-        self._mod_report(mod=self.srfc_dask)
+            self._mod_report(mod=self.srfc_dask)
 
         print("==Auto feeding partial_fit with dask==")
         print(f"self.log_reg score test AUC: {self.log_reg_test_auc}")
@@ -181,18 +174,11 @@ def test_benchmark_manual_sequential(self):
         print(f"self.rfc_once score test AUC: {self.rfc_once_test_auc}")
         print(f"self.srfc score test AUC: {self.srfc_seq_test_auc}")
 
-        # self.assertTrue(math.isclose(self.srfc_seq_test_auc, self.rfc_test_auc,
-        #                              rel_tol=0.05))
-        # self.assertTrue(math.isclose(self.srfc_seq_test_auc, self.log_reg_test_auc,
-        #                              rel_tol=0.05))
-
-        # self._assert_same_n_rows()
-
     def _generate_comparable_models(self,
                                     srfc_n_estimators_per_chunk: int,
                                     srfc_n_partial_fit_calls: int,
                                     srfc_sample_prop: float,
-                                    n_jobs: int=4):
+                                    n_jobs: int = 4):
         """
         Set values for streaming models and different set ups. Create two comparable rfcs designed to see
         equivalent numbers of rows.
@@ -242,7 +228,7 @@ def _generate_comparable_models(self,
 
         # "Auto-spf" srfc
         self.srfc_spf = StreamingRFC(dask_feeding=False,
-                                     n_estimators_per_chunk=self.srfc_n_estimators_per_chunk,\
+                                     n_estimators_per_chunk=self.srfc_n_estimators_per_chunk, \
                                      spf_n_fits=self.srfc_n_partial_fit_calls,
                                      spf_sample_prop=self.srfc_sample_prop,
                                      n_jobs=n_jobs)
diff --git a/tests/integration/incremental_trees/test_trees_dask.py b/tests/integration/incremental_trees/test_trees_dask.py
index 6b48129..123313e 100644
--- a/tests/integration/incremental_trees/test_trees_dask.py
+++ b/tests/integration/incremental_trees/test_trees_dask.py
@@ -3,11 +3,14 @@
 import numpy as np
 from dask_ml.wrappers import Incremental
 
-from incremental_trees.trees import StreamingEXTC, StreamingEXTR, StreamingRFC, StreamingRFR
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+from incremental_trees.trees import StreamingRFC
 from tests.integration.base import DaskTests
 
 
-class TestDaskModel_1(DaskTests, unittest.TestCase):
+class TestDaskModel1(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -24,7 +27,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_2(DaskTests, unittest.TestCase):
+class TestDaskModel2(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -41,7 +44,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_3(DaskTests, unittest.TestCase):
+class TestDaskModel3(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -58,7 +61,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_4(DaskTests, unittest.TestCase):
+class TestDaskModel4(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -76,7 +79,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_5(DaskTests, unittest.TestCase):
+class TestDaskModel5(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -94,7 +97,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_6(DaskTests, unittest.TestCase):
+class TestDaskModel6(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -112,7 +115,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_7(DaskTests, unittest.TestCase):
+class TestDaskModel7(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -130,7 +133,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskModel_8(DaskTests, unittest.TestCase):
+class TestDaskModel8(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -149,7 +152,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFC_1(DaskTests, unittest.TestCase):
+class TestDaskRFC1(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -166,7 +169,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFC_2(DaskTests, unittest.TestCase):
+class TestDaskRFC2(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -183,7 +186,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFC_3(DaskTests, unittest.TestCase):
+class TestDaskRFC3(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -200,7 +203,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFC_4(DaskTests, unittest.TestCase):
+class TestDaskRFC4(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -218,7 +221,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFR_1(DaskTests, unittest.TestCase):
+class TestDaskRFR1(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -236,7 +239,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFR_2(DaskTests, unittest.TestCase):
+class TestDaskRFR2(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -254,7 +257,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFR_3(DaskTests, unittest.TestCase):
+class TestDaskRFR3(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
@@ -272,7 +275,7 @@ def setUpClass(cls):
         super().setUpClass()
 
 
-class TestDaskRFR_4(DaskTests, unittest.TestCase):
+class TestDaskRFR4(DaskTests, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up model to test."""
diff --git a/tests/integration/incremental_trees/test_trees_grids.py b/tests/integration/incremental_trees/test_trees_grids.py
index e62117b..4a0ac0e 100644
--- a/tests/integration/incremental_trees/test_trees_grids.py
+++ b/tests/integration/incremental_trees/test_trees_grids.py
@@ -1,88 +1,90 @@
-# TODO: These tests aren't finished. Need to generalise, add EXTC, regressors, etc.
-
-import unittest
-import warnings
-from incremental_trees.trees import StreamingRFC, StreamingEXTC
-from sklearn.ensemble.forest import RandomForestClassifier, ExtraTreesClassifier
-from sklearn.model_selection import RandomizedSearchCV
-
-from tests.data import Data
-from tests.params import RFCGRID, SRFCGRID
-
-
-class GridBenchmarks:
-    def test_fit_all(self):
-        """
-        Fit grids and compare.
-
-        TODO: Generalise naming.
-        """
-
-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore', FutureWarning)
-            self.rfc_grid.fit(self.x_train, self.y_train)
-            self.srfc_grid.fit(self.x_train, self.y_train)
-
-        self.rfc_report, self.rfc_train_auc, self.rfc_test_auc = self._mod_report(mod=self.rfc_grid.best_estimator_)
-        self.srfc_report, self.srfc_train_auc, self.srfc_test_auc = self._mod_report(mod=self.srfc_grid.best_estimator_)
-
-        print("==Not-necessarily fair grid comparison==")
-        print(f"self.rfc grid score test AUC: {self.rfc_test_auc}")
-        print(f"self.srfc grid score test AUC: {self.srfc_test_auc}")
-        print(self.srfc_grid.get_params())
-
-
-class RFCBenchmarkGrid(GridBenchmarks, Data, unittest.TestCase):
-    """
-    Check a grid runs, assert performance (not added yet)
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        cls._prep_data(cls)
-
-        n_iter = 3
-        cls.srfc_grid = RandomizedSearchCV(StreamingRFC(n_jobs=2,
-                                                        verbose=1),
-                                           param_distributions=SRFCGRID,
-                                           scoring='roc_auc',
-                                           n_iter=n_iter * 2,
-                                           verbose=2,
-                                           n_jobs=3,
-                                           cv=4)
-
-        cls.rfc_grid = RandomizedSearchCV(RandomForestClassifier(n_jobs=2),
-                                          param_distributions=RFCGRID,
-                                          scoring='roc_auc',
-                                          n_iter=n_iter,
-                                          verbose=2,
-                                          n_jobs=3,
-                                          cv=4)
-
-
-class EXTCBenchmarkGrid(GridBenchmarks, Data, unittest.TestCase):
-    """
-    Check a grid runs, assert performance (not added yet)
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        cls._prep_data(cls)
-
-        n_iter = 2
-        cls.srfc_grid = RandomizedSearchCV(StreamingEXTC(n_jobs=2,
-                                                         verbose=1),
-                                           param_distributions=SRFCGRID,
-                                           scoring='roc_auc',
-                                           n_iter=n_iter * 10,
-                                           verbose=2,
-                                           n_jobs=3,
-                                           cv=4)
-
-        cls.rfc_grid = RandomizedSearchCV(ExtraTreesClassifier(n_jobs=2),
-                                          param_distributions=RFCGRID,
-                                          scoring='roc_auc',
-                                          n_iter=n_iter,
-                                          verbose=2,
-                                          n_jobs=3,
-                                          cv=4)
+# TODO: These tests aren't finished. Need to generalise, add EXTC, regressors, etc.
+
+import unittest
+import warnings
+
+from sklearn.ensemble.forest import RandomForestClassifier, ExtraTreesClassifier
+from sklearn.model_selection import RandomizedSearchCV
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.trees import StreamingRFC
+from tests.common.data_fixture import DataFixture
+from tests.common.param_fixtures import RFCGRID, SRFCGRID
+
+
+class GridBenchmarks:
+    def test_fit_all(self):
+        """
+        Fit grids and compare.
+
+        TODO: Generalise naming.
+        """
+
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', FutureWarning)
+            self.rfc_grid.fit(self.x_train, self.y_train)
+            self.srfc_grid.fit(self.x_train, self.y_train)
+
+        self.rfc_report, self.rfc_train_auc, self.rfc_test_auc = self._mod_report(mod=self.rfc_grid.best_estimator_)
+        self.srfc_report, self.srfc_train_auc, self.srfc_test_auc = self._mod_report(mod=self.srfc_grid.best_estimator_)
+
+        print("==Not-necessarily fair grid comparison==")
+        print(f"self.rfc grid score test AUC: {self.rfc_test_auc}")
+        print(f"self.srfc grid score test AUC: {self.srfc_test_auc}")
+        print(self.srfc_grid.get_params())
+
+
+class RFCBenchmarkGrid(GridBenchmarks, DataFixture, unittest.TestCase):
+    """
+    Check a grid runs, assert performance (not added yet)
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        cls._prep_data(cls)
+
+        n_iter = 3
+        cls.srfc_grid = RandomizedSearchCV(StreamingRFC(n_jobs=2,
+                                                        verbose=1),
+                                           param_distributions=SRFCGRID,
+                                           scoring='roc_auc',
+                                           n_iter=n_iter * 2,
+                                           verbose=2,
+                                           n_jobs=3,
+                                           cv=4)
+
+        cls.rfc_grid = RandomizedSearchCV(RandomForestClassifier(n_jobs=2),
+                                          param_distributions=RFCGRID,
+                                          scoring='roc_auc',
+                                          n_iter=n_iter,
+                                          verbose=2,
+                                          n_jobs=3,
+                                          cv=4)
+
+
+class EXTCBenchmarkGrid(GridBenchmarks, DataFixture, unittest.TestCase):
+    """
+    Check a grid runs, assert performance (not added yet)
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        cls._prep_data(cls)
+
+        n_iter = 2
+        cls.srfc_grid = RandomizedSearchCV(StreamingEXTC(n_jobs=2,
+                                                         verbose=1),
+                                           param_distributions=SRFCGRID,
+                                           scoring='roc_auc',
+                                           n_iter=n_iter * 10,
+                                           verbose=2,
+                                           n_jobs=3,
+                                           cv=4)
+
+        cls.rfc_grid = RandomizedSearchCV(ExtraTreesClassifier(n_jobs=2),
+                                          param_distributions=RFCGRID,
+                                          scoring='roc_auc',
+                                          n_iter=n_iter,
+                                          verbose=2,
+                                          n_jobs=3,
+                                          cv=4)
diff --git a/tests/integration/incremental_trees/test_trees_inconsistent_classes.py b/tests/integration/incremental_trees/test_trees_inconsistent_classes.py
index 4b77a5d..86dfd35 100644
--- a/tests/integration/incremental_trees/test_trees_inconsistent_classes.py
+++ b/tests/integration/incremental_trees/test_trees_inconsistent_classes.py
@@ -1,94 +1,95 @@
-import unittest
-
-import numpy as np
-import pandas as pd
-
-from incremental_trees.trees import StreamingRFC, StreamingEXTC
-
-
-class ClassConsistencyTests:
-    @classmethod
-    def setUpClass(cls):
-        data = pd.DataFrame({'a': (1, 2, 3, 4, 5),
-                             'b': (1, 2, 3, 4, 5),
-                             'c': (1, 2, 3, 4, 5),
-                             'target': (1, 1, 2, 2, 3)})
-
-        data = pd.concat((data, data),
-                         axis=0).sort_values('target').reset_index(drop=True)
-
-        cls.x = data[[c for c in data if c != 'target']]
-        cls.y = data['target']
-
-    def test_none_on_second_call(self):
-        # Fit with 2 classes
-        self.mod.partial_fit(self.x[0:6], self.y[0:6],
-                             classes=np.array([1, 2, 3]))
-        self.mod.predict(self.x[0:6])
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-        # Fit with 3 classes
-        self.mod.partial_fit(self.x, self.y)
-        self.mod.predict(self.x)
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-    def test_correct_on_second_call(self):
-        # Fit with 2 classes
-        self.mod.partial_fit(self.x[0:6], self.y[0:6],
-                             classes=np.array([1, 2, 3]))
-        self.mod.predict(self.x[0:6])
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-        # Fit with 3 classes
-        self.mod.partial_fit(self.x, self.y,
-                             classes=np.array([1, 2, 3]))
-        self.mod.predict(self.x)
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-    def test_incorrect_on_second_call(self):
-        """Incorrect on second call - can happen when dask passes classes."""
-
-        # Fit with 3 classes
-        self.mod.partial_fit(self.x, self.y,
-                             classes=np.array([1, 2, 3]))
-        self.mod.predict(self.x)
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-        # Fit with 2 classes
-        self.mod.partial_fit(self.x[0:6], self.y[0:6],
-                             classes=np.array([1, 2]))
-        self.mod.predict(self.x[0:6])
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-        self.mod.partial_fit(self.x[5:7], self.y[5:7],
-                             classes=np.array([2, 3]))
-        self.mod.predict(self.x)
-
-        self.assertEqual(self.mod.n_classes_, 3)
-        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-
-
-class TestInconsistentClassesRFC(ClassConsistencyTests, unittest.TestCase):
-    def setUp(self):
-        self.mod = StreamingRFC(n_estimators_per_chunk=1,
-                                max_n_estimators=np.inf,
-                                verbose=2)
-
-
-class TestInconsistentClassesEXT(ClassConsistencyTests, unittest.TestCase):
-    def setUp(self):
-        self.mod = StreamingEXTC(n_estimators_per_chunk=1,
-                                 max_n_estimators=np.inf,
-                                 verbose=2)
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.trees import StreamingRFC
+
+
+class ClassConsistencyTests:
+    @classmethod
+    def setUpClass(cls):
+        data = pd.DataFrame({'a': (1, 2, 3, 4, 5),
+                             'b': (1, 2, 3, 4, 5),
+                             'c': (1, 2, 3, 4, 5),
+                             'target': (1, 1, 2, 2, 3)})
+
+        data = pd.concat((data, data),
+                         axis=0).sort_values('target').reset_index(drop=True)
+
+        cls.x = data[[c for c in data if c != 'target']]
+        cls.y = data['target']
+
+    def test_none_on_second_call(self):
+        # Fit with 2 classes
+        self.mod.partial_fit(self.x[0:6], self.y[0:6],
+                             classes=np.array([1, 2, 3]))
+        self.mod.predict(self.x[0:6])
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+        # Fit with 3 classes
+        self.mod.partial_fit(self.x, self.y)
+        self.mod.predict(self.x)
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+    def test_correct_on_second_call(self):
+        # Fit with 2 classes
+        self.mod.partial_fit(self.x[0:6], self.y[0:6],
+                             classes=np.array([1, 2, 3]))
+        self.mod.predict(self.x[0:6])
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+        # Fit with 3 classes
+        self.mod.partial_fit(self.x, self.y,
+                             classes=np.array([1, 2, 3]))
+        self.mod.predict(self.x)
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+    def test_incorrect_on_second_call(self):
+        """Incorrect on second call - can happen when dask passes classes."""
+
+        # Fit with 3 classes
+        self.mod.partial_fit(self.x, self.y,
+                             classes=np.array([1, 2, 3]))
+        self.mod.predict(self.x)
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+        # Fit with 2 classes
+        self.mod.partial_fit(self.x[0:6], self.y[0:6],
+                             classes=np.array([1, 2]))
+        self.mod.predict(self.x[0:6])
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+        self.mod.partial_fit(self.x[5:7], self.y[5:7],
+                             classes=np.array([2, 3]))
+        self.mod.predict(self.x)
+
+        self.assertEqual(self.mod.n_classes_, 3)
+        self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
+
+
+class TestInconsistentClassesRFC(ClassConsistencyTests, unittest.TestCase):
+    def setUp(self):
+        self.mod = StreamingRFC(n_estimators_per_chunk=1,
+                                max_n_estimators=np.inf,
+                                verbose=2)
+
+
+class TestInconsistentClassesEXT(ClassConsistencyTests, unittest.TestCase):
+    def setUp(self):
+        self.mod = StreamingEXTC(n_estimators_per_chunk=1,
+                                 max_n_estimators=np.inf,
+                                 verbose=2)