ogrisel · maartenbreddels · Dec 18, 2018 · Dec 18, 2018 · Dec 19, 2018 · Dec 19, 2018
diff --git a/benchmarks/bench_higgs_boson.py b/benchmarks/bench_higgs_boson.py
@@ -85,7 +85,7 @@ def load_data():
                                          max_leaf_nodes=n_leaf_nodes,
                                          n_iter_no_change=None,
                                          random_state=0,
-                                         verbose=1)
+                                         verbose=1, parallel_splitting=False)
 pygbm_model.fit(data_train, target_train)
 toc = time()
 predicted_test = pygbm_model.predict(data_test)

diff --git a/pygbm/gradient_boosting.py b/pygbm/gradient_boosting.py
@@ -26,7 +26,7 @@ class BaseGradientBoostingMachine(BaseEstimator, ABC):
     def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes,
                  max_depth, min_samples_leaf, l2_regularization, max_bins,
                  scoring, validation_split, n_iter_no_change, tol, verbose,
-                 random_state):
+                 random_state, parallel_splitting):
         self.loss = loss
         self.learning_rate = learning_rate
         self.max_iter = max_iter
@@ -41,6 +41,7 @@ def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes,
         self.tol = tol
         self.verbose = verbose
         self.random_state = random_state
+        self.parallel_splitting = parallel_splitting
 
     def _validate_parameters(self):
         """Validate parameters passed to __init__.
@@ -148,11 +149,14 @@ def fit(self, X, y):
         # Subsample the training set for score-based monitoring.
         if do_early_stopping:
             subsample_size = 10000
-            indices = np.arange(X_binned_train.shape[0])
-            if X_binned_train.shape[0] > subsample_size:
-                indices = rng.choice(indices, subsample_size)
-            X_binned_small_train = X_binned_train[indices]
-            y_small_train = y_train[indices]
+            n_samples_train = X_binned_train.shape[0]
+            if n_samples_train > subsample_size:
+                indices = rng.choice(X_binned_train.shape[0], subsample_size)
+                X_binned_small_train = X_binned_train[indices]
+                y_small_train = y_train[indices]
+            else:
+                X_binned_small_train = X_binned_train
+                y_small_train = y_train
             # Predicting is faster of C-contiguous arrays.
             X_binned_small_train = np.ascontiguousarray(X_binned_small_train)
 
@@ -218,14 +222,15 @@ def fit(self, X, y):
                 # whole array.
 
                 grower = TreeGrower(
-                    X_binned_train, gradients_at_k, hessians_at_k,
+                    X_binned_train, gradients_at_k.copy(), hessians_at_k,
                     max_bins=self.max_bins,
                     n_bins_per_feature=self.bin_mapper_.n_bins_per_feature_,
                     max_leaf_nodes=self.max_leaf_nodes,
                     max_depth=self.max_depth,
                     min_samples_leaf=self.min_samples_leaf,
                     l2_regularization=self.l2_regularization,
-                    shrinkage=self.learning_rate)
+                    shrinkage=self.learning_rate,
+                    parallel_splitting=self.parallel_splitting)
                 grower.grow()
 
                 acc_apply_split_time += grower.total_apply_split_time
@@ -492,15 +497,16 @@ def __init__(self, loss='least_squares', learning_rate=0.1,
                  max_iter=100, max_leaf_nodes=31, max_depth=None,
                  min_samples_leaf=20, l2_regularization=0., max_bins=256,
                  scoring=None, validation_split=0.1, n_iter_no_change=5,
-                 tol=1e-7, verbose=0, random_state=None):
+                 tol=1e-7, verbose=0, random_state=None,
+                 parallel_splitting=True):
         super(GradientBoostingRegressor, self).__init__(
             loss=loss, learning_rate=learning_rate, max_iter=max_iter,
             max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,
             min_samples_leaf=min_samples_leaf,
             l2_regularization=l2_regularization, max_bins=max_bins,
             scoring=scoring, validation_split=validation_split,
             n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,
-            random_state=random_state)
+            random_state=random_state, parallel_splitting=parallel_splitting)
 
     def predict(self, X):
         """Predict values for X.
@@ -611,15 +617,15 @@ def __init__(self, loss='auto', learning_rate=0.1, max_iter=100,
                  max_leaf_nodes=31, max_depth=None, min_samples_leaf=20,
                  l2_regularization=0., max_bins=256, scoring=None,
                  validation_split=0.1, n_iter_no_change=5, tol=1e-7,
-                 verbose=0, random_state=None):
+                 verbose=0, random_state=None, parallel_splitting=True):
         super(GradientBoostingClassifier, self).__init__(
             loss=loss, learning_rate=learning_rate, max_iter=max_iter,
             max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,
             min_samples_leaf=min_samples_leaf,
             l2_regularization=l2_regularization, max_bins=max_bins,
             scoring=scoring, validation_split=validation_split,
             n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,
-            random_state=random_state)
+            random_state=random_state, parallel_splitting=parallel_splitting)
 
     def predict(self, X):
         """Predict classes for X.

diff --git a/pygbm/grower.py b/pygbm/grower.py
@@ -8,7 +8,8 @@
 import numpy as np
 from time import time
 
-from .splitting import (SplittingContext, split_indices, find_node_split,
+from .splitting import (SplittingContext, split_indices_parallel,
+                        split_indices_single_thread, find_node_split,
                         find_node_split_subtraction)
 from .predictor import TreePredictor, PREDICTOR_RECORD_DTYPE
 
@@ -77,10 +78,12 @@ class TreeNode:
     apply_split_time = 0.
     hist_subtraction = False
 
-    def __init__(self, depth, sample_indices, sum_gradients,
+    def __init__(self, depth, sample_indices, gradients, hessians, sum_gradients,
                  sum_hessians, parent=None):
         self.depth = depth
         self.sample_indices = sample_indices
+        self.gradients = gradients
+        self.hessians = hessians
         self.n_samples = sample_indices.shape[0]
         self.sum_gradients = sum_gradients
         self.sum_hessians = sum_hessians
@@ -163,7 +166,8 @@ class TreeGrower:
     def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,
                  max_depth=None, min_samples_leaf=20, min_gain_to_split=0.,
                  max_bins=256, n_bins_per_feature=None, l2_regularization=0.,
-                 min_hessian_to_split=1e-3, shrinkage=1.):
+                 min_hessian_to_split=1e-3, shrinkage=1.,
+                 parallel_splitting=True):
 
         self._validate_parameters(X_binned, max_leaf_nodes, max_depth,
                                   min_samples_leaf, min_gain_to_split,
@@ -180,13 +184,14 @@ def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,
         self.splitting_context = SplittingContext(
             X_binned, max_bins, n_bins_per_feature, gradients,
             hessians, l2_regularization, min_hessian_to_split,
-            min_samples_leaf, min_gain_to_split)
+            min_samples_leaf, min_gain_to_split, parallel_splitting)
         self.max_leaf_nodes = max_leaf_nodes
         self.max_depth = max_depth
         self.min_samples_leaf = min_samples_leaf
         self.X_binned = X_binned
         self.min_gain_to_split = min_gain_to_split
         self.shrinkage = shrinkage
+        self.parallel_splitting = parallel_splitting
         self.splittable_nodes = []
         self.finalized_leaves = []
         self.total_find_split_time = 0.  # time spent finding the best splits
@@ -238,14 +243,16 @@ def _intilialize_root(self):
         n_samples = self.X_binned.shape[0]
         depth = 0
         if self.splitting_context.constant_hessian:
-            hessian = self.splitting_context.hessians[0] * n_samples
+            sum_hessian = self.splitting_context.ordered_hessians[0] * n_samples
         else:
-            hessian = self.splitting_context.hessians.sum()
+            sum_hessian = self.splitting_context.ordered_hessians.sum()
         self.root = TreeNode(
             depth=depth,
-            sample_indices=self.splitting_context.partition.view(),
-            sum_gradients=self.splitting_context.gradients.sum(),
-            sum_hessians=hessian
+            gradients=self.splitting_context.ordered_gradients,
+            hessians=self.splitting_context.ordered_hessians,
+            sample_indices=self.splitting_context.partition,#.view(),
+            sum_gradients=self.splitting_context.ordered_gradients.sum(),
+            sum_hessians=sum_hessian
         )
         if (self.max_leaf_nodes is not None and self.max_leaf_nodes == 1):
             self._finalize_leaf(self.root)
@@ -296,7 +303,8 @@ def _compute_spittability(self, node, only_hist=False):
                     node.parent.histograms, node.sibling.histograms)
             else:
                 split_info, histograms = find_node_split(
-                    self.splitting_context, node.sample_indices)
+                    self.splitting_context, node.sample_indices, node.gradients,
+                    node.hessians)
             toc = time()
             node.find_split_time = toc - tic
             self.total_find_split_time += node.find_split_time
@@ -336,8 +344,10 @@ def split_next(self):
         node = heappop(self.splittable_nodes)
 
         tic = time()
-        (sample_indices_left, sample_indices_right) = split_indices(
-            self.splitting_context, node.split_info, node.sample_indices)
+        split_indices = split_indices_parallel if self.parallel_splitting else split_indices_single_thread
+        (sample_indices_left, gradients_left, hessians_left), \
+        (sample_indices_right, gradients_right, hessians_right) = split_indices(
+            self.splitting_context, node.split_info, node.sample_indices, node.gradients, node.hessians)
         toc = time()
         node.apply_split_time = toc - tic
         self.total_apply_split_time += node.apply_split_time
@@ -348,11 +358,15 @@ def split_next(self):
 
         left_child_node = TreeNode(depth,
                                    sample_indices_left,
+                                   gradients_left,
+                                   hessians_left,
                                    node.split_info.gradient_left,
                                    node.split_info.hessian_left,
                                    parent=node)
         right_child_node = TreeNode(depth,
                                     sample_indices_right,
+                                    gradients_right,
+                                    hessians_right,
                                     node.split_info.gradient_right,
                                     node.split_info.hessian_right,
                                     parent=node)