From 8dfc4155edd6cce18bb93b0673907df7f66c88e6 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 27 Sep 2023 04:50:47 -0700 Subject: [PATCH] Consistently add cover to daaal APIs, add output parameters to end of function arguments --- ...sion_forest_classification_model_builder.h | 12 +- .../gbt_classification_model_builder.h | 16 +- .../gbt_regression_model_builder.h | 2 +- .../src/algorithms/dtrees/dtrees_model.cpp | 2 +- .../src/algorithms/dtrees/dtrees_model_impl.h | 2 +- .../df_classification_model_builder.cpp | 4 +- .../gbt_classification_model_builder.cpp | 8 +- .../gbt_regression_model_builder.cpp | 6 +- ...ression_predict_dense_default_batch_impl.i | 17 +- .../src/algorithms/dtrees/gbt/treeshap.cpp | 2 - cpp/daal/src/algorithms/dtrees/gbt/treeshap.h | 490 +----------------- 11 files changed, 34 insertions(+), 527 deletions(-) diff --git a/cpp/daal/include/algorithms/decision_forest/decision_forest_classification_model_builder.h b/cpp/daal/include/algorithms/decision_forest/decision_forest_classification_model_builder.h index 74f564aeac5..f656885c538 100644 --- a/cpp/daal/include/algorithms/decision_forest/decision_forest_classification_model_builder.h +++ b/cpp/daal/include/algorithms/decision_forest/decision_forest_classification_model_builder.h @@ -109,10 +109,9 @@ class DAAL_EXPORT ModelBuilder * \param[in] classLabel Class label to be predicted * \return Node identifier */ - NodeId addLeafNode(const TreeId treeId, const NodeId parentId, const size_t position, const size_t classLabel) + NodeId addLeafNode(const TreeId treeId, const NodeId parentId, const size_t position, const size_t classLabel, const double cover) { NodeId resId; - const double cover = 0.0; // TODO: Add cover _status |= addLeafNodeInternal(treeId, parentId, position, classLabel, cover, resId); services::throwIfPossible(_status); return resId; @@ -126,10 +125,9 @@ class DAAL_EXPORT ModelBuilder * \param[in] proba Array with probability values for each class * \return Node identifier */ - NodeId addLeafNodeByProba(const TreeId treeId, const NodeId parentId, const size_t position, const double * const proba) + NodeId addLeafNodeByProba(const TreeId treeId, const NodeId parentId, const size_t position, const double * const proba, const double cover) { NodeId resId; - const double cover = 0.0; // TODO: Add cover _status |= addLeafNodeByProbaInternal(treeId, parentId, position, proba, cover, resId); services::throwIfPossible(_status); return resId; @@ -144,10 +142,10 @@ class DAAL_EXPORT ModelBuilder * \param[in] featureValue Feature value for splitting * \return Node identifier */ - NodeId addSplitNode(const TreeId treeId, const NodeId parentId, const size_t position, const size_t featureIndex, const double featureValue) + NodeId addSplitNode(const TreeId treeId, const NodeId parentId, const size_t position, const size_t featureIndex, const double featureValue, + const double cover) { NodeId resId; - const double cover = 0.0; // TODO: Add cover _status |= addSplitNodeInternal(treeId, parentId, position, featureIndex, featureValue, cover, resId); services::throwIfPossible(_status); return resId; @@ -192,7 +190,7 @@ class DAAL_EXPORT ModelBuilder services::Status addLeafNodeByProbaInternal(const TreeId treeId, const NodeId parentId, const size_t position, const double * const proba, const double cover, NodeId & res); services::Status addSplitNodeInternal(const TreeId treeId, const NodeId parentId, const size_t position, const size_t featureIndex, - const double featureValue, const double cover, NodeId & res); + const double featureValue, const double cover, const int defaultLeft, NodeId & res); private: size_t _nClasses; diff --git a/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_classification_model_builder.h b/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_classification_model_builder.h index d72cff6e9f4..71dfdbfb3a5 100644 --- a/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_classification_model_builder.h +++ b/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_classification_model_builder.h @@ -109,12 +109,13 @@ class DAAL_EXPORT ModelBuilder * \param[in] parentId Parent node to which new node is added (use noParent for root node) * \param[in] position Position in parent (e.g. 0 for left and 1 for right child in a binary tree) * \param[in] response Response value for leaf node to be predicted + * \param[in] cover Cover (Hessian sum) of the node * \return Node identifier */ - NodeId addLeafNode(TreeId treeId, NodeId parentId, size_t position, double response) + NodeId addLeafNode(TreeId treeId, NodeId parentId, size_t position, double response, double cover) { NodeId resId; - _status |= addLeafNodeInternal(treeId, parentId, position, response, resId); + _status |= addLeafNodeInternal(treeId, parentId, position, response, cover, resId); services::throwIfPossible(_status); return resId; } @@ -127,12 +128,13 @@ class DAAL_EXPORT ModelBuilder * \param[in] featureIndex Feature index for splitting * \param[in] featureValue Feature value for splitting * \param[in] defaultLeft Behaviour in case of missing values + * \param[in] cover Cover (Hessian sum) of the node * \return Node identifier */ - NodeId addSplitNode(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, int defaultLeft = 0) + NodeId addSplitNode(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, int defaultLeft, double cover) { NodeId resId; - _status |= addSplitNodeInternal(treeId, parentId, position, featureIndex, featureValue, resId, defaultLeft); + _status |= addSplitNodeInternal(treeId, parentId, position, featureIndex, featureValue, defaultLeft, cover, resId); services::throwIfPossible(_status); return resId; } @@ -159,9 +161,9 @@ class DAAL_EXPORT ModelBuilder services::Status _status; services::Status initialize(size_t nFeatures, size_t nIterations, size_t nClasses); services::Status createTreeInternal(size_t nNodes, size_t classLabel, TreeId & resId); - services::Status addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, NodeId & res); - services::Status addSplitNodeInternal(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, NodeId & res, - int defaultLeft); + services::Status addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, const double cover, NodeId & res); + services::Status addSplitNodeInternal(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, int defaultLeft, + const double cover, NodeId & res); services::Status convertModelInternal(); size_t _nClasses; size_t _nIterations; diff --git a/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_regression_model_builder.h b/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_regression_model_builder.h index 731f06567a7..bcde75d847a 100644 --- a/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_regression_model_builder.h +++ b/cpp/daal/include/algorithms/gradient_boosted_trees/gbt_regression_model_builder.h @@ -161,7 +161,7 @@ class DAAL_EXPORT ModelBuilder services::Status createTreeInternal(size_t nNodes, TreeId & resId); services::Status addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, double cover, NodeId & res); services::Status addSplitNodeInternal(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, double cover, - NodeId & res, int defaultLeft); + int defaultLeft, NodeId & res); services::Status convertModelInternal(); }; /** @} */ diff --git a/cpp/daal/src/algorithms/dtrees/dtrees_model.cpp b/cpp/daal/src/algorithms/dtrees/dtrees_model.cpp index c2362a6f20a..a7d5286933c 100644 --- a/cpp/daal/src/algorithms/dtrees/dtrees_model.cpp +++ b/cpp/daal/src/algorithms/dtrees/dtrees_model.cpp @@ -224,7 +224,7 @@ void setProbabilities(const size_t treeId, const size_t nodeId, const size_t res } services::Status addSplitNodeInternal(data_management::DataCollectionPtr & serializationData, size_t treeId, size_t parentId, size_t position, - size_t featureIndex, double featureValue, double cover, size_t & res, int defaultLeft) + size_t featureIndex, double featureValue, double cover, int defaultLeft, size_t & res) { const size_t noParent = static_cast(-1); services::Status s; diff --git a/cpp/daal/src/algorithms/dtrees/dtrees_model_impl.h b/cpp/daal/src/algorithms/dtrees/dtrees_model_impl.h index 63fa01ce53b..d383a4ddc7b 100644 --- a/cpp/daal/src/algorithms/dtrees/dtrees_model_impl.h +++ b/cpp/daal/src/algorithms/dtrees/dtrees_model_impl.h @@ -346,7 +346,7 @@ void setNode(DecisionTreeNode & node, int featureIndex, size_t classLabel, doubl void setNode(DecisionTreeNode & node, int featureIndex, double response, double cover); services::Status addSplitNodeInternal(data_management::DataCollectionPtr & serializationData, size_t treeId, size_t parentId, size_t position, - size_t featureIndex, double featureValue, double cover, size_t & res, int defaultLeft = 0); + size_t featureIndex, double featureValue, double cover, int defaultLeft, size_t & res); void setProbabilities(const size_t treeId, const size_t nodeId, const size_t response, const data_management::DataCollectionPtr probTbl, const double * const prob); diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_model_builder.cpp b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_model_builder.cpp index 140b5ca6790..fffc3dadd70 100644 --- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_model_builder.cpp +++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_model_builder.cpp @@ -117,12 +117,12 @@ services::Status ModelBuilder::addLeafNodeByProbaInternal(const TreeId treeId, c } services::Status ModelBuilder::addSplitNodeInternal(const TreeId treeId, const NodeId parentId, const size_t position, const size_t featureIndex, - const double featureValue, const double cover, NodeId & res) + const double featureValue, const double cover, const int defaultLeft, NodeId & res) { decision_forest::classification::internal::ModelImpl & modelImplRef = daal::algorithms::dtrees::internal::getModelRef(_model); return daal::algorithms::dtrees::internal::addSplitNodeInternal(modelImplRef._serializationData, treeId, parentId, position, featureIndex, - featureValue, cover, res); + featureValue, cover, defaultLeft, res); } } // namespace interface2 diff --git a/cpp/daal/src/algorithms/dtrees/gbt/classification/gbt_classification_model_builder.cpp b/cpp/daal/src/algorithms/dtrees/gbt/classification/gbt_classification_model_builder.cpp index 8b5d9afedfd..d13f4e665d0 100644 --- a/cpp/daal/src/algorithms/dtrees/gbt/classification/gbt_classification_model_builder.cpp +++ b/cpp/daal/src/algorithms/dtrees/gbt/classification/gbt_classification_model_builder.cpp @@ -120,23 +120,21 @@ services::Status ModelBuilder::createTreeInternal(size_t nNodes, size_t clasLabe } } -services::Status ModelBuilder::addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, NodeId & res) +services::Status ModelBuilder::addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, double cover, NodeId & res) { gbt::classification::internal::ModelImpl & modelImplRef = daal::algorithms::dtrees::internal::getModelRef(_model); - const double cover = 0.0; // TODO: Add cover return daal::algorithms::dtrees::internal::addLeafNodeInternal(modelImplRef._serializationData, treeId, parentId, position, response, cover, res); } services::Status ModelBuilder::addSplitNodeInternal(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, - NodeId & res, int defaultLeft) + int defaultLeft, const double cover, NodeId & res) { gbt::classification::internal::ModelImpl & modelImplRef = daal::algorithms::dtrees::internal::getModelRef(_model); - const double cover = 0.0; // TODO: Add cover return daal::algorithms::dtrees::internal::addSplitNodeInternal(modelImplRef._serializationData, treeId, parentId, position, featureIndex, - featureValue, cover, res, defaultLeft); + featureValue, cover, defaultLeft, res); } } // namespace interface1 diff --git a/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_model_builder.cpp b/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_model_builder.cpp index 7b5df3313fa..90d66ec943a 100644 --- a/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_model_builder.cpp +++ b/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_model_builder.cpp @@ -72,7 +72,7 @@ services::Status ModelBuilder::createTreeInternal(size_t nNodes, TreeId & resId) } services::Status ModelBuilder::addLeafNodeInternal(TreeId treeId, NodeId parentId, size_t position, double response, double cover, NodeId & res) -{ +{ gbt::regression::internal::ModelImpl & modelImplRef = daal::algorithms::dtrees::internal::getModelRef(_model); return daal::algorithms::dtrees::internal::addLeafNodeInternal(modelImplRef._serializationData, treeId, parentId, position, response, @@ -80,12 +80,12 @@ services::Status ModelBuilder::addLeafNodeInternal(TreeId treeId, NodeId parentI } services::Status ModelBuilder::addSplitNodeInternal(TreeId treeId, NodeId parentId, size_t position, size_t featureIndex, double featureValue, - double cover, NodeId & res, int defaultLeft) + double cover, int defaultLeft, NodeId & res) { gbt::regression::internal::ModelImpl & modelImplRef = daal::algorithms::dtrees::internal::getModelRef(_model); return daal::algorithms::dtrees::internal::addSplitNodeInternal(modelImplRef._serializationData, treeId, parentId, position, featureIndex, - featureValue, cover, res, defaultLeft); + featureValue, cover, defaultLeft, res); } } // namespace interface1 diff --git a/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_predict_dense_default_batch_impl.i index 0d9d06b564b..fd7e49bf77c 100644 --- a/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/dtrees/gbt/regression/gbt_regression_predict_dense_default_batch_impl.i @@ -337,19 +337,6 @@ services::Status PredictRegressionTask::predictContributio const size_t nColumnsPhi = nColumnsData + 1; const size_t biasTermIndex = nColumnsPhi - 1; - // some model details (populated only for Fast TreeSHAP v2) - gbt::treeshap::ModelDetails modelDetails(_aTree.get(), iTree, nTrees); - if (modelDetails.requiresPrecompute) - { - for (size_t currentTreeIndex = iTree; currentTreeIndex < iTree + nTrees; ++currentTreeIndex) - { - // regression model builder tree 0 contains only the base_score and must be skipped - if (currentTreeIndex == 0) continue; - const gbt::internal::GbtDecisionTree * currentTree = _aTree[currentTreeIndex]; - gbt::treeshap::computeCombinationSum(currentTree, currentTreeIndex, modelDetails); - } - } - for (size_t iRow = 0; iRow < nRowsData; ++iRow) { const algorithmFPType * currentX = x + (iRow * nColumnsData); @@ -360,8 +347,8 @@ services::Status PredictRegressionTask::predictContributio if (currentTreeIndex == 0) continue; const gbt::internal::GbtDecisionTree * currentTree = _aTree[currentTreeIndex]; - st |= gbt::treeshap::treeShap( - currentTree, currentTreeIndex, currentX, phi, &_featHelper, condition, conditionFeature, modelDetails); + st |= gbt::treeshap::treeShap(currentTree, currentX, phi, &_featHelper, condition, + conditionFeature); } if (condition == 0) diff --git a/cpp/daal/src/algorithms/dtrees/gbt/treeshap.cpp b/cpp/daal/src/algorithms/dtrees/gbt/treeshap.cpp index b0aa81081fe..e8d5390af3c 100644 --- a/cpp/daal/src/algorithms/dtrees/gbt/treeshap.cpp +++ b/cpp/daal/src/algorithms/dtrees/gbt/treeshap.cpp @@ -215,8 +215,6 @@ float unwoundPathSumZero(const float * partialWeights, unsigned uniqueDepth, uns } } // namespace v1 -namespace v2 -{} } // namespace internal } // namespace treeshap } // namespace gbt diff --git a/cpp/daal/src/algorithms/dtrees/gbt/treeshap.h b/cpp/daal/src/algorithms/dtrees/gbt/treeshap.h index 8f76159bd54..762db0de73b 100644 --- a/cpp/daal/src/algorithms/dtrees/gbt/treeshap.h +++ b/cpp/daal/src/algorithms/dtrees/gbt/treeshap.h @@ -70,83 +70,6 @@ struct PathElement PathElement(const PathElement &) = default; }; -/** - * Model details required for TreeSHAP algorithms -*/ -template -struct ModelDetails -{ - size_t maxDepth = 0; - size_t maxLeafs = 0; - size_t maxNodes = 0; - size_t maxCombinations = 0; - size_t nTreesToUse = 0; - bool requiresPrecompute = false; - algorithmFPType * combinationSum = nullptr; - int * duplicatedNode = nullptr; - ModelDetails() = default; - ModelDetails(const gbt::internal::GbtDecisionTree ** trees, size_t firstTreeIndex, size_t nTrees) - { - const uint8_t shapVersion = getRequestedAlgorithmVersion(); - requiresPrecompute = shapVersion == 2; - if (!requiresPrecompute) - { - // only Fast TreeSHAP v2.2 requires what we do here - return; - } - - nTreesToUse = nTrees; - for (size_t i = firstTreeIndex; i < firstTreeIndex + nTreesToUse; ++i) - { - const gbt::internal::GbtDecisionTree * tree = trees[i]; - const size_t nNodes = tree->getNumberOfNodes(); - const size_t tDepth = tree->getMaxLvl(); - // this is over-estimating number of leafs, but that's okay because - // we're only reserving memory - // TODO: Add nLeafs to the tree structure - // (allocating space for the theoretical max is wasting space for sparse trees) - const size_t nLeafs = static_cast(1 << tDepth); - - maxDepth = maxDepth > tDepth ? maxDepth : tDepth; - maxLeafs = maxLeafs > nLeafs ? maxLeafs : nLeafs; - maxNodes = maxNodes > nNodes ? maxNodes : nNodes; - } - - maxCombinations = static_cast(1 << maxDepth); - - // allocate combinationSum buffer for Fast TreeSHAP v2.2 - const size_t nCombinationSum = maxLeafs * maxCombinations * nTreesToUse; - combinationSum = static_cast(daal_malloc(sizeof(algorithmFPType) * nCombinationSum)); - DAAL_ASSERT(combinationSum); - for (size_t i = 0; i < nCombinationSum; ++i) - { - combinationSum[i] = 0.0; - } - - // allocate duplicatedNode buffer for Fast TreeSHAP v2.2 - const size_t nDuplicatedNode = maxNodes * nTreesToUse; - duplicatedNode = static_cast(daal_malloc(sizeof(int) * nDuplicatedNode)); - DAAL_ASSERT(duplicatedNode); - for (size_t i = 0; i < nDuplicatedNode; ++i) - { - duplicatedNode[i] = -1; - } - } - ~ModelDetails() - { - if (combinationSum) - { - daal_free(combinationSum); - combinationSum = nullptr; - } - if (duplicatedNode) - { - daal_free(duplicatedNode); - duplicatedNode = nullptr; - } - } -}; - namespace internal { @@ -430,7 +353,7 @@ inline void treeShap(const gbt::internal::GbtDecisionTree * tree, const algorith incomingZeroFraction = uniquePath[pathIndex].zeroFraction; incomingOneFraction = uniquePath[pathIndex].oneFraction; unwindPath(uniquePath, pWeights, uniqueDepth, uniqueDepthPWeights, pathIndex); - uniqueDepth -= 1; + --uniqueDepth; // update pWeightsResidual iff the duplicated feature does not satisfy the threshold if (incomingOneFraction != 0.) { @@ -448,15 +371,15 @@ inline void treeShap(const gbt::internal::GbtDecisionTree * tree, const algorith if (condition > 0 && splitIndex == conditionFeature) { coldConditionFraction = 0; - uniqueDepth -= 1; - uniqueDepthPWeights -= 1; + --uniqueDepth; + --uniqueDepthPWeights; } else if (condition < 0 && splitIndex == conditionFeature) { hotConditionFraction *= hotZeroFraction; coldConditionFraction *= coldZeroFraction; - uniqueDepth -= 1; - uniqueDepthPWeights -= 1; + --uniqueDepth; + --uniqueDepthPWeights; } treeShap( @@ -511,403 +434,8 @@ inline services::Status treeShap(const gbt::internal::GbtDecisionTree * tree, co } } // namespace v1 -namespace v2 -{ -template -inline void computeCombinationSum(const gbt::internal::GbtDecisionTree * tree, algorithmFPType * combinationSum, int * duplicatedNode, - size_t maxDepth, unsigned nodeIndex, unsigned depth, unsigned uniqueDepth, int * parentUniqueDepthPWeights, - PathElement * parentUniquePath, float * parentPWeights, float parentZeroFraction, int parentFeatureIndex, - int & leafCount) -{ - // extend the unique path - PathElement * uniquePath = parentUniquePath + uniqueDepth; - size_t nBytes = uniqueDepth * sizeof(PathElement); - int copyStatus = daal::services::internal::daal_memcpy_s(uniquePath, nBytes, parentUniquePath, nBytes); - DAAL_ASSERT(copyStatus == 0); - - uniquePath[uniqueDepth].featureIndex = parentFeatureIndex; - uniquePath[uniqueDepth].zeroFraction = parentZeroFraction; - - unsigned l; - int * uniqueDepthPWeights; - float * pWeights; - float * tPWeights; - - // extend pWeights and update uniqueDepthPWeights - if (uniqueDepth == 0) - { - l = 1; - uniqueDepthPWeights = parentUniqueDepthPWeights; - uniqueDepthPWeights[0] = 0; - pWeights = parentPWeights; - pWeights[0] = 1.0f; - } - else - { - l = static_cast(1 << (uniqueDepth - 1)); - uniqueDepthPWeights = parentUniqueDepthPWeights + l; - nBytes = l * sizeof(int); - copyStatus = daal::services::internal::daal_memcpy_s(uniqueDepthPWeights, nBytes, parentUniqueDepthPWeights, nBytes); - DAAL_ASSERT(copyStatus == 0); - copyStatus = daal::services::internal::daal_memcpy_s(uniqueDepthPWeights + l, nBytes, parentUniqueDepthPWeights, nBytes); - DAAL_ASSERT(copyStatus == 0); - - pWeights = parentPWeights + l * (maxDepth + 1); - nBytes = l * (maxDepth + 1) * sizeof(float); - copyStatus = daal::services::internal::daal_memcpy_s(pWeights, nBytes, parentPWeights, nBytes); - DAAL_ASSERT(copyStatus == 0); - copyStatus = daal::services::internal::daal_memcpy_s(pWeights + l * (maxDepth + 1), nBytes, parentPWeights, nBytes); - DAAL_ASSERT(copyStatus == 0); - - for (unsigned t = 0; t < l; t++) - { - tPWeights = pWeights + t * (maxDepth + 1); - for (int i = uniqueDepthPWeights[t] - 1; i >= 0; i--) - { - tPWeights[i] *= (uniqueDepth - i) / static_cast(uniqueDepth + 1); - } - uniqueDepthPWeights[t] -= 1; - } - for (unsigned t = l; t < 2 * l; t++) - { - tPWeights = pWeights + t * (maxDepth + 1); - tPWeights[uniqueDepthPWeights[t]] = 0.0f; - for (int i = uniqueDepthPWeights[t] - 1; i >= 0; i--) - { - tPWeights[i + 1] += tPWeights[i] * (i + 1) / static_cast(uniqueDepth + 1); - tPWeights[i] *= parentZeroFraction * (uniqueDepth - i) / static_cast(uniqueDepth + 1); - } - } - } - - const bool isLeaf = gbt::internal::ModelImpl::nodeIsLeaf(nodeIndex, *tree, depth); - if (isLeaf) - { - // calculate one row of combinationSum for the current path - algorithmFPType * leafCombinationSum = combinationSum + leafCount * static_cast(1 << maxDepth); - for (unsigned t = 0; t < 2 * l - 1; t++) - { - leafCombinationSum[t] = 0; - tPWeights = pWeights + t * (maxDepth + 1); - for (int i = uniqueDepthPWeights[t]; i >= 0; i--) - { - float value = tPWeights[i] / static_cast(uniqueDepth - i); - leafCombinationSum[t] += value; - } - leafCombinationSum[t] *= (uniqueDepth + 1); - } - ++leafCount; - - return; - } - - const gbt::prediction::internal::ModelFPType * const nodeCoverValues = tree->getNodeCoverValues() - 1; - const gbt::prediction::internal::FeatureIndexType * const fIndexes = tree->getFeatureIndexesForSplit() - 1; - const FeatureIndexType splitIndex = fIndexes[nodeIndex]; - const unsigned leftIndex = 2 * nodeIndex; - const unsigned rightIndex = 2 * nodeIndex + 1; - const algorithmFPType w = nodeCoverValues[nodeIndex]; - const algorithmFPType leftZeroFraction = nodeCoverValues[leftIndex] / w; - const algorithmFPType rightZeroFraction = nodeCoverValues[rightIndex] / w; - algorithmFPType incomingZeroFraction = 1; - - // see if we have already split on this feature, - // if so we undo that split so we can redo it for this node - unsigned pathIndex = 0; - for (; pathIndex <= uniqueDepth; ++pathIndex) - { - if (static_cast(uniquePath[pathIndex].featureIndex) == splitIndex) break; - } - if (pathIndex != uniqueDepth + 1) - { - duplicatedNode[nodeIndex] = pathIndex; // record node index of duplicated feature - incomingZeroFraction = uniquePath[pathIndex].zeroFraction; - - // shrink pWeights and uniquePath, and update uniqueDepthPWeights, given the duplicated feature - unsigned p = static_cast(1 << (pathIndex - 1)); - unsigned t = 0; - float * kPWeights; - for (unsigned j = 0; j < 2 * l; j += 2 * p) - { - for (unsigned k = j; k < j + p; k++) - { - tPWeights = pWeights + t * (maxDepth + 1); - kPWeights = pWeights + k * (maxDepth + 1); - for (int i = uniqueDepthPWeights[k]; i >= 0; i--) - { - tPWeights[i] = kPWeights[i] * (uniqueDepth + 1) / static_cast(uniqueDepth - i); - } - uniqueDepthPWeights[t] = uniqueDepthPWeights[k]; - t += 1; - } - } - for (unsigned i = pathIndex; i < uniqueDepth; ++i) - { - uniquePath[i].featureIndex = uniquePath[i + 1].featureIndex; - uniquePath[i].zeroFraction = uniquePath[i + 1].zeroFraction; - } - uniqueDepth -= 1; - } - else - { - duplicatedNode[nodeIndex] = -1; - } - - PRAGMA_IVDEP - PRAGMA_VECTOR_ALWAYS - for (unsigned t = 0; t < 2 * l; ++t) - { - ++(uniqueDepthPWeights[t]); - } - - computeCombinationSum(tree, combinationSum, duplicatedNode, maxDepth, leftIndex, depth + 1, uniqueDepth + 1, uniqueDepthPWeights, - uniquePath, pWeights, incomingZeroFraction * leftZeroFraction, splitIndex, leafCount); - - computeCombinationSum(tree, combinationSum, duplicatedNode, maxDepth, rightIndex, depth + 1, uniqueDepth + 1, - uniqueDepthPWeights, uniquePath, pWeights, incomingZeroFraction * rightZeroFraction, splitIndex, - leafCount); -} - -template -inline services::Status computeCombinationSum(const gbt::internal::GbtDecisionTree * tree, algorithmFPType * combinationSum, int * duplicatedNode, - size_t maxDepth) -{ - services::Status st; - - const size_t maxCombinations = 1 << maxDepth; - const size_t nUniqueDepthPWeights = 2 * maxCombinations; - const size_t nPWeights = 2 * maxCombinations * (maxDepth + 1); - const size_t nUniquePath = (maxDepth + 1) * (maxDepth + 2) / 2; - - // Pre-allocate space for the unique path data, pWeights and uniqueDepthPWeights - int * uniqueDepthPWeights = static_cast(daal_malloc(sizeof(int) * nUniqueDepthPWeights)); - DAAL_CHECK_MALLOC(uniqueDepthPWeights) - for (size_t i = 0; i < nUniqueDepthPWeights; ++i) - { - uniqueDepthPWeights[i] = 0; - } - - float * pWeights = static_cast(daal_malloc(sizeof(float) * nPWeights)); - DAAL_CHECK_MALLOC(pWeights) - for (size_t i = 0; i < nPWeights; ++i) - { - pWeights[i] = 0.0f; - } - - PathElement * uniquePathData = static_cast(daal_malloc(sizeof(PathElement) * nUniquePath)); - DAAL_CHECK_MALLOC(uniquePathData) - PathElement init; - for (size_t i = 0; i < nUniquePath; ++i) - { - DAAL_ASSERT(0 == daal::services::internal::daal_memcpy_s(uniquePathData + i, sizeof(PathElement), &init, sizeof(PathElement))); - } - - int leafCount = 0; - - computeCombinationSum(tree, combinationSum, duplicatedNode, maxDepth, 1, 0, 0, uniqueDepthPWeights, uniquePathData, pWeights, 1, - -1, leafCount); - - daal_free(uniquePathData); - daal_free(pWeights); - daal_free(uniqueDepthPWeights); - - return st; -} - -/** - * Recursive part of Fast TreeSHAP v2 -*/ -template -inline void treeShap(const gbt::internal::GbtDecisionTree * tree, const algorithmFPType * combinationSum, const int * duplicatedNode, - const int maxDepth, const algorithmFPType * x, algorithmFPType * phi, unsigned nodeIndex, size_t depth, unsigned uniqueDepth, - PathElement * parentUniquePath, float pWeightsResidual, float parentZeroFraction, float parentOneFraction, - int parentFeatureIndex, int & leafCount) -{ - // TODO: Add support for multi-class output - const size_t numOutputs = 1; - - const gbt::prediction::internal::ModelFPType * const splitValues = tree->getSplitPoints() - 1; - const gbt::prediction::internal::FeatureIndexType * const fIndexes = tree->getFeatureIndexesForSplit() - 1; - const gbt::prediction::internal::ModelFPType * const nodeCoverValues = tree->getNodeCoverValues() - 1; - const int * const defaultLeft = tree->getDefaultLeftForSplit() - 1; - - // extend the unique path - PathElement * uniquePath = parentUniquePath + uniqueDepth; - const size_t nBytes = uniqueDepth * sizeof(PathElement); - const int copyStatus = daal::services::internal::daal_memcpy_s(uniquePath, nBytes, parentUniquePath, nBytes); - DAAL_ASSERT(copyStatus == 0); - - uniquePath[uniqueDepth].featureIndex = parentFeatureIndex; - uniquePath[uniqueDepth].zeroFraction = parentZeroFraction; - uniquePath[uniqueDepth].oneFraction = parentOneFraction; - // update pWeightsResidual if the feature of the last split does not satisfy the threshold - if (parentOneFraction != 1) - { - pWeightsResidual *= parentZeroFraction; - } - - const bool isLeaf = gbt::internal::ModelImpl::nodeIsLeaf(nodeIndex, *tree, depth); - // leaf node - if (isLeaf) - { - const algorithmFPType * leafCombinationSum = combinationSum + leafCount * (1 << maxDepth); - // use combinationSumInd to search in the row of combinationSum corresponding to the current path - unsigned combinationSumInd = 0; - for (unsigned i = 1; i <= uniqueDepth; ++i) - { - if (uniquePath[i].oneFraction != 0) - { - combinationSumInd += 1 << (i - 1); - } - } - // update contributions to SHAP values for features satisfying the thresholds and not satisfying the thresholds separately - const unsigned valuesOffset = nodeIndex * numOutputs; - unsigned valuesNonZeroInd = 0; - unsigned valuesNonZeroCount = 0; - for (unsigned j = 0; j < numOutputs; ++j) - { - if (splitValues[valuesOffset + j] != 0) - { - valuesNonZeroInd = j; - valuesNonZeroCount++; - } - } - const algorithmFPType scaleZero = -leafCombinationSum[combinationSumInd] * pWeightsResidual; - for (unsigned i = 1; i <= uniqueDepth; ++i) - { - const PathElement & el = uniquePath[i]; - const unsigned phiOffset = el.featureIndex * numOutputs; - const algorithmFPType scale = - (el.oneFraction != 0) ? leafCombinationSum[combinationSumInd - (1 << (i - 1))] * pWeightsResidual * (1 - el.zeroFraction) : scaleZero; - if (valuesNonZeroCount == 1) - { - phi[phiOffset + valuesNonZeroInd] += scale * splitValues[valuesOffset + valuesNonZeroInd]; - } - else - { - for (unsigned j = 0; j < numOutputs; ++j) - { - phi[phiOffset + j] += scale * splitValues[valuesOffset + j]; - } - } - } - ++leafCount; - - return; - } - - const unsigned leftIndex = 2 * nodeIndex; - const unsigned rightIndex = 2 * nodeIndex + 1; - const algorithmFPType w = nodeCoverValues[nodeIndex]; - const algorithmFPType leftZeroFraction = nodeCoverValues[leftIndex] / w; - const algorithmFPType rightZeroFraction = nodeCoverValues[rightIndex] / w; - algorithmFPType incomingZeroFraction = 1; - algorithmFPType incomingOneFraction = 1; - - // see if we have already split on this feature, - // if so we undo that split so we can redo it for this node - const int pathIndex = duplicatedNode[nodeIndex]; - if (pathIndex >= 0) - { - incomingZeroFraction = uniquePath[pathIndex].zeroFraction; - incomingOneFraction = uniquePath[pathIndex].oneFraction; - - for (unsigned i = pathIndex; i < uniqueDepth; ++i) - { - uniquePath[i].featureIndex = uniquePath[i + 1].featureIndex; - uniquePath[i].zeroFraction = uniquePath[i + 1].zeroFraction; - uniquePath[i].oneFraction = uniquePath[i + 1].oneFraction; - } - --uniqueDepth; - // update pWeightsResidual iff the duplicated feature does not satisfy the threshold - if (incomingOneFraction != 1.) - { - pWeightsResidual /= incomingZeroFraction; - } - } - - const FeatureIndexType splitIndex = fIndexes[nodeIndex]; - bool isLeftSplit = x[splitIndex] <= splitValues[nodeIndex]; - - treeShap( - tree, combinationSum, duplicatedNode, maxDepth, x, phi, leftIndex, depth + 1, uniqueDepth + 1, uniquePath, pWeightsResidual, - leftZeroFraction * incomingZeroFraction, incomingOneFraction * isLeftSplit, splitIndex, leafCount); - - treeShap( - tree, combinationSum, duplicatedNode, maxDepth, x, phi, rightIndex, depth + 1, uniqueDepth + 1, uniquePath, pWeightsResidual, - rightZeroFraction * incomingZeroFraction, incomingOneFraction * (!isLeftSplit), splitIndex, leafCount); -} - -/** - * \brief Version 2, i.e. second Fast TreeSHAP algorithm - * \param tree current tree - * \param treeIndex index of current tree - * \param x dense data matrix - * \param phi dense output matrix of feature attributions - * \param featureHelper pointer to a FeatureTypes object (required to traverse tree) - * \param condition fix one feature to either off (-1) on (1) or not fixed (0 default) - * \param conditionFeature the index of the feature to fix - */ -template -inline services::Status treeShap(const gbt::internal::GbtDecisionTree * tree, size_t treeIndex, const algorithmFPType * x, algorithmFPType * phi, - const FeatureTypes * featureHelper, int condition, FeatureIndexType conditionFeature, - const ModelDetails & modelDetails) -{ - services::Status st; - - // // update the reference value with the expected value of the tree's predictions - // for (unsigned j = 0; j < tree.numOutputs; ++j) - // { - // phi[data.M * tree.numOutputs + j] += tree.values[j]; - // } - - const int depth = tree->getMaxLvl(); - const size_t nElements = (depth + 1) * (depth + 2) / 2; - PathElement * uniquePathData = static_cast(daal_malloc(sizeof(PathElement) * nElements)); - DAAL_CHECK_MALLOC(uniquePathData) - PathElement init; - for (size_t i = 0; i < nElements; ++i) - { - DAAL_ASSERT(0 == daal::services::internal::daal_memcpy_s(uniquePathData + i, sizeof(PathElement), &init, sizeof(PathElement))); - } - int leafCount = 0; - - algorithmFPType * combinationSum = modelDetails.combinationSum + treeIndex * modelDetails.maxLeafs * modelDetails.maxCombinations; - int * duplicatedNode = modelDetails.duplicatedNode + treeIndex * modelDetails.maxNodes; - // hand over duplicatedNode - 1 because we use 1-based node indexing - treeShap(tree, combinationSum, duplicatedNode - 1, modelDetails.maxDepth, x, phi, 1, 0, 0, - uniquePathData, 1, 1, 1, -1, leafCount); - - daal_free(uniquePathData); - return st; -} -} // namespace v2 } // namespace internal -/** - * \brief Return the combination sum, required for Fast TreeSHAP v2 -*/ -template -services::Status computeCombinationSum(const gbt::internal::GbtDecisionTree * tree, const size_t treeIndex, - const ModelDetails & modelDetails) -{ - if (!modelDetails.requiresPrecompute) - { - // nothing to be done - return services::Status(); - } - if (!modelDetails.combinationSum || !modelDetails.duplicatedNode) - { - // buffer wasn't properly allocated - return services::Status(ErrorMemoryAllocationFailed); - } - - algorithmFPType * combinationSum = modelDetails.combinationSum + treeIndex * modelDetails.maxLeafs * modelDetails.maxCombinations; - int * duplicatedNode = modelDetails.duplicatedNode + treeIndex * modelDetails.maxNodes; - // hand over duplicatedNode - 1 because we use 1-based node indexing - return treeshap::internal::v2::computeCombinationSum(tree, combinationSum, duplicatedNode - 1, modelDetails.maxDepth); -} - /** * \brief Recursive function that computes the feature attributions for a single tree. * \param tree current tree @@ -918,9 +446,8 @@ services::Status computeCombinationSum(const gbt::internal::GbtDecisionTree * tr * \param conditionFeature the index of the feature to fix */ template -inline services::Status treeShap(const gbt::internal::GbtDecisionTree * tree, size_t treeIndex, const algorithmFPType * x, algorithmFPType * phi, - const FeatureTypes * featureHelper, int condition, FeatureIndexType conditionFeature, - const ModelDetails & modelDetails) +inline services::Status treeShap(const gbt::internal::GbtDecisionTree * tree, const algorithmFPType * x, algorithmFPType * phi, + const FeatureTypes * featureHelper, int condition, FeatureIndexType conditionFeature) { DAAL_ASSERT(x); DAAL_ASSERT(phi); @@ -936,9 +463,6 @@ inline services::Status treeShap(const gbt::internal::GbtDecisionTree * tree, si case 1: return treeshap::internal::v1::treeShap(tree, x, phi, featureHelper, condition, conditionFeature); - case 2: - return treeshap::internal::v2::treeShap(tree, treeIndex, x, phi, featureHelper, - condition, conditionFeature, modelDetails); default: return services::Status(ErrorMethodNotImplemented); } }