diff --git a/articles/stackgbm.html b/articles/stackgbm.html index 713be0d..5db8ab1 100644 --- a/articles/stackgbm.html +++ b/articles/stackgbm.html @@ -289,18 +289,19 @@

Performance benchmarking

Plot the ROC curves on the independent test set:

-pal <- c("#e69f00", "#56b4e9", "#009e73", "#f0e442")
+pal <- c("#e15759", "#f28e2c", "#59a14f", "#4e79a7", "#76b7b2")
 
-plot(smooth(roc_stack_te), col = pal[1])
-plot(smooth(roc_xgb_te), col = pal[2], add = TRUE)
-plot(smooth(roc_lgb_te), col = pal[3], add = TRUE)
-plot(smooth(roc_cat_te), col = pal[4], add = TRUE)
+plot(smooth(roc_stack_te), col = pal[1], lwd = 1)
+plot(smooth(roc_xgb_te), col = pal[2], lwd = 1, add = TRUE)
+plot(smooth(roc_lgb_te), col = pal[3], lwd = 1, add = TRUE)
+plot(smooth(roc_cat_te), col = pal[4], lwd = 1, add = TRUE)
 legend(
   "bottomright",
-  col = pal, lwd = 2,
+  col = pal,
+  lwd = 2,
   legend = c("stackgbm", "xgboost", "lightgbm", "catboost")
 )
-

+

Notes on categorical features diff --git a/articles/stackgbm_files/figure-html/roc-curves-1.png b/articles/stackgbm_files/figure-html/roc-curves-1.png index 9557ff4..9fe440f 100644 Binary files a/articles/stackgbm_files/figure-html/roc-curves-1.png and b/articles/stackgbm_files/figure-html/roc-curves-1.png differ diff --git a/pkgdown.yml b/pkgdown.yml index dad9753..06d773c 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -3,7 +3,7 @@ pkgdown: 2.0.9 pkgdown_sha: ~ articles: stackgbm: stackgbm.html -last_built: 2024-04-27T03:22Z +last_built: 2024-04-27T17:31Z urls: reference: https://nanx.me/stackgbm/reference article: https://nanx.me/stackgbm/articles diff --git a/reference/xgboost_train.html b/reference/xgboost_train.html index a7bf0d5..27c5eb8 100644 --- a/reference/xgboost_train.html +++ b/reference/xgboost_train.html @@ -113,7 +113,7 @@

Examples#> raw: 100.1 Kb #> call: #> xgboost::xgb.train(params = list(objective = "binary:logistic", -#> eval_metric = "auc", max_depth = 3, eta = 0.1), data = <pointer: 0x55a5b5d38ae0>, +#> eval_metric = "auc", max_depth = 3, eta = 0.1), data = <pointer: 0x562338b51de0>, #> nrounds = 100, nthread = 1) #> params (as set within xgb.train): #> objective = "binary:logistic", eval_metric = "auc", max_depth = "3", eta = "0.1", nthread = "1", validate_parameters = "TRUE" diff --git a/search.json b/search.json index 3f07303..ad4528b 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement @nanx.. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to stackgbm","title":"Contributing to stackgbm","text":"πŸ‘πŸŽ‰ First , thanks taking time contribute! πŸŽ‰πŸ‘ contribute project : Filing bug report feature request issue. Suggesting change via pull request.","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":"issues","dir":"","previous_headings":"","what":"Issues","title":"Contributing to stackgbm","text":"file issue possible bug, please try include: Relevant package versions Necessary code data reproduce issue","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":"pull-requests","dir":"","previous_headings":"","what":"Pull requests","title":"Contributing to stackgbm","text":"suggest change via pull requests, please: Fork repository GitHub account. Clone forked repository local machine, make changes. Commit push changes GitHub. Create pull request.","code":""},{"path":"https://nanx.me/stackgbm/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 stackgbm authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (β€œSoftware”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED β€œβ€, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Model stacking for boosted trees","text":"Model stacking (Wolpert 1992) method ensemble learning combines strength multiple base learners drive predictive performance. particularly popular effective strategy used machine learning competitions. stackgbm implements two-layer stacking model: first layer generates β€œfeatures” produced gradient boosting trees. boosted tree models built xgboost (Chen Guestrin 2016), lightgbm (Ke et al. 2017), catboost (Prokhorenkova et al. 2018). second layer logistic regression uses features inputs.","code":"library(\"stackgbm\") library(\"msaenet\") library(\"pROC\")"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"generate-data","dir":"Articles","previous_headings":"","what":"Generate data","title":"Model stacking for boosted trees","text":"Let’s generate data demo. simulated data \\(1000 \\times 50\\) predictor matrix binary outcome vector. 800 samples training set rest 200 (independent) test set. 25 50 features informative follows \\(N(0, 10)\\).","code":"sim_data <- msaenet.sim.binomial( n = 1000, p = 50, rho = 0.6, coef = rnorm(25, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- sim_data$x.tr x_test <- sim_data$x.te y_train <- as.vector(sim_data$y.tr) y_test <- as.vector(sim_data$y.te)"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"parameter-tuning","dir":"Articles","previous_headings":"","what":"Parameter tuning","title":"Model stacking for boosted trees","text":"cv_xgboost(), cv_lightgbm() cv_catboost() provide wrappers tuning essential hyperparameters type boosted tree models k-fold cross-validation. β€œoptimal” parameters used fit stacking model later.","code":"params_xgb <- cv_xgboost(x_train, y_train) params_lgb <- cv_lightgbm(x_train, y_train) params_cat <- cv_catboost(x_train, y_train)"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"training","dir":"Articles","previous_headings":"","what":"Training","title":"Model stacking for boosted trees","text":"","code":"model_stack <- stackgbm( sim_data$x.tr, sim_data$y.tr, params = list( xgb.nrounds = params_xgb$nrounds, xgb.eta = params_xgb$eta, xgb.max_depth = params_xgb$max_depth, lgb.num_iterations = params_lgb$num_iterations, lgb.max_depth = params_lgb$max_depth, lgb.learning_rate = params_lgb$learning_rate, cat.iterations = params_cat$iterations, cat.depth = params_cat$depth ) ) #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads."},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"inference","dir":"Articles","previous_headings":"","what":"Inference","title":"Model stacking for boosted trees","text":"","code":"roc_stack_tr <- roc(y_train, predict(model_stack, x_train)$prob, quiet = TRUE) roc_stack_te <- roc(y_test, predict(model_stack, x_test)$prob, quiet = TRUE) roc_stack_tr$auc #> Area under the curve: 0.9663 roc_stack_te$auc #> Area under the curve: 0.7835"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"performance-benchmarking","dir":"Articles","previous_headings":"","what":"Performance benchmarking","title":"Model stacking for boosted trees","text":"Let’s compare predictive performance stacking model three types tree boosting models (base learners) fitted individually: xgboost: lightgbm: catboost: Summarize AUC values table: AUC values four models training testing set Plot ROC curves independent test set:","code":"model_xgb <- xgboost_train( params = list( objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = params_xgb$max_depth, eta = params_xgb$eta ), data = xgboost_dmatrix(x_train, label = y_train), nrounds = params_xgb$nrounds ) model_lgb <- lightgbm_train( data = x_train, label = y_train, params = list( objective = \"binary\", learning_rate = params_lgb$learning_rate, num_iterations = params_lgb$num_iterations, max_depth = params_lgb$max_depth, num_leaves = 2^params_lgb$max_depth - 1 ), verbose = -1 ) #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. model_cat <- catboost_train( catboost_load_pool(data = x_train, label = y_train), NULL, params = list( loss_function = \"Logloss\", iterations = params_cat$iterations, depth = params_cat$depth, logging_level = \"Silent\" ) ) roc_xgb_tr <- roc(y_train, predict(model_xgb, x_train), quiet = TRUE) roc_xgb_te <- roc(y_test, predict(model_xgb, x_test), quiet = TRUE) roc_xgb_tr$auc #> Area under the curve: 0.9931 roc_xgb_te$auc #> Area under the curve: 0.7827 roc_lgb_tr <- roc(y_train, predict(model_lgb, x_train), quiet = TRUE) roc_lgb_te <- roc(y_test, predict(model_lgb, x_test), quiet = TRUE) roc_lgb_tr$auc #> Area under the curve: 0.9915 roc_lgb_te$auc #> Area under the curve: 0.784 roc_cat_tr <- roc( y_train, catboost_predict( model_cat, catboost_load_pool(data = x_train, label = NULL) ), quiet = TRUE ) roc_cat_te <- roc( y_test, catboost_predict( model_cat, catboost_load_pool(data = x_test, label = NULL) ), quiet = TRUE ) roc_cat_tr$auc #> Area under the curve: 0.9328 roc_cat_te$auc #> Area under the curve: 0.7751 pal <- c(\"#e69f00\", \"#56b4e9\", \"#009e73\", \"#f0e442\") plot(smooth(roc_stack_te), col = pal[1]) plot(smooth(roc_xgb_te), col = pal[2], add = TRUE) plot(smooth(roc_lgb_te), col = pal[3], add = TRUE) plot(smooth(roc_cat_te), col = pal[4], add = TRUE) legend( \"bottomright\", col = pal, lwd = 2, legend = c(\"stackgbm\", \"xgboost\", \"lightgbm\", \"catboost\") )"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"notes-on-categorical-features","dir":"Articles","previous_headings":"","what":"Notes on categorical features","title":"Model stacking for boosted trees","text":"xgboost lightgbm prefer categorical features encoded integers. catboost, categorical features can encoded character factors. avoid possible confusions, data categorical features, recommend converting integers use one-hot encoding, use numerical matrix input.","code":""},{"path":[]},{"path":"https://nanx.me/stackgbm/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Nan Xiao. Author, maintainer.","code":""},{"path":"https://nanx.me/stackgbm/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao N (2024). stackgbm: Stacked Gradient Boosting Machines. R package version 0.1.0, https://github.com/nanxstats/stackgbm, https://nanx./stackgbm/.","code":"@Manual{, title = {stackgbm: Stacked Gradient Boosting Machines}, author = {Nan Xiao}, year = {2024}, note = {R package version 0.1.0, https://github.com/nanxstats/stackgbm}, url = {https://nanx.me/stackgbm/}, }"},{"path":"https://nanx.me/stackgbm/index.html","id":"stackgbm-","dir":"","previous_headings":"","what":"Stacked Gradient Boosting Machines","title":"Stacked Gradient Boosting Machines","text":"stackgbm offers minimalist, research-oriented implementation model stacking (Wolpert, 1992) gradient boosted tree models built xgboost (Chen Guestrin, 2016), lightgbm (Ke et al., 2017), catboost (Prokhorenkova et al., 2018).","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Stacked Gradient Boosting Machines","text":"Install GitHub: install dependencies, check instructions manage dependencies.","code":"remotes::install_github(\"nanxstats/stackgbm\")"},{"path":"https://nanx.me/stackgbm/index.html","id":"model","dir":"","previous_headings":"","what":"Model","title":"Stacked Gradient Boosting Machines","text":"stackgbm implements classic two-layer stacking model: first layer generates β€œfeatures” produced gradient boosting trees. second layer logistic regression uses features inputs.","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"related-projects","dir":"","previous_headings":"","what":"Related projects","title":"Stacked Gradient Boosting Machines","text":"comprehensive flexible implementation model stacking, see stacks tidymodels StackingClassifier scikit-learn.","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Stacked Gradient Boosting Machines","text":"Please note stackgbm project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a dataset β€” catboost_load_pool","title":"Create a dataset β€” catboost_load_pool","text":"Create dataset","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a dataset β€” catboost_load_pool","text":"","code":"catboost_load_pool(data, label = NULL, ...)"},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a dataset β€” catboost_load_pool","text":"data Predictors. label Labels. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a dataset β€” catboost_load_pool","text":"catboost.Pool object.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a dataset β€” catboost_load_pool","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict based on the model β€” catboost_predict","title":"Predict based on the model β€” catboost_predict","text":"Predict based model","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict based on the model β€” catboost_predict","text":"","code":"catboost_predict(model, pool, prediction_type = \"Probability\", ...)"},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict based on the model β€” catboost_predict","text":"model trained model. pool dataset predict . prediction_type Prediction type. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict based on the model β€” catboost_predict","text":"Predicted values.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict based on the model β€” catboost_predict","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train the model β€” catboost_train","title":"Train the model β€” catboost_train","text":"Train model","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train the model β€” catboost_train","text":"","code":"catboost_train(learn_pool, test_pool = NULL, params = list())"},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train the model β€” catboost_train","text":"learn_pool Training dataset. test_pool Testing dataset. params list training parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train the model β€” catboost_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train the model β€” catboost_train","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":null,"dir":"Reference","previous_headings":"","what":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"catboost - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"","code":"cv_catboost( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"","code":"# check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"lightgbm - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"","code":"cv_lightgbm( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) params <- suppressWarnings( cv_lightgbm( sim_data$x.tr, sim_data$y.tr, params = cv_param_grid( n_iterations = c(100, 200), max_depth = c(3, 5), learning_rate = c(0.1, 0.5) ), n_folds = 5, n_threads = 1, seed = 42, verbose = FALSE ) ) params$df #> num_iterations max_depth learning_rate metric #> 1 100 3 0.1 0.8421053 #> 2 200 3 0.1 0.8320802 #> 3 100 5 0.1 0.8421053 #> 4 200 5 0.1 0.8320802 #> 5 100 3 0.5 0.8082707 #> 6 200 3 0.5 0.8076441 #> 7 100 5 0.5 0.8082707 #> 8 200 5 0.5 0.8076441"},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate a parameter grid for cross-validation β€” cv_param_grid","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"function generates parameter grid used cross-validation gradient boosting decision tree (GBDT) models.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"","code":"cv_param_grid( n_iterations = c(100, 200, 500, 1000), max_depth = c(3, 5, 7, 9), learning_rate = c(0.01, 0.05, 0.1, 0.2) )"},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"n_iterations numeric vector number iterations (trees) GBDT model. equivalent nrounds XGBoost, num_iterations LightGBM, iterations CatBoost. max_depth numeric vector maximum tree depths. parameter equivalent max_depth XGBoost LightGBM, depth CatBoost. learning_rate numeric vector learning rates GBDT model. parameter equivalent eta XGBoost, learning_rate LightGBM, ignored CatBoost.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"list names parameter names values vectors possible values parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"","code":"params <- cv_param_grid( n_iterations = c(10, 100), max_depth = c(3, 5), learning_rate = c(0.01, 0.1) )"},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":null,"dir":"Reference","previous_headings":"","what":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"xgboost - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"","code":"cv_xgboost( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) params <- cv_xgboost( sim_data$x.tr, sim_data$y.tr, params = cv_param_grid( n_iterations = c(100, 200), max_depth = c(3, 5), learning_rate = c(0.1, 0.5) ), n_folds = 5, n_threads = 1, seed = 42, verbose = FALSE ) params$df #> nrounds max_depth eta metric #> 1 100 3 0.1 0.7694236 #> 2 200 3 0.1 0.7888471 #> 3 100 5 0.1 0.7675439 #> 4 200 5 0.1 0.7775689 #> 5 100 3 0.5 0.7882206 #> 6 200 3 0.5 0.7957393 #> 7 100 5 0.5 0.7606516 #> 8 200 5 0.5 0.7662907"},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":null,"dir":"Reference","previous_headings":"","what":"Is catboost installed? β€” is_installed_catboost","title":"Is catboost installed? β€” is_installed_catboost","text":"catboost installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is catboost installed? β€” is_installed_catboost","text":"","code":"is_installed_catboost()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is catboost installed? β€” is_installed_catboost","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is catboost installed? β€” is_installed_catboost","text":"","code":"is_installed_catboost() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Is lightgbm installed? β€” is_installed_lightgbm","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"lightgbm installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"","code":"is_installed_lightgbm()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"","code":"is_installed_lightgbm() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":null,"dir":"Reference","previous_headings":"","what":"Is xgboost installed? β€” is_installed_xgboost","title":"Is xgboost installed? β€” is_installed_xgboost","text":"xgboost installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is xgboost installed? β€” is_installed_xgboost","text":"","code":"is_installed_xgboost()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is xgboost installed? β€” is_installed_xgboost","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is xgboost installed? β€” is_installed_xgboost","text":"","code":"is_installed_xgboost() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train lightgbm model β€” lightgbm_train","title":"Train lightgbm model β€” lightgbm_train","text":"Train lightgbm model","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train lightgbm model β€” lightgbm_train","text":"","code":"lightgbm_train(data, label, params, ...)"},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train lightgbm model β€” lightgbm_train","text":"data Training data. label Labels. params list parameters. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train lightgbm model β€” lightgbm_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train lightgbm model β€” lightgbm_train","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) fit <- suppressWarnings( lightgbm_train( data = sim_data$x.tr, label = sim_data$y.tr, params = list( objective = \"binary\", learning_rate = 0.1, num_iterations = 100, max_depth = 3, num_leaves = 2^3 - 1, num_threads = 1 ), verbose = -1 ) ) fit #> LightGBM Model (100 trees) #> Objective: binary #> Fitted to dataset with 10 columns"},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Make predictions from a stackgbm model object β€” predict.stackgbm","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"Make predictions stackgbm model object","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"","code":"# S3 method for stackgbm predict(object, newx, threshold = 0.5, classes = c(1L, 0L), ...)"},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"object stackgbm model object newx New predictor matrix threshold Decision threshold. Default 0.5. classes class encoding vector predicted outcome. naming order respected. ... unused","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"list two vectors presenting predicted classification probabilities predicted response.","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"","code":"# Check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/stackgbm-package.html","id":null,"dir":"Reference","previous_headings":"","what":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","title":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","text":"minimalist implementation model stacking boosted tree models built 'xgboost', 'lightgbm', 'catboost'.","code":""},{"path":[]},{"path":"https://nanx.me/stackgbm/reference/stackgbm-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","text":"Maintainer: Nan Xiao @nanx.(ORCID)","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Model stacking for boosted trees β€” stackgbm","title":"Model stacking for boosted trees β€” stackgbm","text":"Model stacking two-layer architecture: first layer boosted tree models fitted xgboost, lightgbm, catboost; second layer logistic regression model.","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model stacking for boosted trees β€” stackgbm","text":"","code":"stackgbm(x, y, params, n_folds = 5L, seed = 42, verbose = TRUE)"},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model stacking for boosted trees β€” stackgbm","text":"x Predictor matrix. y Response vector. params list optimal parameters boosted tree models. Can derived cv_xgboost(), cv_lightgbm(), cv_catboost(). n_folds Number folds. Default 5. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model stacking for boosted trees β€” stackgbm","text":"Fitted boosted tree models stacked tree model.","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model stacking for boosted trees β€” stackgbm","text":"","code":"# Check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Create xgb.DMatrix object β€” xgboost_dmatrix","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"Create xgb.DMatrix object","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"","code":"xgboost_dmatrix(data, label = NULL, ...)"},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"data Matrix file. label Labels (optional). ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"xgb.DMatrix object.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- xgboost_dmatrix(sim_data$x.tr, label = sim_data$y.tr) x_train #> xgb.DMatrix dim: 80 x 10 info: label colnames: no x_test <- xgboost_dmatrix(sim_data$x.te) x_test #> xgb.DMatrix dim: 20 x 10 info: NA colnames: no"},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train xgboost model β€” xgboost_train","title":"Train xgboost model β€” xgboost_train","text":"Train xgboost model","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train xgboost model β€” xgboost_train","text":"","code":"xgboost_train(params, data, nrounds, ...)"},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train xgboost model β€” xgboost_train","text":"params list parameters. data Training data. nrounds Maximum number boosting iterations. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train xgboost model β€” xgboost_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train xgboost model β€” xgboost_train","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- xgboost_dmatrix(sim_data$x.tr, label = sim_data$y.tr) fit <- xgboost_train( params = list( objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = 3, eta = 0.1 ), data = x_train, nrounds = 100, nthread = 1 ) fit #> ##### xgb.Booster #> raw: 100.1 Kb #> call: #> xgboost::xgb.train(params = list(objective = \"binary:logistic\", #> eval_metric = \"auc\", max_depth = 3, eta = 0.1), data = , #> nrounds = 100, nthread = 1) #> params (as set within xgb.train): #> objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = \"3\", eta = \"0.1\", nthread = \"1\", validate_parameters = \"TRUE\" #> xgb.attributes: #> niter #> callbacks: #> cb.print.evaluation(period = print_every_n) #> niter: 100 #> nfeatures : 10"},{"path":[]},{"path":"https://nanx.me/stackgbm/news/index.html","id":"new-features-0-1-0","dir":"Changelog","previous_headings":"","what":"New Features","title":"stackgbm 0.1.0","text":"First public release.","code":""}] +[{"path":[]},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement @nanx.. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://nanx.me/stackgbm/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to stackgbm","title":"Contributing to stackgbm","text":"πŸ‘πŸŽ‰ First , thanks taking time contribute! πŸŽ‰πŸ‘ contribute project : Filing bug report feature request issue. Suggesting change via pull request.","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":"issues","dir":"","previous_headings":"","what":"Issues","title":"Contributing to stackgbm","text":"file issue possible bug, please try include: Relevant package versions Necessary code data reproduce issue","code":""},{"path":"https://nanx.me/stackgbm/CONTRIBUTING.html","id":"pull-requests","dir":"","previous_headings":"","what":"Pull requests","title":"Contributing to stackgbm","text":"suggest change via pull requests, please: Fork repository GitHub account. Clone forked repository local machine, make changes. Commit push changes GitHub. Create pull request.","code":""},{"path":"https://nanx.me/stackgbm/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 stackgbm authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (β€œSoftware”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED β€œβ€, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Model stacking for boosted trees","text":"Model stacking (Wolpert 1992) method ensemble learning combines strength multiple base learners drive predictive performance. particularly popular effective strategy used machine learning competitions. stackgbm implements two-layer stacking model: first layer generates β€œfeatures” produced gradient boosting trees. boosted tree models built xgboost (Chen Guestrin 2016), lightgbm (Ke et al. 2017), catboost (Prokhorenkova et al. 2018). second layer logistic regression uses features inputs.","code":"library(\"stackgbm\") library(\"msaenet\") library(\"pROC\")"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"generate-data","dir":"Articles","previous_headings":"","what":"Generate data","title":"Model stacking for boosted trees","text":"Let’s generate data demo. simulated data \\(1000 \\times 50\\) predictor matrix binary outcome vector. 800 samples training set rest 200 (independent) test set. 25 50 features informative follows \\(N(0, 10)\\).","code":"sim_data <- msaenet.sim.binomial( n = 1000, p = 50, rho = 0.6, coef = rnorm(25, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- sim_data$x.tr x_test <- sim_data$x.te y_train <- as.vector(sim_data$y.tr) y_test <- as.vector(sim_data$y.te)"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"parameter-tuning","dir":"Articles","previous_headings":"","what":"Parameter tuning","title":"Model stacking for boosted trees","text":"cv_xgboost(), cv_lightgbm() cv_catboost() provide wrappers tuning essential hyperparameters type boosted tree models k-fold cross-validation. β€œoptimal” parameters used fit stacking model later.","code":"params_xgb <- cv_xgboost(x_train, y_train) params_lgb <- cv_lightgbm(x_train, y_train) params_cat <- cv_catboost(x_train, y_train)"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"training","dir":"Articles","previous_headings":"","what":"Training","title":"Model stacking for boosted trees","text":"","code":"model_stack <- stackgbm( sim_data$x.tr, sim_data$y.tr, params = list( xgb.nrounds = params_xgb$nrounds, xgb.eta = params_xgb$eta, xgb.max_depth = params_xgb$max_depth, lgb.num_iterations = params_lgb$num_iterations, lgb.max_depth = params_lgb$max_depth, lgb.learning_rate = params_lgb$learning_rate, cat.iterations = params_cat$iterations, cat.depth = params_cat$depth ) ) #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads."},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"inference","dir":"Articles","previous_headings":"","what":"Inference","title":"Model stacking for boosted trees","text":"","code":"roc_stack_tr <- roc(y_train, predict(model_stack, x_train)$prob, quiet = TRUE) roc_stack_te <- roc(y_test, predict(model_stack, x_test)$prob, quiet = TRUE) roc_stack_tr$auc #> Area under the curve: 0.9663 roc_stack_te$auc #> Area under the curve: 0.7835"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"performance-benchmarking","dir":"Articles","previous_headings":"","what":"Performance benchmarking","title":"Model stacking for boosted trees","text":"Let’s compare predictive performance stacking model three types tree boosting models (base learners) fitted individually: xgboost: lightgbm: catboost: Summarize AUC values table: AUC values four models training testing set Plot ROC curves independent test set:","code":"model_xgb <- xgboost_train( params = list( objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = params_xgb$max_depth, eta = params_xgb$eta ), data = xgboost_dmatrix(x_train, label = y_train), nrounds = params_xgb$nrounds ) model_lgb <- lightgbm_train( data = x_train, label = y_train, params = list( objective = \"binary\", learning_rate = params_lgb$learning_rate, num_iterations = params_lgb$num_iterations, max_depth = params_lgb$max_depth, num_leaves = 2^params_lgb$max_depth - 1 ), verbose = -1 ) #> Warning in .get_default_num_threads(): Optional package 'RhpcBLASctl' not #> found. Will use default number of OpenMP threads. model_cat <- catboost_train( catboost_load_pool(data = x_train, label = y_train), NULL, params = list( loss_function = \"Logloss\", iterations = params_cat$iterations, depth = params_cat$depth, logging_level = \"Silent\" ) ) roc_xgb_tr <- roc(y_train, predict(model_xgb, x_train), quiet = TRUE) roc_xgb_te <- roc(y_test, predict(model_xgb, x_test), quiet = TRUE) roc_xgb_tr$auc #> Area under the curve: 0.9931 roc_xgb_te$auc #> Area under the curve: 0.7827 roc_lgb_tr <- roc(y_train, predict(model_lgb, x_train), quiet = TRUE) roc_lgb_te <- roc(y_test, predict(model_lgb, x_test), quiet = TRUE) roc_lgb_tr$auc #> Area under the curve: 0.9915 roc_lgb_te$auc #> Area under the curve: 0.784 roc_cat_tr <- roc( y_train, catboost_predict( model_cat, catboost_load_pool(data = x_train, label = NULL) ), quiet = TRUE ) roc_cat_te <- roc( y_test, catboost_predict( model_cat, catboost_load_pool(data = x_test, label = NULL) ), quiet = TRUE ) roc_cat_tr$auc #> Area under the curve: 0.9328 roc_cat_te$auc #> Area under the curve: 0.7751 pal <- c(\"#e15759\", \"#f28e2c\", \"#59a14f\", \"#4e79a7\", \"#76b7b2\") plot(smooth(roc_stack_te), col = pal[1], lwd = 1) plot(smooth(roc_xgb_te), col = pal[2], lwd = 1, add = TRUE) plot(smooth(roc_lgb_te), col = pal[3], lwd = 1, add = TRUE) plot(smooth(roc_cat_te), col = pal[4], lwd = 1, add = TRUE) legend( \"bottomright\", col = pal, lwd = 2, legend = c(\"stackgbm\", \"xgboost\", \"lightgbm\", \"catboost\") )"},{"path":"https://nanx.me/stackgbm/articles/stackgbm.html","id":"notes-on-categorical-features","dir":"Articles","previous_headings":"","what":"Notes on categorical features","title":"Model stacking for boosted trees","text":"xgboost lightgbm prefer categorical features encoded integers. catboost, categorical features can encoded character factors. avoid possible confusions, data categorical features, recommend converting integers use one-hot encoding, use numerical matrix input.","code":""},{"path":[]},{"path":"https://nanx.me/stackgbm/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Nan Xiao. Author, maintainer.","code":""},{"path":"https://nanx.me/stackgbm/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao N (2024). stackgbm: Stacked Gradient Boosting Machines. R package version 0.1.0, https://github.com/nanxstats/stackgbm, https://nanx./stackgbm/.","code":"@Manual{, title = {stackgbm: Stacked Gradient Boosting Machines}, author = {Nan Xiao}, year = {2024}, note = {R package version 0.1.0, https://github.com/nanxstats/stackgbm}, url = {https://nanx.me/stackgbm/}, }"},{"path":"https://nanx.me/stackgbm/index.html","id":"stackgbm-","dir":"","previous_headings":"","what":"Stacked Gradient Boosting Machines","title":"Stacked Gradient Boosting Machines","text":"stackgbm offers minimalist, research-oriented implementation model stacking (Wolpert, 1992) gradient boosted tree models built xgboost (Chen Guestrin, 2016), lightgbm (Ke et al., 2017), catboost (Prokhorenkova et al., 2018).","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Stacked Gradient Boosting Machines","text":"Install GitHub: install dependencies, check instructions manage dependencies.","code":"remotes::install_github(\"nanxstats/stackgbm\")"},{"path":"https://nanx.me/stackgbm/index.html","id":"model","dir":"","previous_headings":"","what":"Model","title":"Stacked Gradient Boosting Machines","text":"stackgbm implements classic two-layer stacking model: first layer generates β€œfeatures” produced gradient boosting trees. second layer logistic regression uses features inputs.","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"related-projects","dir":"","previous_headings":"","what":"Related projects","title":"Stacked Gradient Boosting Machines","text":"comprehensive flexible implementation model stacking, see stacks tidymodels StackingClassifier scikit-learn.","code":""},{"path":"https://nanx.me/stackgbm/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Stacked Gradient Boosting Machines","text":"Please note stackgbm project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a dataset β€” catboost_load_pool","title":"Create a dataset β€” catboost_load_pool","text":"Create dataset","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a dataset β€” catboost_load_pool","text":"","code":"catboost_load_pool(data, label = NULL, ...)"},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a dataset β€” catboost_load_pool","text":"data Predictors. label Labels. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a dataset β€” catboost_load_pool","text":"catboost.Pool object.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_load_pool.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a dataset β€” catboost_load_pool","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict based on the model β€” catboost_predict","title":"Predict based on the model β€” catboost_predict","text":"Predict based model","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict based on the model β€” catboost_predict","text":"","code":"catboost_predict(model, pool, prediction_type = \"Probability\", ...)"},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict based on the model β€” catboost_predict","text":"model trained model. pool dataset predict . prediction_type Prediction type. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict based on the model β€” catboost_predict","text":"Predicted values.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_predict.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict based on the model β€” catboost_predict","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train the model β€” catboost_train","title":"Train the model β€” catboost_train","text":"Train model","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train the model β€” catboost_train","text":"","code":"catboost_train(learn_pool, test_pool = NULL, params = list())"},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train the model β€” catboost_train","text":"learn_pool Training dataset. test_pool Testing dataset. params list training parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train the model β€” catboost_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/catboost_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train the model β€” catboost_train","text":"","code":"# Example code"},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":null,"dir":"Reference","previous_headings":"","what":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"catboost - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"","code":"cv_catboost( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_catboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"catboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_catboost","text":"","code":"# check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"lightgbm - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"","code":"cv_lightgbm( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_lightgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"lightgbm - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_lightgbm","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) params <- suppressWarnings( cv_lightgbm( sim_data$x.tr, sim_data$y.tr, params = cv_param_grid( n_iterations = c(100, 200), max_depth = c(3, 5), learning_rate = c(0.1, 0.5) ), n_folds = 5, n_threads = 1, seed = 42, verbose = FALSE ) ) params$df #> num_iterations max_depth learning_rate metric #> 1 100 3 0.1 0.8421053 #> 2 200 3 0.1 0.8320802 #> 3 100 5 0.1 0.8421053 #> 4 200 5 0.1 0.8320802 #> 5 100 3 0.5 0.8082707 #> 6 200 3 0.5 0.8076441 #> 7 100 5 0.5 0.8082707 #> 8 200 5 0.5 0.8076441"},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate a parameter grid for cross-validation β€” cv_param_grid","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"function generates parameter grid used cross-validation gradient boosting decision tree (GBDT) models.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"","code":"cv_param_grid( n_iterations = c(100, 200, 500, 1000), max_depth = c(3, 5, 7, 9), learning_rate = c(0.01, 0.05, 0.1, 0.2) )"},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"n_iterations numeric vector number iterations (trees) GBDT model. equivalent nrounds XGBoost, num_iterations LightGBM, iterations CatBoost. max_depth numeric vector maximum tree depths. parameter equivalent max_depth XGBoost LightGBM, depth CatBoost. learning_rate numeric vector learning rates GBDT model. parameter equivalent eta XGBoost, learning_rate LightGBM, ignored CatBoost.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"list names parameter names values vectors possible values parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_param_grid.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate a parameter grid for cross-validation β€” cv_param_grid","text":"","code":"params <- cv_param_grid( n_iterations = c(10, 100), max_depth = c(3, 5), learning_rate = c(0.01, 0.1) )"},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":null,"dir":"Reference","previous_headings":"","what":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"xgboost - parameter tuning model selection k-fold cross-validation grid search","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"","code":"cv_xgboost( x, y, params = cv_param_grid(), n_folds = 5, n_threads = 1, seed = 42, verbose = TRUE )"},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"x Predictor matrix. y Response vector. params Parameter grid generated cv_param_grid(). n_folds Number folds. Default 5. n_threads number parallel threads. optimal speed, match number physical CPU cores, threads. See respective model documentation details. Default 1. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"data frame containing complete tuning grid AUC values, best parameter combination highest AUC value.","code":""},{"path":"https://nanx.me/stackgbm/reference/cv_xgboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"xgboost - parameter tuning and model selection with k-fold cross-validation and grid search β€” cv_xgboost","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) params <- cv_xgboost( sim_data$x.tr, sim_data$y.tr, params = cv_param_grid( n_iterations = c(100, 200), max_depth = c(3, 5), learning_rate = c(0.1, 0.5) ), n_folds = 5, n_threads = 1, seed = 42, verbose = FALSE ) params$df #> nrounds max_depth eta metric #> 1 100 3 0.1 0.7694236 #> 2 200 3 0.1 0.7888471 #> 3 100 5 0.1 0.7675439 #> 4 200 5 0.1 0.7775689 #> 5 100 3 0.5 0.7882206 #> 6 200 3 0.5 0.7957393 #> 7 100 5 0.5 0.7606516 #> 8 200 5 0.5 0.7662907"},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":null,"dir":"Reference","previous_headings":"","what":"Is catboost installed? β€” is_installed_catboost","title":"Is catboost installed? β€” is_installed_catboost","text":"catboost installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is catboost installed? β€” is_installed_catboost","text":"","code":"is_installed_catboost()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is catboost installed? β€” is_installed_catboost","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_catboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is catboost installed? β€” is_installed_catboost","text":"","code":"is_installed_catboost() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Is lightgbm installed? β€” is_installed_lightgbm","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"lightgbm installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"","code":"is_installed_lightgbm()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_lightgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is lightgbm installed? β€” is_installed_lightgbm","text":"","code":"is_installed_lightgbm() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":null,"dir":"Reference","previous_headings":"","what":"Is xgboost installed? β€” is_installed_xgboost","title":"Is xgboost installed? β€” is_installed_xgboost","text":"xgboost installed?","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is xgboost installed? β€” is_installed_xgboost","text":"","code":"is_installed_xgboost()"},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is xgboost installed? β€” is_installed_xgboost","text":"TRUE installed, FALSE .","code":""},{"path":"https://nanx.me/stackgbm/reference/is_installed_xgboost.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is xgboost installed? β€” is_installed_xgboost","text":"","code":"is_installed_xgboost() #> [1] TRUE"},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train lightgbm model β€” lightgbm_train","title":"Train lightgbm model β€” lightgbm_train","text":"Train lightgbm model","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train lightgbm model β€” lightgbm_train","text":"","code":"lightgbm_train(data, label, params, ...)"},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train lightgbm model β€” lightgbm_train","text":"data Training data. label Labels. params list parameters. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train lightgbm model β€” lightgbm_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/lightgbm_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train lightgbm model β€” lightgbm_train","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) fit <- suppressWarnings( lightgbm_train( data = sim_data$x.tr, label = sim_data$y.tr, params = list( objective = \"binary\", learning_rate = 0.1, num_iterations = 100, max_depth = 3, num_leaves = 2^3 - 1, num_threads = 1 ), verbose = -1 ) ) fit #> LightGBM Model (100 trees) #> Objective: binary #> Fitted to dataset with 10 columns"},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Make predictions from a stackgbm model object β€” predict.stackgbm","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"Make predictions stackgbm model object","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"","code":"# S3 method for stackgbm predict(object, newx, threshold = 0.5, classes = c(1L, 0L), ...)"},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"object stackgbm model object newx New predictor matrix threshold Decision threshold. Default 0.5. classes class encoding vector predicted outcome. naming order respected. ... unused","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"list two vectors presenting predicted classification probabilities predicted response.","code":""},{"path":"https://nanx.me/stackgbm/reference/predict.stackgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make predictions from a stackgbm model object β€” predict.stackgbm","text":"","code":"# Check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/stackgbm-package.html","id":null,"dir":"Reference","previous_headings":"","what":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","title":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","text":"minimalist implementation model stacking boosted tree models built 'xgboost', 'lightgbm', 'catboost'.","code":""},{"path":[]},{"path":"https://nanx.me/stackgbm/reference/stackgbm-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"stackgbm: Stacked Gradient Boosting Machines β€” stackgbm-package","text":"Maintainer: Nan Xiao @nanx.(ORCID)","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":null,"dir":"Reference","previous_headings":"","what":"Model stacking for boosted trees β€” stackgbm","title":"Model stacking for boosted trees β€” stackgbm","text":"Model stacking two-layer architecture: first layer boosted tree models fitted xgboost, lightgbm, catboost; second layer logistic regression model.","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Model stacking for boosted trees β€” stackgbm","text":"","code":"stackgbm(x, y, params, n_folds = 5L, seed = 42, verbose = TRUE)"},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Model stacking for boosted trees β€” stackgbm","text":"x Predictor matrix. y Response vector. params list optimal parameters boosted tree models. Can derived cv_xgboost(), cv_lightgbm(), cv_catboost(). n_folds Number folds. Default 5. seed Random seed reproducibility. verbose Show progress?","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Model stacking for boosted trees β€” stackgbm","text":"Fitted boosted tree models stacked tree model.","code":""},{"path":"https://nanx.me/stackgbm/reference/stackgbm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Model stacking for boosted trees β€” stackgbm","text":"","code":"# Check the vignette for code examples"},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Create xgb.DMatrix object β€” xgboost_dmatrix","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"Create xgb.DMatrix object","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"","code":"xgboost_dmatrix(data, label = NULL, ...)"},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"data Matrix file. label Labels (optional). ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"xgb.DMatrix object.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_dmatrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create xgb.DMatrix object β€” xgboost_dmatrix","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- xgboost_dmatrix(sim_data$x.tr, label = sim_data$y.tr) x_train #> xgb.DMatrix dim: 80 x 10 info: label colnames: no x_test <- xgboost_dmatrix(sim_data$x.te) x_test #> xgb.DMatrix dim: 20 x 10 info: NA colnames: no"},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Train xgboost model β€” xgboost_train","title":"Train xgboost model β€” xgboost_train","text":"Train xgboost model","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Train xgboost model β€” xgboost_train","text":"","code":"xgboost_train(params, data, nrounds, ...)"},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Train xgboost model β€” xgboost_train","text":"params list parameters. data Training data. nrounds Maximum number boosting iterations. ... Additional parameters.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Train xgboost model β€” xgboost_train","text":"model object.","code":""},{"path":"https://nanx.me/stackgbm/reference/xgboost_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Train xgboost model β€” xgboost_train","text":"","code":"sim_data <- msaenet::msaenet.sim.binomial( n = 100, p = 10, rho = 0.6, coef = rnorm(5, mean = 0, sd = 10), snr = 1, p.train = 0.8, seed = 42 ) x_train <- xgboost_dmatrix(sim_data$x.tr, label = sim_data$y.tr) fit <- xgboost_train( params = list( objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = 3, eta = 0.1 ), data = x_train, nrounds = 100, nthread = 1 ) fit #> ##### xgb.Booster #> raw: 100.1 Kb #> call: #> xgboost::xgb.train(params = list(objective = \"binary:logistic\", #> eval_metric = \"auc\", max_depth = 3, eta = 0.1), data = , #> nrounds = 100, nthread = 1) #> params (as set within xgb.train): #> objective = \"binary:logistic\", eval_metric = \"auc\", max_depth = \"3\", eta = \"0.1\", nthread = \"1\", validate_parameters = \"TRUE\" #> xgb.attributes: #> niter #> callbacks: #> cb.print.evaluation(period = print_every_n) #> niter: 100 #> nfeatures : 10"},{"path":[]},{"path":"https://nanx.me/stackgbm/news/index.html","id":"new-features-0-1-0","dir":"Changelog","previous_headings":"","what":"New Features","title":"stackgbm 0.1.0","text":"First public release.","code":""}]