diff --git a/benchmarks/regressor.jl b/benchmarks/regressor.jl index e0d35b90..ecc39d0a 100644 --- a/benchmarks/regressor.jl +++ b/benchmarks/regressor.jl @@ -19,11 +19,11 @@ import CUDA #threads # laptop depth 6: 12.717845 seconds (2.08 M allocations: 466.228 MiB) -nobs = Int(10e6) +nobs = Int(1e6) num_feat = Int(100) nrounds = 200 max_depth = 6 -tree_type = "binary" +tree_type = "oblivious" T = Float64 nthread = Base.Threads.nthreads() @info "testing with: $nobs observations | $num_feat features. nthread: $nthread | tree_type : $tree_type | max_depth : $max_depth" @@ -55,7 +55,7 @@ params_xgb = Dict( :print_every_n => 5, :subsample => 0.5, :colsample_bytree => 0.5, - :tree_method => "gpu_hist", # hist/gpu_hist + :tree_method => "hist", # hist/gpu_hist :max_bin => 64, ) @@ -149,15 +149,3 @@ CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_e CUDA.@time pred_evo = m_evo(x_train; device); CUDA.@time pred_evo = m_evo(x_train; device); # @btime m_evo($x_train; device); - -# CUDA v4: 0.439869 seconds (29.93 k CPU allocations: 181.869 MiB, 2.28% gc time) (16 GPU allocations: 138.430 MiB, 0.06% memmgmt time) -# CUDA v5: 0.365197 seconds (27.93 k CPU allocations: 181.763 MiB, 3.17% gc time) (16 GPU allocations: 138.430 MiB, 0.03% memmgmt time) -CUDA.@time m, cache = EvoTrees.init(params_evo, x_train, y_train, EvoTrees.GPU); -CUDA.@time EvoTrees.grow_evotree!(m, cache, params_evo, EvoTrees.GPU); -# CUDA v4: 3.345438 seconds (3.57 M CPU allocations: 151.870 MiB, 0.60% gc time) (38.74 k GPU allocations: 1.227 GiB, 1.39% memmgmt time) -# CUDA v5: 5.275562 seconds (6.92 M CPU allocations: 296.344 MiB, 1.04% gc time) (38.73 k GPU allocations: 1.227 GiB, 1.58% memmgmt time) -CUDA.@time begin - for i in 1:200 - EvoTrees.grow_evotree!(m, cache, params_evo, EvoTrees.GPU) - end -end diff --git a/experiments/readme_plots-df-cpu.jl b/experiments/readme_plots-df-cpu.jl index 878c6d7f..8e7a8df7 100644 --- a/experiments/readme_plots-df-cpu.jl +++ b/experiments/readme_plots-df-cpu.jl @@ -121,535 +121,3 @@ plot!( linewidth=1.5, label="Linear - C", ) - -# logistic / cross-entropy -params1 = EvoTreeRegressor( - loss=:logistic, - nrounds=200, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1, - dtrain; - fnames=["x_num", "x_cat"], - target_name="y", - deval, - metric=:logloss, - print_every_n=25, - early_stopping_rounds=20, - verbosity=0 -); -# 218.040 ms (123372 allocations: 34.71 MiB) -# @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval) -plot( - dtrain.x_num, - dtrain.y, - msize=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -dinfer = dtrain[dtrain.x_cat.=="A", :] -pred = model(dinfer) -x_perm = sortperm(dinfer.x_num) -plot!( - dinfer.x_num[x_perm], - pred[x_perm], - color="lightblue", - linewidth=1.5, - label="Linear - A", -) -dinfer = dtrain[dtrain.x_cat.=="B", :] -pred = model(dinfer); -x_perm = sortperm(dinfer.x_num) -plot!( - dinfer.x_num[x_perm], - pred[x_perm], - color="blue", - linewidth=1.5, - label="Linear - B", -) -dinfer = dtrain[dtrain.x_cat.=="C", :] -pred = model(dinfer); -x_perm = sortperm(dinfer.x_num) -plot!( - dinfer.x_num[x_perm], - pred[x_perm], - color="navy", - linewidth=1.5, - label="Linear - C", -) - -# L1 -params1 = EvoTreeRegressor( - loss=:L1, - alpha=0.5, - nrounds=500, - nbins=64, - lambda=0.0, - gamma=0.0, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:mae -); -@time pred_train_L1 = predict(model, x_train) -@time pred_eval_L1 = predict(model, x_eval) -sqrt(mean((pred_train_L1 .- y_train) .^ 2)) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - msize=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_linear[x_perm], - color="navy", - linewidth=1.5, - label="Linear", -) -plot!( - x_train[:, 1][x_perm], - pred_train_linear_w[x_perm], - color="lightblue", - linewidth=1.5, - label="LinearW", -) -plot!( - x_train[:, 1][x_perm], - pred_train_logistic[x_perm], - color="darkred", - linewidth=1.5, - label="Logistic", -) -plot!( - x_train[:, 1][x_perm], - pred_train_L1[x_perm], - color="darkgreen", - linewidth=1.5, - label="L1", -) -savefig("figures/regression_sinus.png") - -# Poisson -params1 = EvoTreeCount( - loss=:poisson, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:poisson -); -@time pred_train_poisson = predict(model, x_train); -sqrt(mean((pred_train_poisson .- y_train) .^ 2)) - -# Gamma -params1 = EvoTreeRegressor( - loss=:gamma, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.02, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:gamma -); -@time pred_train_gamma = predict(model, x_train); -sqrt(mean((pred_train_gamma .- y_train) .^ 2)) - -# Tweedie -params1 = EvoTreeRegressor( - loss=:tweedie, - nrounds=500, - nbins=64, - lambda=0.5, - gamma=0.1, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:tweedie -); -@time pred_train_tweedie = predict(model, x_train); -sqrt(mean((pred_train_tweedie .- y_train) .^ 2)) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - msize=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_poisson[x_perm], - color="navy", - linewidth=1.5, - label="Poisson", -) -plot!( - x_train[:, 1][x_perm], - pred_train_gamma[x_perm], - color="lightblue", - linewidth=1.5, - label="Gamma", -) -plot!( - x_train[:, 1][x_perm], - pred_train_tweedie[x_perm], - color="darkred", - linewidth=1.5, - label="Tweedie", -) -savefig("figures/regression_sinus2.png") - - -############################### -## Quantiles -############################### -# q50 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.5, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:mae -); -# 116.822 ms (74496 allocations: 36.41 MiB) for 100 iterations -# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval) -@time pred_train_q50 = predict(model, x_train) -sum(pred_train_q50 .< y_train) / length(y_train) - -# q20 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.2, - nrounds=300, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); -@time pred_train_q20 = predict(model, x_train) -sum(pred_train_q20 .< y_train) / length(y_train) - -# q80 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.8, - nrounds=300, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) -@time pred_train_q80 = predict(model, x_train) -sum(pred_train_q80 .< y_train) / length(y_train) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q50[x_perm], - color="navy", - linewidth=1.5, - label="Median", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q20[x_perm], - color="darkred", - linewidth=1.5, - label="Q20", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q80[x_perm], - color="darkgreen", - linewidth=1.5, - label="Q80", -) -savefig("figures/quantiles_sinus.png") - - -############################### -## gaussian -############################### -params1 = EvoTreeMLE( - loss=:gaussian, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.05, - max_depth=6, - min_weight=10.0, - rowsample=1.0, - colsample=1.0, - rng=123, -) - -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:gaussian -); -# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); -@time pred_train = EvoTrees.predict(model, x_train); -# @btime pred_train = EvoTrees.predict(model, X_train); - -pred_gauss = - [Distributions.Normal(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)] -pred_q80 = quantile.(pred_gauss, 0.8) -pred_q20 = quantile.(pred_gauss, 0.2) - -mean(y_train .< pred_q80) -mean(y_train .< pred_q20) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train[:, 1], - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 1], - color="navy", - linewidth=1.5, - label="mu", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 2], - color="darkred", - linewidth=1.5, - label="sigma", -) -plot!( - x_train[:, 1][x_perm], - pred_q20[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q20", -) -plot!( - x_train[:, 1][x_perm], - pred_q80[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q80", -) -savefig("figures/gaussian-sinus.png") - - -############################### -## Logistic -############################### -params1 = EvoTrees.EvoTreeMLE( - loss=:logistic, - nrounds=500, - nbins=64, - lambda=1.0, - gamma=0.1, - eta=0.03, - max_depth=6, - min_weight=1.0, - rowsample=1.0, - colsample=1.0, - rng=123, -) - -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:logistic_mle -); -# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); -@time pred_train = EvoTrees.predict(model, x_train); -# @btime pred_train = EvoTrees.predict(model, X_train); - -pred_logistic = [ - Distributions.Logistic(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1) -] -pred_q80 = quantile.(pred_logistic, 0.8) -pred_q20 = quantile.(pred_logistic, 0.2) - -mean(y_train .< pred_q80) -mean(y_train .< pred_q20) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train[:, 1], - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 1], - color="navy", - linewidth=1.5, - label="mu", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 2], - color="darkred", - linewidth=1.5, - label="s", -) -plot!( - x_train[:, 1][x_perm], - pred_q20[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q20", -) -plot!( - x_train[:, 1][x_perm], - pred_q80[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q80", -) -savefig("figures/logistic-sinus.png") diff --git a/experiments/readme_plots-df-gpu.jl b/experiments/readme_plots-df-gpu.jl index bff06ae4..0e623a28 100644 --- a/experiments/readme_plots-df-gpu.jl +++ b/experiments/readme_plots-df-gpu.jl @@ -5,6 +5,7 @@ using StatsBase: sample, quantile using Distributions using Random using Plots +using CUDA using EvoTrees using DataFrames using CategoricalArrays @@ -124,495 +125,3 @@ plot!( linewidth=1.5, label="Linear - C", ) - -# logistic / cross-entropy -params1 = EvoTreeRegressor( - loss=:logistic, - nrounds=200, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) - -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:logloss -); -# 218.040 ms (123372 allocations: 34.71 MiB) -# @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval) -@time pred_train_logistic = predict(model, x_train); -@time pred_eval_logistic = predict(model, x_eval) -sqrt(mean((pred_train_logistic .- y_train) .^ 2)) - -# L1 -params1 = EvoTreeRegressor( - loss=:L1, - alpha=0.5, - nrounds=500, - nbins=64, - lambda=0.0, - gamma=0.0, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:mae -); -@time pred_train_L1 = predict(model, x_train) -@time pred_eval_L1 = predict(model, x_eval) -sqrt(mean((pred_train_L1 .- y_train) .^ 2)) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - msize=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_linear[x_perm], - color="navy", - linewidth=1.5, - label="Linear", -) -plot!( - x_train[:, 1][x_perm], - pred_train_linear_w[x_perm], - color="lightblue", - linewidth=1.5, - label="LinearW", -) -plot!( - x_train[:, 1][x_perm], - pred_train_logistic[x_perm], - color="darkred", - linewidth=1.5, - label="Logistic", -) -plot!( - x_train[:, 1][x_perm], - pred_train_L1[x_perm], - color="darkgreen", - linewidth=1.5, - label="L1", -) -savefig("figures/regression_sinus.png") - -# Poisson -params1 = EvoTreeCount( - loss=:poisson, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:poisson -); -@time pred_train_poisson = predict(model, x_train); -sqrt(mean((pred_train_poisson .- y_train) .^ 2)) - -# Gamma -params1 = EvoTreeRegressor( - loss=:gamma, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.02, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:gamma -); -@time pred_train_gamma = predict(model, x_train); -sqrt(mean((pred_train_gamma .- y_train) .^ 2)) - -# Tweedie -params1 = EvoTreeRegressor( - loss=:tweedie, - nrounds=500, - nbins=64, - lambda=0.5, - gamma=0.1, - eta=0.1, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:tweedie -); -@time pred_train_tweedie = predict(model, x_train); -sqrt(mean((pred_train_tweedie .- y_train) .^ 2)) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - msize=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_poisson[x_perm], - color="navy", - linewidth=1.5, - label="Poisson", -) -plot!( - x_train[:, 1][x_perm], - pred_train_gamma[x_perm], - color="lightblue", - linewidth=1.5, - label="Gamma", -) -plot!( - x_train[:, 1][x_perm], - pred_train_tweedie[x_perm], - color="darkred", - linewidth=1.5, - label="Tweedie", -) -savefig("figures/regression_sinus2.png") - - -############################### -## Quantiles -############################### -# q50 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.5, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:mae -); -# 116.822 ms (74496 allocations: 36.41 MiB) for 100 iterations -# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval) -@time pred_train_q50 = predict(model, x_train) -sum(pred_train_q50 .< y_train) / length(y_train) - -# q20 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.2, - nrounds=300, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); -@time pred_train_q20 = predict(model, x_train) -sum(pred_train_q20 .< y_train) / length(y_train) - -# q80 -params1 = EvoTreeRegressor( - loss=:quantile, - alpha=0.8, - nrounds=300, - nbins=64, - lambda=0.1, - gamma=0.0, - eta=0.05, - max_depth=6, - min_weight=1.0, - rowsample=0.5, - colsample=1.0, -) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) -@time pred_train_q80 = predict(model, x_train) -sum(pred_train_q80 .< y_train) / length(y_train) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train, - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q50[x_perm], - color="navy", - linewidth=1.5, - label="Median", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q20[x_perm], - color="darkred", - linewidth=1.5, - label="Q20", -) -plot!( - x_train[:, 1][x_perm], - pred_train_q80[x_perm], - color="darkgreen", - linewidth=1.5, - label="Q80", -) -savefig("figures/quantiles_sinus.png") - - -############################### -## gaussian -############################### -params1 = EvoTreeMLE( - loss=:gaussian, - nrounds=500, - nbins=64, - lambda=0.1, - gamma=0.1, - eta=0.05, - max_depth=6, - min_weight=10.0, - rowsample=1.0, - colsample=1.0, - rng=123, -) - -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:gaussian -); -# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); -@time pred_train = EvoTrees.predict(model, x_train); -# @btime pred_train = EvoTrees.predict(model, X_train); - -pred_gauss = - [Distributions.Normal(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)] -pred_q80 = quantile.(pred_gauss, 0.8) -pred_q20 = quantile.(pred_gauss, 0.2) - -mean(y_train .< pred_q80) -mean(y_train .< pred_q20) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train[:, 1], - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 1], - color="navy", - linewidth=1.5, - label="mu", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 2], - color="darkred", - linewidth=1.5, - label="sigma", -) -plot!( - x_train[:, 1][x_perm], - pred_q20[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q20", -) -plot!( - x_train[:, 1][x_perm], - pred_q80[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q80", -) -savefig("figures/gaussian-sinus.png") - - -############################### -## Logistic -############################### -params1 = EvoTrees.EvoTreeMLE( - loss=:logistic, - nrounds=500, - nbins=64, - lambda=1.0, - gamma=0.1, - eta=0.03, - max_depth=6, - min_weight=1.0, - rowsample=1.0, - colsample=1.0, - rng=123, -) - -@time model = fit_evotree( - params1; - x_train, - y_train, - x_eval, - y_eval, - print_every_n=25, - early_stopping_rounds=50, - metric=:logistic_mle -); -# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); -@time pred_train = EvoTrees.predict(model, x_train); -# @btime pred_train = EvoTrees.predict(model, X_train); - -pred_logistic = [ - Distributions.Logistic(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1) -] -pred_q80 = quantile.(pred_logistic, 0.8) -pred_q20 = quantile.(pred_logistic, 0.2) - -mean(y_train .< pred_q80) -mean(y_train .< pred_q20) - -x_perm = sortperm(x_train[:, 1]) -plot( - x_train[:, 1], - y_train, - ms=0.5, - mcolor="darkgray", - mswidth=0, - background_color=RGB(1, 1, 1), - seriestype=:scatter, - xaxis=("feature"), - yaxis=("target"), - legend=true, - label="", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 1], - color="navy", - linewidth=1.5, - label="mu", -) -plot!( - x_train[:, 1][x_perm], - pred_train[x_perm, 2], - color="darkred", - linewidth=1.5, - label="s", -) -plot!( - x_train[:, 1][x_perm], - pred_q20[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q20", -) -plot!( - x_train[:, 1][x_perm], - pred_q80[x_perm, 1], - color="darkgreen", - linewidth=1.5, - label="q80", -) -savefig("figures/logistic-sinus.png") diff --git a/experiments/readme_plots_gpu.jl b/experiments/readme_plots_gpu.jl index 33b89c92..89dc1f07 100644 --- a/experiments/readme_plots_gpu.jl +++ b/experiments/readme_plots_gpu.jl @@ -5,12 +5,13 @@ using Distributions using Random using Plots using Revise +using CUDA using EvoTrees using EvoTrees: predict, sigmoid, logit # using ProfileView # prepare a dataset -tree_type = "binary" +tree_type = "oblivious" # binary/oblivious device = "gpu" Random.seed!(123) diff --git a/ext/EvoTreesCUDAExt/eval.jl b/ext/EvoTreesCUDAExt/eval.jl index 6ea10153..f041c18e 100644 --- a/ext/EvoTreesCUDAExt/eval.jl +++ b/ext/EvoTreesCUDAExt/eval.jl @@ -46,7 +46,7 @@ end function eval_logloss_kernel!(eval::CuDeviceVector{T}, p::CuDeviceMatrix{T}, y::CuDeviceVector{T}, w::CuDeviceVector{T}) where {T<:AbstractFloat} i = threadIdx().x + (blockIdx().x - 1) * blockDim().x if i <= length(y) - @inbounds pred = sigmoid(p[1, i]) + @inbounds pred = EvoTrees.sigmoid(p[1, i]) @inbounds eval[i] = w[i] * (-y[i] * log(pred) + (y[i] - 1) * log(1 - pred)) end return nothing diff --git a/ext/EvoTreesCUDAExt/fit.jl b/ext/EvoTreesCUDAExt/fit.jl index e4f4e330..1f454254 100644 --- a/ext/EvoTreesCUDAExt/fit.jl +++ b/ext/EvoTreesCUDAExt/fit.jl @@ -162,6 +162,7 @@ function grow_otree!( out, left, right, + h∇_cpu::Array{Float64,3}, h∇::CuArray{Float64,3}, x_bin::CuMatrix, feattypes::Vector{Bool}, diff --git a/figures/gaussian-sinus-oblivious-gpu.png b/figures/gaussian-sinus-oblivious-gpu.png index abde7cdd..c62c1267 100644 Binary files a/figures/gaussian-sinus-oblivious-gpu.png and b/figures/gaussian-sinus-oblivious-gpu.png differ diff --git a/figures/regression-sinus-oblivious-gpu.png b/figures/regression-sinus-oblivious-gpu.png index 44644279..eb60970d 100644 Binary files a/figures/regression-sinus-oblivious-gpu.png and b/figures/regression-sinus-oblivious-gpu.png differ