From 3ccd5f0f6d46ec9c614e79f3229dbd696808eeec Mon Sep 17 00:00:00 2001 From: Baran Karakus Date: Sun, 6 Sep 2020 21:39:18 +0100 Subject: [PATCH 1/4] =?UTF-8?q?Correctly=20handling=20the=20case=20=CE=BBm?= =?UTF-8?q?ax=20=3D=200.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes: 1) Change to computeλ to ensure λmax = 0 leads to an output of [0] and not [NaN, ..., NaN]. 2) Change to fit! to ensure the case where autoλ = true and λmax = 0 is handled correctly (rather than throwing an error). --- src/Lasso.jl | 4 ++++ src/coordinate_descent.jl | 40 +++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/Lasso.jl b/src/Lasso.jl index a3625d8..10d0e04 100644 --- a/src/Lasso.jl +++ b/src/Lasso.jl @@ -209,6 +209,10 @@ const MAX_DEV_FRAC = 0.999 # Compute automatic λ values based on λmax and λminratio function computeλ(λmax, λminratio, α, nλ) λmax /= α + if λmax == 0 + @info "The penalized coefficients equal zero for all values of the regularisation parameter λ." + return [λmax] + end logλmax = log(λmax) exp.(range(logλmax, stop=logλmax + log(λminratio), length=nλ)) end diff --git a/src/coordinate_descent.jl b/src/coordinate_descent.jl index f8d71ee..67f69ea 100644 --- a/src/coordinate_descent.jl +++ b/src/coordinate_descent.jl @@ -685,7 +685,7 @@ function StatsBase.fit!(path::RegularizationPath{S,T}; verbose::Bool=false, irls niter = 0 if nλ == 0 i = 0 - else + elseif i <= nλ # need this check because it is possible that autoλ is true and nλ is 1 while true # outer loop obj = convert(T, Inf) last_dev_ratio = dev_ratio @@ -776,6 +776,7 @@ function StatsBase.fit!(path::RegularizationPath{S,T}; verbose::Bool=false, irls end end + i = min(i, nλ) path.λ = path.λ[1:i] path.pct_dev = pct_dev[1:i] path.coefs = coefs[:, 1:i] @@ -819,29 +820,32 @@ function StatsBase.fit!(path::RegularizationPath{S,T}; verbose::Bool=false, i = 1 end - while true # outer loop - last_dev_ratio = dev_ratio - curλ = λ[i] + if i <= nλ # need this check because it is possible that autoλ is true and nλ is 1 + while true # outer loop + last_dev_ratio = dev_ratio + curλ = λ[i] - # Run coordinate descent - niter += cdfit!(newcoef, cd, curλ, criterion) + # Run coordinate descent + niter += cdfit!(newcoef, cd, curλ, criterion) - dev_ratio = cd.dev/nulldev - pct_dev[i] = 1 - dev_ratio - addcoefs!(coefs, newcoef, i) - b0s[i] = intercept(newcoef, cd) + dev_ratio = cd.dev/nulldev + pct_dev[i] = 1 - dev_ratio + addcoefs!(coefs, newcoef, i) + b0s[i] = intercept(newcoef, cd) - # Test whether we should continue - if i == nλ || (stopearly && autoλ && (last_dev_ratio - dev_ratio < MIN_DEV_FRAC_DIFF || - pct_dev[i] > MAX_DEV_FRAC)) - break - end + # Test whether we should continue + if i == nλ || (stopearly && autoλ && (last_dev_ratio - dev_ratio < MIN_DEV_FRAC_DIFF || + pct_dev[i] > MAX_DEV_FRAC)) + break + end - verbose && println("$i: λ=$curλ, pct_dev=$(pct_dev[i])") - poststep(path, cd, i, newcoef) - i += 1 + verbose && println("$i: λ=$curλ, pct_dev=$(pct_dev[i])") + poststep(path, cd, i, newcoef) + i += 1 + end end + i = min(i, nλ) path.λ = path.λ[1:i] path.pct_dev = pct_dev[1:i] path.coefs = coefs[:, 1:i] From f4a3923e183899c5b5faf741aac90da2f13270f1 Mon Sep 17 00:00:00 2001 From: barankarakus <34224042+barankarakus@users.noreply.github.com> Date: Sun, 20 Sep 2020 21:44:16 +0100 Subject: [PATCH 2/4] Update src/Lasso.jl Changing spelling of 'regularisation'. --- src/Lasso.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lasso.jl b/src/Lasso.jl index 10d0e04..760fecf 100644 --- a/src/Lasso.jl +++ b/src/Lasso.jl @@ -210,7 +210,7 @@ const MAX_DEV_FRAC = 0.999 function computeλ(λmax, λminratio, α, nλ) λmax /= α if λmax == 0 - @info "The penalized coefficients equal zero for all values of the regularisation parameter λ." + @info "The penalized coefficients equal zero for all values of the regularization parameter λ." return [λmax] end logλmax = log(λmax) From 5636fed75e9ddda2a9ed757b8bf326b78ff78cb5 Mon Sep 17 00:00:00 2001 From: Baran Karakus Date: Sun, 20 Sep 2020 23:00:59 +0100 Subject: [PATCH 3/4] Replacing equality with approximate equality. --- src/Lasso.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Lasso.jl b/src/Lasso.jl index 760fecf..df8981a 100644 --- a/src/Lasso.jl +++ b/src/Lasso.jl @@ -209,9 +209,9 @@ const MAX_DEV_FRAC = 0.999 # Compute automatic λ values based on λmax and λminratio function computeλ(λmax, λminratio, α, nλ) λmax /= α - if λmax == 0 + if isapprox(λmax, 0; atol=1e-10) # then assuming λmax = 0 @info "The penalized coefficients equal zero for all values of the regularization parameter λ." - return [λmax] + return [0] end logλmax = log(λmax) exp.(range(logλmax, stop=logλmax + log(λminratio), length=nλ)) From 5ac04f7befcd797043b3b882d0a6b092d3c9dd51 Mon Sep 17 00:00:00 2001 From: Baran Karakus Date: Sun, 20 Sep 2020 23:02:56 +0100 Subject: [PATCH 4/4] Added test for case: zero variation in y. --- test/lasso.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/lasso.jl b/test/lasso.jl index 0b4917b..4f6d7ed 100644 --- a/test/lasso.jl +++ b/test/lasso.jl @@ -154,6 +154,24 @@ end end end +# Test case with zero variation in y is handled correctly +function zero_variation_test() + X = [ + 0.5472502169628388 0.37660447632078875 0.06669114126498532 0.4950818154768257; + 0.5142931961160688 0.520205941129849 0.4052730635141131 0.6700530909562794; + 0.5831846867316071 0.3174143498124731 0.772131243876973 0.03386847158881201; + 0.8802489459954292 0.6742158685234003 0.3849775799923969 0.7773264968613842; + 0.9216786846192617 0.7888303438159934 0.09788865152005011 0.34950775139369905 + ] + y = 0.2937233091452627 .+ zeros(size(X, 1)) + path = fit(LassoPath, X, y) + (path.λ == eltype(path.λ)[0]) || return false + (length(path.coefs.nzval) == 0) || return false + return true +end + +@test zero_variation_test() == true + # Test for sparse matrices # @testset "LassoPath Zero in" begin