diff --git a/Project.toml b/Project.toml index ec3193c..7750b76 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Lasso" uuid = "b4fcebef-c861-5a0f-a7e2-ba9dc32b180a" -version = "0.4.1" +version = "0.5.0" [deps] DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" @@ -15,19 +15,18 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] -GLM = "1.2" -StatsModels = "0.4, 0.5" +GLM = "1.3" +StatsModels = "0.6" julia = "0.7, 1" [extras] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["CSV", "InteractiveUtils", "LinearAlgebra", "GLMNet", "DataFrames", "Random", "SparseArrays", "Test"] +test = ["CSV", "LinearAlgebra", "GLMNet", "DataFrames", "Random", "SparseArrays", "Test"] diff --git a/docs/src/index.md b/docs/src/index.md index 0d3a3a1..237ec08 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -41,9 +41,9 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) │ 3 │ 3 │ 7 │ julia> m = fit(LassoModel, @formula(Y ~ X), data) -StatsModels.DataFrameRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}} +StatsModels.TableRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}} -Formula: Y ~ +X +Y ~ X Coefficients: ────────────────────────────────────────────────────────────────── diff --git a/docs/src/lasso.md b/docs/src/lasso.md index 3d20d90..ddab76e 100644 --- a/docs/src/lasso.md +++ b/docs/src/lasso.md @@ -73,9 +73,9 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) │ 3 │ 3 │ 7 │ julia> m = fit(LassoModel, @formula(Y ~ X), data; select=MinCVmse(Kfold(3,2))) -StatsModels.DataFrameRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}} +StatsModels.TableRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}} -Formula: Y ~ +X +Y ~ X Coefficients: ────────────────────────────────────────────────────────────────── diff --git a/src/segselect.jl b/src/segselect.jl index 41c641f..1f63a8a 100644 --- a/src/segselect.jl +++ b/src/segselect.jl @@ -284,7 +284,7 @@ newglm(m::GeneralizedLinearModel, pp) = GeneralizedLinearModel(m.rr, pp, true) # don't add an intercept when using a @formula because we use the intercept keyword arg to add an intercept StatsModels.drop_intercept(::Type{R}) where R<:RegularizedModel = true -StatsModels.@delegate StatsModels.DataFrameRegressionModel.model [segselect, MinCVmse, MinCV1se] +StatsModels.@delegate StatsModels.TableRegressionModel.model [segselect, MinCVmse, MinCV1se] for modeltype in (:LassoModel, :GammaLassoModel) @eval begin StatsModels.@delegate $modeltype.lpm [StatsBase.coef, StatsBase.confint, diff --git a/test/gammalasso.jl b/test/gammalasso.jl index d461b05..7d71aa8 100644 --- a/test/gammalasso.jl +++ b/test/gammalasso.jl @@ -41,7 +41,7 @@ Random.seed!(243214) gcoefs = readcsvmat(joinpath(datapath,"gamlr.$family.$fitname.coefs.csv");types=[Float64 for i=1:100]) family = prms[1,Symbol("fit.family")] γ = prms[1,Symbol("fit.gamma")] - λ = nothing #convert(Vector{Float64},fittable[Symbol("fit.lambda")]) # should be set to nothing evenatually + λ = nothing #convert(Vector{Float64},fittable[!, Symbol("fit.lambda")]) # should be set to nothing evenatually # fit julia version glp = fit(GammaLassoPath, X, y, dist, link; γ=γ, stopearly=false, @@ -49,23 +49,23 @@ Random.seed!(243214) standardize=false, standardizeω=false) # compare - @test true==issimilarhead(glp.λ,fittable[Symbol("fit.lambda")];rtol=rtol) - @test true==issimilarhead(glp.b0,fittable[Symbol("fit.alpha")];rtol=rtol) + @test true==issimilarhead(glp.λ,fittable[!, Symbol("fit.lambda")];rtol=rtol) + @test true==issimilarhead(glp.b0,fittable[!, Symbol("fit.alpha")];rtol=rtol) @test true==issimilarhead(convert(Matrix{Float64},glp.coefs'),gcoefs';rtol=rtol) # we follow GLM.jl convention where deviance is scaled by nobs, while in gamlr it is not - @test true==issimilarhead(deviance(glp),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol) - @test true==issimilarhead(deviance(glp,X,y),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol) + @test true==issimilarhead(deviance(glp),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol) + @test true==issimilarhead(deviance(glp,X,y),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol) # @test true==issimilarhead(round(df(glp)[2:end]),round(fittable[2:end,Symbol("fit.df")])) - @test true==issimilarhead(loglikelihood(glp),fittable[Symbol("fit.logLik")];rtol=rtol) - @test true==issimilarhead(aicc(glp),fittable[Symbol("fit.AICc")];rtol=rtol) + @test true==issimilarhead(loglikelihood(glp),fittable[!, Symbol("fit.logLik")];rtol=rtol) + @test true==issimilarhead(aicc(glp),fittable[!, Symbol("fit.AICc")];rtol=rtol) # TODO: figure out why these are so off, maybe because most are corner solutions # and stopping rules for lambda are different # # what we really need all these stats for is that the AICc identifies the same minima: - # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[Symbol("fit.AICc")]) != lastindex(fittable[Symbol("fit.AICc")]) + # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[!, Symbol("fit.AICc")]) != lastindex(fittable[!, Symbol("fit.AICc")]) # # interior minima # println("comparing intereior AICc") - # @test argmin(aicc(glp)) == argmin(fittable[Symbol("fit.AICc")]) + # @test argmin(aicc(glp)) == argmin(fittable[!, Symbol("fit.AICc")]) # end # comparse CV, NOTE: this involves a random choice of train subsamples @@ -116,7 +116,7 @@ end # gcoefs = readcsvmat(joinpath(datapath,"gamlr.$family.$fitname.coefs.csv");types=[Float64 for i=1:100]) # family = prms[1,Symbol("fit.family")] # γ = prms[1,Symbol("fit.gamma")] -# λ = nothing #convert(Vector{Float64},fittable[Symbol("fit.lambda")]) # should be set to nothing evenatually +# λ = nothing #convert(Vector{Float64},fittable[!, Symbol("fit.lambda")]) # should be set to nothing evenatually # # # fit julia version # glp = fit(GammaLassoPath, X, y, dist, link; γ=γ, stopearly=false, @@ -156,23 +156,23 @@ end # predict(m) # # # compare -# @test true==issimilarhead(glp.λ,fittable[Symbol("fit.lambda")];rtol=rtol) -# @test true==issimilarhead(glp.b0,fittable[Symbol("fit.alpha")];rtol=rtol) +# @test true==issimilarhead(glp.λ,fittable[!, Symbol("fit.lambda")];rtol=rtol) +# @test true==issimilarhead(glp.b0,fittable[!, Symbol("fit.alpha")];rtol=rtol) # @test true==issimilarhead(convert(Matrix{Float64},glp.coefs'),gcoefs';rtol=rtol) # # we follow GLM.jl convention where deviance is scaled by nobs, while in gamlr it is not -# @test true==issimilarhead(deviance(glp),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol) -# @test true==issimilarhead(deviance(glp,X,y),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol) +# @test true==issimilarhead(deviance(glp),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol) +# @test true==issimilarhead(deviance(glp,X,y),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol) # # @test true==issimilarhead(round(df(glp)[2:end]),round(fittable[2:end,Symbol("fit.df")])) -# @test true==issimilarhead(loglikelihood(glp),fittable[Symbol("fit.logLik")];rtol=rtol) -# @test true==issimilarhead(aicc(glp),fittable[Symbol("fit.AICc")];rtol=rtol) +# @test true==issimilarhead(loglikelihood(glp),fittable[!, Symbol("fit.logLik")];rtol=rtol) +# @test true==issimilarhead(aicc(glp),fittable[!, Symbol("fit.AICc")];rtol=rtol) # # # TODO: figure out why these are so off, maybe because most are corner solutions # # and stopping rules for lambda are different # # # what we really need all these stats for is that the AICc identifies the same minima: -# # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[Symbol("fit.AICc")]) != lastindex(fittable[Symbol("fit.AICc")]) +# # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[!, Symbol("fit.AICc")]) != lastindex(fittable[!, Symbol("fit.AICc")]) # # # interior minima # # println("comparing intereior AICc") -# # @test argmin(aicc(glp)) == argmin(fittable[Symbol("fit.AICc")]) +# # @test argmin(aicc(glp)) == argmin(fittable[!, Symbol("fit.AICc")]) # # end # # # comparse CV, NOTE: this involves a random choice of train subsamples