Skip to content

Commit

Permalink
Upgraded to StatsModels v0.6 syntax (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
AsafManela authored Aug 3, 2019
1 parent d3c02c5 commit 9d099f5
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 28 deletions.
9 changes: 4 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "Lasso"
uuid = "b4fcebef-c861-5a0f-a7e2-ba9dc32b180a"
version = "0.4.1"
version = "0.5.0"

[deps]
DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
Expand All @@ -15,19 +15,18 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"

[compat]
GLM = "1.2"
StatsModels = "0.4, 0.5"
GLM = "1.3"
StatsModels = "0.6"
julia = "0.7, 1"

[extras]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["CSV", "InteractiveUtils", "LinearAlgebra", "GLMNet", "DataFrames", "Random", "SparseArrays", "Test"]
test = ["CSV", "LinearAlgebra", "GLMNet", "DataFrames", "Random", "SparseArrays", "Test"]
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7])
│ 3 │ 3 │ 7 │
julia> m = fit(LassoModel, @formula(Y ~ X), data)
StatsModels.DataFrameRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}}
StatsModels.TableRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}}
Formula: Y ~ +X
Y ~ X
Coefficients:
──────────────────────────────────────────────────────────────────
Expand Down
4 changes: 2 additions & 2 deletions docs/src/lasso.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7])
│ 3 │ 3 │ 7 │
julia> m = fit(LassoModel, @formula(Y ~ X), data; select=MinCVmse(Kfold(3,2)))
StatsModels.DataFrameRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}}
StatsModels.TableRegressionModel{LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}},Array{Float64,2}}
Formula: Y ~ +X
Y ~ X
Coefficients:
──────────────────────────────────────────────────────────────────
Expand Down
2 changes: 1 addition & 1 deletion src/segselect.jl
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ newglm(m::GeneralizedLinearModel, pp) = GeneralizedLinearModel(m.rr, pp, true)
# don't add an intercept when using a @formula because we use the intercept keyword arg to add an intercept
StatsModels.drop_intercept(::Type{R}) where R<:RegularizedModel = true

StatsModels.@delegate StatsModels.DataFrameRegressionModel.model [segselect, MinCVmse, MinCV1se]
StatsModels.@delegate StatsModels.TableRegressionModel.model [segselect, MinCVmse, MinCV1se]
for modeltype in (:LassoModel, :GammaLassoModel)
@eval begin
StatsModels.@delegate $modeltype.lpm [StatsBase.coef, StatsBase.confint,
Expand Down
36 changes: 18 additions & 18 deletions test/gammalasso.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,31 @@ Random.seed!(243214)
gcoefs = readcsvmat(joinpath(datapath,"gamlr.$family.$fitname.coefs.csv");types=[Float64 for i=1:100])
family = prms[1,Symbol("fit.family")]
γ = prms[1,Symbol("fit.gamma")]
λ = nothing #convert(Vector{Float64},fittable[Symbol("fit.lambda")]) # should be set to nothing evenatually
λ = nothing #convert(Vector{Float64},fittable[!, Symbol("fit.lambda")]) # should be set to nothing evenatually

# fit julia version
glp = fit(GammaLassoPath, X, y, dist, link; γ=γ, stopearly=false,
λminratio=0.001, penalty_factor=penalty_factor, λ=λ,
standardize=false, standardizeω=false)

# compare
@test true==issimilarhead(glp.λ,fittable[Symbol("fit.lambda")];rtol=rtol)
@test true==issimilarhead(glp.b0,fittable[Symbol("fit.alpha")];rtol=rtol)
@test true==issimilarhead(glp.λ,fittable[!, Symbol("fit.lambda")];rtol=rtol)
@test true==issimilarhead(glp.b0,fittable[!, Symbol("fit.alpha")];rtol=rtol)
@test true==issimilarhead(convert(Matrix{Float64},glp.coefs'),gcoefs';rtol=rtol)
# we follow GLM.jl convention where deviance is scaled by nobs, while in gamlr it is not
@test true==issimilarhead(deviance(glp),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
@test true==issimilarhead(deviance(glp,X,y),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
@test true==issimilarhead(deviance(glp),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
@test true==issimilarhead(deviance(glp,X,y),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
# @test true==issimilarhead(round(df(glp)[2:end]),round(fittable[2:end,Symbol("fit.df")]))
@test true==issimilarhead(loglikelihood(glp),fittable[Symbol("fit.logLik")];rtol=rtol)
@test true==issimilarhead(aicc(glp),fittable[Symbol("fit.AICc")];rtol=rtol)
@test true==issimilarhead(loglikelihood(glp),fittable[!, Symbol("fit.logLik")];rtol=rtol)
@test true==issimilarhead(aicc(glp),fittable[!, Symbol("fit.AICc")];rtol=rtol)

# TODO: figure out why these are so off, maybe because most are corner solutions
# and stopping rules for lambda are different
# # what we really need all these stats for is that the AICc identifies the same minima:
# if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[Symbol("fit.AICc")]) != lastindex(fittable[Symbol("fit.AICc")])
# if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[!, Symbol("fit.AICc")]) != lastindex(fittable[!, Symbol("fit.AICc")])
# # interior minima
# println("comparing intereior AICc")
# @test argmin(aicc(glp)) == argmin(fittable[Symbol("fit.AICc")])
# @test argmin(aicc(glp)) == argmin(fittable[!, Symbol("fit.AICc")])
# end

# comparse CV, NOTE: this involves a random choice of train subsamples
Expand Down Expand Up @@ -116,7 +116,7 @@ end
# gcoefs = readcsvmat(joinpath(datapath,"gamlr.$family.$fitname.coefs.csv");types=[Float64 for i=1:100])
# family = prms[1,Symbol("fit.family")]
# γ = prms[1,Symbol("fit.gamma")]
# λ = nothing #convert(Vector{Float64},fittable[Symbol("fit.lambda")]) # should be set to nothing evenatually
# λ = nothing #convert(Vector{Float64},fittable[!, Symbol("fit.lambda")]) # should be set to nothing evenatually
#
# # fit julia version
# glp = fit(GammaLassoPath, X, y, dist, link; γ=γ, stopearly=false,
Expand Down Expand Up @@ -156,23 +156,23 @@ end
# predict(m)
#
# # compare
# @test true==issimilarhead(glp.λ,fittable[Symbol("fit.lambda")];rtol=rtol)
# @test true==issimilarhead(glp.b0,fittable[Symbol("fit.alpha")];rtol=rtol)
# @test true==issimilarhead(glp.λ,fittable[!, Symbol("fit.lambda")];rtol=rtol)
# @test true==issimilarhead(glp.b0,fittable[!, Symbol("fit.alpha")];rtol=rtol)
# @test true==issimilarhead(convert(Matrix{Float64},glp.coefs'),gcoefs';rtol=rtol)
# # we follow GLM.jl convention where deviance is scaled by nobs, while in gamlr it is not
# @test true==issimilarhead(deviance(glp),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
# @test true==issimilarhead(deviance(glp,X,y),fittable[Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
# @test true==issimilarhead(deviance(glp),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
# @test true==issimilarhead(deviance(glp,X,y),fittable[!, Symbol("fit.deviance")]/nobs(glp);rtol=rtol)
# # @test true==issimilarhead(round(df(glp)[2:end]),round(fittable[2:end,Symbol("fit.df")]))
# @test true==issimilarhead(loglikelihood(glp),fittable[Symbol("fit.logLik")];rtol=rtol)
# @test true==issimilarhead(aicc(glp),fittable[Symbol("fit.AICc")];rtol=rtol)
# @test true==issimilarhead(loglikelihood(glp),fittable[!, Symbol("fit.logLik")];rtol=rtol)
# @test true==issimilarhead(aicc(glp),fittable[!, Symbol("fit.AICc")];rtol=rtol)
#
# # TODO: figure out why these are so off, maybe because most are corner solutions
# # and stopping rules for lambda are different
# # # what we really need all these stats for is that the AICc identifies the same minima:
# # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[Symbol("fit.AICc")]) != lastindex(fittable[Symbol("fit.AICc")])
# # if argmin(aicc(glp)) != lastindex(aicc(glp)) && argmin(fittable[!, Symbol("fit.AICc")]) != lastindex(fittable[!, Symbol("fit.AICc")])
# # # interior minima
# # println("comparing intereior AICc")
# # @test argmin(aicc(glp)) == argmin(fittable[Symbol("fit.AICc")])
# # @test argmin(aicc(glp)) == argmin(fittable[!, Symbol("fit.AICc")])
# # end
#
# # comparse CV, NOTE: this involves a random choice of train subsamples
Expand Down

2 comments on commit 9d099f5

@AsafManela
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/2513

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.5.0 -m "<description of version>" 9d099f512afb6042b1094714e0ab280cf10f3863
git push origin v0.5.0

Please sign in to comment.