From b980a36e47eb436d805380fb60b7c7d307f67061 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 17 Nov 2023 10:33:46 +0000 Subject: [PATCH 01/71] WIP --- docs/src/tmle_estimation.md | 2 +- estimands_test.yaml | 102 +++++++++++ scripts/tmle.jl | 44 +++-- src/TargetedEstimation.jl | 6 +- src/cache_managers.jl | 65 +++++++ src/merge.jl | 4 +- src/runner.jl | 98 +++++++++++ src/sieve_variance.jl | 4 +- src/tmle.jl | 94 ---------- src/utils.jl | 151 ++++++++-------- test/cache_managers.jl | 86 ++++++++++ test/config/failing_parameters.yaml | 10 +- .../{tmle_config_2.jl => ose_config.jl} | 11 +- test/config/parameters.yaml | 60 +++---- test/config/sieve_tests_parameters_1.yaml | 12 +- test/config/sieve_tests_parameters_2.yaml | 4 +- test/config/tmle_config.jl | 16 +- test/data/merge/empty_sieve.csv | 2 +- test/data/merge/sieve_output_1.csv | 14 +- test/data/merge/sieve_output_2.csv | 6 +- test/data/merge/tmle_output_1.csv | 14 +- test/data/merge/tmle_output_2.csv | 6 +- test/load_tmle_spec.jl | 113 ------------ test/merge.jl | 6 +- test/resampling.jl | 1 - test/{tmle.jl => runner.jl} | 89 ++++++---- test/runtests.jl | 5 +- test/sieve_variance.jl | 14 +- test/testutils.jl | 57 ++++++ test/utils.jl | 162 +++++++++++------- 30 files changed, 759 insertions(+), 499 deletions(-) create mode 100644 estimands_test.yaml create mode 100644 src/cache_managers.jl create mode 100644 src/runner.jl delete mode 100644 src/tmle.jl create mode 100644 test/cache_managers.jl rename test/config/{tmle_config_2.jl => ose_config.jl} (83%) delete mode 100644 test/load_tmle_spec.jl rename test/{tmle.jl => runner.jl} (70%) create mode 100644 test/testutils.jl diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md index 4917922..78984cb 100644 --- a/docs/src/tmle_estimation.md +++ b/docs/src/tmle_estimation.md @@ -34,7 +34,7 @@ The output file is a plain CSV file containing one line per estimand in the inpu - `TREATMENTS`: A "_&_" separated string containing all treatment variables associated with the estimand. - `CASE`: A "_&_" separated string containing the treatment variables' case values in the same order as `TREATMENTS`. - `CONTROL`: A "_&_" separated string containing the treatment variables' control values in the same order as `TREATMENTS`. -- `TARGET`: The outcome variable. +- `OUTCOME`: The outcome variable. - `CONFOUNDERS`: A "_&_" separated string containing the confounding variables. - `COVARIATES`: A "_&_" separated string containing the extra covariates used to estimate the outcome's mean. - `INITIAL_ESTIMATE`: The initial estimate before the targeting step. diff --git a/estimands_test.yaml b/estimands_test.yaml new file mode 100644 index 0000000..34ae410 --- /dev/null +++ b/estimands_test.yaml @@ -0,0 +1,102 @@ +type: "Configuration" +estimands: + - outcome_extra_covariates: + - C1 + type: "IATE" + treatment_values: + T2: + case: true + control: false + T1: + case: true + control: false + outcome: CONTINUOUS, outcome + treatment_confounders: + T2: + - W1 + - W2 + T1: + - W1 + - W2 + - outcome_extra_covariates: + - C1 + type: "IATE" + treatment_values: + T2: + case: true + control: false + T1: + case: true + control: false + outcome: BINARY/outcome + treatment_confounders: + T2: + - W1 + - W2 + T1: + - W1 + - W2 + - outcome_extra_covariates: [] + type: "ATE" + treatment_values: + T1: + case: true + control: false + outcome: CONTINUOUS, outcome + treatment_confounders: + T1: + - W1 + - W2 + - outcome_extra_covariates: [] + type: "IATE" + treatment_values: + T2: + case: false + control: true + T1: + case: true + control: false + outcome: CONTINUOUS, outcome + treatment_confounders: + T2: + - W1 + - W2 + T1: + - W1 + - W2 + - outcome_extra_covariates: + - C1 + type: "IATE" + treatment_values: + T2: + case: false + control: true + T1: + case: true + control: false + outcome: BINARY/outcome + treatment_confounders: + T2: + - W1 + - W2 + T1: + - W1 + - W2 + - outcome_extra_covariates: + - C1 + type: "ATE" + treatment_values: + T2: + case: true + control: false + T1: + case: true + control: false + outcome: CONTINUOUS, outcome + treatment_confounders: + T2: + - W1 + - W2 + T1: + - W1 + - W2 diff --git a/scripts/tmle.jl b/scripts/tmle.jl index d16849f..40df722 100644 --- a/scripts/tmle.jl +++ b/scripts/tmle.jl @@ -3,37 +3,57 @@ using TargetedEstimation function parse_commandline() s = ArgParseSettings( - description = "Targeted Learning estimation", + description = "Targeted Learning Estimation", commands_are_required = false, version = "0.2", add_version = true) @add_arg_table s begin - "data" + "dataset" help = "Path to dataset file (.csv|.arrow)" required = true - "param-file" - help = "A file (.yaml|.bin) listing all parameters to estimate." + "estimands-config" + help = "A .yaml file listing all parameters to estimate." required = true - "csv-out" - help = "Path to output `.csv` file" - required = true - "--estimator-file" - help = "A file (.jl) describing the tmle estimator to use, README.md" + "--estimators-config" + help = "A file (.jl) defining the estimators to be used." arg_type= String required = false "--hdf5-out" - help = "If the influence curves also need to be stored (see also: --pval-threshold)" + help = "Stores the results in a HDF5 file format (see also: --pval-threshold)." arg_type = String default = nothing + "--csv-out" + help = "Path to an output `.csv` file." + required = true "--pval-threshold" - help = "Only those parameters passing the threshold will have their influence curve saved." + help = """In order to save disk space, only estimation results with a p-value lesser than + the threshold will have their influence curve saved. (default = 1., i.e. all influence curves are saved). + """ default = 1. arg_type = Float64 + "--sort-estimands" + help = "If estimands should be sorted to minimize memory usage, see also: cache-strategy." + default = false + arg_type = Bool + "--cache-strategy" + help = string("Nuisance functions are stored in the cache during estimation. The cache can be released from these", + " functions to limit memory consumption. There are currently 3 caching management strategies: ", + "'release_unusable' (default): Will release the cache from nuisance functions that won't be used in the future. ", + "'K': Will keep the cache size under K nuisance functions. ", + "'no_cache': Disables caching. ", + "Note that caching strategies are better used in conjunction with `--sort-estimands` to minimized memory usage." + ) + default = "release_unusable" + arg_type = String "--chunksize" - help = "Results will be appended to outfiles every chunk" + help = "Results are appended to outfiles in chunks." default = 100 arg_type = Int + "--rng" + help = "Random seed" + default = 123 + arg_type = Int "--verbosity", "-v" help = "Verbosity level" arg_type = Int diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 6c40623..88a6d4e 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -24,10 +24,12 @@ using MultipleTesting using Combinatorics using Tables using Random +using YAML import MLJModelInterface -include("tmle.jl") +include("cache_managers.jl") +include("runner.jl") include("utils.jl") include("sieve_variance.jl") include("merge.jl") @@ -36,7 +38,7 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) -export tmle_estimation, sieve_variance_plateau, merge_csv_files +export run_estimation, sieve_variance_plateau, merge_csv_files export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV diff --git a/src/cache_managers.jl b/src/cache_managers.jl new file mode 100644 index 0000000..4f0a3f6 --- /dev/null +++ b/src/cache_managers.jl @@ -0,0 +1,65 @@ +abstract type CacheManager end + +struct ReleaseUnusableCacheManager <: CacheManager + cache::Dict + η_counts::Dict + ReleaseUnusableCacheManager(η_counts) = new(Dict(), η_counts) +end + +function release!(cache_manager::ReleaseUnusableCacheManager, Ψ) + # Always drop fluctuations + haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation) + + η = TMLE.get_relevant_factors(Ψ) + # Propensity scores + for ps in η.propensity_score + cache_manager.η_counts[ps] -= 1 + if cache_manager.η_counts[ps] == 0 + pop!(cache_manager.cache, ps) + end + end + # Outcome Mean + cache_manager.η_counts[η.outcome_mean] -= 1 + if cache_manager.η_counts[η.outcome_mean] == 0 + pop!(cache_manager.cache, η.outcome_mean) + end +end + +struct MaxSizeCacheManager <: CacheManager + cache::Dict + max_size::Int + MaxSizeCacheManager(max_size) = new(Dict(), max_size) +end + +function release!(cache_manager::MaxSizeCacheManager, Ψ) + while length(cache_manager.cache) > cache_manager.max_size + # Prioritize the release of the last fluctuation + if haskey(cache_manager.cache, :last_fluctuation) + pop!(cache_manager.cache, :last_fluctuation) + else + pop!(cache_manager.cache) + end + end +end + +struct NoCacheManager <: CacheManager + cache::Dict + NoCacheManager() = new(Dict()) +end + +function release!(cache_manager::NoCacheManager, Ψ) + empty!(cache_manager.cache) +end + +function make_cache_manager(estimands, string) + if string == "release_unusable" + return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands)) + elseif string == "no_cache" + return NoCacheManager() + else + return MaxSizeCacheManager(parse(Int, string)) + end +end + + + diff --git a/src/merge.jl b/src/merge.jl index 3c34649..4fd4b1f 100644 --- a/src/merge.jl +++ b/src/merge.jl @@ -22,7 +22,7 @@ function load_csv_files(data, files) return data end -joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "TARGET", "CONFOUNDERS", "COVARIATES"] +joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES"] function merge_csv_files(parsed_args) tmle_files = files_matching_prefix_and_suffix( @@ -45,7 +45,7 @@ function merge_csv_files(parsed_args) end # Pvalue Adjustment by Target - for gp in groupby(data, :TARGET) + for gp in groupby(data, :OUTCOME) gp.TRAIT_ADJUSTED_TMLE_PVALUE = gp[:, :TMLE_PVALUE] pvalues = collect(skipmissing(gp.TMLE_PVALUE)) if length(pvalues) > 0 diff --git a/src/runner.jl b/src/runner.jl new file mode 100644 index 0000000..b9127cc --- /dev/null +++ b/src/runner.jl @@ -0,0 +1,98 @@ +struct FailedEstimation + message::String +end + +mutable struct Runner + estimators::NamedTuple + estimands::Vector{TMLE.Estimand} + dataset::DataFrame + cache_manager::CacheManager + chunksize::Int + pvalue_threshold::Float64 + output_ios::NamedTuple + function Runner(parsed_args) + datafile = parsed_args["dataset"] + paramfile = parsed_args["estimands-config"] + estimatorfile = parsed_args["estimators-config"] + verbosity = parsed_args["verbosity"] + csv_filename = parsed_args["csv-out"] + hdf5_filename = parsed_args["hdf5-out"] + pvalue_threshold = parsed_args["pval-threshold"] + chunksize = parsed_args["chunksize"] + rng = parsed_args["rng"] + cache_strategy = parsed_args["cache-strategy"] + sort_estimands = parsed_args["sort-estimands"] + + # Output IOs + output_ios = (CSV=csv_filename, HDF5=hdf5_filename) + # Retrieve TMLE specifications + estimators = TargetedEstimation.load_tmle_spec(estimatorfile) + # Load dataset + dataset = TargetedEstimation.instantiate_dataset(datafile) + # Read parameter files + estimands = TargetedEstimation.proofread_estimands_from_yaml(paramfile, dataset) + if sort_estimands + estimands = groups_ordering(estimands; + brute_force=true, + do_shuffle=true, + rng=MersenneTwister(rng), + verbosity=verbosity + ) + end + cache_manager = make_cache_manager(estimands, cache_strategy) + + return new(estimators, estimands, dataset, cache_manager, chunksize, pvalue_threshold, output_ios) + end +end + + +function (runner::Runner)(partition) + results = Vector{NamedTuple}(undef, size(partition, 1)) + for (partition_index, param_index) in enumerate(partition) + Ψ = runner.estimands[param_index] + # Make sure data types are appropriate for the estimand + TargetedEstimation.coerce_types!(runner.dataset, Ψ) + # Maybe update cache with new η_spec + estimators_results = [] + for estimator in estimators + try + result, _ = estimator(Ψ, runner.dataset, + cache=runner.cache, + verbosity=runner.verbosity, + ) + catch e + # On Error, store the nuisance function where the error occured + # to fail fast the next estimands + result = FailedEstimation(string(e)) + end + push!(estimators_results, result) + end + # Update results + results[partition_index] = NamedTuple{keys(runner.estimators)}(estimators_results) + # Release cache + release!(runner.cache_manager, Ψ) + # Try clean C memory + GC.gc() + if Sys.islinux() + ccall(:malloc_trim, Cvoid, (Cint,), 0) + end + end + return results +end + +function (runner::Runner)() + # Split worklist in partitions + nparams = size(runner.estimands, 1) + for partition in Iterators.partition(1:nparams, runner.chunksize) + results = runner(partition) + # Append CSV result with partition + append_csv(csv_file, results) + # Append HDF5 result if save-ic is true + update_jld2_output(jld2_file, partition, results, dataset; pval_threshold=pval_threshold) + end + + verbosity >= 1 && @info "Done." + return 0 +end + +run_estimation(parsed_args) = Runner(parsed_args)() \ No newline at end of file diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index 27c4c04..f6dfeb6 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -28,7 +28,7 @@ sieve_dataframe() = DataFrame( TREATMENTS=String[], CASE=String[], CONTROL=Union{String, Missing}[], - TARGET=String[], + OUTCOME=String[], CONFOUNDERS=String[], COVARIATES=Union{String, Missing}[], TMLE_ESTIMATE=Float64[], @@ -39,7 +39,7 @@ empty_sieve_output() = DataFrame( TREATMENTS=String[], CASE=String[], CONTROL=Union{String, Missing}[], - TARGET=String[], + OUTCOME=String[], CONFOUNDERS=String[], COVARIATES=Union{String, Missing}[], SIEVE_STD = Float64[], diff --git a/src/tmle.jl b/src/tmle.jl deleted file mode 100644 index 9f0a62e..0000000 --- a/src/tmle.jl +++ /dev/null @@ -1,94 +0,0 @@ -struct MissingTMLEResult - parameter::TMLE.Parameter -end - -function try_tmle!(cache; verbosity=1, threshold=1e-8, weighted_fluctuation=false) - try - tmle_result, _ = tmle!(cache; verbosity=verbosity, threshold=threshold, weighted_fluctuation=weighted_fluctuation) - return tmle_result, missing - catch e - @warn string("Failed to run Targeted Estimation for parameter:", cache.Ψ) - return MissingTMLEResult(cache.Ψ), string(e) - end -end - - -function partition_tmle!( - cache, - tmle_results, - logs, - partition, - tmle_spec, - parameters, - variables; - verbosity=0) - for (partition_index, param_index) in enumerate(partition) - previous_target_is_binary = isdefined(cache, :Ψ) ? cache.Ψ.target ∈ variables.binarytargets : nothing - Ψ = parameters[param_index] - # Update cache with new Ψ - update!(cache, Ψ) - # Maybe update cache with new η_spec - target_is_binary = Ψ.target ∈ variables.binarytargets - if !isdefined(cache, :η_spec) || !(target_is_binary === previous_target_is_binary) - Q_spec = target_is_binary ? tmle_spec.Q_binary : tmle_spec.Q_continuous - η_spec = NuisanceSpec(Q_spec, tmle_spec.G, cache=tmle_spec.cache) - update!(cache, η_spec) - end - # Run TMLE - tmle_result, log = TargetedEstimation.try_tmle!( - cache; - verbosity=verbosity, - threshold=tmle_spec.threshold, - weighted_fluctuation=tmle_spec.weighted_fluctuation - ) - # Update results - tmle_results[partition_index] = tmle_result - logs[partition_index] = log - - # Try clean C memory - GC.gc() - if Sys.islinux() - ccall(:malloc_trim, Cvoid, (Cint,), 0) - end - end -end - -function tmle_estimation(parsed_args) - datafile = parsed_args["data"] - paramfile = parsed_args["param-file"] - estimatorfile = parsed_args["estimator-file"] - verbosity = parsed_args["verbosity"] - csv_file = parsed_args["csv-out"] - jld2_file = parsed_args["hdf5-out"] - pval_threshold = parsed_args["pval-threshold"] - chunksize = parsed_args["chunksize"] - - # Load dataset - dataset = TargetedEstimation.instantiate_dataset(datafile) - # Read parameter files - parameters = TargetedEstimation.read_parameters(paramfile, dataset) - optimize_ordering!(parameters) - - # Get covariate, confounder and treatment columns - variables = TargetedEstimation.variables(parameters, dataset) - TargetedEstimation.coerce_types!(dataset, variables) - - # Retrieve TMLE specifications - tmle_spec = TargetedEstimation.load_tmle_spec(estimatorfile) - - cache = TMLECache(dataset) - nparams = size(parameters, 1) - for partition in Iterators.partition(1:nparams, chunksize) - partition_size = size(partition, 1) - tmle_results = Vector{Union{TMLE.TMLEResult, MissingTMLEResult}}(undef, partition_size) - logs = Vector{Union{String, Missing}}(undef, partition_size) - partition_tmle!(cache, tmle_results, logs, partition, tmle_spec, parameters, variables; verbosity=verbosity) - # Append CSV result with partition - append_csv(csv_file, tmle_results, logs) - # Append HDF5 result if save-ic is true - update_jld2_output(jld2_file, partition, tmle_results, dataset; pval_threshold=pval_threshold) - end - - verbosity >= 1 && @info "Done." - return 0 -end diff --git a/src/utils.jl b/src/utils.jl index 9ca8dea..f76b3a2 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -10,7 +10,7 @@ empty_tmle_output(;size=0) = DataFrame( TREATMENTS=Vector{String}(undef, size), CASE=Vector{String}(undef, size), CONTROL=Vector{Union{Missing, String}}(undef, size), - TARGET=Vector{String}(undef, size), + OUTCOME=Vector{String}(undef, size), CONFOUNDERS=Vector{String}(undef, size), COVARIATES=Vector{Union{Missing, String}}(undef, size), INITIAL_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), @@ -28,9 +28,9 @@ empty_tmle_output(;size=0) = DataFrame( ) covariates_string(Ψ; join_string="_&_") = - length(Ψ.covariates) != 0 ? join(Ψ.covariates, join_string) : missing + length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing -function param_string(param::T) where T <: TMLE.Parameter +function param_string(param::T) where T <: TMLE.Estimand str = string(T) return startswith(str, "TMLE.") ? str[6:end] : str end @@ -44,7 +44,7 @@ control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = control_string(t; join_string="_&_") = missing -control_string(Ψ::TMLE.Parameter; join_string="_&_") = +control_string(Ψ::TMLE.Estimand; join_string="_&_") = control_string(values(Ψ.treatment); join_string=join_string) treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment), join_string) @@ -60,7 +60,7 @@ function statistics_from_estimator(estimator) return (Ψ̂, std, pval, l, u) end -function statistics_from_result(result::TMLE.TMLEResult) +function statistics_from_result(result::TMLE.Estimate) Ψ̂₀ = result.initial # TMLE stats tmle_stats = statistics_from_estimator(result.tmle) @@ -69,7 +69,7 @@ function statistics_from_result(result::TMLE.TMLEResult) return Ψ̂₀, tmle_stats, onestep_stats end -statistics_from_result(result::MissingTMLEResult) = +statistics_from_result(result::FailedEstimation) = missing, (missing, missing, missing, missing, missing), (missing, missing, missing, missing, missing) @@ -109,7 +109,7 @@ function update_jld2_output(jld2_file::String, partition, tmle_results, dataset; for (partition_index, param_index) in enumerate(partition) r = tmle_results[partition_index] - if (r isa TMLE.TMLEResult) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold) + if (r isa TMLE.Estimate) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold) current_variables = variables(r.parameter) if previous_variables != current_variables sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables) @@ -127,52 +127,60 @@ function update_jld2_output(jld2_file::String, partition, tmle_results, dataset; end ##################################################################### -#####  Read Parameters #### +#####  Read Estimands #### ##################################################################### -function treatment_values(Ψ::Union{IATE, ATE}, treatment_names, treatment_types) +function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names return [( - case = convert(treatment_types[tn], Ψ.treatment[tn].case), - control = convert(treatment_types[tn], Ψ.treatment[tn].control) + case = convert(treatment_types[tn], treatment_levels[tn].case), + control = convert(treatment_types[tn], treatment_levels[tn].control) ) - for tn in treatment_names] + for tn in names] end -treatment_values(Ψ::CM, treatment_names, treatment_types) = - [convert(treatment_types[tn], Ψ.treatment[tn]) for tn in treatment_names] +convert_treatment_values(treatment_levels::NamedTuple{names,}, treatment_types) where names = + [convert(treatment_types[tn], treatment_levels[tn]) for tn in names] -""" - parameters_from_yaml(param_file, dataset) +MissingSCMError() = ArgumentError(string("A Structural Causal Model should be provided in the configuration file in order to identify causal estimands.")) + +get_identification_method(method::Nothing) = BackdoorAdjustment() +get_identification_method(method) = method + +maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) = + identify(get_identification_method(method), Ψ, scm) + +maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError()) + +maybe_identify(Ψ, scm, method) = Ψ -Reads parameters from file and ensures that the parameters treatment in the config file -respect the treatment types in the dataset. """ -function read_parameters(param_file, dataset) - parameters = if any(endswith(param_file, ext) for ext in ("yaml", "yml")) - parameters_from_yaml(param_file) - else - deserialize(param_file) - end + read_estimands(param_file, dataset) +Reads estimands from file and ensures that the treatment values in the config file +respects the treatment types in the dataset. +""" +function proofread_estimands_from_yaml(filename, dataset) + config = configuration_from_yaml(filename) + estimands = Vector{TMLE.Estimand}(undef, length(config.estimands)) treatment_types = Dict() - for index in eachindex(parameters) - Ψ = parameters[index] - treatment_names = keys(Ψ.treatment) + for (index, Ψ) in enumerate(config.estimands) + statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment) + treatment_names = keys(statisticalΨ.treatment_values) for tn in treatment_names haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn]) end new_treatment = NamedTuple{treatment_names}( - treatment_values(Ψ, treatment_names, treatment_types) + TargetedEstimation.convert_treatment_values(statisticalΨ.treatment_values, treatment_types) ) - parameters[index] = typeof(Ψ)( - target = Ψ.target, - treatment = new_treatment, - confounders = Ψ.confounders, - covariates = Ψ.covariates + estimands[index] = typeof(Ψ)( + outcome = Ψ.outcome, + treatment_values = new_treatment, + treatment_confounders = statisticalΨ.treatment_confounders, + outcome_extra_covariates = statisticalΨ.outcome_extra_covariates ) end - return collect(parameters) + return estimands end ##################################################################### @@ -194,13 +202,15 @@ instantiate_dataset(path::String) = isbinary(col, dataset) = Set(unique(skipmissing(dataset[!, col]))) == Set([0, 1]) +make_categorical(x::CategoricalVector, ordered) = x +make_categorical(x, ordered) = categorical(x, ordered=ordered) function make_categorical!(dataset, colname::Union{String, Symbol}; infer_ordered=false) ordered = false if infer_ordered ordered = eltype(dataset[!, colname]) <: Real end - dataset[!, colname] = categorical(dataset[!, colname], ordered=ordered) + dataset[!, colname] = make_categorical(dataset[!, colname], ordered) end function make_categorical!(dataset, colnames; infer_ordered=false) @@ -209,8 +219,10 @@ function make_categorical!(dataset, colnames; infer_ordered=false) end end +make_float(x) = float(x) + make_float!(dataset, colname::Union{String, Symbol}) = - dataset[!, colname] = float(dataset[!, colname]) + dataset[!, colname] = make_float(dataset[!, colname]) function make_float!(dataset, colnames) for colname in colnames @@ -218,55 +230,36 @@ function make_float!(dataset, colnames) end end -function coerce_types!(dataset, variables) - # Treatment columns are converted to categorical - make_categorical!(dataset, variables.treatments, infer_ordered=true) - # Confounders and Covariates are converted to Float64 - make_float!(dataset, vcat(variables.confounders, variables.covariates)) - # Binary targets are converted to categorical - make_categorical!(dataset, variables.binarytargets, infer_ordered=false) - # Continuous targets are converted to Float64 - make_float!(dataset, variables.continuoustargets) +function coerce_types!(dataset, Ψ) + categorical_variables = Set(keys(Ψ.treatment_values)) + continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders))) + union!(continuous_variables, Ψ.outcome_extra_covariates) + TMLE.is_binary(dataset, Ψ.outcome) ? + push!(categorical_variables, Ψ.outcome) : + push!(continuous_variables, Ψ.outcome) + make_categorical!(dataset, categorical_variables, infer_ordered=true) + make_float!(dataset, continuous_variables) end -variables(Ψ::TMLE.Parameter) = ( - target = Ψ.target, - covariates = Ψ.covariates, - confounders = Ψ.confounders, - treatments = keys(Ψ.treatment) - ) - -function variables(parameters::Vector{<:TMLE.Parameter}, dataset) - treatments = Set{Symbol}() - confounders = Set{Symbol}() - covariates = Set{Symbol}() - binarytargets = Set{Symbol}() - continuoustargets = Set{Symbol}() - for Ψ in parameters - push!(treatments, keys(Ψ.treatment)...) - push!(confounders, Ψ.confounders...) - length(Ψ.covariates) > 0 && push!(covariates, Ψ.covariates...) - isbinary(Ψ.target, dataset) ? push!(binarytargets, Ψ.target) : push!(continuoustargets, Ψ.target) - end - return ( - treatments=treatments, - confounders=confounders, - covariates=covariates, - binarytargets=binarytargets, - continuoustargets +variables(Ψ::TMLE.Estimand) = ( + outcome = Ψ.outcome, + covariates = Ψ.outcome_extra_covariates, + confounders = Ψ.treatment_confounders, + treatments = keys(Ψ.treatment_values) ) -end load_tmle_spec(file::Nothing) = ( - cache = false, - weighted_fluctuation = false, - threshold = 1e-8, - Q_continuous = LinearRegressor(), - Q_binary = LogisticClassifier(lambda=0.), - G = LogisticClassifier(lambda=0.) - ) + TMLE = TMLEE( + models = TMLE.default_models( + Q_binary = LogisticClassifier(lambda=0.), + Q_continuous = LinearRegressor(), + G = LogisticClassifier(lambda=0.) + ), + weighted = true, + ), + ) function load_tmle_spec(file) include(abspath(file)) - return merge(load_tmle_spec(nothing), tmle_spec::NamedTuple) + return ESTIMATORS end \ No newline at end of file diff --git a/test/cache_managers.jl b/test/cache_managers.jl new file mode 100644 index 0000000..6574680 --- /dev/null +++ b/test/cache_managers.jl @@ -0,0 +1,86 @@ +module TestRunner + +using TargetedEstimation +using Test +using TMLE + +@testset "Test NoCacheManager" begin + cache_manager = TargetedEstimation.NoCacheManager() + cache_manager.cache["Toto"] = 1 + cache_manager.cache["Tata"] = 2 + TargetedEstimation.release!(cache_manager, nothing) + @test cache_manager.cache == Dict() +end + +@testset "Test MaxSizeCacheManager" begin + cache_manager = TargetedEstimation.MaxSizeCacheManager(3) + cache_manager.cache["Toto"] = 1 + cache_manager.cache["Tata"] = 2 + TargetedEstimation.release!(cache_manager, nothing) + @test cache_manager.cache == Dict("Toto" => 1, "Tata" => 2) + cache_manager.cache["Titi"] = 3 + cache_manager.cache["Tutu"] = 4 + @test length(cache_manager.cache) == 4 + TargetedEstimation.release!(cache_manager, nothing) + @test length(cache_manager.cache) == 3 +end + +@testset "Test ReleaseUnusableCacheManager" begin + estimands = [ + ATE( + outcome=:Y, + treatment_values=(T₁=(case=1, control=0), T₂=(case=1, control=0)), + treatment_confounders=(T₁=[:W], T₂=[:W]) + ), + ATE( + outcome=:Y, + treatment_values=(T₁=(case=1, control=0), T₂=(case=2, control=0)), + treatment_confounders=(T₁=[:W], T₂=[:W]) + ), + ATE( + outcome=:Y, + treatment_values=(T₁=(case=1, control=0),), + treatment_confounders=(T₁=[:W],) + ), + ATE( + outcome=:Ynew, + treatment_values=(T₃=(case=1, control=0),), + treatment_confounders=(T₃=[:W],) + ) + ] + η_counts = TMLE.nuisance_counts(estimands) + cache_manager = TargetedEstimation.ReleaseUnusableCacheManager(η_counts) + # Estimation of the first estimand will fill the cache with the following + Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) + cache_manager.cache[Y_T₁T₂] = 1 + T₁_W = TMLE.ConditionalDistribution(:T₁, (:W,)) + cache_manager.cache[T₁_W] = 1 + T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,)) + cache_manager.cache[T₂_W] = 1 + cache_manager.cache[:last_fluctuation] = 1 + @test length(cache_manager.cache) == 4 + # After estimation of the first estimand, only the fluctuation is released + TargetedEstimation.release!(cache_manager, estimands[1]) + @test length(cache_manager.cache) == 3 + + # Estimation of the second estimand will not result in further nuisance functions + # Y_T₁T₂ and T₂_W are no longer needed + TargetedEstimation.release!(cache_manager, estimands[2]) + @test length(cache_manager.cache) == 1 + @test !haskey(cache_manager.cache, T₂_W) + @test !haskey(cache_manager.cache, Y_T₁T₂) + @test haskey(cache_manager.cache, T₁_W) + + # Estimation of the third estimand will fill the cache with the following + Y_T₁ = TMLE.ConditionalDistribution(:Y, (:T₁, :W)) + cache_manager.cache[Y_T₁] = 1 + # Y_T₁ and T₁_W are no longer needed + TargetedEstimation.release!(cache_manager, estimands[3]) + @test cache_manager.cache == Dict() + + +end + +end + +true \ No newline at end of file diff --git a/test/config/failing_parameters.yaml b/test/config/failing_parameters.yaml index 9991cd1..92fdeff 100644 --- a/test/config/failing_parameters.yaml +++ b/test/config/failing_parameters.yaml @@ -1,6 +1,6 @@ - Parameters: + Estimands: - type: ATE - target: EXTREME_BINARY - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] \ No newline at end of file + outcome: EXTREME_BINARY + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) + treatment_confounders: (T1 = [W1, W2], T2 = [W1, W2]) + outcome_extra_covariates: [C1] \ No newline at end of file diff --git a/test/config/tmle_config_2.jl b/test/config/ose_config.jl similarity index 83% rename from test/config/tmle_config_2.jl rename to test/config/ose_config.jl index d77aaa0..5462955 100644 --- a/test/config/tmle_config_2.jl +++ b/test/config/ose_config.jl @@ -1,13 +1,7 @@ evotree = EvoTreeClassifier(nrounds=10) -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true`` may result in faster execution but higher memory usage - cache = true, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = true, - # Propensity score threshold - threshold = 1e-8, +default_models = TMLE.default_models( # For the estimation of E[Y|W, T]: continuous target Q_continuous = Stack( metalearner = LinearRegressor(fit_intercept=false), @@ -42,3 +36,6 @@ tmle_spec = ( ) ) +ESTIMATORS = ( + OSE = OSE(models=default_models), +) \ No newline at end of file diff --git a/test/config/parameters.yaml b/test/config/parameters.yaml index 8399487..4dea179 100644 --- a/test/config/parameters.yaml +++ b/test/config/parameters.yaml @@ -1,29 +1,31 @@ -Parameters: - - type: IATE - target: CONTINUOUS, TARGET - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - - type: IATE - target: "BINARY/TARGET" - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - - type: ATE - target: CONTINUOUS, TARGET - treatment: (T1 = (control = 0, case = 1),) - confounders: [W1, W2] - - type: IATE - target: CONTINUOUS, TARGET - treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - confounders: [W1, W2] - - type: IATE - target: "BINARY/TARGET" - treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - confounders: [W1, W2] - covariates: [C1] - - type: ATE - target: CONTINUOUS, TARGET - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] \ No newline at end of file +Estimands: + - type: TMLE.StatisticalIATE + outcome: CONTINUOUS, outcome + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) + treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) + outcome_extra_covariates: (:C1,) + - type: TMLE.StatisticalIATE + outcome: "BINARY/outcome" + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) + treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) + outcome_extra_covariates: (:C1,) + - type: TMLE.StatisticalATE + outcome: CONTINUOUS, outcome + treatment_values: (T1 = (control = 0, case = 1),) + treatment_confounders: (T1 = (:W1, :W2),) + outcome_extra_covariates: () + - type: TMLE.StatisticalIATE + outcome: CONTINUOUS, outcome + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) + treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) + outcome_extra_covariates: () + - type: TMLE.StatisticalIATE + outcome: "BINARY/outcome" + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) + treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) + outcome_extra_covariates: (:C1,) + - type: TMLE.StatisticalATE + outcome: CONTINUOUS, outcome + treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) + treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) + outcome_extra_covariates: (:C1,) \ No newline at end of file diff --git a/test/config/sieve_tests_parameters_1.yaml b/test/config/sieve_tests_parameters_1.yaml index 510b500..9edf5fe 100644 --- a/test/config/sieve_tests_parameters_1.yaml +++ b/test/config/sieve_tests_parameters_1.yaml @@ -1,31 +1,31 @@ Parameters: - type: IATE - target: CONTINUOUS, TARGET + target: CONTINUOUS, OUTCOME treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) confounders: [W1, W2] covariates: [C1] - type: IATE - target: CONTINUOUS, TARGET + target: CONTINUOUS, OUTCOME treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) confounders: [W1, W2] covariates: [C1] - type: ATE - target: CONTINUOUS, TARGET + target: CONTINUOUS, OUTCOME treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) confounders: [W1, W2] covariates: [C1] - type: IATE - target: "BINARY/TARGET" + target: "BINARY/OUTCOME" treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) confounders: [W1, W2] covariates: [C1] - type: IATE - target: "BINARY/TARGET" + target: "BINARY/OUTCOME" treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) confounders: [W1, W2] covariates: [C1] - type: ATE - target: "BINARY/TARGET" + target: "BINARY/OUTCOME" treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) confounders: [W1, W2] covariates: [C1] diff --git a/test/config/sieve_tests_parameters_2.yaml b/test/config/sieve_tests_parameters_2.yaml index 371868c..0d147be 100644 --- a/test/config/sieve_tests_parameters_2.yaml +++ b/test/config/sieve_tests_parameters_2.yaml @@ -1,9 +1,9 @@ Parameters: - type: ATE - target: CONTINUOUS, TARGET + target: CONTINUOUS, OUTCOME treatment: (T1 = (control = 0, case = 1),) confounders: [W1] - type: CM - target: CONTINUOUS, TARGET + target: CONTINUOUS, OUTCOME treatment: (T1 = 0,) confounders: [W1] \ No newline at end of file diff --git a/test/config/tmle_config.jl b/test/config/tmle_config.jl index 4281e80..8649d9f 100644 --- a/test/config/tmle_config.jl +++ b/test/config/tmle_config.jl @@ -1,16 +1,10 @@ evotree = EvoTreeClassifier(nrounds=10) -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache=false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 0.001, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( Q_continuous = Stack( metalearner = LinearRegressor(fit_intercept=false), resampling = CV(nfolds=2), + cache = false, interaction_glmnet = Pipeline( interaction_transformer = RestrictedInteractionTransformer(order=3, primary_variables_patterns=[r"^rs[0-9]+"]), glmnet = GLMNetRegressor(), @@ -46,6 +40,7 @@ tmle_spec = ( G = Stack( metalearner = LogisticClassifier(lambda=0., fit_intercept=false), resampling = StratifiedCV(nfolds=2), + cache = false, interaction_glmnet = Pipeline( interaction_transformer = RestrictedInteractionTransformer( order=2, @@ -58,4 +53,9 @@ tmle_spec = ( constant = ConstantClassifier(), evo = EvoTreeClassifier(nrounds=10) ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=0.001), + OSE = OSE(models=default_models) ) \ No newline at end of file diff --git a/test/data/merge/empty_sieve.csv b/test/data/merge/empty_sieve.csv index 4b160ac..3241e3c 100644 --- a/test/data/merge/empty_sieve.csv +++ b/test/data/merge/empty_sieve.csv @@ -1 +1 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES +PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES diff --git a/test/data/merge/sieve_output_1.csv b/test/data/merge/sieve_output_1.csv index 119c5fa..cfe77b9 100644 --- a/test/data/merge/sieve_output_1.csv +++ b/test/data/merge/sieve_output_1.csv @@ -1,7 +1,7 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB -IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935 -IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624 -ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587 -IATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185 -IATE,T2_&_T1,0_&_1,1_&_0,BINARY/TARGET,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833 -ATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235 \ No newline at end of file +PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB +IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935 +IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624 +ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587 +IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185 +IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833 +ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235 \ No newline at end of file diff --git a/test/data/merge/sieve_output_2.csv b/test/data/merge/sieve_output_2.csv index 1de809d..ad536a5 100644 --- a/test/data/merge/sieve_output_2.csv +++ b/test/data/merge/sieve_output_2.csv @@ -1,3 +1,3 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB -ATE,T1,1,0,"CONTINUOUS, TARGET",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455 -CM,T1,0,,"CONTINUOUS, TARGET",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622 +PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB +ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455 +CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622 diff --git a/test/data/merge/tmle_output_1.csv b/test/data/merge/tmle_output_1.csv index 21d2ae0..574764e 100644 --- a/test/data/merge/tmle_output_1.csv +++ b/test/data/merge/tmle_output_1.csv @@ -1,7 +1,7 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG -IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344, -IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822, -ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208, -IATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487, -IATE,T2_&_T1,0_&_1,1_&_0,BINARY/TARGET,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093, -ATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685, +PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG +IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344, +IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822, +ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208, +IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487, +IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093, +ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685, diff --git a/test/data/merge/tmle_output_2.csv b/test/data/merge/tmle_output_2.csv index 4e76aa3..a7d02aa 100644 --- a/test/data/merge/tmle_output_2.csv +++ b/test/data/merge/tmle_output_2.csv @@ -1,3 +1,3 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG -ATE,T1,1,0,"CONTINUOUS, TARGET",W1,,-1.170325854136744,,,,,,,,,,,"Error" -CM,T1,0,,"CONTINUOUS, TARGET",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685, +PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG +ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,-1.170325854136744,,,,,,,,,,,"Error" +CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685, diff --git a/test/load_tmle_spec.jl b/test/load_tmle_spec.jl deleted file mode 100644 index 71988cc..0000000 --- a/test/load_tmle_spec.jl +++ /dev/null @@ -1,113 +0,0 @@ -module TestsStackBuilding - -using Test -using TargetedEstimation -using MLJ -using MLJGLMInterface -using MLJLinearModels -using EvoTrees - -@testset "Test tmle_spec_from_yaml: Only Stacks" begin - tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl")) - - @test tmle_spec.threshold == 0.001 - @test tmle_spec.weighted_fluctuation == false - # Test binary target TMLE's Qstack - Q_binary = tmle_spec.Q_binary - @test Q_binary.cache == false - ## Checking Qstack.metalearner - @test Q_binary.metalearner isa LogisticClassifier - @test Q_binary.metalearner.fit_intercept == false - ## Checking Qstack.resampling - @test Q_binary.resampling isa StratifiedCV - @test Q_binary.resampling.nfolds == 2 - ## Checking Qstack EvoTree models - @test Q_binary.gridsearch_evo.tuning.goal == 5 - @test Q_binary.gridsearch_evo.cache == false - @test Q_binary.gridsearch_evo.model.nrounds == 10 - @test Q_binary.gridsearch_evo.resampling isa CV - ranges = Q_binary.gridsearch_evo.range - @test ranges[2].lower == 1e-5 - @test ranges[2].upper == 10 - @test ranges[2].scale == :log - @test ranges[1].lower == 3 - @test ranges[1].upper == 5 - @test ranges[1].scale == :linear - ## Checking Qstack Interaction Logistic models - @test Q_binary.interaction_glmnet isa MLJ.ProbabilisticPipeline - @test Q_binary.interaction_glmnet.interaction_transformer.order == 2 - ## Checking Qstack HAL model - @test Q_binary.hal.lambda == 10 - @test Q_binary.hal.smoothness_orders == 1 - @test Q_binary.hal.cv_select == false - @test Q_binary.hal.num_knots == [10, 5] - - # Test continuous target TMLE's Qstack - Q_continuous = tmle_spec.Q_continuous - ## Checking Qstack.metalearner - @test Q_continuous.metalearner isa MLJLinearModels.LinearRegressor - @test Q_continuous.metalearner.fit_intercept == false - - ## Checking Qstack.resampling - @test Q_continuous.resampling isa CV - @test Q_continuous.resampling.nfolds == 2 - ## Checking Qstack EvoTree models - @test Q_continuous.evo_10.nrounds == 10 - @test Q_continuous.evo_20.nrounds == 20 - ## Checking Qstack Interaction Linear model - @test Q_continuous.interaction_glmnet isa MLJ.DeterministicPipeline - @test Q_continuous.interaction_glmnet.interaction_transformer.order == 3 - @test Q_continuous.interaction_glmnet.interaction_transformer.primary_variables == [] - @test Q_continuous.interaction_glmnet.interaction_transformer.primary_variables_patterns == [r"^rs[0-9]+"] - ## Checking Qstack HAL model - @test Q_continuous.hal.lambda == 10 - @test Q_continuous.hal.smoothness_orders == 1 - @test Q_continuous.hal.cv_select == false - @test Q_continuous.hal.num_knots == [10, 5] - - # TMLE G Stack - G = tmle_spec.G - ## Checking Gstack.metalearner - @test G.metalearner isa LogisticClassifier - @test G.metalearner.fit_intercept == false - ## Checking Gstack.resampling - @test G.resampling isa StratifiedCV - @test G.resampling.nfolds == 2 - ## Checking Gstack models - @test G.interaction_glmnet.interaction_transformer.order == 2 - @test G.interaction_glmnet.interaction_transformer.primary_variables == [:T1, :T2] - @test G.interaction_glmnet.interaction_transformer.primary_variables_patterns == [r"C"] - @test G.evo.nrounds == 10 - - @test tmle_spec.cache == false -end - -@testset "Test tmle_spec_from_yaml: Simple models and GridSearch" begin - tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config_2.jl")) - @test tmle_spec.G.cache == true - @test tmle_spec.weighted_fluctuation == true - @test tmle_spec.G.measure isa LogLoss - @test tmle_spec.G.tuning.goal == 5 - @test tmle_spec.G.model.nrounds == 10 - lambda_range = tmle_spec.G.range[2] - @test lambda_range.lower == 1e-5 - @test lambda_range.upper == 10 - @test lambda_range.scale == :log - depth_range = tmle_spec.G.range[1] - @test depth_range.lower == 3 - @test depth_range.upper == 5 - @test depth_range.scale == :linear - - @test tmle_spec.Q_binary isa MLJ.ProbabilisticPipeline - @test tmle_spec.threshold == 1e-8 - - @test tmle_spec.Q_continuous.cache == true - @test tmle_spec.Q_continuous.interaction_glmnet.cache == true - - @test tmle_spec.cache == true -end - -end; - -true - diff --git a/test/merge.jl b/test/merge.jl index 038ed03..d8967ca 100644 --- a/test/merge.jl +++ b/test/merge.jl @@ -15,7 +15,7 @@ using DataFrames output = CSV.read(parsed_args["out"], DataFrame) @test names(output) == [ "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "TARGET", "CONFOUNDERS", + "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES", "INITIAL_ESTIMATE", "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", @@ -42,7 +42,7 @@ end @testset "Test merge_csv_files, sieve file" begin sieve_colnames = [ "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "TARGET", "CONFOUNDERS", + "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES", "INITIAL_ESTIMATE", "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", @@ -88,7 +88,7 @@ end output = CSV.read(parsed_args["out"], DataFrame) @test names(output) == [ "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "TARGET", "CONFOUNDERS", + "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES", "INITIAL_ESTIMATE", "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", diff --git a/test/resampling.jl b/test/resampling.jl index 1e13e8d..9032ad7 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -76,7 +76,6 @@ end @test stratification_col == ["_0", "_0", "_1", "_0", "_1", "_0", "_missing"] end - @testset "Test JointStratifiedCV" begin X = ( X1 = [0, 0, 1, 0, 1, 0, missing], diff --git a/test/tmle.jl b/test/runner.jl similarity index 70% rename from test/tmle.jl rename to test/runner.jl index 281041b..5f61bca 100644 --- a/test/tmle.jl +++ b/test/runner.jl @@ -10,23 +10,31 @@ using LogExpFunctions using CategoricalArrays using DataFrames using CSV +using Serialization using Arrow +using YAML + +PKGDIR = pkgdir(TargetedEstimation) + +CONFIGDIR = joinpath(PKGDIR, "test", "config") + +include(joinpath(PKGDIR, "test", "testutils.jl")) function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_idx) jld2_res = jldio[string(param_index)] csv_row = data[param_index, :] Ψ = jld2_res["result"].parameter - @test jld2_res["result"] isa TMLE.TMLEResult + @test jld2_res["result"] isa TMLE.Estimate @test jld2_res["result"].tmle.Ψ̂ isa Float64 @test Ψ == expected_param @test jld2_res["sample_ids_idx"] == sample_ids_idx sample_ids = jldio[string(jld2_res["sample_ids_idx"])]["sample_ids"] - if expected_param.target == Symbol("BINARY/TARGET") + if expected_param.target == Symbol("BINARY/OUTCOME") @test sample_ids == 2:1000 else @test sample_ids == 1:1000 end - @test jld2_res["result"] isa TMLE.TMLEResult + @test jld2_res["result"] isa TMLE.Estimate if csv_row.COVARIATES === missing @test TargetedEstimation.covariates_string(Ψ) === csv_row.COVARIATES @@ -42,11 +50,11 @@ function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_i end """ -CONTINUOUS_TARGET: +CONTINUOUS_OUTCOME: - IATE(0->1, 0->1) = E[W₂] = 0.5 - ATE(0->1, 0->1) = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5 -BINARY_TARGET: +BINARY_OUTCOME: - IATE(0->1, 0->1) = - ATE(0->1, 0->1) = @@ -77,9 +85,9 @@ function build_dataset(;n=1000, format="csv") C1 = C₁, ) # Comma in name - dataset[!, "CONTINUOUS, TARGET"] = categorical(y₁) + dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁) # Slash in name - dataset[!, "BINARY/TARGET"] = categorical(y₂) + dataset[!, "BINARY/OUTCOME"] = categorical(y₂) dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1))) format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset) @@ -88,13 +96,13 @@ end @testset "Test partition_tmle!" begin build_dataset(;n=1000, format="csv") dataset = TargetedEstimation.instantiate_dataset("data.csv") - parameters = TargetedEstimation.read_parameters(joinpath("config", "parameters.yaml"), dataset) - variables = TargetedEstimation.variables(parameters, dataset) + estimands = TargetedEstimation.read_estimands(joinpath(config_dir, "parameters.yaml"), dataset) + variables = TargetedEstimation.variables(estimands, dataset) TargetedEstimation.coerce_types!(dataset, variables) tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl")) cache = TMLECache(dataset) - tmle_results = Vector{Union{TMLE.TMLEResult, TargetedEstimation.MissingTMLEResult}}(undef, 3) + tmle_results = Vector{Union{TMLE.Estimate, TargetedEstimation.FailedEstimation}}(undef, 3) logs = Vector{Union{String, Missing}}(undef, 3) part = 4:6 TargetedEstimation.partition_tmle!(cache, tmle_results, logs, part, tmle_spec, parameters, variables; verbosity=0) @@ -107,31 +115,35 @@ end @testset "Test tmle_estimation" begin expected_parameters = [ - ATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]), - IATE(Symbol("BINARY/TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), - IATE(Symbol("BINARY/TARGET"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]), - IATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]), - IATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), - ATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]) + ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]), + IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), + IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]), + IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]), + IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), + ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]) ] + outfilename = "statistical_estimands.yml" + configuration_to_yaml(outfilename, statistical_estimands_only_config()) expected_param_sample_ids_idx = [1, 2, 2, 4, 5, 5] # Run tests over CSV and Arrow data formats for format in ("csv", "arrow") build_dataset(;n=1000, format=format) parsed_args = Dict( - "data" => string("data.", format), - "param-file" => nothing, - "estimator-file" => joinpath("config", "tmle_config.jl"), + "dataset" => string("data.", format), + "estimands-config" => nothing, + "estimators-config" => joinpath(config_dir, "tmle_config.jl"), "csv-out" => "output.csv", "verbosity" => 0, "hdf5-out" => "output.hdf5", "pval-threshold" => 1., "chunksize" => nothing ) + runner = TargetedEstimation.Runner(parsed_args) for param_file in ("parameters.yaml", "parameters.bin") for chunksize in (4, 10) # Only one continuous phenotype / machines not saved / no adaptive cv - parsed_args["param-file"] = joinpath("config", param_file) + + parsed_args["estimands-config"] = outfilename parsed_args["chunksize"] = chunksize tmle_estimation(parsed_args) @@ -151,26 +163,29 @@ end rm(parsed_args["hdf5-out"]) end end - rm(parsed_args["data"]) + rm(parsed_args["dataset"]) end end @testset "Test tmle_estimation: No hdf5 file" begin build_dataset(;n=1000, format="csv") + estimands_filename = "estimands_test.yaml" + configuration_to_yaml(estimands_filename, statistical_estimands_only_config()) # Only one continuous phenotype / machines not saved / no adaptive cv - param_file = "parameters.yaml" parsed_args = Dict( - "data" => "data.csv", - "param-file" => joinpath("config", param_file), - "estimator-file" => joinpath("config", "tmle_config_2.jl"), + "dataset" => "data.csv", + "estimands-config" => estimands_filename, + "estimators-config" => joinpath(CONFIGDIR, "ose_config.jl"), "csv-out" => "output.csv", "verbosity" => 0, "hdf5-out" => nothing, "pval-threshold" => 1., - "chunksize" => 10 + "chunksize" => 10, + "rng" => 123, + "sort-estimands" => false, + "cache-strategy" => "release_unusable" ) - - tmle_estimation(parsed_args) + @enter run_estimation(parsed_args) ## Check CSV file data = CSV.read(parsed_args["csv-out"], DataFrame) @@ -179,16 +194,16 @@ end all(x === missing for x in data.LOG) # Clean rm(parsed_args["csv-out"]) - rm(parsed_args["data"]) + rm(parsed_args["dataset"]) end @testset "Test tmle_estimation: lower p-value threhsold" begin build_dataset(;n=1000, format="csv") parsed_args = Dict( - "data" => "data.csv", - "param-file" => joinpath("config", "parameters.yaml"), - "estimator-file" => joinpath("config", "tmle_config.jl"), + "dataset" => "data.csv", + "estimands-config" => joinpath("config", "parameters.yaml"), + "estimators-config" => joinpath("config", "tmle_config.jl"), "csv-out" => "output.csv", "verbosity" => 0, "hdf5-out" => "output.hdf5", @@ -209,7 +224,7 @@ end @test jldio["1"]["result"].tmle.Ψ̂ == data[1, :TMLE_ESTIMATE] - rm(parsed_args["data"]) + rm(parsed_args["dataset"]) rm(parsed_args["csv-out"]) rm(parsed_args["hdf5-out"]) end @@ -217,9 +232,9 @@ end @testset "Test tmle_estimation: Failing parameters" begin build_dataset(;n=1000, format="csv") parsed_args = Dict( - "data" => "data.csv", - "param-file" => joinpath("config", "failing_parameters.yaml"), - "estimator-file" => joinpath("config", "tmle_config.jl"), + "dataset" => "data.csv", + "estimands-config" => joinpath("config", "failing_parameters.yaml"), + "estimators-config" => joinpath("config", "tmle_config.jl"), "csv-out" => "output.csv", "verbosity" => 0, "hdf5-out" => nothing, @@ -234,7 +249,7 @@ end @test size(data) == (1, 19) @test data[1, :TMLE_ESTIMATE] === missing - rm(parsed_args["data"]) + rm(parsed_args["dataset"]) rm(parsed_args["csv-out"]) end diff --git a/test/runtests.jl b/test/runtests.jl index 7c461fe..0a34f46 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,8 @@ -include("tmle.jl") -include("load_tmle_spec.jl") + +include("cache_managers.jl") include("utils.jl") include("sieve_variance.jl") +include("runner.jl") include("merge.jl") include("resampling.jl") include(joinpath("models", "glmnet.jl")) diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index b90153d..901a286 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -38,8 +38,8 @@ function build_dataset(sample_ids) C1 = C₁, ) - dataset[!, "CONTINUOUS, TARGET"] = y₁ - dataset[!, "BINARY/TARGET"] = categorical(y₂) + dataset[!, "CONTINUOUS, OUTCOME"] = y₁ + dataset[!, "BINARY/OUTCOME"] = categorical(y₂) CSV.write("data.csv", dataset) end @@ -91,7 +91,7 @@ end function test_initial_output(output, expected_output) # Metadata columns - for col in [:PARAMETER_TYPE, :TREATMENTS, :CASE, :CONTROL, :TARGET, :CONFOUNDERS, :COVARIATES] + for col in [:PARAMETER_TYPE, :TREATMENTS, :CASE, :CONTROL, :OUTCOME, :CONFOUNDERS, :COVARIATES] for index in eachindex(output[!, col]) if expected_output[index, col] === missing @test expected_output[index, col] === output[index, col] @@ -126,7 +126,7 @@ end result = io[key]["result"] IC = result.tmle.IC # missing sample - if result.parameter.target == Symbol("BINARY/TARGET") + if result.parameter.target == Symbol("BINARY/OUTCOME") IC = vcat(0, IC) end @test convert(Vector{Float32}, IC) == influence_curves[parse(Int, key), :] @@ -138,7 +138,7 @@ end TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2"], CASE=["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true"], CONTROL=["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false"], - TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"], + OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2"], COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1"] ) @@ -159,7 +159,7 @@ end TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"], CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"], CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing], - TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"], + OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"], COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing] ) @@ -319,7 +319,7 @@ end TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"], CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"], CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing], - TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"], + OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"], COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing] ) diff --git a/test/testutils.jl b/test/testutils.jl new file mode 100644 index 0000000..76d6ab4 --- /dev/null +++ b/test/testutils.jl @@ -0,0 +1,57 @@ +using TMLE + +function statistical_estimands_only_config() + configuration = Configuration( + estimands=[ + IATE( + outcome = Symbol("CONTINUOUS, outcome"), + treatment_values = ( + T1 = (case = true, control = false), + T2 = (case = true, control = false)), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = (:C1,) + ), + IATE( + outcome = Symbol("BINARY/outcome"), + treatment_values = ( + T1 = (case = true, control = false), + T2 = (case = true, control = false)), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = (:C1,) + ), + ATE( + outcome = Symbol("CONTINUOUS, outcome"), + treatment_values = (T1 = (case = true, control = false),), + treatment_confounders = (T1 = (:W1, :W2),), + outcome_extra_covariates = () + ), + IATE( + outcome = Symbol("CONTINUOUS, outcome"), + treatment_values = ( + T1 = (case = true, control = false), + T2 = (case = false, control = true) + ), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = () + ), + IATE( + outcome = Symbol("BINARY/outcome"), + treatment_values = ( + T1 = (case = true, control = false), + T2 = (case = false, control = true) + ), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = (:C1,) + ), + ATE( + outcome = Symbol("CONTINUOUS, outcome"), + treatment_values = ( + T1 = (case = true, control = false), + T2 = (case = true, control = false)), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = (:C1,) + ) + ] + ) + return configuration +end diff --git a/test/utils.jl b/test/utils.jl index 480fb24..386f413 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -5,13 +5,69 @@ using TargetedEstimation using TMLE using DataFrames using CSV +using MLJBase +using MLJLinearModels using CategoricalArrays +PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation))) + +include(joinpath(PROJECT_DIR, "test", "testutils.jl")) + +@testset "Test load_tmle_spec: with configuration file" begin + estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_config.jl")) + @test estimators.TMLE isa TMLE.TMLEE + @test estimators.OSE isa TMLE.OSE + @test estimators.TMLE.weighted === true + @test estimators.TMLE.models.G_default === estimators.OSE.models.G_default + @test estimators.TMLE.models.G_default isa MLJBase.ProbabilisticStack +end + +@testset "Test load_tmle_spec: no configuration file" begin + estimators = TargetedEstimation.load_tmle_spec(nothing) + @test !haskey(estimators, :OSE) + @test haskey(estimators, :TMLE) + @test estimators.TMLE.weighted === true + @test estimators.TMLE.models.G_default isa LogisticClassifier +end + +@testset "Test convert_treatment_values" begin + treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int) + newT = TargetedEstimation.convert_treatment_values((T₁=1,), treatment_types) + @test newT isa Vector{Bool} + @test newT == [1] + + newT = TargetedEstimation.convert_treatment_values((T₁=(case=1, control=0.),), treatment_types) + @test newT isa Vector{NamedTuple{(:case, :control), Tuple{Bool, Bool}}} + @test newT == [(case = true, control = false)] + + newT = TargetedEstimation.convert_treatment_values((T₁=(case=1, control=0.), T₂=(case=true, control=0)), treatment_types) + @test newT isa Vector{NamedTuple{(:case, :control)}} + @test newT == [(case = true, control = false), (case = 1, control = 0)] +end + +@testset "Test proofread_estimands_from_yaml" begin + filename = "statistical_estimands.yml" + configuration_to_yaml(filename, statistical_estimands_only_config()) + dataset = DataFrame(T1 = [1., 0.], T2=[true, false]) + estimands = TargetedEstimation.proofread_estimands_from_yaml(filename, dataset) + for estimand in estimands + if haskey(estimand.treatment_values, :T1) + @test estimand.treatment_values.T1.case isa Float64 + @test estimand.treatment_values.T1.control isa Float64 + end + if haskey(estimand.treatment_values, :T2) + @test estimand.treatment_values.T2.case isa Bool + @test estimand.treatment_values.T2.control isa Bool + end + end + rm(filename) +end + @testset "Test CSV writing" begin Ψ = IATE( - target=:Y, - treatment=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), - confounders=[:W₁, :W₂] + outcome=:Y, + treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), + treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂]) ) @test TargetedEstimation.covariates_string(Ψ) === missing @test TargetedEstimation.param_string(Ψ) == "IATE" @@ -33,38 +89,43 @@ using CategoricalArrays @test TargetedEstimation.control_string(Ψ) === missing @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂" @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂" - end -@testset "Test variables" begin - parameters = [ - IATE( - target=:Y, - treatment=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), - confounders=[:W₁, :W₂]), - CM( - target=:Y₂, - treatment=(T₁=1, T₃="AC"), - confounders=[:W₃, :W₂], - covariates=[:C₁]) - ] - dataset = DataFrame(Y=[1.1, 2.2, missing], Y₂=[1, 0, missing]) - variables = TargetedEstimation.variables(parameters, dataset) - @test variables == ( - treatments = Set([:T₃, :T₁, :T₂]), - confounders = Set([:W₁, :W₃, :W₂]), - covariates = Set([:C₁]), - binarytargets = Set([:Y₂]), - continuoustargets = Set([:Y]) +@testset "Test coerce_types!" begin + Ψ = IATE( + outcome=:Ycont, + treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), + treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂]), ) - variables = TargetedEstimation.variables(parameters[1]) - @test variables == ( - target = :Y, - covariates = Symbol[], - confounders = [:W₁, :W₂], - treatments = (:T₁, :T₂) + dataset = DataFrame( + Ycont = [1.1, 2.2, missing], + Ycat = [1., 0., missing], + T₁ = [1, 0, missing], + T₂ = [missing, "AC", "CC"], + W₁ = [1., 0., 0.], + W₂ = [missing, 0., 0.], + C = [1, 2, 3] ) + TargetedEstimation.coerce_types!(dataset, Ψ) + + @test dataset.T₁ isa CategoricalArray + @test dataset.T₂ isa CategoricalArray + for var in [:W₁, :W₂, :Ycont] + @test eltype(dataset[!, var]) <: Union{Missing, Float64} + end + + Ψ = IATE( + outcome=:Ycat, + treatment_values=(T₂=(case="AC", control="CC"), ), + treatment_confounders=(T₂=[:W₂],), + outcome_extra_covariates=[:C] + ) + TargetedEstimation.coerce_types!(dataset, Ψ) + + @test dataset.Ycat isa CategoricalArray + @test eltype(dataset.C) <: Union{Missing, Float64} + end @testset "Test get_sample_ids" begin @@ -89,41 +150,6 @@ end @test sample_ids == [2] end -@testset "Test treatment_values" begin - treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int) - Ψ = CM(target=:Y, treatment=(T₁=1,), confounders=[:W₁]) - newT = TargetedEstimation.treatment_values(Ψ, (:T₁,), treatment_types) - @test newT isa Vector{Bool} - @test newT == [1] - - Ψ = ATE(target=:Y, treatment=(T₁=(case=1, control=0.),), confounders=[:W₁]) - newT = TargetedEstimation.treatment_values(Ψ, (:T₁,), treatment_types) - @test newT isa Vector{NamedTuple{(:case, :control), Tuple{Bool, Bool}}} - @test newT == [(case = true, control = false)] - - Ψ = ATE(target=:Y, treatment=(T₁=(case=1, control=0.), T₂=(case=true, control=0)), confounders=[:W₁]) - newT = TargetedEstimation.treatment_values(Ψ, (:T₁, :T₂), treatment_types) - @test newT isa Vector{NamedTuple{(:case, :control)}} - @test newT == [(case = true, control = false), (case = 1, control = 0)] -end - -@testset "Test read_parameters" for param_file in ("parameters.yaml", "parameters.bin") - param_file = joinpath("config", param_file) - dataset = DataFrame(T1 = [1., 0.], T2=[true, false]) - params = TargetedEstimation.read_parameters(param_file, dataset) - for param in params - if haskey(param.treatment, :T1) - @test param.treatment.T1.case isa Float64 - @test param.treatment.T1.control isa Float64 - end - if haskey(param.treatment, :T2) - @test param.treatment.T2.case isa Bool - @test param.treatment.T2.control isa Bool - end - end -end - - @testset "Test write_target_results with missing values" begin filename = "test.csv" parameters = [ @@ -133,7 +159,7 @@ end confounders=[:W₁, :W₂], covariates=[:C₁] )] - tmle_results = [TargetedEstimation.MissingTMLEResult(parameters[1])] + tmle_results = [TargetedEstimation.FailedEstimation(parameters[1])] logs = ["Error X"] TargetedEstimation.append_csv(filename, tmle_results, logs) out = CSV.read(filename, DataFrame) @@ -176,6 +202,10 @@ end TargetedEstimation.make_float!(dataset, [:C₁]) @test eltype(dataset.C₁) == Float64 + # If the type is already coerced then no-operation is applied + TargetedEstimation.make_float(dataset.C₁) === dataset.C₁ + TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁ + end end; From 667968fbab9dac4b488ca9fba53f5e3050f0869e Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Mon, 20 Nov 2023 18:36:12 +0000 Subject: [PATCH 02/71] add WIP --- Project.toml | 16 +- deps/build.jl | 1 + estimands_test.yaml | 12 +- src/TargetedEstimation.jl | 5 +- src/cache_managers.jl | 10 +- src/runner.jl | 154 +++++++++++++----- src/utils.jl | 136 +++++++++++----- .../{tmle_config.jl => tmle_ose_config.jl} | 0 test/runner.jl | 60 +++++-- test/testutils.jl | 12 +- test/utils.jl | 44 +++-- 11 files changed, 325 insertions(+), 125 deletions(-) create mode 100644 deps/build.jl rename test/config/{tmle_config.jl => tmle_ose_config.jl} (100%) diff --git a/Project.toml b/Project.toml index 1c787d9..4d74093 100644 --- a/Project.toml +++ b/Project.toml @@ -4,16 +4,19 @@ authors = ["Olivier Labayle"] version = "0.7.4" [deps] -ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" +Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542" +Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" HighlyAdaptiveLasso = "c5dac772-1445-43c4-b698-9440de7877f6" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" @@ -28,27 +31,30 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] -ArgParse = "1.1.4" Arrow = "2.5.2" CSV = "0.10" CategoricalArrays = "0.10" Combinatorics = "1.0.2" +Comonicon = "1.0.6" +Configurations = "0.17.6" +JSON = "0.21.4" DataFrames = "1.3.4" EvoTrees = "0.14.6" GLMNet = "0.7" HighlyAdaptiveLasso = "0.2.0" JLD2 = "0.4.22" MKL = "0.6" -MLJ = "0.19" -MLJBase = "0.21" +MLJ = "0.20.0" +MLJBase = "1.0.1" MLJLinearModels = "0.9" MLJModelInterface = "1.8.0" MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.5.1" Optim = "1.7" -TMLE = "0.11.4" Tables = "1.10.1" +YAML = "0.4.9" julia = "1.7, 1" diff --git a/deps/build.jl b/deps/build.jl new file mode 100644 index 0000000..e0a9f11 --- /dev/null +++ b/deps/build.jl @@ -0,0 +1 @@ +using TargetedEstimation; TargetedEstimation.comonicon_install() \ No newline at end of file diff --git a/estimands_test.yaml b/estimands_test.yaml index 34ae410..8901313 100644 --- a/estimands_test.yaml +++ b/estimands_test.yaml @@ -10,7 +10,7 @@ estimands: T1: case: true control: false - outcome: CONTINUOUS, outcome + outcome: CONTINUOUS, OUTCOME treatment_confounders: T2: - W1 @@ -28,7 +28,7 @@ estimands: T1: case: true control: false - outcome: BINARY/outcome + outcome: BINARY/OUTCOME treatment_confounders: T2: - W1 @@ -42,7 +42,7 @@ estimands: T1: case: true control: false - outcome: CONTINUOUS, outcome + outcome: CONTINUOUS, OUTCOME treatment_confounders: T1: - W1 @@ -56,7 +56,7 @@ estimands: T1: case: true control: false - outcome: CONTINUOUS, outcome + outcome: CONTINUOUS, OUTCOME treatment_confounders: T2: - W1 @@ -74,7 +74,7 @@ estimands: T1: case: true control: false - outcome: BINARY/outcome + outcome: BINARY/OUTCOME treatment_confounders: T2: - W1 @@ -92,7 +92,7 @@ estimands: T1: case: true control: false - outcome: CONTINUOUS, outcome + outcome: CONTINUOUS, OUTCOME treatment_confounders: T2: - W1 diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 88a6d4e..6ade605 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -25,6 +25,9 @@ using Combinatorics using Tables using Random using YAML +using JSON +using Comonicon +using Configurations import MLJModelInterface @@ -38,7 +41,7 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) -export run_estimation, sieve_variance_plateau, merge_csv_files +export Runner, run_estimation, sieve_variance_plateau, merge_csv_files export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV diff --git a/src/cache_managers.jl b/src/cache_managers.jl index 4f0a3f6..a9908ee 100644 --- a/src/cache_managers.jl +++ b/src/cache_managers.jl @@ -52,12 +52,16 @@ function release!(cache_manager::NoCacheManager, Ψ) end function make_cache_manager(estimands, string) - if string == "release_unusable" + if string == "release-unusable" return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands)) - elseif string == "no_cache" + elseif string == "no-cache" return NoCacheManager() else - return MaxSizeCacheManager(parse(Int, string)) + maxsize = try parse(Int, string) + catch E + throw(ArgumentError(string("Could not convert the provided cache value to an integer: ", string))) + end + return MaxSizeCacheManager(maxsize) end end diff --git a/src/runner.jl b/src/runner.jl index b9127cc..0ed6220 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -2,35 +2,57 @@ struct FailedEstimation message::String end + +@option struct JSONOutput + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing +end + +initialize(output::JSONOutput) = initialize_json(output.filename) + + +@option struct HDF5Output + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing +end + +initialize_hdf5(x) = nothing + +initialize(output::HDF5Output) = initialize_hdf5(output.filename) + +@option struct Outputs + json::JSONOutput = JSONOutput() + hdf5::HDF5Output = HDF5Output() + std::Bool = true +end + +function initialize(outputs::Outputs) + initialize(outputs.json) + initialize(outputs.hdf5) +end + mutable struct Runner estimators::NamedTuple estimands::Vector{TMLE.Estimand} dataset::DataFrame cache_manager::CacheManager chunksize::Int - pvalue_threshold::Float64 - output_ios::NamedTuple - function Runner(parsed_args) - datafile = parsed_args["dataset"] - paramfile = parsed_args["estimands-config"] - estimatorfile = parsed_args["estimators-config"] - verbosity = parsed_args["verbosity"] - csv_filename = parsed_args["csv-out"] - hdf5_filename = parsed_args["hdf5-out"] - pvalue_threshold = parsed_args["pval-threshold"] - chunksize = parsed_args["chunksize"] - rng = parsed_args["rng"] - cache_strategy = parsed_args["cache-strategy"] - sort_estimands = parsed_args["sort-estimands"] - - # Output IOs - output_ios = (CSV=csv_filename, HDF5=hdf5_filename) + outputs::Outputs + verbosity::Int + function Runner(dataset, estimands, estimators; + verbosity=0, + outputs=Outputs(), + chunksize=100, + rng=123, + cache_strategy="release-unusable", + sort_estimands=false + ) # Retrieve TMLE specifications - estimators = TargetedEstimation.load_tmle_spec(estimatorfile) + estimators = TargetedEstimation.load_tmle_spec(estimators) # Load dataset - dataset = TargetedEstimation.instantiate_dataset(datafile) + dataset = TargetedEstimation.instantiate_dataset(dataset) # Read parameter files - estimands = TargetedEstimation.proofread_estimands_from_yaml(paramfile, dataset) + estimands = TargetedEstimation.proofread_estimands(estimands, dataset) if sort_estimands estimands = groups_ordering(estimands; brute_force=true, @@ -41,10 +63,33 @@ mutable struct Runner end cache_manager = make_cache_manager(estimands, cache_strategy) - return new(estimators, estimands, dataset, cache_manager, chunksize, pvalue_threshold, output_ios) + return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity) end end +function save(runner::Runner, results, partition, finalize) + # Append STD Out + update(runner.outputs.std, results) + # Append JSON result with partition + update(runner.outputs.json, results; finalize=finalize) + # Append HDF5 result if save-ic is true + # update_jld2_output(runner.output_ios.HDF5, partition, results, runner.dataset) +end + + +function try_estimation(runner, Ψ, estimator) + try + result, _ = estimator(Ψ, runner.dataset, + cache=runner.cache_manager.cache, + verbosity=runner.verbosity, + ) + return result + catch e + # On Error, store the nuisance function where the error occured + # to fail fast the next estimands + return FailedEstimation(string(e)) + end +end function (runner::Runner)(partition) results = Vector{NamedTuple}(undef, size(partition, 1)) @@ -54,17 +99,8 @@ function (runner::Runner)(partition) TargetedEstimation.coerce_types!(runner.dataset, Ψ) # Maybe update cache with new η_spec estimators_results = [] - for estimator in estimators - try - result, _ = estimator(Ψ, runner.dataset, - cache=runner.cache, - verbosity=runner.verbosity, - ) - catch e - # On Error, store the nuisance function where the error occured - # to fail fast the next estimands - result = FailedEstimation(string(e)) - end + for estimator in runner.estimators + result = try_estimation(runner, Ψ, estimator) push!(estimators_results, result) end # Update results @@ -81,18 +117,58 @@ function (runner::Runner)(partition) end function (runner::Runner)() + # Initialize output files + initialize_outputs(runner.output_ios) # Split worklist in partitions nparams = size(runner.estimands, 1) - for partition in Iterators.partition(1:nparams, runner.chunksize) + partitions = collect(Iterators.partition(1:nparams, runner.chunksize)) + for partition in partitions results = runner(partition) - # Append CSV result with partition - append_csv(csv_file, results) - # Append HDF5 result if save-ic is true - update_jld2_output(jld2_file, partition, results, dataset; pval_threshold=pval_threshold) + save(runner, results, partition, partition===partitions[end]) end - verbosity >= 1 && @info "Done." return 0 end -run_estimation(parsed_args) = Runner(parsed_args)() \ No newline at end of file + +""" +TMLE CLI. + +# Args + +- `dataset`: Data file (either .csv or .arrow) +- `estimands`: Estimands file (either .json or .yaml) +- `estimators`: A julia file containing the estimators to use. + +# Options + +- `-v, --verbosity`: Verbosity level. +- `-j, --json_out`: JSON output filename. +- `--hdf5_out`: HDF5 output filename. +- `--chunksize`: Results are written in batches of size chunksize. +- `-r, --rng`: Random seed (Only used for estimands ordering at the moment). +- `-c, --cache_strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). + +# Flags + +- `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). +""" +@main function tmle(dataset, estimands, estimators; + verbosity=0, + outputs=Outputs(), + chunksize=100, + rng=123, + cache_strategy="release-unusable", + sort_estimands=false + ) + runner = Runner(dataset, estimands, estimators; + verbosity=verbosity, + outputs=outputs, + chunksize=chunksize, + rng=rng, + cache_strategy=cache_strategy, + sort_estimands=sort_estimands + ) + runner() + return +end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index f76b3a2..2c31c6e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -30,14 +30,12 @@ empty_tmle_output(;size=0) = DataFrame( covariates_string(Ψ; join_string="_&_") = length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing -function param_string(param::T) where T <: TMLE.Estimand - str = string(T) - return startswith(str, "TMLE.") ? str[6:end] : str -end +param_string(param::T) where T <: TMLE.Estimand = replace(string(T), "TMLE.Statistical" => "") + case(nt::NamedTuple) = nt.case case(x) = x -case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment)), join_string) +case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment_values)), join_string) control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = join((val.control for val in t), join_string) @@ -45,10 +43,10 @@ control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = control_string(t; join_string="_&_") = missing control_string(Ψ::TMLE.Estimand; join_string="_&_") = - control_string(values(Ψ.treatment); join_string=join_string) + control_string(values(Ψ.treatment_values); join_string=join_string) -treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment), join_string) -confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders, join_string) +treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment_values), join_string) +confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders_values, join_string) function statistics_from_estimator(estimator) @@ -74,9 +72,9 @@ statistics_from_result(result::FailedEstimation) = (missing, missing, missing, missing, missing), (missing, missing, missing, missing, missing) -function append_csv(filename, tmle_results, logs) - data = empty_tmle_output(size=size(tmle_results, 1)) - for (i, (result, log)) in enumerate(zip(tmle_results, logs)) +function append_csv(filename, results) + data = empty_tmle_output(size=size(results, 1)) + for (i, result) in enumerate(results) Ψ = result.parameter param_type = param_string(Ψ) treatments = treatment_string(Ψ) @@ -93,41 +91,83 @@ function append_csv(filename, tmle_results, logs) CSV.write(filename, data, append=true, header=!isfile(filename)) end +##################################################################### +#####  JSON OUTPUT #### +##################################################################### + +initialize_json(filename::Nothing) = nothing + +initialize_json(filename::String) = open(filename, "w") do io + print(io, '[') +end + +function update(output::JSONOutput, results; finalize=false) + output.filename === nothing && return + open(output.filename, "a") do io + for result in results + result = TMLE.emptyIC(result, output.pval_threshold) + JSON.print(io, TMLE.to_dict(result)) + print(io, ',') + end + if finalize + skip(io, -1) # get rid of the last comma which JSON doesn't allow + print(io, ']') + end + end +end + +##################################################################### +#####  STD OUTPUT #### +##################################################################### + +function update(doprint, results) + if doprint + mimetext = MIME"text/plain"() + index = 1 + for result in results + for (key, val) ∈ zip(keys(result), result) + show(stdout, mimetext, string("⋆⋆⋆ Estimand ", index, " ⋆⋆⋆")) + show(stdout, mimetext, val.estimand) + show(stdout, mimetext, string("Estimation Result From: ", key, )) + show(stdout, mimetext, val) + index += 1 + end + end + end +end ##################################################################### #####  JLD2 OUTPUT #### ##################################################################### -update_jld2_output(jld2_file::Nothing, partition, tmle_results, dataset; pval_threshold=0.05) = nothing - -function update_jld2_output(jld2_file::String, partition, tmle_results, dataset; pval_threshold=0.05) - if jld2_file !== nothing - jldopen(jld2_file, "a+", compress=true) do io - # Append only with results passing the threshold - previous_variables = nothing - sample_ids_idx = nothing - - for (partition_index, param_index) in enumerate(partition) - r = tmle_results[partition_index] - if (r isa TMLE.Estimate) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold) - current_variables = variables(r.parameter) - if previous_variables != current_variables - sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables) - io["$param_index/sample_ids"] = sample_ids - sample_ids_idx = param_index - end - io["$param_index/result"] = r - io["$param_index/sample_ids_idx"] = sample_ids_idx - - previous_variables = current_variables - end + +function update(output::HDF5Output, partition, results, dataset) + output.filename === nothing && return + + jldopen(output.filename, "a+", compress=true) do io + # Append only with results passing the threshold + previous_variables = nothing + sample_ids_idx = nothing + for (partition_index, param_index) in enumerate(partition) + estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold) + current_variables = variables(r.parameter) + if previous_variables != current_variables + sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables) + io["$param_index/sample_ids"] = sample_ids + sample_ids_idx = param_index end + io["$param_index/result"] = r + io["$param_index/sample_ids_idx"] = sample_ids_idx + + previous_variables = current_variables end + end + end ##################################################################### -#####  Read Estimands #### +#####  Read TMLE Estimands Configuration #### ##################################################################### @@ -154,14 +194,17 @@ maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = thro maybe_identify(Ψ, scm, method) = Ψ +read_method(extension) = extension == ".json" ? read_json : read_yaml + """ - read_estimands(param_file, dataset) + proofread_estimands(param_file, dataset) Reads estimands from file and ensures that the treatment values in the config file respects the treatment types in the dataset. """ -function proofread_estimands_from_yaml(filename, dataset) - config = configuration_from_yaml(filename) +function proofread_estimands(filename, dataset) + extension = filename[findlast(isequal('.'), filename):end] + config = read_method(extension)(filename) estimands = Vector{TMLE.Estimand}(undef, length(config.estimands)) treatment_types = Dict() for (index, Ψ) in enumerate(config.estimands) @@ -187,6 +230,18 @@ end #####  ADDITIONAL METHODS #### ##################################################################### +TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = + NamedTuple{names}([TMLE.emptyIC(r) for r in result]) + +function TMLE.emptyIC(result, pval_threshold::Float64) + pval = pvalue(OneSampleZTest(result)) + return pval < pval_threshold ? result : TMLE.emptyIC(result) +end + +TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names = + NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result]) + + function get_sample_ids(data, variables) cols = [:SAMPLE_ID, variables.target, variables.treatments..., variables.confounders..., variables.covariates...] return dropmissing(data[!, cols]).SAMPLE_ID @@ -262,4 +317,7 @@ load_tmle_spec(file::Nothing) = ( function load_tmle_spec(file) include(abspath(file)) return ESTIMATORS -end \ No newline at end of file +end + +TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = + Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) \ No newline at end of file diff --git a/test/config/tmle_config.jl b/test/config/tmle_ose_config.jl similarity index 100% rename from test/config/tmle_config.jl rename to test/config/tmle_ose_config.jl diff --git a/test/runner.jl b/test/runner.jl index 5f61bca..3b7779c 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -93,24 +93,52 @@ function build_dataset(;n=1000, format="csv") format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset) end -@testset "Test partition_tmle!" begin +@testset "Integration Test" begin build_dataset(;n=1000, format="csv") - dataset = TargetedEstimation.instantiate_dataset("data.csv") - estimands = TargetedEstimation.read_estimands(joinpath(config_dir, "parameters.yaml"), dataset) - variables = TargetedEstimation.variables(estimands, dataset) - TargetedEstimation.coerce_types!(dataset, variables) - tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl")) - cache = TMLECache(dataset) - - tmle_results = Vector{Union{TMLE.Estimate, TargetedEstimation.FailedEstimation}}(undef, 3) - logs = Vector{Union{String, Missing}}(undef, 3) - part = 4:6 - TargetedEstimation.partition_tmle!(cache, tmle_results, logs, part, tmle_spec, parameters, variables; verbosity=0) - @test [x.tmle.Ψ̂ for x in tmle_results] isa Vector{Float64} - @test [x.parameter for x in tmle_results] == parameters[part] - @test [x.onestep.Ψ̂ for x in tmle_results] isa Vector{Float64} - @test all(x === missing for x in logs) + tmpdir = mktempdir(cleanup=true) + estimands_filename = joinpath(tmpdir, "configuration.yaml") + TMLE.write_json(estimands_filename, statistical_estimands_only_config()) + outputs = TargetedEstimation.Outputs( + json=TargetedEstimation.JSONOutput(filename="output.json"), + std=true, + ) + runner = Runner( + "data.csv", + estimands_filename, + joinpath(CONFIGDIR, "tmle_ose_config.jl"); + outputs=outputs, + cache_strategy="release-unusable", + ) + partition = 1:3 + results = runner(partition) + for result in results + @test result.TMLE isa TMLE.TMLEstimate + @test result.OSE isa TMLE.OSEstimate + end + + output_txt = "output.txt" + TargetedEstimation.initialize(outputs) + open(output_txt, "w") do io + redirect_stdout(io) do + TargetedEstimation.save(runner, results, partition, true) + end + end + # Read STDOUT + stdout_content = split(read(output_txt, String), "\n") + @test length(stdout_content) > 20 + + # Read JSON + loaded_results = TMLE.read_json(outputs.json.filename) + for (result, loaded_result) in zip(results, loaded_results) + @test loaded_result[:TMLE] isa TMLE.TMLEstimate + @test result.TMLE.estimate == loaded_result[:TMLE].estimate + @test loaded_result[:OSE] isa TMLE.OSEstimate + @test result.OSE.estimate == loaded_result[:OSE].estimate + end + rm("data.csv") + rm(output_txt) + rm(outputs.json.filename) end @testset "Test tmle_estimation" begin diff --git a/test/testutils.jl b/test/testutils.jl index 76d6ab4..c41ad33 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -4,7 +4,7 @@ function statistical_estimands_only_config() configuration = Configuration( estimands=[ IATE( - outcome = Symbol("CONTINUOUS, outcome"), + outcome = Symbol("CONTINUOUS, OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = true, control = false)), @@ -12,7 +12,7 @@ function statistical_estimands_only_config() outcome_extra_covariates = (:C1,) ), IATE( - outcome = Symbol("BINARY/outcome"), + outcome = Symbol("BINARY/OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = true, control = false)), @@ -20,13 +20,13 @@ function statistical_estimands_only_config() outcome_extra_covariates = (:C1,) ), ATE( - outcome = Symbol("CONTINUOUS, outcome"), + outcome = Symbol("CONTINUOUS, OUTCOME"), treatment_values = (T1 = (case = true, control = false),), treatment_confounders = (T1 = (:W1, :W2),), outcome_extra_covariates = () ), IATE( - outcome = Symbol("CONTINUOUS, outcome"), + outcome = Symbol("CONTINUOUS, OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = false, control = true) @@ -35,7 +35,7 @@ function statistical_estimands_only_config() outcome_extra_covariates = () ), IATE( - outcome = Symbol("BINARY/outcome"), + outcome = Symbol("BINARY/OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = false, control = true) @@ -44,7 +44,7 @@ function statistical_estimands_only_config() outcome_extra_covariates = (:C1,) ), ATE( - outcome = Symbol("CONTINUOUS, outcome"), + outcome = Symbol("CONTINUOUS, OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = true, control = false)), diff --git a/test/utils.jl b/test/utils.jl index 386f413..8c4bf5e 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -14,7 +14,7 @@ PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation))) include(joinpath(PROJECT_DIR, "test", "testutils.jl")) @testset "Test load_tmle_spec: with configuration file" begin - estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_config.jl")) + estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_ose_config.jl")) @test estimators.TMLE isa TMLE.TMLEE @test estimators.OSE isa TMLE.OSE @test estimators.TMLE.weighted === true @@ -45,11 +45,13 @@ end @test newT == [(case = true, control = false), (case = 1, control = 0)] end -@testset "Test proofread_estimands_from_yaml" begin - filename = "statistical_estimands.yml" - configuration_to_yaml(filename, statistical_estimands_only_config()) +@testset "Test proofread_estimands" for extension in ("yaml", "json") + # Write estimands file + filename = "statistical_estimands.$extension" + eval(Meta.parse("write_$extension"))(filename, statistical_estimands_only_config()) + dataset = DataFrame(T1 = [1., 0.], T2=[true, false]) - estimands = TargetedEstimation.proofread_estimands_from_yaml(filename, dataset) + estimands = TargetedEstimation.proofread_estimands(filename, dataset) for estimand in estimands if haskey(estimand.treatment_values, :T1) @test estimand.treatment_values.T1.case isa Float64 @@ -60,6 +62,7 @@ end @test estimand.treatment_values.T2.control isa Bool end end + # Clean estimands file rm(filename) end @@ -77,10 +80,10 @@ end @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂" Ψ = CM( - target=:Y, - treatment=(T₁=1, T₂="AC"), - confounders=[:W₁, :W₂], - covariates=[:C₁] + outcome=:Y, + treatment_values=(T₁=1, T₂="AC"), + treatment_confounders=(T₁=[:W₁, :W₂], T₂ = [:W₁, :W₂]), + outcome_extra_covariates=[:C₁] ) @test TargetedEstimation.covariates_string(Ψ) === "C₁" @@ -125,7 +128,6 @@ end @test dataset.Ycat isa CategoricalArray @test eltype(dataset.C) <: Union{Missing, Float64} - end @testset "Test get_sample_ids" begin @@ -205,7 +207,29 @@ end # If the type is already coerced then no-operation is applied TargetedEstimation.make_float(dataset.C₁) === dataset.C₁ TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁ +end +@tetset "Test JSON writing" begin + results = [] + for Ψ in statistical_estimands_only_config().estimands + push!(results, ( + TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]), + OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[]) + )) + end + tmpdir = mktempdir(cleanup=true) + filename = joinpath(tmpdir, "output_test.json") + TargetedEstimation.initialize_json(filename) + TargetedEstimation.update(filename, results[1:3]) + TargetedEstimation.update(filename, results[4:end]; finalize=true) + loaded_results = TMLE.read_json(filename) + @test size(loaded_results) == size(results) + for (result, loaded_result) in zip(results, loaded_results) + @test result.TMLE.estimate == loaded_result[:TMLE].estimate + @test result.TMLE.std == loaded_result[:TMLE].std + @test result.OSE.estimate == loaded_result[:OSE].estimate + @test result.OSE.std == loaded_result[:OSE].std + end end end; From e61d7c84c1f606901f3c1b953f79a3d6feadfbc9 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 21 Nov 2023 12:07:09 +0000 Subject: [PATCH 03/71] fix more tests --- src/TargetedEstimation.jl | 2 +- src/runner.jl | 21 ++-- src/utils.jl | 37 +++--- test/runner.jl | 229 ++++++++++++++++---------------------- test/testutils.jl | 20 ++-- test/utils.jl | 31 +++++- 6 files changed, 157 insertions(+), 183 deletions(-) diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 6ade605..2f809ec 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -41,7 +41,7 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) -export Runner, run_estimation, sieve_variance_plateau, merge_csv_files +export Runner, tmle, sieve_variance_plateau, merge_csv_files export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV diff --git a/src/runner.jl b/src/runner.jl index 0ed6220..2120cfa 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -2,7 +2,6 @@ struct FailedEstimation message::String end - @option struct JSONOutput filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing @@ -10,25 +9,19 @@ end initialize(output::JSONOutput) = initialize_json(output.filename) - @option struct HDF5Output filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing end -initialize_hdf5(x) = nothing - -initialize(output::HDF5Output) = initialize_hdf5(output.filename) - @option struct Outputs json::JSONOutput = JSONOutput() hdf5::HDF5Output = HDF5Output() - std::Bool = true + std::Bool = false end function initialize(outputs::Outputs) initialize(outputs.json) - initialize(outputs.hdf5) end mutable struct Runner @@ -69,11 +62,11 @@ end function save(runner::Runner, results, partition, finalize) # Append STD Out - update(runner.outputs.std, results) - # Append JSON result with partition + update(runner.outputs.std, results, partition) + # Append JSON Output update(runner.outputs.json, results; finalize=finalize) - # Append HDF5 result if save-ic is true - # update_jld2_output(runner.output_ios.HDF5, partition, results, runner.dataset) + # Append HDF5 Output + update(runner.outputs.hdf5, partition, results, runner.dataset) end @@ -118,7 +111,7 @@ end function (runner::Runner)() # Initialize output files - initialize_outputs(runner.output_ios) + initialize(runner.outputs) # Split worklist in partitions nparams = size(runner.estimands, 1) partitions = collect(Iterators.partition(1:nparams, runner.chunksize)) @@ -126,7 +119,7 @@ function (runner::Runner)() results = runner(partition) save(runner, results, partition, partition===partitions[end]) end - verbosity >= 1 && @info "Done." + runner.verbosity >= 1 && @info "Done." return 0 end diff --git a/src/utils.jl b/src/utils.jl index 2c31c6e..cecc9ed 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -120,15 +120,17 @@ end #####  STD OUTPUT #### ##################################################################### -function update(doprint, results) +function update(doprint, results, partition) if doprint mimetext = MIME"text/plain"() index = 1 - for result in results + for (result, estimand_index) in zip(results, partition) + show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆")) + println(stdout) + show(stdout, mimetext, first(result).estimand) for (key, val) ∈ zip(keys(result), result) - show(stdout, mimetext, string("⋆⋆⋆ Estimand ", index, " ⋆⋆⋆")) - show(stdout, mimetext, val.estimand) - show(stdout, mimetext, string("Estimation Result From: ", key, )) + show(stdout, mimetext, string("→ Estimation Result From: ", key, )) + println(stdout) show(stdout, mimetext, val) index += 1 end @@ -145,25 +147,22 @@ function update(output::HDF5Output, partition, results, dataset) output.filename === nothing && return jldopen(output.filename, "a+", compress=true) do io - # Append only with results passing the threshold previous_variables = nothing sample_ids_idx = nothing for (partition_index, param_index) in enumerate(partition) estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold) - current_variables = variables(r.parameter) + current_variables = variables(first(estimator_results).estimand) if previous_variables != current_variables sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables) io["$param_index/sample_ids"] = sample_ids sample_ids_idx = param_index end - io["$param_index/result"] = r + io["$param_index/result"] = estimator_results io["$param_index/sample_ids_idx"] = sample_ids_idx previous_variables = current_variables end - end - end ##################################################################### @@ -242,10 +241,8 @@ TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names = NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result]) -function get_sample_ids(data, variables) - cols = [:SAMPLE_ID, variables.target, variables.treatments..., variables.confounders..., variables.covariates...] - return dropmissing(data[!, cols]).SAMPLE_ID -end +get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID + """ instantiate_dataset(path::String) @@ -296,12 +293,12 @@ function coerce_types!(dataset, Ψ) make_float!(dataset, continuous_variables) end -variables(Ψ::TMLE.Estimand) = ( - outcome = Ψ.outcome, - covariates = Ψ.outcome_extra_covariates, - confounders = Ψ.treatment_confounders, - treatments = keys(Ψ.treatment_values) - ) +variables(Ψ::TMLE.Estimand) = Set([ + Ψ.outcome, + keys(Ψ.treatment_values)..., + Ψ.outcome_extra_covariates..., + Iterators.flatten(values(Ψ.treatment_confounders))... + ]) load_tmle_spec(file::Nothing) = ( TMLE = TMLEE( diff --git a/test/runner.jl b/test/runner.jl index 3b7779c..cde4dbb 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -20,33 +20,18 @@ CONFIGDIR = joinpath(PKGDIR, "test", "config") include(joinpath(PKGDIR, "test", "testutils.jl")) -function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_idx) - jld2_res = jldio[string(param_index)] - csv_row = data[param_index, :] - Ψ = jld2_res["result"].parameter - @test jld2_res["result"] isa TMLE.Estimate - @test jld2_res["result"].tmle.Ψ̂ isa Float64 - @test Ψ == expected_param - @test jld2_res["sample_ids_idx"] == sample_ids_idx - sample_ids = jldio[string(jld2_res["sample_ids_idx"])]["sample_ids"] - if expected_param.target == Symbol("BINARY/OUTCOME") - @test sample_ids == 2:1000 - else - @test sample_ids == 1:1000 +sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt) +sort_nt_by_key(x) = x + +function test_estimands_match(Ψ₁::T1, Ψ₂::T2) where {T1, T2} + @test T1 == T2 + @test Ψ₁.outcome == Ψ₂.outcome + @test Ψ₁.outcome_extra_covariates == Ψ₂.outcome_extra_covariates + @test sort_nt_by_key(Ψ₁.treatment_confounders) == sort_nt_by_key(Ψ₂.treatment_confounders) + @test sort(keys(Ψ₁.treatment_values)) == sort(keys(Ψ₂.treatment_values)) + for key in keys(Ψ₁.treatment_values) + @test sort_nt_by_key(Ψ₁.treatment_values[key]) == sort_nt_by_key(Ψ₂.treatment_values[key]) end - @test jld2_res["result"] isa TMLE.Estimate - - if csv_row.COVARIATES === missing - @test TargetedEstimation.covariates_string(Ψ) === csv_row.COVARIATES - else - @test TargetedEstimation.covariates_string(Ψ) == csv_row.COVARIATES - end - @test TargetedEstimation.param_string(Ψ) == csv_row.PARAMETER_TYPE - @test TargetedEstimation.case_string(Ψ) == csv_row.CASE - @test TargetedEstimation.control_string(Ψ) == csv_row.CONTROL - @test TargetedEstimation.treatment_string(Ψ) == csv_row.TREATMENTS - @test TargetedEstimation.confounders_string(Ψ) == csv_row.CONFOUNDERS - @test csv_row.TMLE_ESTIMATE == jld2_res["result"].tmle.Ψ̂ end """ @@ -100,6 +85,7 @@ end TMLE.write_json(estimands_filename, statistical_estimands_only_config()) outputs = TargetedEstimation.Outputs( json=TargetedEstimation.JSONOutput(filename="output.json"), + hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.), std=true, ) runner = Runner( @@ -109,13 +95,14 @@ end outputs=outputs, cache_strategy="release-unusable", ) - partition = 1:3 + partition = 4:6 results = runner(partition) for result in results @test result.TMLE isa TMLE.TMLEstimate @test result.OSE isa TMLE.OSEstimate end + # Test Save to STDOUT output_txt = "output.txt" TargetedEstimation.initialize(outputs) open(output_txt, "w") do io @@ -123,138 +110,116 @@ end TargetedEstimation.save(runner, results, partition, true) end end - # Read STDOUT - stdout_content = split(read(output_txt, String), "\n") - @test length(stdout_content) > 20 + stdout_content = read(output_txt, String) + @test all(occursin("Estimand $i", stdout_content) for i in partition) - # Read JSON + # Test Save to JSON loaded_results = TMLE.read_json(outputs.json.filename) for (result, loaded_result) in zip(results, loaded_results) @test loaded_result[:TMLE] isa TMLE.TMLEstimate @test result.TMLE.estimate == loaded_result[:TMLE].estimate + @test loaded_result[:TMLE].IC == [] + @test loaded_result[:OSE] isa TMLE.OSEstimate @test result.OSE.estimate == loaded_result[:OSE].estimate + @test loaded_result[:OSE].IC == [] end + # Test Save to HDF5 + hdf5file = jldopen(outputs.hdf5.filename, "r") + for (result_index, param_index) in enumerate(4:6) + result = hdf5file[string(param_index, "/result")] + @test result.TMLE isa TMLE.TMLEstimate + @test results[result_index].TMLE.estimate == result.TMLE.estimate + + @test result.OSE isa TMLE.OSEstimate + @test results[result_index].OSE.estimate == result.OSE.estimate + end + @test hdf5file["4/sample_ids"] == collect(2:1000) + @test hdf5file["4/sample_ids_idx"] == 4 + @test size(hdf5file["4/result"].TMLE.IC, 1) == 999 + + @test !haskey(hdf5file, "5/sample_ids") + @test hdf5file["5/sample_ids_idx"] == 4 + @test size(hdf5file["5/result"].TMLE.IC, 1) == 999 + + @test hdf5file["6/sample_ids"] == collect(1:1000) + @test hdf5file["6/sample_ids_idx"] == 6 + @test size(hdf5file["6/result"].TMLE.IC, 1) == 1000 + + close(hdf5file) + + # Clean rm("data.csv") rm(output_txt) rm(outputs.json.filename) + rm(outputs.hdf5.filename) end -@testset "Test tmle_estimation" begin - expected_parameters = [ - ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]), - IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), - IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]), - IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]), - IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]), - ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]) - ] - outfilename = "statistical_estimands.yml" - configuration_to_yaml(outfilename, statistical_estimands_only_config()) - expected_param_sample_ids_idx = [1, 2, 2, 4, 5, 5] +@testset "Test tmle" begin + tmpdir = mktempdir(cleanup=true) + estimands_filename = joinpath(tmpdir, "configuration.json") + configuration = statistical_estimands_only_config() + TMLE.write_json(estimands_filename, configuration) + outputs = TargetedEstimation.Outputs( + json=TargetedEstimation.JSONOutput(filename="output.json"), + hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.), + ) + estimatorfile = joinpath(CONFIGDIR, "tmle_ose_config.jl") # Run tests over CSV and Arrow data formats for format in ("csv", "arrow") + datafile = string("data.", format) build_dataset(;n=1000, format=format) - parsed_args = Dict( - "dataset" => string("data.", format), - "estimands-config" => nothing, - "estimators-config" => joinpath(config_dir, "tmle_config.jl"), - "csv-out" => "output.csv", - "verbosity" => 0, - "hdf5-out" => "output.hdf5", - "pval-threshold" => 1., - "chunksize" => nothing - ) - runner = TargetedEstimation.Runner(parsed_args) - for param_file in ("parameters.yaml", "parameters.bin") - for chunksize in (4, 10) - # Only one continuous phenotype / machines not saved / no adaptive cv - - parsed_args["estimands-config"] = outfilename - parsed_args["chunksize"] = chunksize - - tmle_estimation(parsed_args) - - # Given the threshold is 1, all - # estimation results will make the threshold - jldio = jldopen(parsed_args["hdf5-out"]) - data = CSV.read(parsed_args["csv-out"], DataFrame) - - @test all(data[i, :TMLE_ESTIMATE] != data[j, :TMLE_ESTIMATE] for i in 1:5 for j in i+1:6) - - for (param_index, (Ψ, sample_ids_idx)) in enumerate(zip(expected_parameters, expected_param_sample_ids_idx)) - test_tmle_output(param_index, jldio, data, Ψ, sample_ids_idx) - end - # Clean - rm(parsed_args["csv-out"]) - rm(parsed_args["hdf5-out"]) + for chunksize in (4, 10) + tmle(datafile, estimands_filename, estimatorfile; + outputs=outputs, + chunksize=chunksize, + ) + + hdf5file = jldopen(outputs.hdf5.filename) + results_from_json = TMLE.read_json(outputs.json.filename) + + for i in 1:6 + Ψ = configuration.estimands[i] + test_estimands_match(Ψ, results_from_json[i][:TMLE].estimand) + hdf5result = hdf5file[string(i, "/result")] + @test results_from_json[i][:TMLE].estimate == hdf5result.TMLE.estimate + @test results_from_json[i][:OSE].estimate == hdf5result.OSE.estimate end + + # Clean + rm(outputs.hdf5.filename) + rm(outputs.json.filename) end - rm(parsed_args["dataset"]) + rm(datafile) end end -@testset "Test tmle_estimation: No hdf5 file" begin +@testset "Test tmle: lower p-value threshold only JSON output" begin build_dataset(;n=1000, format="csv") - estimands_filename = "estimands_test.yaml" - configuration_to_yaml(estimands_filename, statistical_estimands_only_config()) - # Only one continuous phenotype / machines not saved / no adaptive cv - parsed_args = Dict( - "dataset" => "data.csv", - "estimands-config" => estimands_filename, - "estimators-config" => joinpath(CONFIGDIR, "ose_config.jl"), - "csv-out" => "output.csv", - "verbosity" => 0, - "hdf5-out" => nothing, - "pval-threshold" => 1., - "chunksize" => 10, - "rng" => 123, - "sort-estimands" => false, - "cache-strategy" => "release_unusable" - ) - @enter run_estimation(parsed_args) - - ## Check CSV file - data = CSV.read(parsed_args["csv-out"], DataFrame) - @test names(TargetedEstimation.empty_tmle_output()) == names(data) - @test size(data) == (6, 19) - all(x === missing for x in data.LOG) - # Clean - rm(parsed_args["csv-out"]) - rm(parsed_args["dataset"]) -end - - -@testset "Test tmle_estimation: lower p-value threhsold" begin - build_dataset(;n=1000, format="csv") - parsed_args = Dict( - "dataset" => "data.csv", - "estimands-config" => joinpath("config", "parameters.yaml"), - "estimators-config" => joinpath("config", "tmle_config.jl"), - "csv-out" => "output.csv", - "verbosity" => 0, - "hdf5-out" => "output.hdf5", - "pval-threshold" => 1e-15, - "chunksize" => 10 + outputs = TargetedEstimation.Outputs( + json=TargetedEstimation.JSONOutput(filename="output.json", pval_threshold=1e-15) ) - - tmle_estimation(parsed_args) + tmpdir = mktempdir(cleanup=true) + estimandsfile = joinpath(tmpdir, "configuration.json") + configuration = statistical_estimands_only_config() + TMLE.write_json(estimandsfile, configuration) + estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") + datafile = "data.csv" + tmle(datafile, estimandsfile, estimatorfile; outputs=outputs) # Essential results - data = CSV.read(parsed_args["csv-out"], DataFrame) - jldio = jldopen(parsed_args["hdf5-out"]) - @test !haskey(jldio, "2") - @test !haskey(jldio, "3") - @test !haskey(jldio, "4") - @test !haskey(jldio, "5") - @test !haskey(jldio, "6") - - @test jldio["1"]["result"].tmle.Ψ̂ == data[1, :TMLE_ESTIMATE] + results_from_json = TMLE.read_json(outputs.json.filename) + n_IC_empties = 0 + for result in results_from_json + if result[:OSE].IC != [] + n_IC_empties += 1 + end + end + @test n_IC_empties > 0 - rm(parsed_args["dataset"]) - rm(parsed_args["csv-out"]) - rm(parsed_args["hdf5-out"]) + rm(datafile) + rm(outputs.json.filename) end @testset "Test tmle_estimation: Failing parameters" begin diff --git a/test/testutils.jl b/test/testutils.jl index c41ad33..c9bc500 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -11,14 +11,6 @@ function statistical_estimands_only_config() treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), outcome_extra_covariates = (:C1,) ), - IATE( - outcome = Symbol("BINARY/OUTCOME"), - treatment_values = ( - T1 = (case = true, control = false), - T2 = (case = true, control = false)), - treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), - outcome_extra_covariates = (:C1,) - ), ATE( outcome = Symbol("CONTINUOUS, OUTCOME"), treatment_values = (T1 = (case = true, control = false),), @@ -43,11 +35,19 @@ function statistical_estimands_only_config() treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), outcome_extra_covariates = (:C1,) ), - ATE( - outcome = Symbol("CONTINUOUS, OUTCOME"), + IATE( + outcome = Symbol("BINARY/OUTCOME"), treatment_values = ( T1 = (case = true, control = false), T2 = (case = true, control = false)), + treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), + outcome_extra_covariates = (:C1,) + ), + CM( + outcome = Symbol("CONTINUOUS, OUTCOME"), + treatment_values = ( + T1 = true, + T2 = false), treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), outcome_extra_covariates = (:C1,) ) diff --git a/test/utils.jl b/test/utils.jl index 8c4bf5e..1859622 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -130,13 +130,32 @@ end @test eltype(dataset.C) <: Union{Missing, Float64} end -@testset "Test get_sample_ids" begin - variables = ( - target = :Y, - covariates = Symbol[], - confounders = [:W₁, :W₂], - treatments = (:T₁, :T₂) +@testset "Test misc" begin + Ψ = ATE( + outcome = :Y, + treatment_values = ( + T₁ = (case=1, control=0), + T₂ = (case=1, control=0)), + treatment_confounders = ( + T₁=[:W₁, :W₂], + T₂=[:W₂, :W₃] + ), + outcome_extra_covariates = [:C] ) + variables = TargetedEstimation.variables(Ψ) + @test variables == Set([:Y, :C, :T₁, :T₂, :W₁, :W₂, :W₃]) + Ψ = ATE( + outcome = :Y, + treatment_values = ( + T₁ = (case=1, control=0), + T₂ = (case=1, control=0)), + treatment_confounders = ( + T₁=[:W₁, :W₂], + T₂=[:W₁, :W₂] + ), + ) + variables = TargetedEstimation.variables(Ψ) + @test variables == Set([:Y, :T₁, :T₂, :W₁, :W₂]) data = DataFrame( SAMPLE_ID = [1, 2, 3, 4, 5], Y = [1, 2, 3, missing, 5], From 07e0c7f33e47e1207ca9a36bca1aa23688713309 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 21 Nov 2023 17:16:11 +0000 Subject: [PATCH 04/71] add new tmle function --- src/cache_managers.jl | 4 +- src/runner.jl | 47 +++++++-- src/utils.jl | 113 +++------------------ test/cache_managers.jl | 8 +- test/config/problematic_tmle_ose_config.jl | 14 +++ test/models/biallelic_snp_encoder.jl | 1 + test/resampling.jl | 4 +- test/runner.jl | 60 +++++++---- test/runtests.jl | 23 +++-- test/utils.jl | 79 +++----------- 10 files changed, 144 insertions(+), 209 deletions(-) create mode 100644 test/config/problematic_tmle_ose_config.jl diff --git a/src/cache_managers.jl b/src/cache_managers.jl index a9908ee..64d5004 100644 --- a/src/cache_managers.jl +++ b/src/cache_managers.jl @@ -15,13 +15,13 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ) for ps in η.propensity_score cache_manager.η_counts[ps] -= 1 if cache_manager.η_counts[ps] == 0 - pop!(cache_manager.cache, ps) + delete!(cache_manager.cache, ps) end end # Outcome Mean cache_manager.η_counts[η.outcome_mean] -= 1 if cache_manager.η_counts[η.outcome_mean] == 0 - pop!(cache_manager.cache, η.outcome_mean) + delete!(cache_manager.cache, η.outcome_mean) end end diff --git a/src/runner.jl b/src/runner.jl index 2120cfa..24cff63 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -1,7 +1,14 @@ struct FailedEstimation - message::String + estimand::TMLE.Estimand + msg::String end +TMLE.to_dict(x::FailedEstimation) = Dict( + :estimand => TMLE.to_dict(x.estimand), + :error => x.msg + ) + + @option struct JSONOutput filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing @@ -32,6 +39,7 @@ mutable struct Runner chunksize::Int outputs::Outputs verbosity::Int + failed_nuisance::Set function Runner(dataset, estimands, estimators; verbosity=0, outputs=Outputs(), @@ -55,21 +63,22 @@ mutable struct Runner ) end cache_manager = make_cache_manager(estimands, cache_strategy) + + failed_nuisance = Set([]) - return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity) + return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity, failed_nuisance) end end function save(runner::Runner, results, partition, finalize) # Append STD Out - update(runner.outputs.std, results, partition) + update_file(runner.outputs.std, results, partition) # Append JSON Output - update(runner.outputs.json, results; finalize=finalize) + update_file(runner.outputs.json, results; finalize=finalize) # Append HDF5 Output - update(runner.outputs.hdf5, partition, results, runner.dataset) + update_file(runner.outputs.hdf5, partition, results, runner.dataset) end - function try_estimation(runner, Ψ, estimator) try result, _ = estimator(Ψ, runner.dataset, @@ -78,16 +87,33 @@ function try_estimation(runner, Ψ, estimator) ) return result catch e - # On Error, store the nuisance function where the error occured - # to fail fast the next estimands - return FailedEstimation(string(e)) + # Some nuisance function fits may fail. We do not interrupt on them but log instead. + # This also allows to skip fast the next estimands requiring the same nuisance functions. + if e isa TMLE.FitFailedError + push!(runner.failed_nuisance, e.estimand) + return FailedEstimation(Ψ, e.msg) + # On other errors, rethrow + else + rethrow(e) + end end end +function skip_fast(runner, Ψ) + ηs = TMLE.get_relevant_factors(Ψ) + ηs.propensity_score + any(η ∈ runner.failed_nuisance for η in (ηs.outcome_mean, ηs.propensity_score...)) && return true + return false +end + function (runner::Runner)(partition) results = Vector{NamedTuple}(undef, size(partition, 1)) for (partition_index, param_index) in enumerate(partition) Ψ = runner.estimands[param_index] + if skip_fast(runner, Ψ) + results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimation(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)]) + continue + end # Make sure data types are appropriate for the estimand TargetedEstimation.coerce_types!(runner.dataset, Ψ) # Maybe update cache with new η_spec @@ -119,8 +145,6 @@ function (runner::Runner)() results = runner(partition) save(runner, results, partition, partition===partitions[end]) end - runner.verbosity >= 1 && @info "Done." - return 0 end @@ -163,5 +187,6 @@ TMLE CLI. sort_estimands=sort_estimands ) runner() + verbosity >= 1 && @info "Done." return end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index cecc9ed..ce62c37 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,96 +1,3 @@ - - -##################################################################### -#####  CSV OUTPUT #### -##################################################################### - - -empty_tmle_output(;size=0) = DataFrame( - PARAMETER_TYPE=Vector{String}(undef, size), - TREATMENTS=Vector{String}(undef, size), - CASE=Vector{String}(undef, size), - CONTROL=Vector{Union{Missing, String}}(undef, size), - OUTCOME=Vector{String}(undef, size), - CONFOUNDERS=Vector{String}(undef, size), - COVARIATES=Vector{Union{Missing, String}}(undef, size), - INITIAL_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), - TMLE_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), - TMLE_STD=Vector{Union{Missing, Float64}}(undef, size), - TMLE_PVALUE=Vector{Union{Missing, Float64}}(undef, size), - TMLE_LWB=Vector{Union{Missing, Float64}}(undef, size), - TMLE_UPB=Vector{Union{Missing, Float64}}(undef, size), - ONESTEP_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), - ONESTEP_STD=Vector{Union{Missing, Float64}}(undef, size), - ONESTEP_PVALUE=Vector{Union{Missing, Float64}}(undef, size), - ONESTEP_LWB=Vector{Union{Missing, Float64}}(undef, size), - ONESTEP_UPB=Vector{Union{Missing, Float64}}(undef, size), - LOG=Vector{Union{Missing, String}}(undef, size) -) - -covariates_string(Ψ; join_string="_&_") = - length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing - -param_string(param::T) where T <: TMLE.Estimand = replace(string(T), "TMLE.Statistical" => "") - - -case(nt::NamedTuple) = nt.case -case(x) = x -case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment_values)), join_string) - -control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = - join((val.control for val in t), join_string) - -control_string(t; join_string="_&_") = missing - -control_string(Ψ::TMLE.Estimand; join_string="_&_") = - control_string(values(Ψ.treatment_values); join_string=join_string) - -treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment_values), join_string) -confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders_values, join_string) - - -function statistics_from_estimator(estimator) - Ψ̂ = TMLE.estimate(estimator) - std = √(var(estimator)) - testresult = OneSampleTTest(estimator) - pval = pvalue(testresult) - l, u = confint(testresult) - return (Ψ̂, std, pval, l, u) -end - -function statistics_from_result(result::TMLE.Estimate) - Ψ̂₀ = result.initial - # TMLE stats - tmle_stats = statistics_from_estimator(result.tmle) - # OneStep stats - onestep_stats = statistics_from_estimator(result.onestep) - return Ψ̂₀, tmle_stats, onestep_stats -end - -statistics_from_result(result::FailedEstimation) = - missing, - (missing, missing, missing, missing, missing), - (missing, missing, missing, missing, missing) - -function append_csv(filename, results) - data = empty_tmle_output(size=size(results, 1)) - for (i, result) in enumerate(results) - Ψ = result.parameter - param_type = param_string(Ψ) - treatments = treatment_string(Ψ) - case = case_string(Ψ) - control = control_string(Ψ) - confounders = confounders_string(Ψ) - covariates = covariates_string(Ψ) - Ψ̂₀, tmle_stats, onestep_stats = statistics_from_result(result) - data[i, :] = ( - param_type, treatments, case, control, string(Ψ.target), confounders, covariates, - Ψ̂₀, tmle_stats..., onestep_stats..., log - ) - end - CSV.write(filename, data, append=true, header=!isfile(filename)) -end - ##################################################################### #####  JSON OUTPUT #### ##################################################################### @@ -101,7 +8,7 @@ initialize_json(filename::String) = open(filename, "w") do io print(io, '[') end -function update(output::JSONOutput, results; finalize=false) +function update_file(output::JSONOutput, results; finalize=false) output.filename === nothing && return open(output.filename, "a") do io for result in results @@ -120,7 +27,7 @@ end #####  STD OUTPUT #### ##################################################################### -function update(doprint, results, partition) +function update_file(doprint, results, partition) if doprint mimetext = MIME"text/plain"() index = 1 @@ -143,7 +50,7 @@ end ##################################################################### -function update(output::HDF5Output, partition, results, dataset) +function update_file(output::HDF5Output, partition, results, dataset) output.filename === nothing && return jldopen(output.filename, "a+", compress=true) do io @@ -169,7 +76,6 @@ end #####  Read TMLE Estimands Configuration #### ##################################################################### - function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names return [( case = convert(treatment_types[tn], treatment_levels[tn].case), @@ -229,17 +135,21 @@ end #####  ADDITIONAL METHODS #### ##################################################################### +TMLE.emptyIC(result::FailedEstimation) = result + +TMLE.emptyIC(result::FailedEstimation, pval_threshold::Float64) = result + TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = NamedTuple{names}([TMLE.emptyIC(r) for r in result]) +TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names = + NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result]) + function TMLE.emptyIC(result, pval_threshold::Float64) pval = pvalue(OneSampleZTest(result)) return pval < pval_threshold ? result : TMLE.emptyIC(result) end -TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names = - NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result]) - get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID @@ -317,4 +227,7 @@ function load_tmle_spec(file) end TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = + Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) + +TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{FailedEstimation}}}) where names = Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) \ No newline at end of file diff --git a/test/cache_managers.jl b/test/cache_managers.jl index 6574680..0252f13 100644 --- a/test/cache_managers.jl +++ b/test/cache_managers.jl @@ -10,6 +10,8 @@ using TMLE cache_manager.cache["Tata"] = 2 TargetedEstimation.release!(cache_manager, nothing) @test cache_manager.cache == Dict() + # Check this does not throw + TargetedEstimation.release!(cache_manager, nothing) end @testset "Test MaxSizeCacheManager" begin @@ -23,6 +25,8 @@ end @test length(cache_manager.cache) == 4 TargetedEstimation.release!(cache_manager, nothing) @test length(cache_manager.cache) == 3 + TargetedEstimation.release!(cache_manager, nothing) + @test length(cache_manager.cache) == 3 end @testset "Test ReleaseUnusableCacheManager" begin @@ -77,8 +81,8 @@ end # Y_T₁ and T₁_W are no longer needed TargetedEstimation.release!(cache_manager, estimands[3]) @test cache_manager.cache == Dict() - - + # Check this does not throw + TargetedEstimation.release!(cache_manager, estimands[1]) end end diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl new file mode 100644 index 0000000..a517cf4 --- /dev/null +++ b/test/config/problematic_tmle_ose_config.jl @@ -0,0 +1,14 @@ +default_models = TMLE.default_models( + Q_continuous = LinearRegressor(), + # For the estimation of E[Y|W, T]: binary target + Q_binary = LogisticClassifier(), + # This will fail + G = LogisticClassifier() +) + +models = merge(default_models, (T2 = LinearRegressor(),)) + +ESTIMATORS = ( + TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001), + OSE = OSE(models=models) +) \ No newline at end of file diff --git a/test/models/biallelic_snp_encoder.jl b/test/models/biallelic_snp_encoder.jl index f929508..5eb7406 100644 --- a/test/models/biallelic_snp_encoder.jl +++ b/test/models/biallelic_snp_encoder.jl @@ -49,3 +49,4 @@ end end +true \ No newline at end of file diff --git a/test/resampling.jl b/test/resampling.jl index 9032ad7..4976dc3 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -104,6 +104,6 @@ end @test length(ttp) == 5 end -true +end -end \ No newline at end of file +true \ No newline at end of file diff --git a/test/runner.jl b/test/runner.jl index cde4dbb..6782d22 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -222,29 +222,55 @@ end rm(outputs.json.filename) end -@testset "Test tmle_estimation: Failing parameters" begin +@testset "Test tmle: Failing estimands" begin build_dataset(;n=1000, format="csv") - parsed_args = Dict( - "dataset" => "data.csv", - "estimands-config" => joinpath("config", "failing_parameters.yaml"), - "estimators-config" => joinpath("config", "tmle_config.jl"), - "csv-out" => "output.csv", - "verbosity" => 0, - "hdf5-out" => nothing, - "pval-threshold" => 1e-10, - "chunksize" => 10 + outputs = TargetedEstimation.Outputs( + json=TargetedEstimation.JSONOutput(filename="output.json"), + hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5") ) + tmpdir = mktempdir(cleanup=true) + estimandsfile = joinpath(tmpdir, "configuration.json") + configuration = statistical_estimands_only_config() + TMLE.write_json(estimandsfile, configuration) + estimatorfile = joinpath(CONFIGDIR, "problematic_tmle_ose_config.jl") + datafile = "data.csv" - tmle_estimation(parsed_args) + runner = Runner(datafile, estimandsfile, estimatorfile; outputs=outputs); + runner() - # Essential results - data = CSV.read(parsed_args["csv-out"], DataFrame) - @test size(data) == (1, 19) - @test data[1, :TMLE_ESTIMATE] === missing + # Test failed nuisance estimates (T2 model) + @test runner.failed_nuisance == Set([ + TMLE.ConditionalDistribution(:T2, (:W1, :W2)) + ]) - rm(parsed_args["dataset"]) - rm(parsed_args["csv-out"]) + # Check results from JSON + results_from_json = TMLE.read_json(outputs.json.filename) + for estimator in (:OSE, :TMLE) + @test results_from_json[1][estimator][:error] == "Could not fit the following propensity score model: P₀(T2 | W1, W2)" + @test results_from_json[1][estimator][:estimand] isa TMLE.Estimand + @test results_from_json[2][estimator] isa TMLE.EICEstimate + for i in 3:6 + @test results_from_json[i][estimator][:error] == "Skipped due to shared failed nuisance fit." + @test results_from_json[i][estimator][:estimand] isa TMLE.Estimand + end + end + + # Check results from HDF5 + hdf5file = jldopen(outputs.hdf5.filename) + for estimator in (:OSE, :TMLE) + @test hdf5file["1/result"][estimator] isa TargetedEstimation.FailedEstimation + @test hdf5file["2/result"][estimator] isa TMLE.EICEstimate + for i in 3:6 + @test hdf5file[string(i, "/result")][estimator] isa TargetedEstimation.FailedEstimation + @test hdf5file[string(i, "/result")][estimator].estimand isa TMLE.Estimand + end + end + close(hdf5file) + # Clean + rm(outputs.json.filename) + rm(outputs.hdf5.filename) + rm(datafile) end end; diff --git a/test/runtests.jl b/test/runtests.jl index 0a34f46..13cf36f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,15 @@ +using TargetedEstimation -include("cache_managers.jl") -include("utils.jl") -include("sieve_variance.jl") -include("runner.jl") -include("merge.jl") -include("resampling.jl") -include(joinpath("models", "glmnet.jl")) -include(joinpath("models", "adaptive_interaction_transformer.jl")) -include(joinpath("models", "biallelic_snp_encoder.jl")) +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") + +@time begin + @test include(joinpath(TESTDIR, "cache_managers.jl")) + @test include(joinpath(TESTDIR, "utils.jl")) + @test include(joinpath(TESTDIR, "sieve_variance.jl")) + @test include(joinpath(TESTDIR, "runner.jl")) + @test include(joinpath(TESTDIR, "merge.jl")) + @test include(joinpath(TESTDIR, "resampling.jl")) + @test include(joinpath(TESTDIR, "models", "glmnet.jl")) + @test include(joinpath(TESTDIR, "models", "adaptive_interaction_transformer.jl")) + @test include(joinpath(TESTDIR, "models", "biallelic_snp_encoder.jl")) +end \ No newline at end of file diff --git a/test/utils.jl b/test/utils.jl index 1859622..10d6237 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -9,6 +9,11 @@ using MLJBase using MLJLinearModels using CategoricalArrays +check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T + +check_type(treatment_values::NamedTuple, ::Type{T}) where T = + @test treatment_values.case isa T && treatment_values.control isa T + PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation))) include(joinpath(PROJECT_DIR, "test", "testutils.jl")) @@ -54,46 +59,15 @@ end estimands = TargetedEstimation.proofread_estimands(filename, dataset) for estimand in estimands if haskey(estimand.treatment_values, :T1) - @test estimand.treatment_values.T1.case isa Float64 - @test estimand.treatment_values.T1.control isa Float64 + check_type(estimand.treatment_values.T1, Float64) end if haskey(estimand.treatment_values, :T2) - @test estimand.treatment_values.T2.case isa Bool - @test estimand.treatment_values.T2.control isa Bool + check_type(estimand.treatment_values.T2, Bool) end end # Clean estimands file rm(filename) end - -@testset "Test CSV writing" begin - Ψ = IATE( - outcome=:Y, - treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), - treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂]) - ) - @test TargetedEstimation.covariates_string(Ψ) === missing - @test TargetedEstimation.param_string(Ψ) == "IATE" - @test TargetedEstimation.case_string(Ψ) == "1_&_AC" - @test TargetedEstimation.control_string(Ψ) == "0_&_CC" - @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂" - @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂" - - Ψ = CM( - outcome=:Y, - treatment_values=(T₁=1, T₂="AC"), - treatment_confounders=(T₁=[:W₁, :W₂], T₂ = [:W₁, :W₂]), - outcome_extra_covariates=[:C₁] - ) - - @test TargetedEstimation.covariates_string(Ψ) === "C₁" - @test TargetedEstimation.param_string(Ψ) == "CM" - @test TargetedEstimation.case_string(Ψ) == "1_&_AC" - @test TargetedEstimation.control_string(Ψ) === missing - @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂" - @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂" -end - @testset "Test coerce_types!" begin Ψ = IATE( outcome=:Ycont, @@ -171,33 +145,6 @@ end @test sample_ids == [2] end -@testset "Test write_target_results with missing values" begin - filename = "test.csv" - parameters = [ - CM( - target=:Y, - treatment=(T₁=1, T₂="AC"), - confounders=[:W₁, :W₂], - covariates=[:C₁] - )] - tmle_results = [TargetedEstimation.FailedEstimation(parameters[1])] - logs = ["Error X"] - TargetedEstimation.append_csv(filename, tmle_results, logs) - out = CSV.read(filename, DataFrame) - expected_out = ["CM", "T₁_&_T₂", "1_&_AC", missing, "Y", "W₁_&_W₂", "C₁", - missing, missing, missing, missing, missing, missing, - missing, missing, missing, missing, missing, - "Error X"] - for (x, y) in zip(first(out), expected_out) - if x === missing - @test x === y - else - @test x == y - end - end - rm(filename) -end - @testset "Test make_categorical! and make_float!" begin dataset = DataFrame( T₁ = [1, 1, 0, 0], @@ -228,7 +175,7 @@ end TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁ end -@tetset "Test JSON writing" begin +@testset "Test JSON writing" begin results = [] for Ψ in statistical_estimands_only_config().estimands push!(results, ( @@ -237,11 +184,11 @@ end )) end tmpdir = mktempdir(cleanup=true) - filename = joinpath(tmpdir, "output_test.json") - TargetedEstimation.initialize_json(filename) - TargetedEstimation.update(filename, results[1:3]) - TargetedEstimation.update(filename, results[4:end]; finalize=true) - loaded_results = TMLE.read_json(filename) + jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json")) + TargetedEstimation.initialize_json(jsonoutput.filename) + TargetedEstimation.update_file(jsonoutput, results[1:3]) + TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true) + loaded_results = TMLE.read_json(jsonoutput.filename) @test size(loaded_results) == size(results) for (result, loaded_result) in zip(results, loaded_results) @test result.TMLE.estimate == loaded_result[:TMLE].estimate From 77a8f3f53da34057a090cd3f79f90c5279b7bbc1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 21 Nov 2023 18:11:39 +0000 Subject: [PATCH 05/71] start to fix sieve variance --- src/sieve_variance.jl | 26 ++++----- test/sieve_variance.jl | 126 +++++++++++++++++------------------------ test/utils.jl | 6 +- 3 files changed, 69 insertions(+), 89 deletions(-) diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index f6dfeb6..34154d0 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -88,32 +88,32 @@ function build_work_list(prefix, grm_ids) x -> startswith(x, prefix_) && endswith(x, ".hdf5"), readdir(dirname__) ) - hdf5files = [joinpath(dirname_, x) for x in hdf5files] + hdf5files = sort([joinpath(dirname_, x) for x in hdf5files]) influence_curves = Vector{Float32}[] n_obs = Int[] - sieve_df = sieve_dataframe() + tmle_results = [] for hdf5file in hdf5files jldopen(hdf5file) do io # templateΨs = io["parameters"] # results = io["results"] for key in keys(io) result_group = io[key] - tmleresult = io[key]["result"] - Ψ = tmleresult.parameter - sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] : - io[string(result_group["sample_ids_idx"])]["sample_ids"] - sample_ids = string.(sample_ids) - Ψ̂ = TMLE.estimate(tmleresult.tmle) - - push!(influence_curves, align_ic(tmleresult.tmle.IC, sample_ids, grm_ids)) - push!(n_obs, size(sample_ids, 1)) - push_sieveless!(sieve_df, Ψ, Ψ̂) + tmleresult = io[key]["result"].TMLE + if size(tmleresult.IC, 1) > 0 + sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] : + io[string(result_group["sample_ids_idx"])]["sample_ids"] + sample_ids = string.(sample_ids) + + push!(influence_curves, align_ic(tmleresult.IC, sample_ids, grm_ids)) + push!(n_obs, size(sample_ids, 1)) + push!(tmle_results, tmleresult) + end end end end influence_curves = length(influence_curves) > 0 ? reduce(vcat, transpose(influence_curves)) : Matrix{Float32}(undef, 0, 0) - return sieve_df, influence_curves, n_obs + return tmle_results, influence_curves, n_obs end diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 901a286..daf683e 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -11,6 +11,8 @@ using StableRNGs using Distributions using LogExpFunctions +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") + function build_dataset(sample_ids) rng = StableRNG(123) n = size(sample_ids, 1) @@ -44,24 +46,14 @@ function build_dataset(sample_ids) CSV.write("data.csv", dataset) end -function build_tmle_output_file(sample_ids, param_file, outprefix) +function build_tmle_output_file(sample_ids, estimandfile, outprefix, pval) build_dataset(sample_ids) - # Only one continuous phenotype / machines not saved / no adaptive cv - parsed_args = Dict( - "data" => "data.csv", - "param-file" => param_file, - "estimator-file" => joinpath("config", "tmle_config.jl"), - "csv-out" => string(outprefix, ".csv"), - "verbosity" => 0, - "hdf5-out" => string(outprefix, ".hdf5"), - "pval-threshold" => 1., - "chunksize" => 100 + outputs = TargetedEstimation.Outputs( + hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval), ) - - TargetedEstimation.tmle_estimation(parsed_args) + tmle("data.csv", estimandfile, joinpath(TESTDIR, "config", "tmle_ose_config.jl"), outputs=outputs) end - function basic_variance_implementation(matrix_distance, influence_curve, n_obs) variance = 0.f0 n_samples = size(influence_curve, 1) @@ -101,9 +93,8 @@ function test_initial_output(output, expected_output) end end end - @testset "Test readGRM" begin - prefix = joinpath("data", "grm", "test.grm") + prefix = joinpath(TESTDIR, "data", "grm", "test.grm") GRM, ids = TargetedEstimation.readGRM(prefix) @test eltype(ids.SAMPLE_ID) == String @test size(GRM, 1) == 18915 @@ -111,65 +102,55 @@ end end @testset "Test build_work_list" begin - grm_ids = TargetedEstimation.GRMIDs(joinpath("data", "grm", "test.grm.id")) - param_file_1 = joinpath("config", "sieve_tests_parameters_1.yaml") - outprefix_1 = "tmle_output_1" - prefix = "tmle_output" - # CASE_1: only one file - build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_1, outprefix_1) - # Since pval = 1., all parameters are considered for sieve variance - sieve_df, influence_curves, n_obs = TargetedEstimation.build_work_list(prefix, grm_ids) - @test n_obs == [193, 193, 193, 194, 194, 194] + grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + tmpdir = mktempdir(cleanup=true) + configuration = statistical_estimands_only_config() + + # CASE_1: Since pval = 1. + # Simulate multiple runs that occured + pval = 1. + config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3]) + estimandsfile_1 = joinpath(tmpdir, "configuration_1.json") + TMLE.write_json(estimandsfile_1, config_1) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval) + + config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end]) + estimandsfile_2 = joinpath(tmpdir, "configuration_2.json") + TMLE.write_json(estimandsfile_2, config_2) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval) + + results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids) + # Check n_obs + @test n_obs == [194, 194, 194, 193, 193, 194] # Check influence curves - io = jldopen(string(outprefix_1, ".hdf5")) - for key in keys(io) - result = io[key]["result"] - IC = result.tmle.IC - # missing sample - if result.parameter.target == Symbol("BINARY/OUTCOME") - IC = vcat(0, IC) - end - @test convert(Vector{Float32}, IC) == influence_curves[parse(Int, key), :] + expected_influence_curves = [size(r.IC, 1) == 194 ? r.IC : vcat(0, r.IC) for r in results] + for rowindex in 1:6 + @test convert(Vector{Float32}, expected_influence_curves[rowindex]) == influence_curves[rowindex, :] end - close(io) - # Check output - some_expected_cols = DataFrame( - PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE"], - TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2"], - CASE=["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true"], - CONTROL=["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false"], - OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], - CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2"], - COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1"] - ) - test_initial_output(sieve_df, some_expected_cols) - - # CASE_2: add another file - param_file_2 = joinpath("config", "sieve_tests_parameters_2.yaml") - outprefix_2 = "tmle_output_2" - build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_2, outprefix_2) - # This p-value filters the influence curves for the binary outcome - sieve_df, influence_curves, n_obs = TargetedEstimation.build_work_list(prefix, grm_ids) - @test size(influence_curves) == (8, 194) - @test n_obs == [193, 193, 193, 194, 194, 194, 194, 194] - - # Check output - some_expected_cols = DataFrame( - PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE", "ATE", "CM"], - TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"], - CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"], - CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing], - OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], - CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"], - COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing] - ) - test_initial_output(sieve_df, some_expected_cols) - + # Check results + all(x isa TMLE.TMLEstimate for x in results) + all(size(x.IC, 1) > 0 for x in results) # clean - rm(string(outprefix_1, ".hdf5")) - rm(string(outprefix_1, ".csv")) - rm(string(outprefix_2, ".hdf5")) - rm(string(outprefix_2, ".csv")) + rm("tmle_output_1.hdf5") + rm("tmle_output_2.hdf5") + + # CASE_2: Since pval = 0.8 + pval = 0.8 + estimandsfile = joinpath(tmpdir, "configuration.json") + TMLE.write_json(estimandsfile, configuration) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output", pval) + results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids) + # Check n_obs + @test n_obs == [194, 194, 194] + # Check influence curves + for rowindex in 1:3 + @test convert(Vector{Float32}, results[rowindex].IC) == influence_curves[rowindex, :] + end + # Check results + all(x isa TMLE.TMLEstimate for x in results) + all(size(x.IC, 1) > 0 for x in results) + # Clean + rm("tmle_output.hdf5") rm("data.csv") end @@ -257,7 +238,6 @@ end # Check by hand for a single τ=0.5 @test variances[2, :] ≈ Float32[0.03666667, 0.045, 0.045] - end @testset "Test grm_rows_bounds" begin diff --git a/test/utils.jl b/test/utils.jl index 10d6237..f98d1bc 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -14,12 +14,12 @@ check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T check_type(treatment_values::NamedTuple, ::Type{T}) where T = @test treatment_values.case isa T && treatment_values.control isa T -PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation))) +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") -include(joinpath(PROJECT_DIR, "test", "testutils.jl")) +include(joinpath(TESTDIR, "testutils.jl")) @testset "Test load_tmle_spec: with configuration file" begin - estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_ose_config.jl")) + estimators = TargetedEstimation.load_tmle_spec(joinpath(TESTDIR, "config", "tmle_ose_config.jl")) @test estimators.TMLE isa TMLE.TMLEE @test estimators.OSE isa TMLE.OSE @test estimators.TMLE.weighted === true From 4e7c24ec4171aa9bbba5e40fa07136e163a02204 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 21 Nov 2023 18:12:57 +0000 Subject: [PATCH 06/71] fix another test --- test/sieve_variance.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index daf683e..6057fa7 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -251,7 +251,7 @@ end end @testset "Test corrected_stderrors" begin - io = jldopen(joinpath("data", "sieve_variances.hdf5")) + io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5")) variances = io["variances"] n_obs = [10, 10, 10, 10, 10, 100, 100, 1000, 1000, 1000] stderrors = TargetedEstimation.corrected_stderrors(variances, n_obs) From f5cd1d40a0e11de4eb2c7af9b40b83fccf74473b Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 22 Nov 2023 14:20:22 +0000 Subject: [PATCH 07/71] fix svp --- Comonicon.toml | 8 +++ src/TargetedEstimation.jl | 3 + src/runner.jl | 20 +++++-- src/sieve_variance.jl | 88 +++++++++++++++++------------ test/sieve_variance.jl | 116 ++++++++++++++++++-------------------- 5 files changed, 131 insertions(+), 104 deletions(-) create mode 100644 Comonicon.toml diff --git a/Comonicon.toml b/Comonicon.toml new file mode 100644 index 0000000..bd97f0f --- /dev/null +++ b/Comonicon.toml @@ -0,0 +1,8 @@ +name = "TargetedEstimation" + +[install] +completion = true +quiet = false +optimize = 2 + +[sysimg] \ No newline at end of file diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 2f809ec..5dd5011 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -41,6 +41,9 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) +"""TL CLI.""" +@main + export Runner, tmle, sieve_variance_plateau, merge_csv_files export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder diff --git a/src/runner.jl b/src/runner.jl index 24cff63..edf3812 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -149,6 +149,15 @@ end """ + tmle(dataset, estimands, estimators; + verbosity=0, + outputs=Outputs(), + chunksize=100, + rng=123, + cache_strategy="release-unusable", + sort_estimands=false + ) + TMLE CLI. # Args @@ -160,23 +169,22 @@ TMLE CLI. # Options - `-v, --verbosity`: Verbosity level. -- `-j, --json_out`: JSON output filename. -- `--hdf5_out`: HDF5 output filename. +- `-o, --outputs`: Ouputs to be genrated. - `--chunksize`: Results are written in batches of size chunksize. - `-r, --rng`: Random seed (Only used for estimands ordering at the moment). -- `-c, --cache_strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). +- `-c, --cache-strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). # Flags - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ -@main function tmle(dataset, estimands, estimators; +@cast function tmle(dataset, estimands, estimators; verbosity=0, outputs=Outputs(), chunksize=100, rng=123, cache_strategy="release-unusable", - sort_estimands=false + sort_estimands::Bool=false ) runner = Runner(dataset, estimands, estimators; verbosity=verbosity, @@ -189,4 +197,4 @@ TMLE CLI. runner() verbosity >= 1 && @info "Done." return -end \ No newline at end of file +end diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index 34154d0..cc953e9 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -99,7 +99,7 @@ function build_work_list(prefix, grm_ids) # results = io["results"] for key in keys(io) result_group = io[key] - tmleresult = io[key]["result"].TMLE + tmleresult = first(io[key]["result"]) if size(tmleresult.IC, 1) > 0 sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] : io[string(result_group["sample_ids_idx"])]["sample_ids"] @@ -184,7 +184,6 @@ function compute_variances(influence_curves, grm, τs, n_obs) return variances end - function grm_rows_bounds(n_samples) bounds = Pair{Int, Int}[] start_idx = 1 @@ -198,59 +197,74 @@ function grm_rows_bounds(n_samples) return bounds end - -function save_results(outprefix, output, τs, variances) - CSV.write(string(outprefix, ".csv"), output) +function save_results(outprefix, results, τs, variances) + TMLE.write_json(string(outprefix, ".json"), results) jldopen(string(outprefix, ".hdf5"), "w") do io io["taus"] = τs io["variances"] = variances end end - -corrected_stderrors(variances, n_obs) = - sqrt.(view(maximum(variances, dims=1), 1, :) ./ n_obs) - -function update_sieve_df!(df, stds) - n = size(stds, 1) - df.SIEVE_STD = Vector{Float64}(undef, n) - df.SIEVE_PVALUE = Vector{Float64}(undef, n) - df.SIEVE_LWB = Vector{Float64}(undef, n) - df.SIEVE_UPB = Vector{Float64}(undef, n) - - for index in 1:n - std = stds[index] - estimate = df.TMLE_ESTIMATE[index] - testresult = OneSampleZTest(estimate, std, 1) - lwb, upb = confint(testresult) - df.SIEVE_STD[index] = std - df.SIEVE_PVALUE[index] = pvalue(testresult) - df.SIEVE_LWB[index] = lwb - df.SIEVE_UPB[index] = upb +corrected_stderrors(variances) = + sqrt.(view(maximum(variances, dims=1), 1, :)) + +function update_with_sieve_estimate!(results, stds) + for index in eachindex(results) + old = results[index] + results[index] = typeof(old)( + old.estimand, + old.estimate, + convert(Float64, stds[index]), + old.n, + Float64[] + ) end - - select!(df, Not(:TMLE_ESTIMATE)) end -function sieve_variance_plateau(parsed_args) - prefix = parsed_args["prefix"] - outprefix = parsed_args["out-prefix"] - verbosity = parsed_args["verbosity"] +""" + sieve_variance_plateau(input_prefix; + output_prefix="svp", + grm_prefix="GRM", + verbosity=0, + n_estimators=10, + max_tau=0.8 + ) + +Sieve Variance Plateau CLI. + +# Args + +- `input-prefix`: Input prefix to HDF5 files generated by the tmle CLI. - τs = default_τs(parsed_args["nb-estimators"];max_τ=parsed_args["max-tau"]) - grm, grm_ids = readGRM(parsed_args["grm-prefix"]) +# Options + +- `-o, --output-prefix`: Output prefix. +- `-g, --grm-prefix`: Prefix to the aggregated GRM. +- `-v, --verbosity`: Verbosity level. +- `-n, --n_estimators`: Number of variance estimators to build for each estimate. +- `-m, --max_tau`: Maximum distance between any two individuals. +""" +@cast function sieve_variance_plateau(input_prefix; + output_prefix="svp", + grm_prefix="GRM", + verbosity=0, + n_estimators=10, + max_tau=0.8 + ) + τs = default_τs(n_estimators;max_τ=max_tau) + grm, grm_ids = readGRM(grm_prefix) verbosity > 0 && @info "Preparing work list." - sieve_df, influence_curves, n_obs = build_work_list(prefix, grm_ids) + results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids) if length(influence_curves) > 0 verbosity > 0 && @info "Computing variance estimates." variances = compute_variances(influence_curves, grm, τs, n_obs) - std_errors = corrected_stderrors(variances, n_obs) - update_sieve_df!(sieve_df, std_errors) + std_errors = corrected_stderrors(variances) + update_with_sieve_estimate!(results, std_errors) else variances = Float32[] end - save_results(outprefix, sieve_df, τs, variances) + save_results(output_prefix, results, τs, variances) verbosity > 0 && @info "Done." return 0 diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 6057fa7..3359f2a 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -13,6 +13,8 @@ using LogExpFunctions TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") +include(joinpath(TESTDIR, "testutils.jl")) + function build_dataset(sample_ids) rng = StableRNG(123) n = size(sample_ids, 1) @@ -106,7 +108,7 @@ end tmpdir = mktempdir(cleanup=true) configuration = statistical_estimands_only_config() - # CASE_1: Since pval = 1. + # CASE_1: pval = 1. # Simulate multiple runs that occured pval = 1. config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3]) @@ -134,17 +136,18 @@ end rm("tmle_output_1.hdf5") rm("tmle_output_2.hdf5") - # CASE_2: Since pval = 0.8 - pval = 0.8 + # CASE_2: pval = 0.1 + pval = 0.1 estimandsfile = joinpath(tmpdir, "configuration.json") TMLE.write_json(estimandsfile, configuration) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output", pval) results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids) # Check n_obs - @test n_obs == [194, 194, 194] + @test n_obs == [194, 193, 193, 194] # Check influence curves - for rowindex in 1:3 - @test convert(Vector{Float32}, results[rowindex].IC) == influence_curves[rowindex, :] + expected_influence_curves = [size(r.IC, 1) == 194 ? r.IC : vcat(0, r.IC) for r in results] + for rowindex in 1:4 + @test convert(Vector{Float32}, expected_influence_curves[rowindex]) == influence_curves[rowindex, :] end # Check results all(x isa TMLE.TMLEstimate for x in results) @@ -253,76 +256,67 @@ end @testset "Test corrected_stderrors" begin io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5")) variances = io["variances"] - n_obs = [10, 10, 10, 10, 10, 100, 100, 1000, 1000, 1000] - stderrors = TargetedEstimation.corrected_stderrors(variances, n_obs) + stderrors = TargetedEstimation.corrected_stderrors(variances) # sanity check @test size(stderrors, 1) == 10 # check for the first curve - stderrors[1] == sqrt(maximum(variances[:,1])/n_obs[1]) + stderrors[1] == sqrt(maximum(variances[:,1])) close(io) end @testset "Test sieve_variance_plateau" begin # Generate data - nb_estimators = 10 - grm_ids = TargetedEstimation.GRMIDs(joinpath("data", "grm", "test.grm.id")) - param_file_1 = joinpath("config", "sieve_tests_parameters_1.yaml") - tmle_outprefix_1 = "tmle_output_1" - param_file_2 = joinpath("config", "sieve_tests_parameters_2.yaml") - tmle_outprefix_2 = "tmle_output_2" - build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_1, tmle_outprefix_1) - build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_2, tmle_outprefix_2) - - outprefix = "sieve_output" - parsed_args = Dict( - "prefix" => "tmle_output", - "pval" => 1e-10, - "grm-prefix" => "data/grm/test.grm", - "out-prefix" => outprefix, - "nb-estimators" => nb_estimators, - "max-tau" => 0.75, - "verbosity" => 0 + grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + tmpdir = mktempdir(cleanup=true) + configuration = statistical_estimands_only_config() + pval = 0.1 + config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3]) + estimandsfile_1 = joinpath(tmpdir, "configuration_1.json") + TMLE.write_json(estimandsfile_1, config_1) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval) + + config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end]) + estimandsfile_2 = joinpath(tmpdir, "configuration_2.json") + TMLE.write_json(estimandsfile_2, config_2) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval) + + sieve_variance_plateau("tmle_output"; + grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"), + max_tau=0.75 ) - sieve_variance_plateau(parsed_args) - # check hdf5 file - io = jldopen(string(outprefix, ".hdf5")) - @test io["taus"] == TargetedEstimation.default_τs(nb_estimators; max_τ=parsed_args["max-tau"]) - @test size(io["variances"]) == (10, 8) + # Check HDF5 file + io = jldopen("svp.hdf5") + @test io["taus"] == TargetedEstimation.default_τs(10; max_τ=0.75) + @test size(io["variances"]) == (10, 4) close(io) - # check csv file - output = TargetedEstimation.read_output_with_types(string(outprefix, ".csv")) - some_expected_cols = DataFrame( - PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE", "ATE", "CM"], - TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"], - CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"], - CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing], - OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"], - CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"], - COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing] - ) - test_initial_output(output, some_expected_cols) - @test output.SIEVE_PVALUE isa Vector{Float64} - @test output.SIEVE_LWB isa Vector{Float64} - @test output.SIEVE_UPB isa Vector{Float64} - @test output.SIEVE_STD isa Vector{Float64} - - tmle_output = TargetedEstimation.load_csv_files( - TargetedEstimation.empty_tmle_output(), - ["tmle_output_1.csv", "tmle_output_2.csv"] + + # Check JSON file + svp_results = TMLE.read_json("svp.json") + tmleout1 = jldopen("tmle_output_1.hdf5") + tmleout2 = jldopen("tmle_output_2.hdf5") + + src_results = vcat( + [tmleout1[string(i, "/result")].TMLE for i in 1:3], + [tmleout2[string(i, "/result")].TMLE for i in 1:3], ) - joined = leftjoin(tmle_output, output, on=TargetedEstimation.joining_keys(), matchmissing=:equal) - @test all(joined.SIEVE_PVALUE .> 0 ) + for svp_result in svp_results + src_result_index = findall(x.estimand == svp_result.estimand for x in src_results) + src_result = src_results[only(src_result_index)] + @test src_result.std != svp_result.std + @test src_result.estimate == svp_result.estimate + @test src_result.n == svp_result.n + @test svp_result.IC == [] + end + # clean - rm(string(outprefix, ".csv")) - rm(string(outprefix, ".hdf5")) - rm(string(tmle_outprefix_1, ".hdf5")) - rm(string(tmle_outprefix_1, ".csv")) - rm(string(tmle_outprefix_2, ".hdf5")) - rm(string(tmle_outprefix_2, ".csv")) + rm("svp.json") + rm("svp.hdf5") + rm("tmle_output_1.hdf5") + rm("tmle_output_2.hdf5") rm("data.csv") end From 78c4ee92708aaae0c7b66e564cfae64494d57a43 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 23 Nov 2023 17:58:54 +0000 Subject: [PATCH 08/71] start to work on composed estimands --- src/runner.jl | 10 ++++++- src/utils.jl | 73 +++++++++++++++++++++++++++++++++++++---------- test/runner.jl | 38 +++++++++++++++++++++++- test/testutils.jl | 22 ++++++++++++++ test/utils.jl | 30 +++++++++++++++++++ 5 files changed, 156 insertions(+), 17 deletions(-) diff --git a/src/runner.jl b/src/runner.jl index edf3812..0cf30e5 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -21,10 +21,16 @@ initialize(output::JSONOutput) = initialize_json(output.filename) pval_threshold::Union{Nothing, Float64} = nothing end +@option struct JLSOutput + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing +end + @option struct Outputs json::JSONOutput = JSONOutput() hdf5::HDF5Output = HDF5Output() - std::Bool = false + jls::JLSOutput = JLSOutput() + std::Bool = false end function initialize(outputs::Outputs) @@ -75,6 +81,8 @@ function save(runner::Runner, results, partition, finalize) update_file(runner.outputs.std, results, partition) # Append JSON Output update_file(runner.outputs.json, results; finalize=finalize) + # Append JLS Output + update_file(runner.outputs.jls, results) # Append HDF5 Output update_file(runner.outputs.hdf5, partition, results, runner.dataset) end diff --git a/src/utils.jl b/src/utils.jl index ce62c37..dfd376b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -46,7 +46,7 @@ function update_file(doprint, results, partition) end ##################################################################### -#####  JLD2 OUTPUT #### +#####  HDF5 OUTPUT #### ##################################################################### @@ -72,6 +72,21 @@ function update_file(output::HDF5Output, partition, results, dataset) end end +##################################################################### +#####  JLS OUTPUT #### +##################################################################### + +function update_file(output::JLSOutput, results) + output.filename === nothing && return + + open(output.filename, "a") do io + for result in results + result = TMLE.emptyIC(result, output.pval_threshold) + serialize(io, result) + end + end +end + ##################################################################### #####  Read TMLE Estimands Configuration #### ##################################################################### @@ -97,9 +112,49 @@ maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) = maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError()) +function maybe_identify(Ψ::TMLE.ComposedEstimand, scm, method) + method = get_identification_method(method) + return TMLE.ComposedEstimand(Ψ.f, Tuple(maybe_identify(arg, scm, method) for arg ∈ Ψ.args)) +end + maybe_identify(Ψ, scm, method) = Ψ -read_method(extension) = extension == ".json" ? read_json : read_yaml +function read_method(extension) + method = if extension == ".json" + TMLE.read_json + elseif extension == ".yaml" + TMLE.read_yaml + elseif extension == ".jls" + deserialize + else + throw(ArgumentError(string("Can't read from ", extension, " file"))) + end + return method +end + +function fix_treatment_values!(treatment_types::AbstractDict, Ψ::ComposedEstimand, dataset) + new_args = Tuple(fix_treatment_values!(treatment_types, arg, dataset) for arg in Ψ.args) + return ComposedEstimand(Ψ.f, new_args) +end + +""" +Uses the values found in the dataset to create a new estimand with adjusted values. +""" +function fix_treatment_values!(treatment_types::AbstractDict, Ψ, dataset) + treatment_names = keys(Ψ.treatment_values) + for tn in treatment_names + haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn]) + end + new_treatment = NamedTuple{treatment_names}( + convert_treatment_values(Ψ.treatment_values, treatment_types) + ) + return typeof(Ψ)( + outcome = Ψ.outcome, + treatment_values = new_treatment, + treatment_confounders = Ψ.treatment_confounders, + outcome_extra_covariates = Ψ.outcome_extra_covariates + ) +end """ proofread_estimands(param_file, dataset) @@ -114,19 +169,7 @@ function proofread_estimands(filename, dataset) treatment_types = Dict() for (index, Ψ) in enumerate(config.estimands) statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment) - treatment_names = keys(statisticalΨ.treatment_values) - for tn in treatment_names - haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn]) - end - new_treatment = NamedTuple{treatment_names}( - TargetedEstimation.convert_treatment_values(statisticalΨ.treatment_values, treatment_types) - ) - estimands[index] = typeof(Ψ)( - outcome = Ψ.outcome, - treatment_values = new_treatment, - treatment_confounders = statisticalΨ.treatment_confounders, - outcome_extra_covariates = statisticalΨ.outcome_extra_covariates - ) + estimands[index] = fix_treatment_values!(treatment_types, statisticalΨ, dataset) end return estimands end diff --git a/test/runner.jl b/test/runner.jl index 6782d22..33bacc8 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -86,6 +86,7 @@ end outputs = TargetedEstimation.Outputs( json=TargetedEstimation.JSONOutput(filename="output.json"), hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.), + jls=TargetedEstimation.JLSOutput(filename="output.jls"), std=true, ) runner = Runner( @@ -125,6 +126,23 @@ end @test loaded_result[:OSE].IC == [] end + # Test Save to JLS + loaded_results = [] + open(outputs.jls.filename) do io + while !eof(io) + push!(loaded_results, deserialize(io)) + end + end + for (result, loaded_result) in zip(results, loaded_results) + @test loaded_result[:TMLE] isa TMLE.TMLEstimate + @test result.TMLE.estimate == loaded_result[:TMLE].estimate + @test loaded_result[:TMLE].IC == [] + + @test loaded_result[:OSE] isa TMLE.OSEstimate + @test result.OSE.estimate == loaded_result[:OSE].estimate + @test loaded_result[:OSE].IC == [] + end + # Test Save to HDF5 hdf5file = jldopen(outputs.hdf5.filename, "r") for (result_index, param_index) in enumerate(4:6) @@ -156,7 +174,7 @@ end rm(outputs.hdf5.filename) end -@testset "Test tmle" begin +@testset "Test tmle: varying dataset format and chunksize" begin tmpdir = mktempdir(cleanup=true) estimands_filename = joinpath(tmpdir, "configuration.json") configuration = statistical_estimands_only_config() @@ -273,6 +291,24 @@ end rm(datafile) end +@testset "Test tmle: Causal and Composed Estimands" begin + build_dataset(;n=1000, format="csv") + outputs = TargetedEstimation.Outputs( + jls=TargetedEstimation.JLSOutput(filename="output.jls") + ) + tmpdir = mktempdir(cleanup=true) + estimandsfile = joinpath(tmpdir, "configuration.jls") + + configuration = causal_and_composed_estimands_config() + serialize(estimandsfile, configuration) + estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") + datafile = "data.csv" + tmle(datafile, estimandsfile, estimatorfile; outputs=outputs) + + rm(datafile) +end + + end; true \ No newline at end of file diff --git a/test/testutils.jl b/test/testutils.jl index c9bc500..4f286c0 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -55,3 +55,25 @@ function statistical_estimands_only_config() ) return configuration end + +function causal_and_composed_estimands_config() + ATE₁ = ATE( + outcome = Symbol("CONTINUOUS, OUTCOME"), + treatment_values = (T1 = (case = true, control = false),), + ) + ATE₂ = ATE( + outcome = Symbol("CONTINUOUS, OUTCOME"), + treatment_values = (T1 = (case = false, control = true),), + ) + diff = ComposedEstimand(-, (ATE₁, ATE₂)) + scm = StaticSCM( + outcomes = ["CONTINUOUS, OUTCOME"], + treatments = ["T1"], + confounders = [:W1, :W2] + ) + configuration = Configuration( + estimands = [ATE₁, ATE₂, diff], + scm = scm + ) + return configuration +end diff --git a/test/utils.jl b/test/utils.jl index f98d1bc..2a3b5c0 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -198,6 +198,36 @@ end end end +@testset "Test maybe_identify" begin + scm = StaticSCM( + outcomes = [:Y], + treatments = [:T₁, :T₂], + confounders = [:W] + ) + adjustment = BackdoorAdjustment() + causalATE = ATE( + outcome = :Y, + treatment_values = (T₁ =(case=1, control=0),) + ) + statisticalATE = ATE( + outcome = :Y, + treatment_values = (T₁ =(case=1, control=0),), + treatment_confounders = (T₁=[:W],) + ) + # Correctly identifies the estimand + identifiedATE = TargetedEstimation.maybe_identify(causalATE, scm, nothing) + @test statisticalATE == identifiedATE + # Just returns the estimand + @test TargetedEstimation.maybe_identify(statisticalATE, scm, nothing) === statisticalATE + # Throws if can't identify + @test_throws TargetedEstimation.MissingSCMError() TargetedEstimation.maybe_identify(causalATE, nothing, nothing) + # Composed Estimand with a weird mixture of statistical/causal estimands + diff = ComposedEstimand(-, (causalATE, statisticalATE)) + identified_diff = TargetedEstimation.maybe_identify(diff, scm, nothing) + statistical_diff = ComposedEstimand(-, (statisticalATE, statisticalATE)) + @test identified_diff == statistical_diff +end + end; true \ No newline at end of file From 57267deac9bcf0526a80c69da633e1db6321bed5 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 28 Nov 2023 11:31:25 +0000 Subject: [PATCH 09/71] add test for causal and composite estimands --- src/cache_managers.jl | 17 +++++------------ src/runner.jl | 5 ++--- src/utils.jl | 22 +++++++++------------- test/cache_managers.jl | 2 +- test/models/biallelic_snp_encoder.jl | 2 +- test/runner.jl | 28 +++++++++++++++++++++++++--- test/utils.jl | 2 +- 7 files changed, 44 insertions(+), 34 deletions(-) diff --git a/src/cache_managers.jl b/src/cache_managers.jl index 64d5004..56acd3f 100644 --- a/src/cache_managers.jl +++ b/src/cache_managers.jl @@ -10,19 +10,12 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ) # Always drop fluctuations haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation) - η = TMLE.get_relevant_factors(Ψ) - # Propensity scores - for ps in η.propensity_score - cache_manager.η_counts[ps] -= 1 - if cache_manager.η_counts[ps] == 0 - delete!(cache_manager.cache, ps) + for η in TMLE.nuisance_functions_iterator(Ψ) + cache_manager.η_counts[η] -= 1 + if cache_manager.η_counts[η] == 0 + delete!(cache_manager.cache, η) end end - # Outcome Mean - cache_manager.η_counts[η.outcome_mean] -= 1 - if cache_manager.η_counts[η.outcome_mean] == 0 - delete!(cache_manager.cache, η.outcome_mean) - end end struct MaxSizeCacheManager <: CacheManager @@ -53,7 +46,7 @@ end function make_cache_manager(estimands, string) if string == "release-unusable" - return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands)) + return ReleaseUnusableCacheManager(TMLE.nuisance_function_counts(estimands)) elseif string == "no-cache" return NoCacheManager() else diff --git a/src/runner.jl b/src/runner.jl index 0cf30e5..0523489 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -108,9 +108,8 @@ function try_estimation(runner, Ψ, estimator) end function skip_fast(runner, Ψ) - ηs = TMLE.get_relevant_factors(Ψ) - ηs.propensity_score - any(η ∈ runner.failed_nuisance for η in (ηs.outcome_mean, ηs.propensity_score...)) && return true + ηs = TMLE.nuisance_functions_iterator(Ψ) + any(η ∈ runner.failed_nuisance for η in ηs) && return true return false end diff --git a/src/utils.jl b/src/utils.jl index dfd376b..2e188c8 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -178,20 +178,10 @@ end #####  ADDITIONAL METHODS #### ##################################################################### -TMLE.emptyIC(result::FailedEstimation) = result +TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result -TMLE.emptyIC(result::FailedEstimation, pval_threshold::Float64) = result - -TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = - NamedTuple{names}([TMLE.emptyIC(r) for r in result]) - -TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names = - NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result]) - -function TMLE.emptyIC(result, pval_threshold::Float64) - pval = pvalue(OneSampleZTest(result)) - return pval < pval_threshold ? result : TMLE.emptyIC(result) -end +TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names = + NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt]) get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID @@ -235,6 +225,12 @@ function make_float!(dataset, colnames) end end +function coerce_types!(dataset, Ψ::ComposedEstimand) + for arg in Ψ.args + coerce_types!(dataset, arg) + end +end + function coerce_types!(dataset, Ψ) categorical_variables = Set(keys(Ψ.treatment_values)) continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders))) diff --git a/test/cache_managers.jl b/test/cache_managers.jl index 0252f13..294ccd2 100644 --- a/test/cache_managers.jl +++ b/test/cache_managers.jl @@ -52,7 +52,7 @@ end treatment_confounders=(T₃=[:W],) ) ] - η_counts = TMLE.nuisance_counts(estimands) + η_counts = TMLE.nuisance_function_counts(estimands) cache_manager = TargetedEstimation.ReleaseUnusableCacheManager(η_counts) # Estimation of the first estimand will fill the cache with the following Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) diff --git a/test/models/biallelic_snp_encoder.jl b/test/models/biallelic_snp_encoder.jl index 5eb7406..27d544b 100644 --- a/test/models/biallelic_snp_encoder.jl +++ b/test/models/biallelic_snp_encoder.jl @@ -17,7 +17,7 @@ using MLJBase fit!(mach, verbosity=0) fitresult = fitted_params(mach).fitresult @test fitresult == Dict(:rs1234 => 'A', :rs4567 => 'C') - Xt = transform(mach) + Xt = MLJBase.transform(mach) @test Xt.rs1234[1:3] == [1, 0, 0] @test Xt.rs1234[4] === missing @test Xt.rs4567 == [0, 1, 2, 2] diff --git a/test/runner.jl b/test/runner.jl index 33bacc8..013cd41 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -14,11 +14,11 @@ using Serialization using Arrow using YAML -PKGDIR = pkgdir(TargetedEstimation) +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") -CONFIGDIR = joinpath(PKGDIR, "test", "config") +CONFIGDIR = joinpath(TESTDIR, "config") -include(joinpath(PKGDIR, "test", "testutils.jl")) +include(joinpath(TESTDIR, "testutils.jl")) sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt) sort_nt_by_key(x) = x @@ -169,6 +169,7 @@ end # Clean rm("data.csv") + rm(outputs.jls.filename) rm(output_txt) rm(outputs.json.filename) rm(outputs.hdf5.filename) @@ -305,7 +306,28 @@ end datafile = "data.csv" tmle(datafile, estimandsfile, estimatorfile; outputs=outputs) + results = [] + open(outputs.jls.filename) do io + while !eof(io) + push!(results, deserialize(io)) + end + end + + for (index, Ψ) ∈ enumerate(configuration.estimands) + @test results[index].OSE.estimand == identify(Ψ, configuration.scm) + end + # The components of the diff should match the estimands 1 and 2 + for index in 1:2 + ATE_from_diff = results[3].OSE.estimates[index] + ATE_standalone = results[index].OSE + @test ATE_from_diff.estimand == ATE_standalone.estimand + @test ATE_from_diff.estimate == ATE_standalone.estimate + @test ATE_from_diff.std == ATE_standalone.std + end + @test results[3].OSE isa TMLE.ComposedEstimate + rm(datafile) + rm(outputs.jls.filename) end diff --git a/test/utils.jl b/test/utils.jl index 2a3b5c0..569171f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -53,7 +53,7 @@ end @testset "Test proofread_estimands" for extension in ("yaml", "json") # Write estimands file filename = "statistical_estimands.$extension" - eval(Meta.parse("write_$extension"))(filename, statistical_estimands_only_config()) + eval(Meta.parse("TMLE.write_$extension"))(filename, statistical_estimands_only_config()) dataset = DataFrame(T1 = [1., 0.], T2=[true, false]) estimands = TargetedEstimation.proofread_estimands(filename, dataset) From 23484dd94968fed90112fe77cf2c03d6c944ec35 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 28 Nov 2023 16:46:47 +0000 Subject: [PATCH 10/71] add new fields to output managers --- src/runner.jl | 7 +++++-- src/utils.jl | 55 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/runner.jl b/src/runner.jl index 0523489..3b25053 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -19,11 +19,14 @@ initialize(output::JSONOutput) = initialize_json(output.filename) @option struct HDF5Output filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing + sample_ids::Bool = false + compress::Bool = false end @option struct JLSOutput filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing + sample_ids::Bool = false end @option struct Outputs @@ -82,9 +85,9 @@ function save(runner::Runner, results, partition, finalize) # Append JSON Output update_file(runner.outputs.json, results; finalize=finalize) # Append JLS Output - update_file(runner.outputs.jls, results) + update_file(runner.outputs.jls, results, runner.dataset) # Append HDF5 Output - update_file(runner.outputs.hdf5, partition, results, runner.dataset) + update_file(runner.outputs.hdf5, results, runner.dataset) end function try_estimation(runner, Ψ, estimator) diff --git a/src/utils.jl b/src/utils.jl index 2e188c8..c15724e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -49,26 +49,12 @@ end #####  HDF5 OUTPUT #### ##################################################################### - -function update_file(output::HDF5Output, partition, results, dataset) +function update_file(output::HDF5Output, results, dataset) output.filename === nothing && return - - jldopen(output.filename, "a+", compress=true) do io - previous_variables = nothing - sample_ids_idx = nothing - for (partition_index, param_index) in enumerate(partition) - estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold) - current_variables = variables(first(estimator_results).estimand) - if previous_variables != current_variables - sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables) - io["$param_index/sample_ids"] = sample_ids - sample_ids_idx = param_index - end - io["$param_index/result"] = estimator_results - io["$param_index/sample_ids_idx"] = sample_ids_idx - - previous_variables = current_variables - end + results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids) + jldopen(output.filename, "a+", compress=output.compress) do io + latest_index = maximum(parse(Int, split(key, "_")[2]) for key in keys(io)) + io[string("Batch_", latest_index + 1)] = results end end @@ -76,12 +62,12 @@ end #####  JLS OUTPUT #### ##################################################################### -function update_file(output::JLSOutput, results) +function update_file(output::JLSOutput, results, dataset) output.filename === nothing && return + results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids) open(output.filename, "a") do io for result in results - result = TMLE.emptyIC(result, output.pval_threshold) serialize(io, result) end end @@ -183,9 +169,32 @@ TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names = NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt]) +function post_process(results, dataset, pval_threshold, save_sample_ids) + results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results] + if save_sample_ids + sample_ids = get_sample_ids(dataset, results) + results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)] + end + return results +end -get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID - +sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID + +function get_sample_ids(dataset, results) + previous_variables = nothing + sample_ids = [] + current_ref_id = 0 + for (index, result) in enumerate(results) + current_variables = variables(first(result).estimand) + if previous_variables != current_variables + push!(sample_ids, sample_ids_from_variables(dataset, current_variables)) + current_ref_id = index + else + push!(sample_ids, current_ref_id) + end + end + return sample_ids +end """ instantiate_dataset(path::String) From 7da3375a3fc383c4276257cd99dc60ba8a09a892 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 15:47:24 +0000 Subject: [PATCH 11/71] up tests --- Project.toml | 2 +- src/runner.jl | 1 - src/utils.jl | 15 ++++--- test/runner.jl | 103 +++++++++++++++++++++++++++---------------------- 4 files changed, 64 insertions(+), 57 deletions(-) diff --git a/Project.toml b/Project.toml index 4d74093..a4f5a0d 100644 --- a/Project.toml +++ b/Project.toml @@ -40,12 +40,12 @@ CategoricalArrays = "0.10" Combinatorics = "1.0.2" Comonicon = "1.0.6" Configurations = "0.17.6" -JSON = "0.21.4" DataFrames = "1.3.4" EvoTrees = "0.14.6" GLMNet = "0.7" HighlyAdaptiveLasso = "0.2.0" JLD2 = "0.4.22" +JSON = "0.21.4" MKL = "0.6" MLJ = "0.20.0" MLJBase = "1.0.1" diff --git a/src/runner.jl b/src/runner.jl index 3b25053..eb991e9 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -8,7 +8,6 @@ TMLE.to_dict(x::FailedEstimation) = Dict( :error => x.msg ) - @option struct JSONOutput filename::Union{Nothing, String} = nothing pval_threshold::Union{Nothing, Float64} = nothing diff --git a/src/utils.jl b/src/utils.jl index c15724e..e2aeb91 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -51,9 +51,10 @@ end function update_file(output::HDF5Output, results, dataset) output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids) + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) jldopen(output.filename, "a+", compress=output.compress) do io - latest_index = maximum(parse(Int, split(key, "_")[2]) for key in keys(io)) + batches_keys = keys(io) + latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys) io[string("Batch_", latest_index + 1)] = results end end @@ -64,7 +65,7 @@ end function update_file(output::JLSOutput, results, dataset) output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids) + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) open(output.filename, "a") do io for result in results @@ -74,7 +75,7 @@ function update_file(output::JLSOutput, results, dataset) end ##################################################################### -#####  Read TMLE Estimands Configuration #### +#####  Read TMLE Estimands Configuration #### ##################################################################### function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names @@ -189,6 +190,7 @@ function get_sample_ids(dataset, results) if previous_variables != current_variables push!(sample_ids, sample_ids_from_variables(dataset, current_variables)) current_ref_id = index + previous_variables = current_variables else push!(sample_ids, current_ref_id) end @@ -274,8 +276,5 @@ function load_tmle_spec(file) return ESTIMATORS end -TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = - Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) - -TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{FailedEstimation}}}) where names = +TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimation, TMLE.ComposedEstimate}}}}) where names = Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) \ No newline at end of file diff --git a/test/runner.jl b/test/runner.jl index 013cd41..bb415bf 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -13,6 +13,7 @@ using CSV using Serialization using Arrow using YAML +using JSON TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") @@ -20,20 +21,6 @@ CONFIGDIR = joinpath(TESTDIR, "config") include(joinpath(TESTDIR, "testutils.jl")) -sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt) -sort_nt_by_key(x) = x - -function test_estimands_match(Ψ₁::T1, Ψ₂::T2) where {T1, T2} - @test T1 == T2 - @test Ψ₁.outcome == Ψ₂.outcome - @test Ψ₁.outcome_extra_covariates == Ψ₂.outcome_extra_covariates - @test sort_nt_by_key(Ψ₁.treatment_confounders) == sort_nt_by_key(Ψ₂.treatment_confounders) - @test sort(keys(Ψ₁.treatment_values)) == sort(keys(Ψ₂.treatment_values)) - for key in keys(Ψ₁.treatment_values) - @test sort_nt_by_key(Ψ₁.treatment_values[key]) == sort_nt_by_key(Ψ₂.treatment_values[key]) - end -end - """ CONTINUOUS_OUTCOME: - IATE(0->1, 0->1) = E[W₂] = 0.5 @@ -85,8 +72,8 @@ end TMLE.write_json(estimands_filename, statistical_estimands_only_config()) outputs = TargetedEstimation.Outputs( json=TargetedEstimation.JSONOutput(filename="output.json"), - hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.), - jls=TargetedEstimation.JLSOutput(filename="output.jls"), + hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1., sample_ids=true), + jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5), std=true, ) runner = Runner( @@ -133,37 +120,40 @@ end push!(loaded_results, deserialize(io)) end end - for (result, loaded_result) in zip(results, loaded_results) + for (index, (result, loaded_result)) in enumerate(zip(results, loaded_results)) @test loaded_result[:TMLE] isa TMLE.TMLEstimate @test result.TMLE.estimate == loaded_result[:TMLE].estimate - @test loaded_result[:TMLE].IC == [] - @test loaded_result[:OSE] isa TMLE.OSEstimate @test result.OSE.estimate == loaded_result[:OSE].estimate - @test loaded_result[:OSE].IC == [] + @test !haskey(loaded_result, :SAMPLE_IDS) + if index ∈ (1, 2) + @test loaded_result[:TMLE].IC == [] + @test loaded_result[:OSE].IC == [] + else + @test length(loaded_result[:TMLE].IC) > 0 + @test length(loaded_result[:OSE].IC) > 0 + end end # Test Save to HDF5 hdf5file = jldopen(outputs.hdf5.filename, "r") - for (result_index, param_index) in enumerate(4:6) - result = hdf5file[string(param_index, "/result")] + loaded_results = hdf5file[string("Batch_1")] + for (param_index, result) in enumerate(loaded_results) @test result.TMLE isa TMLE.TMLEstimate - @test results[result_index].TMLE.estimate == result.TMLE.estimate + @test results[param_index].TMLE.estimate == result.TMLE.estimate @test result.OSE isa TMLE.OSEstimate - @test results[result_index].OSE.estimate == result.OSE.estimate + @test results[param_index].OSE.estimate == result.OSE.estimate end - @test hdf5file["4/sample_ids"] == collect(2:1000) - @test hdf5file["4/sample_ids_idx"] == 4 - @test size(hdf5file["4/result"].TMLE.IC, 1) == 999 - @test !haskey(hdf5file, "5/sample_ids") - @test hdf5file["5/sample_ids_idx"] == 4 - @test size(hdf5file["5/result"].TMLE.IC, 1) == 999 + @test loaded_results[1].SAMPLE_IDS == collect(2:1000) + @test size(loaded_results[1].TMLE.IC, 1) == 999 + + @test loaded_results[2].SAMPLE_IDS == 1 + @test size(loaded_results[2].TMLE.IC, 1) == 999 - @test hdf5file["6/sample_ids"] == collect(1:1000) - @test hdf5file["6/sample_ids_idx"] == 6 - @test size(hdf5file["6/result"].TMLE.IC, 1) == 1000 + @test loaded_results[3].SAMPLE_IDS == collect(1:1000) + @test size(loaded_results[3].TMLE.IC, 1) == 1000 close(hdf5file) @@ -195,15 +185,20 @@ end chunksize=chunksize, ) - hdf5file = jldopen(outputs.hdf5.filename) + results_from_hdf5 = jldopen(outputs.hdf5.filename) do io + map(keys(io)) do key + io[key] + end + end + results_from_hdf5 = vcat(results_from_hdf5...) results_from_json = TMLE.read_json(outputs.json.filename) for i in 1:6 Ψ = configuration.estimands[i] - test_estimands_match(Ψ, results_from_json[i][:TMLE].estimand) - hdf5result = hdf5file[string(i, "/result")] - @test results_from_json[i][:TMLE].estimate == hdf5result.TMLE.estimate - @test results_from_json[i][:OSE].estimate == hdf5result.OSE.estimate + for estimator_name in (:OSE, :TMLE) + @test Ψ == results_from_hdf5[i][estimator_name].estimand == results_from_json[i][estimator_name].estimand + @test results_from_hdf5[i][estimator_name].estimate == results_from_json[i][estimator_name].estimate + end end # Clean @@ -275,16 +270,15 @@ end end # Check results from HDF5 - hdf5file = jldopen(outputs.hdf5.filename) + results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"] for estimator in (:OSE, :TMLE) - @test hdf5file["1/result"][estimator] isa TargetedEstimation.FailedEstimation - @test hdf5file["2/result"][estimator] isa TMLE.EICEstimate + @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimation + @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate for i in 3:6 - @test hdf5file[string(i, "/result")][estimator] isa TargetedEstimation.FailedEstimation - @test hdf5file[string(i, "/result")][estimator].estimand isa TMLE.Estimand + @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimation + @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand end end - close(hdf5file) # Clean rm(outputs.json.filename) @@ -295,7 +289,9 @@ end @testset "Test tmle: Causal and Composed Estimands" begin build_dataset(;n=1000, format="csv") outputs = TargetedEstimation.Outputs( - jls=TargetedEstimation.JLSOutput(filename="output.jls") + json = TargetedEstimation.JSONOutput(filename="output.json"), + jls = TargetedEstimation.JLSOutput(filename="output.jls"), + hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5") ) tmpdir = mktempdir(cleanup=true) estimandsfile = joinpath(tmpdir, "configuration.jls") @@ -304,15 +300,16 @@ end serialize(estimandsfile, configuration) estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") datafile = "data.csv" - tmle(datafile, estimandsfile, estimatorfile; outputs=outputs) + + tmle(datafile, estimandsfile, estimatorfile; outputs=outputs, chunksize=2) + # JLS Output results = [] open(outputs.jls.filename) do io while !eof(io) push!(results, deserialize(io)) end end - for (index, Ψ) ∈ enumerate(configuration.estimands) @test results[index].OSE.estimand == identify(Ψ, configuration.scm) end @@ -326,8 +323,20 @@ end end @test results[3].OSE isa TMLE.ComposedEstimate + # JSON Output + results_from_json = TMLE.read_json(outputs.json.filename) + @test length(results_from_json) == 3 + + # HDF5 + results_from_json = jldopen(outputs.hdf5.filename) + @test length(results_from_json["Batch_1"]) == 2 + composed_result = only(results_from_json["Batch_2"]) + @test composed_result.OSE.cov == results[3].OSE.cov + rm(datafile) rm(outputs.jls.filename) + rm(outputs.json.filename) + rm(outputs.hdf5.filename) end From a3469cbd9bf36e7e78f4536e688c67e9a8463775 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 15:57:15 +0000 Subject: [PATCH 12/71] reorg outputs code --- src/TargetedEstimation.jl | 1 + src/outputs.jl | 138 ++++++++++++++++++++++++++++++++++++++ src/runner.jl | 31 --------- src/utils.jl | 100 --------------------------- 4 files changed, 139 insertions(+), 131 deletions(-) create mode 100644 src/outputs.jl diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 5dd5011..6e11e08 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -32,6 +32,7 @@ using Configurations import MLJModelInterface include("cache_managers.jl") +include("outputs.jl") include("runner.jl") include("utils.jl") include("sieve_variance.jl") diff --git a/src/outputs.jl b/src/outputs.jl new file mode 100644 index 0000000..9563d12 --- /dev/null +++ b/src/outputs.jl @@ -0,0 +1,138 @@ +##################################################################### +#####  JSON OUTPUT #### +##################################################################### + +@option struct JSONOutput + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing +end + +initialize(output::JSONOutput) = initialize_json(output.filename) + +initialize_json(filename::Nothing) = nothing + +initialize_json(filename::String) = open(filename, "w") do io + print(io, '[') +end + +function update_file(output::JSONOutput, results; finalize=false) + output.filename === nothing && return + open(output.filename, "a") do io + for result in results + result = TMLE.emptyIC(result, output.pval_threshold) + JSON.print(io, TMLE.to_dict(result)) + print(io, ',') + end + if finalize + skip(io, -1) # get rid of the last comma which JSON doesn't allow + print(io, ']') + end + end +end + +##################################################################### +#####  HDF5 OUTPUT #### +##################################################################### + +@option struct HDF5Output + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing + sample_ids::Bool = false + compress::Bool = false +end + +function update_file(output::HDF5Output, results, dataset) + output.filename === nothing && return + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) + jldopen(output.filename, "a+", compress=output.compress) do io + batches_keys = keys(io) + latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys) + io[string("Batch_", latest_index + 1)] = results + end +end + +##################################################################### +#####  JLS OUTPUT #### +##################################################################### + +@option struct JLSOutput + filename::Union{Nothing, String} = nothing + pval_threshold::Union{Nothing, Float64} = nothing + sample_ids::Bool = false +end + +function update_file(output::JLSOutput, results, dataset) + output.filename === nothing && return + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) + + open(output.filename, "a") do io + for result in results + serialize(io, result) + end + end +end + +##################################################################### +#####  STD OUTPUT #### +##################################################################### + +function update_file(doprint, results, partition) + if doprint + mimetext = MIME"text/plain"() + index = 1 + for (result, estimand_index) in zip(results, partition) + show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆")) + println(stdout) + show(stdout, mimetext, first(result).estimand) + for (key, val) ∈ zip(keys(result), result) + show(stdout, mimetext, string("→ Estimation Result From: ", key, )) + println(stdout) + show(stdout, mimetext, val) + index += 1 + end + end + end +end + +##################################################################### +#####  OUTPUTS #### +##################################################################### + +@option struct Outputs + json::JSONOutput = JSONOutput() + hdf5::HDF5Output = HDF5Output() + jls::JLSOutput = JLSOutput() + std::Bool = false +end + +function initialize(outputs::Outputs) + initialize(outputs.json) +end + +function post_process(results, dataset, pval_threshold, save_sample_ids) + results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results] + if save_sample_ids + sample_ids = get_sample_ids(dataset, results) + results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)] + end + return results +end + +sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID + +function get_sample_ids(dataset, results) + previous_variables = nothing + sample_ids = [] + current_ref_id = 0 + for (index, result) in enumerate(results) + current_variables = variables(first(result).estimand) + if previous_variables != current_variables + push!(sample_ids, sample_ids_from_variables(dataset, current_variables)) + current_ref_id = index + previous_variables = current_variables + else + push!(sample_ids, current_ref_id) + end + end + return sample_ids +end \ No newline at end of file diff --git a/src/runner.jl b/src/runner.jl index eb991e9..873cf82 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -8,37 +8,6 @@ TMLE.to_dict(x::FailedEstimation) = Dict( :error => x.msg ) -@option struct JSONOutput - filename::Union{Nothing, String} = nothing - pval_threshold::Union{Nothing, Float64} = nothing -end - -initialize(output::JSONOutput) = initialize_json(output.filename) - -@option struct HDF5Output - filename::Union{Nothing, String} = nothing - pval_threshold::Union{Nothing, Float64} = nothing - sample_ids::Bool = false - compress::Bool = false -end - -@option struct JLSOutput - filename::Union{Nothing, String} = nothing - pval_threshold::Union{Nothing, Float64} = nothing - sample_ids::Bool = false -end - -@option struct Outputs - json::JSONOutput = JSONOutput() - hdf5::HDF5Output = HDF5Output() - jls::JLSOutput = JLSOutput() - std::Bool = false -end - -function initialize(outputs::Outputs) - initialize(outputs.json) -end - mutable struct Runner estimators::NamedTuple estimands::Vector{TMLE.Estimand} diff --git a/src/utils.jl b/src/utils.jl index e2aeb91..0fc4f9e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,78 +1,5 @@ -##################################################################### -#####  JSON OUTPUT #### -##################################################################### - -initialize_json(filename::Nothing) = nothing - -initialize_json(filename::String) = open(filename, "w") do io - print(io, '[') -end - -function update_file(output::JSONOutput, results; finalize=false) - output.filename === nothing && return - open(output.filename, "a") do io - for result in results - result = TMLE.emptyIC(result, output.pval_threshold) - JSON.print(io, TMLE.to_dict(result)) - print(io, ',') - end - if finalize - skip(io, -1) # get rid of the last comma which JSON doesn't allow - print(io, ']') - end - end -end - -##################################################################### -#####  STD OUTPUT #### -##################################################################### - -function update_file(doprint, results, partition) - if doprint - mimetext = MIME"text/plain"() - index = 1 - for (result, estimand_index) in zip(results, partition) - show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆")) - println(stdout) - show(stdout, mimetext, first(result).estimand) - for (key, val) ∈ zip(keys(result), result) - show(stdout, mimetext, string("→ Estimation Result From: ", key, )) - println(stdout) - show(stdout, mimetext, val) - index += 1 - end - end - end -end - -##################################################################### -#####  HDF5 OUTPUT #### -##################################################################### - -function update_file(output::HDF5Output, results, dataset) - output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.sample_ids) - jldopen(output.filename, "a+", compress=output.compress) do io - batches_keys = keys(io) - latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys) - io[string("Batch_", latest_index + 1)] = results - end -end -##################################################################### -#####  JLS OUTPUT #### -##################################################################### -function update_file(output::JLSOutput, results, dataset) - output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.sample_ids) - - open(output.filename, "a") do io - for result in results - serialize(io, result) - end - end -end ##################################################################### #####  Read TMLE Estimands Configuration #### @@ -170,33 +97,6 @@ TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names = NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt]) -function post_process(results, dataset, pval_threshold, save_sample_ids) - results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results] - if save_sample_ids - sample_ids = get_sample_ids(dataset, results) - results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)] - end - return results -end - -sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID - -function get_sample_ids(dataset, results) - previous_variables = nothing - sample_ids = [] - current_ref_id = 0 - for (index, result) in enumerate(results) - current_variables = variables(first(result).estimand) - if previous_variables != current_variables - push!(sample_ids, sample_ids_from_variables(dataset, current_variables)) - current_ref_id = index - previous_variables = current_variables - else - push!(sample_ids, current_ref_id) - end - end - return sample_ids -end """ instantiate_dataset(path::String) From 3f3751443a01633d27223c4bc539d778fdb62d1b Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 16:00:32 +0000 Subject: [PATCH 13/71] add failed estimate file --- src/TargetedEstimation.jl | 2 ++ src/failed_estimate.jl | 11 +++++++++++ src/runner.jl | 14 ++------------ src/utils.jl | 4 +--- test/runner.jl | 4 ++-- 5 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 src/failed_estimate.jl diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 6e11e08..7d94897 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -31,6 +31,7 @@ using Configurations import MLJModelInterface +include("failed_estimate.jl") include("cache_managers.jl") include("outputs.jl") include("runner.jl") @@ -42,6 +43,7 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) + """TL CLI.""" @main diff --git a/src/failed_estimate.jl b/src/failed_estimate.jl new file mode 100644 index 0000000..b512e93 --- /dev/null +++ b/src/failed_estimate.jl @@ -0,0 +1,11 @@ +struct FailedEstimate + estimand::TMLE.Estimand + msg::String +end + +TMLE.to_dict(x::FailedEstimate) = Dict( + :estimand => TMLE.to_dict(x.estimand), + :error => x.msg +) + +TMLE.emptyIC(result::FailedEstimate, pval_threshold) = result diff --git a/src/runner.jl b/src/runner.jl index 873cf82..0817526 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -1,13 +1,3 @@ -struct FailedEstimation - estimand::TMLE.Estimand - msg::String -end - -TMLE.to_dict(x::FailedEstimation) = Dict( - :estimand => TMLE.to_dict(x.estimand), - :error => x.msg - ) - mutable struct Runner estimators::NamedTuple estimands::Vector{TMLE.Estimand} @@ -70,7 +60,7 @@ function try_estimation(runner, Ψ, estimator) # This also allows to skip fast the next estimands requiring the same nuisance functions. if e isa TMLE.FitFailedError push!(runner.failed_nuisance, e.estimand) - return FailedEstimation(Ψ, e.msg) + return FailedEstimate(Ψ, e.msg) # On other errors, rethrow else rethrow(e) @@ -89,7 +79,7 @@ function (runner::Runner)(partition) for (partition_index, param_index) in enumerate(partition) Ψ = runner.estimands[param_index] if skip_fast(runner, Ψ) - results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimation(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)]) + results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimate(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)]) continue end # Make sure data types are appropriate for the estimand diff --git a/src/utils.jl b/src/utils.jl index 0fc4f9e..4c534f4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -92,8 +92,6 @@ end #####  ADDITIONAL METHODS #### ##################################################################### -TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result - TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names = NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt]) @@ -176,5 +174,5 @@ function load_tmle_spec(file) return ESTIMATORS end -TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimation, TMLE.ComposedEstimate}}}}) where names = +TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimate, TMLE.ComposedEstimate}}}}) where names = Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt)) \ No newline at end of file diff --git a/test/runner.jl b/test/runner.jl index bb415bf..fd52bc3 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -272,10 +272,10 @@ end # Check results from HDF5 results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"] for estimator in (:OSE, :TMLE) - @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimation + @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate for i in 3:6 - @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimation + @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand end end From 77f93b6e00c59e8014f2e55a0961c7ab3ae3401d Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 16:12:14 +0000 Subject: [PATCH 14/71] fremove some deprecate methods and simplify ose config --- src/utils.jl | 18 ++---------------- test/config/ose_config.jl | 34 +++------------------------------- test/runtests.jl | 1 + test/utils.jl | 34 ++-------------------------------- 4 files changed, 8 insertions(+), 79 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 4c534f4..a045168 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,6 +1,3 @@ - - - ##################################################################### #####  Read TMLE Estimands Configuration #### ##################################################################### @@ -21,18 +18,6 @@ MissingSCMError() = ArgumentError(string("A Structural Causal Model should be pr get_identification_method(method::Nothing) = BackdoorAdjustment() get_identification_method(method) = method -maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) = - identify(get_identification_method(method), Ψ, scm) - -maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError()) - -function maybe_identify(Ψ::TMLE.ComposedEstimand, scm, method) - method = get_identification_method(method) - return TMLE.ComposedEstimand(Ψ.f, Tuple(maybe_identify(arg, scm, method) for arg ∈ Ψ.args)) -end - -maybe_identify(Ψ, scm, method) = Ψ - function read_method(extension) method = if extension == ".json" TMLE.read_json @@ -79,10 +64,11 @@ respects the treatment types in the dataset. function proofread_estimands(filename, dataset) extension = filename[findlast(isequal('.'), filename):end] config = read_method(extension)(filename) + adjustment_method = get_identification_method(config.adjustment) estimands = Vector{TMLE.Estimand}(undef, length(config.estimands)) treatment_types = Dict() for (index, Ψ) in enumerate(config.estimands) - statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment) + statisticalΨ = identify(Ψ, config.scm, method=adjustment_method) estimands[index] = fix_treatment_values!(treatment_types, statisticalΨ, dataset) end return estimands diff --git a/test/config/ose_config.jl b/test/config/ose_config.jl index 5462955..28f2367 100644 --- a/test/config/ose_config.jl +++ b/test/config/ose_config.jl @@ -1,39 +1,11 @@ -evotree = EvoTreeClassifier(nrounds=10) - default_models = TMLE.default_models( # For the estimation of E[Y|W, T]: continuous target - Q_continuous = Stack( - metalearner = LinearRegressor(fit_intercept=false), - cache = true, - resampling = AdaptiveCV(), - interaction_glmnet = Pipeline( - interaction_transformer = InteractionTransformer(order=3), - glmnet = GLMNetRegressor(), - cache = true - ), - evo_1 = EvoTreeRegressor(nrounds=10, lambda=0., gamma=0.3), - evo_2 = EvoTreeRegressor(nrounds=10, lambda=1., gamma=0.3), - evo_3 = EvoTreeRegressor(nrounds=20, lambda=0., gamma=0.3), - evo_4 = EvoTreeRegressor(nrounds=20, lambda=1., gamma=0.3), - constant = ConstantRegressor(), - hal = HALRegressor(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false) - ), + Q_continuous = LinearRegressor(), # For the estimation of E[Y|W, T]: binary target - Q_binary = Pipeline( - interaction_transformer = InteractionTransformer(order=2), - glmnet = GLMNetClassifier(), - cache = false - ), + Q_binary = LogisticClassifier(), # For the estimation of p(T| W) - G = TunedModel( - model = evotree, - resampling = CV(), - tuning = Grid(goal=5), - range = [range(evotree, :max_depth, lower=3, upper=5), range(evotree, :lambda, lower=1e-5, upper=10, scale=:log)], - measure = log_loss, - cache=true - ) + G = LogisticClassifier() ) ESTIMATORS = ( diff --git a/test/runtests.jl b/test/runtests.jl index 13cf36f..e8f5280 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using TargetedEstimation +using Test TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") diff --git a/test/utils.jl b/test/utils.jl index 569171f..1355b29 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -138,10 +138,10 @@ end T₁ = [1, 2, 3, 4, 5], T₂ = [1, 2, 3, 4, missing], ) - sample_ids = TargetedEstimation.get_sample_ids(data, variables) + sample_ids = TargetedEstimation.sample_ids_from_variables(data, variables) @test sample_ids == [2, 3] data.W₁ = [1, 2, missing, 4, 5] - sample_ids = TargetedEstimation.get_sample_ids(data, variables) + sample_ids = TargetedEstimation.sample_ids_from_variables(data, variables) @test sample_ids == [2] end @@ -198,36 +198,6 @@ end end end -@testset "Test maybe_identify" begin - scm = StaticSCM( - outcomes = [:Y], - treatments = [:T₁, :T₂], - confounders = [:W] - ) - adjustment = BackdoorAdjustment() - causalATE = ATE( - outcome = :Y, - treatment_values = (T₁ =(case=1, control=0),) - ) - statisticalATE = ATE( - outcome = :Y, - treatment_values = (T₁ =(case=1, control=0),), - treatment_confounders = (T₁=[:W],) - ) - # Correctly identifies the estimand - identifiedATE = TargetedEstimation.maybe_identify(causalATE, scm, nothing) - @test statisticalATE == identifiedATE - # Just returns the estimand - @test TargetedEstimation.maybe_identify(statisticalATE, scm, nothing) === statisticalATE - # Throws if can't identify - @test_throws TargetedEstimation.MissingSCMError() TargetedEstimation.maybe_identify(causalATE, nothing, nothing) - # Composed Estimand with a weird mixture of statistical/causal estimands - diff = ComposedEstimand(-, (causalATE, statisticalATE)) - identified_diff = TargetedEstimation.maybe_identify(diff, scm, nothing) - statistical_diff = ComposedEstimand(-, (statisticalATE, statisticalATE)) - @test identified_diff == statistical_diff -end - end; true \ No newline at end of file From 060b0901f514005e7eecb1a4db0ac831f02c29d1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 16:29:34 +0000 Subject: [PATCH 15/71] add initialize tests --- src/outputs.jl | 29 ++++++++++++++++++++++- test/outputs.jl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/utils.jl | 23 ------------------- 4 files changed, 89 insertions(+), 24 deletions(-) create mode 100644 test/outputs.jl diff --git a/src/outputs.jl b/src/outputs.jl index 9563d12..1044fe3 100644 --- a/src/outputs.jl +++ b/src/outputs.jl @@ -1,3 +1,15 @@ +FileExistsError(filename) = ArgumentError(string("File ", filename, " already exists.")) + +check_file_exists(filename::Nothing) = nothing +check_file_exists(filename) = !isfile(filename) || throw(FileExistsError(filename)) + +""" + initialize(output) + +Default intialization procedure only checks that file does not exist. +""" +initialize(output) = check_file_exists(output.filename) + ##################################################################### #####  JSON OUTPUT #### ##################################################################### @@ -7,7 +19,15 @@ pval_threshold::Union{Nothing, Float64} = nothing end -initialize(output::JSONOutput) = initialize_json(output.filename) +""" + initialize(output::JSONOutput) + +Checks that file does not exist and inialize the json file +""" +function initialize(output::JSONOutput) + check_file_exists(output.filename) + initialize_json(output.filename) +end initialize_json(filename::Nothing) = nothing @@ -105,8 +125,15 @@ end std::Bool = false end +""" + initialize(output::Outputs) + +Initializes all outputs in output. +""" function initialize(outputs::Outputs) initialize(outputs.json) + initialize(outputs.jls) + initialize(outputs.hdf5) end function post_process(results, dataset, pval_threshold, save_sample_ids) diff --git a/test/outputs.jl b/test/outputs.jl new file mode 100644 index 0000000..9b13bb0 --- /dev/null +++ b/test/outputs.jl @@ -0,0 +1,60 @@ +module TestOutputs + +using TargetedEstimation +using Test +using JSON + +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") + +include(joinpath(TESTDIR, "testutils.jl")) + +@testset "Test initialize" begin + outputs = TargetedEstimation.Outputs( + json = TargetedEstimation.JSONOutput(filename="output.json"), + jls = TargetedEstimation.JLSOutput(filename="output.jls"), + hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5"), + ) + + TargetedEstimation.initialize(outputs) + + @test isfile(outputs.json.filename) + @test_throws TargetedEstimation.FileExistsError(outputs.json.filename) TargetedEstimation.initialize(outputs) + rm(outputs.json.filename) + + touch(outputs.jls.filename) + @test_throws TargetedEstimation.FileExistsError(outputs.jls.filename) TargetedEstimation.initialize(outputs) + rm(outputs.jls.filename) + rm(outputs.json.filename) + + touch(outputs.hdf5.filename) + @test_throws TargetedEstimation.FileExistsError(outputs.hdf5.filename) TargetedEstimation.initialize(outputs) + rm(outputs.hdf5.filename) + rm(outputs.json.filename) +end + +@testset "Test JSON update_file" begin + results = [] + for Ψ in statistical_estimands_only_config().estimands + push!(results, ( + TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]), + OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[]) + )) + end + tmpdir = mktempdir(cleanup=true) + jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json")) + TargetedEstimation.initialize_json(jsonoutput.filename) + TargetedEstimation.update_file(jsonoutput, results[1:3]) + TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true) + loaded_results = TMLE.read_json(jsonoutput.filename) + @test size(loaded_results) == size(results) + for (result, loaded_result) in zip(results, loaded_results) + @test result.TMLE.estimate == loaded_result[:TMLE].estimate + @test result.TMLE.std == loaded_result[:TMLE].std + @test result.OSE.estimate == loaded_result[:OSE].estimate + @test result.OSE.std == loaded_result[:OSE].std + end +end + +end + +true \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e8f5280..e8b741a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using Test TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") @time begin + @test include(joinpath(TESTDIR, "outputs.jl")) @test include(joinpath(TESTDIR, "cache_managers.jl")) @test include(joinpath(TESTDIR, "utils.jl")) @test include(joinpath(TESTDIR, "sieve_variance.jl")) diff --git a/test/utils.jl b/test/utils.jl index 1355b29..7525168 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -175,29 +175,6 @@ end TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁ end -@testset "Test JSON writing" begin - results = [] - for Ψ in statistical_estimands_only_config().estimands - push!(results, ( - TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]), - OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[]) - )) - end - tmpdir = mktempdir(cleanup=true) - jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json")) - TargetedEstimation.initialize_json(jsonoutput.filename) - TargetedEstimation.update_file(jsonoutput, results[1:3]) - TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true) - loaded_results = TMLE.read_json(jsonoutput.filename) - @test size(loaded_results) == size(results) - for (result, loaded_result) in zip(results, loaded_results) - @test result.TMLE.estimate == loaded_result[:TMLE].estimate - @test result.TMLE.std == loaded_result[:TMLE].std - @test result.OSE.estimate == loaded_result[:OSE].estimate - @test result.OSE.std == loaded_result[:OSE].std - end -end - end; true \ No newline at end of file From c59ba1305990b6031cb5ae79a9e8d0ff67ce05a1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 17:52:18 +0000 Subject: [PATCH 16/71] update svp test to support composedestimate --- src/sieve_variance.jl | 97 +++++++++++++++++++++++++---------------- src/utils.jl | 2 + test/sieve_variance.jl | 98 +++++++++++++++++++++++++++++------------- 3 files changed, 130 insertions(+), 67 deletions(-) diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index cc953e9..290e672 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -80,8 +80,26 @@ end default_τs(nτs;max_τ=2) = Float32[max_τ*(i-1)/(nτs-1) for i in 1:nτs] +retrieve_sample_ids(sample_ids::AbstractVector, batch_results) = sample_ids -function build_work_list(prefix, grm_ids) +retrieve_sample_ids(index::Int, batch_results) = batch_results[index].SAMPLE_IDS + +function update_work_lists_with!(result::TMLE.ComposedEstimate, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs) + for estimate in result.estimates + update_work_lists_with!(estimate, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs) + end +end + +function update_work_lists_with!(result, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs) + if length(result.IC) > 0 + sample_ids = string.(retrieve_sample_ids(sample_ids, batch_results)) + push!(influence_curves, align_ic(result.IC, sample_ids, grm_ids)) + push!(n_obs, size(sample_ids, 1)) + push!(results, result) + end +end + +function build_work_list(prefix, grm_ids; estimator_key=:TMLE) dirname_, prefix_ = splitdir(prefix) dirname__ = dirname_ == "" ? "." : dirname_ hdf5files = filter( @@ -92,28 +110,28 @@ function build_work_list(prefix, grm_ids) influence_curves = Vector{Float32}[] n_obs = Int[] - tmle_results = [] + results = [] for hdf5file in hdf5files jldopen(hdf5file) do io - # templateΨs = io["parameters"] - # results = io["results"] for key in keys(io) - result_group = io[key] - tmleresult = first(io[key]["result"]) - if size(tmleresult.IC, 1) > 0 - sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] : - io[string(result_group["sample_ids_idx"])]["sample_ids"] - sample_ids = string.(sample_ids) - - push!(influence_curves, align_ic(tmleresult.IC, sample_ids, grm_ids)) - push!(n_obs, size(sample_ids, 1)) - push!(tmle_results, tmleresult) + batch_results = io[key] + for nt_result in batch_results + result = nt_result[estimator_key] + sample_ids = nt_result.SAMPLE_IDS + update_work_lists_with!( + result, + sample_ids, + batch_results, + grm_ids, results, + influence_curves, + n_obs + ) end end end end influence_curves = length(influence_curves) > 0 ? reduce(vcat, transpose(influence_curves)) : Matrix{Float32}(undef, 0, 0) - return tmle_results, influence_curves, n_obs + return results, influence_curves, n_obs end @@ -197,37 +215,37 @@ function grm_rows_bounds(n_samples) return bounds end -function save_results(outprefix, results, τs, variances) - TMLE.write_json(string(outprefix, ".json"), results) - jldopen(string(outprefix, ".hdf5"), "w") do io +function save_results(filename, results, τs, variances) + jldopen(filename, "w") do io io["taus"] = τs io["variances"] = variances + io["results"] = results end end corrected_stderrors(variances) = sqrt.(view(maximum(variances, dims=1), 1, :)) -function update_with_sieve_estimate!(results, stds) - for index in eachindex(results) - old = results[index] - results[index] = typeof(old)( - old.estimand, - old.estimate, - convert(Float64, stds[index]), - old.n, - Float64[] - ) - end -end +with_updated_std(estimate::T, std) where T = T( + estimate.estimand, + estimate.estimate, + convert(Float64, std), + estimate.n, + Float64[] +) + +with_updated_std(results, stds, estimator_key) = + [NamedTuple{(estimator_key,)}([with_updated_std(result, std)]) for (result, std) in zip(results, stds)] + """ sieve_variance_plateau(input_prefix; - output_prefix="svp", + out="svp.hdf5", grm_prefix="GRM", verbosity=0, n_estimators=10, - max_tau=0.8 + max_tau=0.8, + estimator_key="TMLE" ) Sieve Variance Plateau CLI. @@ -238,33 +256,36 @@ Sieve Variance Plateau CLI. # Options -- `-o, --output-prefix`: Output prefix. +- `-o, --out`: Output filename. - `-g, --grm-prefix`: Prefix to the aggregated GRM. - `-v, --verbosity`: Verbosity level. - `-n, --n_estimators`: Number of variance estimators to build for each estimate. - `-m, --max_tau`: Maximum distance between any two individuals. +- `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction. """ @cast function sieve_variance_plateau(input_prefix; - output_prefix="svp", + out="svp.hdf5", grm_prefix="GRM", verbosity=0, n_estimators=10, - max_tau=0.8 + max_tau=0.8, + estimator_key="TMLE" ) + estimator_key = Symbol(estimator_key) τs = default_τs(n_estimators;max_τ=max_tau) grm, grm_ids = readGRM(grm_prefix) verbosity > 0 && @info "Preparing work list." - results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids) + results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids, estimator_key=estimator_key) if length(influence_curves) > 0 verbosity > 0 && @info "Computing variance estimates." variances = compute_variances(influence_curves, grm, τs, n_obs) std_errors = corrected_stderrors(variances) - update_with_sieve_estimate!(results, std_errors) + results = with_updated_std(results, std_errors, estimator_key) else variances = Float32[] end - save_results(output_prefix, results, τs, variances) + save_results(out, results, τs, variances) verbosity > 0 && @info "Done." return 0 diff --git a/src/utils.jl b/src/utils.jl index a045168..a5c4c24 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -137,6 +137,8 @@ function coerce_types!(dataset, Ψ) make_float!(dataset, continuous_variables) end +variables(Ψ::TMLE.ComposedEstimand) = union((variables(arg) for arg in Ψ.args)...) + variables(Ψ::TMLE.Estimand) = Set([ Ψ.outcome, keys(Ψ.treatment_values)..., diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 3359f2a..3ffaad7 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -48,12 +48,15 @@ function build_dataset(sample_ids) CSV.write("data.csv", dataset) end -function build_tmle_output_file(sample_ids, estimandfile, outprefix, pval) +function build_tmle_output_file(sample_ids, estimandfile, outprefix; + pval=1., + estimatorfile=joinpath(TESTDIR, "config", "tmle_ose_config.jl") + ) build_dataset(sample_ids) outputs = TargetedEstimation.Outputs( - hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval), + hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval, sample_ids=true), ) - tmle("data.csv", estimandfile, joinpath(TESTDIR, "config", "tmle_ose_config.jl"), outputs=outputs) + tmle("data.csv", estimandfile, estimatorfile, outputs=outputs) end function basic_variance_implementation(matrix_distance, influence_curve, n_obs) @@ -110,16 +113,15 @@ end # CASE_1: pval = 1. # Simulate multiple runs that occured - pval = 1. config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3]) estimandsfile_1 = joinpath(tmpdir, "configuration_1.json") TMLE.write_json(estimandsfile_1, config_1) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1") config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end]) estimandsfile_2 = joinpath(tmpdir, "configuration_2.json") TMLE.write_json(estimandsfile_2, config_2) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2") results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids) # Check n_obs @@ -140,7 +142,7 @@ end pval = 0.1 estimandsfile = joinpath(tmpdir, "configuration.json") TMLE.write_json(estimandsfile, configuration) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output"; pval=pval) results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids) # Check n_obs @test n_obs == [194, 193, 193, 194] @@ -266,7 +268,7 @@ end close(io) end -@testset "Test sieve_variance_plateau" begin +@testset "Test SVP" begin # Generate data grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) tmpdir = mktempdir(cleanup=true) @@ -275,49 +277,87 @@ end config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3]) estimandsfile_1 = joinpath(tmpdir, "configuration_1.json") TMLE.write_json(estimandsfile_1, config_1) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1"; pval=pval) config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end]) estimandsfile_2 = joinpath(tmpdir, "configuration_2.json") TMLE.write_json(estimandsfile_2, config_2) - build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval) + build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2"; pval=pval) sieve_variance_plateau("tmle_output"; grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"), max_tau=0.75 ) - # Check HDF5 file io = jldopen("svp.hdf5") + # Check τs @test io["taus"] == TargetedEstimation.default_τs(10; max_τ=0.75) + # Check variances @test size(io["variances"]) == (10, 4) - close(io) - - # Check JSON file - svp_results = TMLE.read_json("svp.json") - tmleout1 = jldopen("tmle_output_1.hdf5") - tmleout2 = jldopen("tmle_output_2.hdf5") - - src_results = vcat( - [tmleout1[string(i, "/result")].TMLE for i in 1:3], - [tmleout2[string(i, "/result")].TMLE for i in 1:3], - ) + # Check results + svp_results = io["results"] + + tmleout1 = jldopen("tmle_output_1.hdf5")["Batch_1"] + tmleout2 = jldopen("tmle_output_2.hdf5")["Batch_1"] + src_results = [tmleout1..., tmleout2...] for svp_result in svp_results - src_result_index = findall(x.estimand == svp_result.estimand for x in src_results) + src_result_index = findall(x.TMLE.estimand == svp_result.TMLE.estimand for x in src_results) src_result = src_results[only(src_result_index)] - @test src_result.std != svp_result.std - @test src_result.estimate == svp_result.estimate - @test src_result.n == svp_result.n - @test svp_result.IC == [] + @test src_result.TMLE.std != svp_result.TMLE.std + @test src_result.TMLE.estimate == svp_result.TMLE.estimate + @test src_result.TMLE.n == svp_result.TMLE.n + @test svp_result.TMLE.IC == [] end - + close(io) # clean - rm("svp.json") rm("svp.hdf5") rm("tmle_output_1.hdf5") rm("tmle_output_2.hdf5") rm("data.csv") end +@testset "Test SVP: causal and composed estimands" begin + # Generate data + grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + tmpdir = mktempdir(cleanup=true) + configuration = causal_and_composed_estimands_config() + pval = 1. + configfile = joinpath(tmpdir, "configuration.json") + TMLE.write_json(configfile, configuration) + build_tmle_output_file( + grm_ids.SAMPLE_ID, + configfile, + "tmle_output"; + estimatorfile=joinpath(TESTDIR, "config", "ose_config.jl") + ) + sieve_variance_plateau("tmle_output"; + grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"), + max_tau=0.75, + estimator_key="OSE" + ) + # The ComposedEstimate std is not updated but each component is. + src_results = jldopen("tmle_output.hdf5")["Batch_1"] + io = jldopen("svp.hdf5") + svp_results = io["results"] + standalone_estimates = svp_results[1:2] + from_composite = svp_results[3:4] + @test standalone_estimates[1].OSE.estimand == from_composite[1].OSE.estimand + @test standalone_estimates[2].OSE.estimand == from_composite[2].OSE.estimand + + # Check std has been updated + for i in 1:2 + @test standalone_estimates[i].OSE.estimand == src_results[i].OSE.estimand + @test standalone_estimates[i].OSE.estimate == src_results[i].OSE.estimate + @test standalone_estimates[i].OSE.std != src_results[i].OSE.std + end + + close(io) + + # clean + rm("svp.hdf5") + rm("tmle_output.hdf5") + rm("data.csv") +end + end From 25d0496707646a27d5f59790393c341a6bdf340b Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 29 Nov 2023 18:11:51 +0000 Subject: [PATCH 17/71] start working on merge --- src/merge.jl | 77 ++++++++++++++++++---------------------- test/merge.jl | 97 +-------------------------------------------------- 2 files changed, 35 insertions(+), 139 deletions(-) diff --git a/src/merge.jl b/src/merge.jl index 4fd4b1f..3bcce8e 100644 --- a/src/merge.jl +++ b/src/merge.jl @@ -12,55 +12,46 @@ end read_output_with_types(file) = CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys())) -function load_csv_files(data, files) - for file in files - new_data = read_output_with_types(file) - if size(new_data, 1) > 0 - data = vcat(data, new_data) - end - end - return data -end -joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES"] -function merge_csv_files(parsed_args) - tmle_files = files_matching_prefix_and_suffix( - parsed_args["tmle-prefix"], - ".csv" + +""" + make_summary(prefix; out="summary.json") + +# Args + +- `prefix`: Prefix to .hdf5 files to be used to create the summary file + +# Options + +- `-o, --out`: Ouptut JSON file +""" +@task function make_summary(prefix; output=JSONOutput(filename="summary.json")) + dirname_, prefix_ = splitdir(prefix) + dirname__ = dirname_ == "" ? "." : dirname_ + files = filter( + x -> startswith(x, prefix_), + readdir(dirname__) ) - # Load tmle data - data = load_csv_files(empty_tmle_output(), tmle_files) - # Load sieve data - sieveprefix = parsed_args["sieve-prefix"] - if sieveprefix !== nothing - sieve_files = files_matching_prefix_and_suffix( - parsed_args["sieve-prefix"], - ".csv" - ) - sieve_data = load_csv_files(empty_sieve_output(), sieve_files) - if size(sieve_data, 1) > 0 - data = leftjoin(data, sieve_data, on=joining_keys(), matchmissing=:equal) + # Initialize JSON output + initialize(output) + # Write all but last batch + for filename in files[1:end-1] + filepath = joinpath(dirname_, filename) + jldopen(filepath) do io + for batch_key in keys(io) + update_file(output, io[batch_key]) + end end end - - # Pvalue Adjustment by Target - for gp in groupby(data, :OUTCOME) - gp.TRAIT_ADJUSTED_TMLE_PVALUE = gp[:, :TMLE_PVALUE] - pvalues = collect(skipmissing(gp.TMLE_PVALUE)) - if length(pvalues) > 0 - adjusted_pvalues = adjust(pvalues, BenjaminiHochberg()) - adjusted_pval_index = 1 - for index in eachindex(gp.TRAIT_ADJUSTED_TMLE_PVALUE) - gp.TRAIT_ADJUSTED_TMLE_PVALUE[index] === missing && continue - gp.TRAIT_ADJUSTED_TMLE_PVALUE[index] = adjusted_pvalues[adjusted_pval_index] - adjusted_pval_index += 1 - end + # Write last batch + filepath = joinpath(dirname_, files[end]) + jldopen(filepath) do io + nkeys = length(keys(io)) + for (batch_index, batch_key) in enumerate(keys(io)) + finalize = batch_index == nkeys ? true : false + update_file(output, io[batch_key], finalize=finalize) end end - - # Write to output file - CSV.write(parsed_args["out"], data) - return 0 end \ No newline at end of file diff --git a/test/merge.jl b/test/merge.jl index d8967ca..946e84c 100644 --- a/test/merge.jl +++ b/test/merge.jl @@ -6,102 +6,7 @@ using CSV using DataFrames @testset "Test merge_csv_files, no sieve file" begin - parsed_args = Dict( - "tmle-prefix" => joinpath("data", "merge", "tmle"), - "sieve-prefix" => nothing, - "out" => "output.csv" - ) - merge_csv_files(parsed_args) - output = CSV.read(parsed_args["out"], DataFrame) - @test names(output) == [ - "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "OUTCOME", "CONFOUNDERS", - "COVARIATES", "INITIAL_ESTIMATE", - "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", - "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", - "LOG", "TRAIT_ADJUSTED_TMLE_PVALUE" - ] - @test size(output, 1) == 8 - - for (pval, adjusted_pval) in zip(output.TMLE_PVALUE, output.TRAIT_ADJUSTED_TMLE_PVALUE) - if pval === missing - @test adjusted_pval === missing - else - @test pval <= adjusted_pval - end - end - - @test output.PARAMETER_TYPE == [ - "IATE", "IATE", "ATE", - "IATE", "IATE", "ATE", - "ATE", "CM" - ] - rm(parsed_args["out"]) -end - -@testset "Test merge_csv_files, sieve file" begin - sieve_colnames = [ - "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "OUTCOME", "CONFOUNDERS", - "COVARIATES", "INITIAL_ESTIMATE", - "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", - "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", - "LOG", "SIEVE_STD", "SIEVE_PVALUE", "SIEVE_LWB", "SIEVE_UPB", "TRAIT_ADJUSTED_TMLE_PVALUE" - ] - parsed_args = Dict( - "tmle-prefix" => joinpath("data", "merge", "tmle"), - "sieve-prefix" => joinpath("data", "merge", "sieve"), - "out" => "output.csv" - ) - merge_csv_files(parsed_args) - output = CSV.read(parsed_args["out"], DataFrame) - @test names(output) == sieve_colnames - @test size(output, 1) == 8 - @test output.SIEVE_STD isa Vector{Float64} - @test output.PARAMETER_TYPE == [ - "IATE", "IATE", "ATE", - "IATE", "IATE", "ATE", - "ATE", "CM" - ] - - parsed_args = Dict( - "tmle-prefix" => joinpath("data", "merge", "tmle"), - "sieve-prefix" => joinpath("data", "merge", "sieve_output_2"), - "out" => "output.csv" - ) - merge_csv_files(parsed_args) - output = CSV.read(parsed_args["out"], DataFrame) - @test names(output) == sieve_colnames - @test size(output, 1) == 8 - @test all(x===missing for x in output.SIEVE_STD[3:end]) - - rm(parsed_args["out"]) -end - -@testset "Test merge_csv_files, empty sieve file" begin - parsed_args = Dict( - "tmle-prefix" => joinpath("data", "merge", "tmle"), - "sieve-prefix" => joinpath("data", "merge", "empty"), - "out" => "output.csv" - ) - merge_csv_files(parsed_args) - output = CSV.read(parsed_args["out"], DataFrame) - @test names(output) == [ - "PARAMETER_TYPE", "TREATMENTS", "CASE", - "CONTROL", "OUTCOME", "CONFOUNDERS", - "COVARIATES", "INITIAL_ESTIMATE", - "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", - "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", - "LOG", "TRAIT_ADJUSTED_TMLE_PVALUE" - ] - @test size(output, 1) == 8 - @test output.PARAMETER_TYPE == [ - "IATE", "IATE", "ATE", - "IATE", "IATE", "ATE", - "ATE", "CM" - ] - - rm(parsed_args["out"]) + make_summary("tmle_out") end From 6b128fd2bbe76305812257dc3d3956da54f4fca2 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 17:33:32 +0000 Subject: [PATCH 18/71] fix all tests --- src/TargetedEstimation.jl | 4 +-- src/merge.jl | 57 ----------------------------- src/outputs.jl | 41 +++++++-------------- src/runner.jl | 2 -- src/sieve_variance.jl | 38 -------------------- src/summary.jl | 67 ++++++++++++++++++++++++++++++++++ test/merge.jl | 15 -------- test/runner.jl | 63 ++------------------------------ test/runtests.jl | 2 +- test/sieve_variance.jl | 2 ++ test/summary.jl | 75 +++++++++++++++++++++++++++++++++++++++ test/testutils.jl | 51 ++++++++++++++++++++++++++ 12 files changed, 213 insertions(+), 204 deletions(-) delete mode 100644 src/merge.jl create mode 100644 src/summary.jl delete mode 100644 test/merge.jl create mode 100644 test/summary.jl diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 7d94897..e38087c 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -37,7 +37,7 @@ include("outputs.jl") include("runner.jl") include("utils.jl") include("sieve_variance.jl") -include("merge.jl") +include("summary.jl") include("resampling.jl") include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) @@ -47,7 +47,7 @@ include(joinpath("models", "biallelic_snp_encoder.jl")) """TL CLI.""" @main -export Runner, tmle, sieve_variance_plateau, merge_csv_files +export Runner, tmle, sieve_variance_plateau, make_summary export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV diff --git a/src/merge.jl b/src/merge.jl deleted file mode 100644 index 3bcce8e..0000000 --- a/src/merge.jl +++ /dev/null @@ -1,57 +0,0 @@ - -function files_matching_prefix_and_suffix(prefix, suffix) - dirname_, prefix_ = splitdir(prefix) - dirname__ = dirname_ == "" ? "." : dirname_ - files = filter( - x -> startswith(x, prefix_) && endswith(x, suffix), - readdir(dirname__) - ) - return [joinpath(dirname_, x) for x in files] -end - -read_output_with_types(file) = - CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys())) - - - - -""" - make_summary(prefix; out="summary.json") - -# Args - -- `prefix`: Prefix to .hdf5 files to be used to create the summary file - -# Options - -- `-o, --out`: Ouptut JSON file -""" -@task function make_summary(prefix; output=JSONOutput(filename="summary.json")) - dirname_, prefix_ = splitdir(prefix) - dirname__ = dirname_ == "" ? "." : dirname_ - files = filter( - x -> startswith(x, prefix_), - readdir(dirname__) - ) - # Initialize JSON output - initialize(output) - # Write all but last batch - for filename in files[1:end-1] - filepath = joinpath(dirname_, filename) - jldopen(filepath) do io - for batch_key in keys(io) - update_file(output, io[batch_key]) - end - end - end - # Write last batch - filepath = joinpath(dirname_, files[end]) - jldopen(filepath) do io - nkeys = length(keys(io)) - for (batch_index, batch_key) in enumerate(keys(io)) - finalize = batch_index == nkeys ? true : false - update_file(output, io[batch_key], finalize=finalize) - end - end - return 0 -end \ No newline at end of file diff --git a/src/outputs.jl b/src/outputs.jl index 1044fe3..4302919 100644 --- a/src/outputs.jl +++ b/src/outputs.jl @@ -61,9 +61,7 @@ end compress::Bool = false end -function update_file(output::HDF5Output, results, dataset) - output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.sample_ids) +function update_file(output::HDF5Output, results; finalize=false) jldopen(output.filename, "a+", compress=output.compress) do io batches_keys = keys(io) latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys) @@ -71,6 +69,12 @@ function update_file(output::HDF5Output, results, dataset) end end +function update_file(output::HDF5Output, results, dataset) + output.filename === nothing && return + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) + update_file(output, results) +end + ##################################################################### #####  JLS OUTPUT #### ##################################################################### @@ -81,10 +85,7 @@ end sample_ids::Bool = false end -function update_file(output::JLSOutput, results, dataset) - output.filename === nothing && return - results = post_process(results, dataset, output.pval_threshold, output.sample_ids) - +function update_file(output::JLSOutput, results; finalize=false) open(output.filename, "a") do io for result in results serialize(io, result) @@ -92,28 +93,13 @@ function update_file(output::JLSOutput, results, dataset) end end -##################################################################### -#####  STD OUTPUT #### -##################################################################### - -function update_file(doprint, results, partition) - if doprint - mimetext = MIME"text/plain"() - index = 1 - for (result, estimand_index) in zip(results, partition) - show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆")) - println(stdout) - show(stdout, mimetext, first(result).estimand) - for (key, val) ∈ zip(keys(result), result) - show(stdout, mimetext, string("→ Estimation Result From: ", key, )) - println(stdout) - show(stdout, mimetext, val) - index += 1 - end - end - end +function update_file(output::JLSOutput, results, dataset) + output.filename === nothing && return + results = post_process(results, dataset, output.pval_threshold, output.sample_ids) + update_file(output, results) end + ##################################################################### #####  OUTPUTS #### ##################################################################### @@ -122,7 +108,6 @@ end json::JSONOutput = JSONOutput() hdf5::HDF5Output = HDF5Output() jls::JLSOutput = JLSOutput() - std::Bool = false end """ diff --git a/src/runner.jl b/src/runner.jl index 0817526..473e873 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -38,8 +38,6 @@ mutable struct Runner end function save(runner::Runner, results, partition, finalize) - # Append STD Out - update_file(runner.outputs.std, results, partition) # Append JSON Output update_file(runner.outputs.json, results; finalize=finalize) # Append JLS Output diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index 290e672..d113cc7 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -23,44 +23,6 @@ function align_ic(ic, sample_ids, grm_ids) return coalesce.(aligned_ic, 0) end -sieve_dataframe() = DataFrame( - PARAMETER_TYPE=String[], - TREATMENTS=String[], - CASE=String[], - CONTROL=Union{String, Missing}[], - OUTCOME=String[], - CONFOUNDERS=String[], - COVARIATES=Union{String, Missing}[], - TMLE_ESTIMATE=Float64[], -) - -empty_sieve_output() = DataFrame( - PARAMETER_TYPE=String[], - TREATMENTS=String[], - CASE=String[], - CONTROL=Union{String, Missing}[], - OUTCOME=String[], - CONFOUNDERS=String[], - COVARIATES=Union{String, Missing}[], - SIEVE_STD = Float64[], - SIEVE_PVALUE = Float64[], - SIEVE_LWB = Float64[], - SIEVE_UPB = Float64[], -) - -function push_sieveless!(output, Ψ, Ψ̂) - target = string(Ψ.target) - param_type = param_string(Ψ) - treatments = treatment_string(Ψ) - case = case_string(Ψ) - control = control_string(Ψ) - confounders = confounders_string(Ψ) - covariates = covariates_string(Ψ) - push!(output, ( - param_type, treatments, case, control, target, confounders, covariates, Ψ̂ - )) -end - """ bit_distances(sample_grm, nτs) diff --git a/src/summary.jl b/src/summary.jl new file mode 100644 index 0000000..6ebf4c5 --- /dev/null +++ b/src/summary.jl @@ -0,0 +1,67 @@ + +function files_matching_prefix_and_suffix(prefix, suffix) + dirname_, prefix_ = splitdir(prefix) + dirname__ = dirname_ == "" ? "." : dirname_ + files = filter( + x -> startswith(x, prefix_) && endswith(x, suffix), + readdir(dirname__) + ) + return [joinpath(dirname_, x) for x in files] +end + +read_output_with_types(file) = + CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys())) + +""" + make_summary( + prefix; + outputs=Outputs(json=JSONOutput(filename="summary.json")) + ) + +Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can be output at once. + +# Args + +- `prefix`: Prefix to .hdf5 files to be used to create the summary file + +# Options + +- `-o, --outputs`: Ouptuts configuration. +""" +@cast function make_summary( + prefix; + outputs=Outputs(json=JSONOutput(filename="summary.json")) + ) + + # Initialize output files + initialize(outputs) + actual_outputs = [getfield(outputs, field) for field ∈ fieldnames(Outputs) + if getfield(outputs, field).filename !== nothing] + + # Get all input .hdf5 files + dirname_, prefix_ = splitdir(prefix) + dirname__ = dirname_ == "" ? "." : dirname_ + files = sort(filter( + x -> startswith(x, prefix_), + readdir(dirname__) + )) + nfiles = length(files) + + # Write to files + for (file_index, filename) in enumerate(files) + filepath = joinpath(dirname_, filename) + jldopen(filepath) do io + batch_keys = collect(keys(io)) + nbatches = length(batch_keys) + for (batch_index, batch_key) in enumerate(batch_keys) + results = io[batch_key] + finalize = file_index == nfiles && batch_index == nbatches + for output in actual_outputs + update_file(output, results; finalize=finalize) + end + end + end + end + + return 0 +end \ No newline at end of file diff --git a/test/merge.jl b/test/merge.jl deleted file mode 100644 index 946e84c..0000000 --- a/test/merge.jl +++ /dev/null @@ -1,15 +0,0 @@ -module TestMergeCSVFiles - -using TargetedEstimation -using Test -using CSV -using DataFrames - -@testset "Test merge_csv_files, no sieve file" begin - make_summary("tmle_out") -end - - -end - -true \ No newline at end of file diff --git a/test/runner.jl b/test/runner.jl index fd52bc3..3acccfc 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -4,14 +4,8 @@ using Test using TargetedEstimation using TMLE using JLD2 -using StableRNGs -using Distributions -using LogExpFunctions -using CategoricalArrays -using DataFrames using CSV using Serialization -using Arrow using YAML using JSON @@ -21,50 +15,6 @@ CONFIGDIR = joinpath(TESTDIR, "config") include(joinpath(TESTDIR, "testutils.jl")) -""" -CONTINUOUS_OUTCOME: -- IATE(0->1, 0->1) = E[W₂] = 0.5 -- ATE(0->1, 0->1) = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5 - -BINARY_OUTCOME: -- IATE(0->1, 0->1) = -- ATE(0->1, 0->1) = - -""" -function build_dataset(;n=1000, format="csv") - rng = StableRNG(123) - # Confounders - W₁ = rand(rng, Uniform(), n) - W₂ = rand(rng, Uniform(), n) - # Covariates - C₁ = rand(rng, n) - # Treatment | Confounders - T₁ = rand(rng, Uniform(), n) .< logistic.(0.5sin.(W₁) .- 1.5W₂) - T₂ = rand(rng, Uniform(), n) .< logistic.(-3W₁ - 1.5W₂) - # target | Confounders, Covariates, Treatments - μ = 1 .+ 2W₁ .+ 3W₂ .- 4C₁.*T₁ .+ T₁ + T₂.*W₂.*T₁ - y₁ = μ .+ rand(rng, Normal(0, 0.01), n) - y₂ = rand(rng, Uniform(), n) .< logistic.(μ) - # Add some missingness - y₂ = vcat(missing, y₂[2:end]) - - dataset = DataFrame( - SAMPLE_ID = 1:n, - T1 = categorical(T₁), - T2 = categorical(T₂), - W1 = W₁, - W2 = W₂, - C1 = C₁, - ) - # Comma in name - dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁) - # Slash in name - dataset[!, "BINARY/OUTCOME"] = categorical(y₂) - dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1))) - - format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset) -end - @testset "Integration Test" begin build_dataset(;n=1000, format="csv") tmpdir = mktempdir(cleanup=true) @@ -74,7 +24,6 @@ end json=TargetedEstimation.JSONOutput(filename="output.json"), hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1., sample_ids=true), jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5), - std=true, ) runner = Runner( "data.csv", @@ -90,16 +39,9 @@ end @test result.OSE isa TMLE.OSEstimate end - # Test Save to STDOUT - output_txt = "output.txt" + # Save outputs TargetedEstimation.initialize(outputs) - open(output_txt, "w") do io - redirect_stdout(io) do - TargetedEstimation.save(runner, results, partition, true) - end - end - stdout_content = read(output_txt, String) - @test all(occursin("Estimand $i", stdout_content) for i in partition) + TargetedEstimation.save(runner, results, partition, true) # Test Save to JSON loaded_results = TMLE.read_json(outputs.json.filename) @@ -160,7 +102,6 @@ end # Clean rm("data.csv") rm(outputs.jls.filename) - rm(output_txt) rm(outputs.json.filename) rm(outputs.hdf5.filename) end diff --git a/test/runtests.jl b/test/runtests.jl index e8b741a..50cc785 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,7 @@ TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") @test include(joinpath(TESTDIR, "utils.jl")) @test include(joinpath(TESTDIR, "sieve_variance.jl")) @test include(joinpath(TESTDIR, "runner.jl")) - @test include(joinpath(TESTDIR, "merge.jl")) + @test include(joinpath(TESTDIR, "summary.jl")) @test include(joinpath(TESTDIR, "resampling.jl")) @test include(joinpath(TESTDIR, "models", "glmnet.jl")) @test include(joinpath(TESTDIR, "models", "adaptive_interaction_transformer.jl")) diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 3ffaad7..c357950 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -361,3 +361,5 @@ end end end + +true diff --git a/test/summary.jl b/test/summary.jl new file mode 100644 index 0000000..6903e88 --- /dev/null +++ b/test/summary.jl @@ -0,0 +1,75 @@ +module TestMergeCSVFiles + +using TargetedEstimation +using Test +using CSV +using DataFrames +using Serialization +using JLD2 + +TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") + +CONFIGDIR = joinpath(TESTDIR, "config") + +include(joinpath(TESTDIR, "testutils.jl")) + +@testset "Test make_summary" begin + build_dataset() + datafile = "data.csv" + estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") + tmpdir = mktempdir(cleanup=true) + # First Run + tmle_output_1 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_1.hdf5")) + config_1 = statistical_estimands_only_config() + configfile_1 = joinpath(tmpdir, "configuration_1.json") + TMLE.write_json(configfile_1, config_1) + tmle(datafile, configfile_1, estimatorfile; outputs=tmle_output_1, chunksize=3) + + # Second Run + tmle_output_2 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_2.hdf5")) + config_2 = causal_and_composed_estimands_config() + configfile_2 = joinpath(tmpdir, "configuration_2.json") + TMLE.write_json(configfile_2, config_2) + tmle(datafile, configfile_2, estimatorfile; outputs=tmle_output_2) + + # Make summary files + outputs = TargetedEstimation.Outputs( + json=TargetedEstimation.JSONOutput(filename="summary.json"), + hdf5=TargetedEstimation.HDF5Output(filename="summary.hdf5"), + jls=TargetedEstimation.JLSOutput(filename="summary.jls") + ) + make_summary("tmle_output", outputs=outputs) + + # Test correctness + hdf5file_1 = jldopen("tmle_output_1.hdf5") + hdf5file_2 = jldopen("tmle_output_2.hdf5") + inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"]) + + json_outputs = TMLE.read_json(outputs.json.filename) + jls_outputs = [] + open(outputs.jls.filename) do io + while !eof(io) + push!(jls_outputs, deserialize(io)) + end + end + hdf5_output = jldopen(outputs.hdf5.filename) + hdf5_outputs = vcat((hdf5_output[key] for key in keys(hdf5_output))...) + + @test length(inputs) == 9 + for (input, jls_output, hdf5_out, json_output) in zip(inputs, jls_outputs, hdf5_outputs, json_outputs) + @test input.OSE.estimand == jls_output.OSE.estimand == hdf5_out.OSE.estimand == json_output[:OSE].estimand + end + + # cleanup + rm("tmle_output_1.hdf5") + rm("tmle_output_2.hdf5") + rm(outputs.json.filename) + rm(outputs.jls.filename) + rm(outputs.hdf5.filename) + rm(datafile) +end + + +end + +true \ No newline at end of file diff --git a/test/testutils.jl b/test/testutils.jl index 4f286c0..ef5b992 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -1,4 +1,11 @@ using TMLE +using StableRNGs +using DataFrames +using Distributions +using LogExpFunctions +using CSV +using Arrow +using CategoricalArrays function statistical_estimands_only_config() configuration = Configuration( @@ -77,3 +84,47 @@ function causal_and_composed_estimands_config() ) return configuration end + +""" +CONTINUOUS_OUTCOME: +- IATE(0->1, 0->1) = E[W₂] = 0.5 +- ATE(0->1, 0->1) = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5 + +BINARY_OUTCOME: +- IATE(0->1, 0->1) = +- ATE(0->1, 0->1) = + +""" +function build_dataset(;n=1000, format="csv") + rng = StableRNG(123) + # Confounders + W₁ = rand(rng, Uniform(), n) + W₂ = rand(rng, Uniform(), n) + # Covariates + C₁ = rand(rng, n) + # Treatment | Confounders + T₁ = rand(rng, Uniform(), n) .< logistic.(0.5sin.(W₁) .- 1.5W₂) + T₂ = rand(rng, Uniform(), n) .< logistic.(-3W₁ - 1.5W₂) + # target | Confounders, Covariates, Treatments + μ = 1 .+ 2W₁ .+ 3W₂ .- 4C₁.*T₁ .+ T₁ + T₂.*W₂.*T₁ + y₁ = μ .+ rand(rng, Normal(0, 0.01), n) + y₂ = rand(rng, Uniform(), n) .< logistic.(μ) + # Add some missingness + y₂ = vcat(missing, y₂[2:end]) + + dataset = DataFrame( + SAMPLE_ID = 1:n, + T1 = categorical(T₁), + T2 = categorical(T₂), + W1 = W₁, + W2 = W₂, + C1 = C₁, + ) + # Comma in name + dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁) + # Slash in name + dataset[!, "BINARY/OUTCOME"] = categorical(y₂) + dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1))) + + format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset) +end \ No newline at end of file From 72fd116c294a8e6b5180b5be45ee7da0247af030 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 17:51:18 +0000 Subject: [PATCH 19/71] fix typo --- src/runner.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runner.jl b/src/runner.jl index 473e873..e7961c8 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -135,7 +135,7 @@ TMLE CLI. # Options - `-v, --verbosity`: Verbosity level. -- `-o, --outputs`: Ouputs to be genrated. +- `-o, --outputs`: Ouputs to be generated. - `--chunksize`: Results are written in batches of size chunksize. - `-r, --rng`: Random seed (Only used for estimands ordering at the moment). - `-c, --cache-strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). From a459a1c8da783e3aad7f99c338d00d4ca447aa20 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 18:24:21 +0000 Subject: [PATCH 20/71] add more commonicon stuff --- Comonicon.toml | 16 +++++++-- deps/execute.jl | 7 ++++ scripts/merge_summaries.jl | 26 -------------- scripts/sieve_variance.jl | 41 ---------------------- scripts/tmle.jl | 70 -------------------------------------- 5 files changed, 21 insertions(+), 139 deletions(-) create mode 100644 deps/execute.jl delete mode 100644 scripts/merge_summaries.jl delete mode 100644 scripts/sieve_variance.jl delete mode 100644 scripts/tmle.jl diff --git a/Comonicon.toml b/Comonicon.toml index bd97f0f..19f0cb6 100644 --- a/Comonicon.toml +++ b/Comonicon.toml @@ -1,8 +1,20 @@ -name = "TargetedEstimation" +name = "fasttmle" [install] completion = true quiet = false optimize = 2 -[sysimg] \ No newline at end of file +[sysimg] +incremental=true +filter_stdlibs=false + +[sysimg.precompile] +execution_file = ["deps/execute.jl"] + +[application] +incremental=true +filter_stdlibs=false + +[application.precompile] +execution_file = ["deps/execute.jl"] \ No newline at end of file diff --git a/deps/execute.jl b/deps/execute.jl new file mode 100644 index 0000000..666bc56 --- /dev/null +++ b/deps/execute.jl @@ -0,0 +1,7 @@ +using TargetedEstimation + +TargetedEstimation.command_main(["-h"]) +TargetedEstimation.command_main(["tmle", "-h"]) +TargetedEstimation.command_main(["make-summary", "-h"]) +TargetedEstimation.command_main(["sieve-variance-plateau", "-h"]) + diff --git a/scripts/merge_summaries.jl b/scripts/merge_summaries.jl deleted file mode 100644 index 34fd604..0000000 --- a/scripts/merge_summaries.jl +++ /dev/null @@ -1,26 +0,0 @@ -using ArgParse -using TargetedEstimation - -function parse_commandline() - s = ArgParseSettings( - description = "Merge files outputs by tmle.jl and sieve_variance.jl in a single file.", - commands_are_required = false) - - @add_arg_table s begin - "tmle-prefix" - help = "Prefix to files output by tmle.jl" - required = true - "out" - help = "Output file to be generated" - required = true - "--sieve-prefix" - help = "Prefix to files output by sieve_variance.jl" - required = false - arg_type = String - end - - return parse_args(s) -end - -parsed_args = parse_commandline() -merge_csv_files(parsed_args) \ No newline at end of file diff --git a/scripts/sieve_variance.jl b/scripts/sieve_variance.jl deleted file mode 100644 index f6a551e..0000000 --- a/scripts/sieve_variance.jl +++ /dev/null @@ -1,41 +0,0 @@ -using TargetedEstimation -using ArgParse - -function parse_commandline() - s = ArgParseSettings(description="Compute the Sieve Variance Plateau estimate for each phenotype in the result file") - - @add_arg_table s begin - "prefix" - help = "Prefix to the .hdf5 files generated by the `tmle.jl` script" - arg_type = String - required = true - "grm-prefix" - arg_type = String - help = "Prefix of the aggregated GRM" - required = true - "out-prefix" - arg_type = String - help = "output filename" - required = true - "--nb-estimators", "-n" - arg_type = Int - help = "Number of variance estimators to compute" - default = 10 - "--max-tau", "-m" - arg_type = Float64 - help = "Maximum distance of individuals to take into account (maximum=2)"* - "It was witnessed that beyond 0.9, weird limit effects happen" - default = 0.8 - "--verbosity", "-v" - arg_type = Int - help = "Verbosity level" - default = 1 - end - - return parse_args(s) -end - - -parsed_args = parse_commandline() - -sieve_variance_plateau(parsed_args) diff --git a/scripts/tmle.jl b/scripts/tmle.jl deleted file mode 100644 index 40df722..0000000 --- a/scripts/tmle.jl +++ /dev/null @@ -1,70 +0,0 @@ -using ArgParse -using TargetedEstimation - -function parse_commandline() - s = ArgParseSettings( - description = "Targeted Learning Estimation", - commands_are_required = false, - version = "0.2", - add_version = true) - - @add_arg_table s begin - "dataset" - help = "Path to dataset file (.csv|.arrow)" - required = true - "estimands-config" - help = "A .yaml file listing all parameters to estimate." - required = true - "--estimators-config" - help = "A file (.jl) defining the estimators to be used." - arg_type= String - required = false - "--hdf5-out" - help = "Stores the results in a HDF5 file format (see also: --pval-threshold)." - arg_type = String - default = nothing - "--csv-out" - help = "Path to an output `.csv` file." - required = true - "--pval-threshold" - help = """In order to save disk space, only estimation results with a p-value lesser than - the threshold will have their influence curve saved. (default = 1., i.e. all influence curves are saved). - """ - default = 1. - arg_type = Float64 - "--sort-estimands" - help = "If estimands should be sorted to minimize memory usage, see also: cache-strategy." - default = false - arg_type = Bool - "--cache-strategy" - help = string("Nuisance functions are stored in the cache during estimation. The cache can be released from these", - " functions to limit memory consumption. There are currently 3 caching management strategies: ", - "'release_unusable' (default): Will release the cache from nuisance functions that won't be used in the future. ", - "'K': Will keep the cache size under K nuisance functions. ", - "'no_cache': Disables caching. ", - "Note that caching strategies are better used in conjunction with `--sort-estimands` to minimized memory usage." - ) - default = "release_unusable" - arg_type = String - "--chunksize" - help = "Results are appended to outfiles in chunks." - default = 100 - arg_type = Int - "--rng" - help = "Random seed" - default = 123 - arg_type = Int - "--verbosity", "-v" - help = "Verbosity level" - arg_type = Int - default = 1 - end - - return parse_args(s) -end - -parsed_args = parse_commandline() - -tmle_estimation(parsed_args) - - From c45cb82a9e545b6400ed5ca9ecfddc5101d74def Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 18:41:02 +0000 Subject: [PATCH 21/71] update compats --- Project.toml | 6 +++--- test/Project.toml | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index a4f5a0d..2f187ac 100644 --- a/Project.toml +++ b/Project.toml @@ -41,7 +41,7 @@ Combinatorics = "1.0.2" Comonicon = "1.0.6" Configurations = "0.17.6" DataFrames = "1.3.4" -EvoTrees = "0.14.6" +EvoTrees = "0.16.5" GLMNet = "0.7" HighlyAdaptiveLasso = "0.2.0" JLD2 = "0.4.22" @@ -49,11 +49,11 @@ JSON = "0.21.4" MKL = "0.6" MLJ = "0.20.0" MLJBase = "1.0.1" -MLJLinearModels = "0.9" +MLJLinearModels = "0.10.0" MLJModelInterface = "1.8.0" MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" -MultipleTesting = "0.5.1" +MultipleTesting = "0.6.0" Optim = "1.7" Tables = "1.10.1" YAML = "0.4.9" diff --git a/test/Project.toml b/test/Project.toml index 465caa7..440b866 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -6,14 +6,14 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" -RCall = "6f49c342-dc21-5d91-9882-a32aef131414" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -23,6 +23,4 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] Distributions = "0.25" LogExpFunctions = "0.3" -MLJGLMInterface = "0.3" -RCall = "0.13" StableRNGs = "1.0" From 2ec3ac57f876af4d02d7089a157f99143f3146f6 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 18:41:24 +0000 Subject: [PATCH 22/71] remove deprecated files --- test/config/failing_parameters.yaml | 6 ---- test/config/parameters.bin | Bin 487 -> 0 bytes test/config/parameters.yaml | 31 -------------------- test/config/problematic_tmle_ose_config.jl | 14 --------- test/config/sieve_tests_parameters_1.yaml | 32 --------------------- test/config/sieve_tests_parameters_2.yaml | 9 ------ test/data/merge/empty_sieve.csv | 1 - test/data/merge/sieve_output_1.csv | 7 ----- test/data/merge/sieve_output_2.csv | 3 -- test/data/merge/tmle_output_1.csv | 7 ----- test/data/merge/tmle_output_2.csv | 3 -- test/data/sieve_variances.hdf5 | Bin 4790 -> 0 bytes 12 files changed, 113 deletions(-) delete mode 100644 test/config/failing_parameters.yaml delete mode 100644 test/config/parameters.bin delete mode 100644 test/config/parameters.yaml delete mode 100644 test/config/problematic_tmle_ose_config.jl delete mode 100644 test/config/sieve_tests_parameters_1.yaml delete mode 100644 test/config/sieve_tests_parameters_2.yaml delete mode 100644 test/data/merge/empty_sieve.csv delete mode 100644 test/data/merge/sieve_output_1.csv delete mode 100644 test/data/merge/sieve_output_2.csv delete mode 100644 test/data/merge/tmle_output_1.csv delete mode 100644 test/data/merge/tmle_output_2.csv delete mode 100644 test/data/sieve_variances.hdf5 diff --git a/test/config/failing_parameters.yaml b/test/config/failing_parameters.yaml deleted file mode 100644 index 92fdeff..0000000 --- a/test/config/failing_parameters.yaml +++ /dev/null @@ -1,6 +0,0 @@ - Estimands: - - type: ATE - outcome: EXTREME_BINARY - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - treatment_confounders: (T1 = [W1, W2], T2 = [W1, W2]) - outcome_extra_covariates: [C1] \ No newline at end of file diff --git a/test/config/parameters.bin b/test/config/parameters.bin deleted file mode 100644 index 3fe2558a5906a110f484958b93a56a08c0b32505..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 487 zcmXr_@)2ZVU|+(cVK6~)CL_oj5VtV`y#;p} hnw#!}+yl~#;szgItjc%@DTDfuAJar%A2bueN&y8sYxV#D diff --git a/test/config/parameters.yaml b/test/config/parameters.yaml deleted file mode 100644 index 4dea179..0000000 --- a/test/config/parameters.yaml +++ /dev/null @@ -1,31 +0,0 @@ -Estimands: - - type: TMLE.StatisticalIATE - outcome: CONTINUOUS, outcome - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) - outcome_extra_covariates: (:C1,) - - type: TMLE.StatisticalIATE - outcome: "BINARY/outcome" - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) - outcome_extra_covariates: (:C1,) - - type: TMLE.StatisticalATE - outcome: CONTINUOUS, outcome - treatment_values: (T1 = (control = 0, case = 1),) - treatment_confounders: (T1 = (:W1, :W2),) - outcome_extra_covariates: () - - type: TMLE.StatisticalIATE - outcome: CONTINUOUS, outcome - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) - outcome_extra_covariates: () - - type: TMLE.StatisticalIATE - outcome: "BINARY/outcome" - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) - outcome_extra_covariates: (:C1,) - - type: TMLE.StatisticalATE - outcome: CONTINUOUS, outcome - treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2)) - outcome_extra_covariates: (:C1,) \ No newline at end of file diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl deleted file mode 100644 index a517cf4..0000000 --- a/test/config/problematic_tmle_ose_config.jl +++ /dev/null @@ -1,14 +0,0 @@ -default_models = TMLE.default_models( - Q_continuous = LinearRegressor(), - # For the estimation of E[Y|W, T]: binary target - Q_binary = LogisticClassifier(), - # This will fail - G = LogisticClassifier() -) - -models = merge(default_models, (T2 = LinearRegressor(),)) - -ESTIMATORS = ( - TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001), - OSE = OSE(models=models) -) \ No newline at end of file diff --git a/test/config/sieve_tests_parameters_1.yaml b/test/config/sieve_tests_parameters_1.yaml deleted file mode 100644 index 9edf5fe..0000000 --- a/test/config/sieve_tests_parameters_1.yaml +++ /dev/null @@ -1,32 +0,0 @@ -Parameters: - - type: IATE - target: CONTINUOUS, OUTCOME - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - - type: IATE - target: CONTINUOUS, OUTCOME - treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - confounders: [W1, W2] - covariates: [C1] - - type: ATE - target: CONTINUOUS, OUTCOME - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - - type: IATE - target: "BINARY/OUTCOME" - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - - type: IATE - target: "BINARY/OUTCOME" - treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0)) - confounders: [W1, W2] - covariates: [C1] - - type: ATE - target: "BINARY/OUTCOME" - treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1)) - confounders: [W1, W2] - covariates: [C1] - \ No newline at end of file diff --git a/test/config/sieve_tests_parameters_2.yaml b/test/config/sieve_tests_parameters_2.yaml deleted file mode 100644 index 0d147be..0000000 --- a/test/config/sieve_tests_parameters_2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -Parameters: - - type: ATE - target: CONTINUOUS, OUTCOME - treatment: (T1 = (control = 0, case = 1),) - confounders: [W1] - - type: CM - target: CONTINUOUS, OUTCOME - treatment: (T1 = 0,) - confounders: [W1] \ No newline at end of file diff --git a/test/data/merge/empty_sieve.csv b/test/data/merge/empty_sieve.csv deleted file mode 100644 index 3241e3c..0000000 --- a/test/data/merge/empty_sieve.csv +++ /dev/null @@ -1 +0,0 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES diff --git a/test/data/merge/sieve_output_1.csv b/test/data/merge/sieve_output_1.csv deleted file mode 100644 index cfe77b9..0000000 --- a/test/data/merge/sieve_output_1.csv +++ /dev/null @@ -1,7 +0,0 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB -IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935 -IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624 -ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587 -IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185 -IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833 -ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235 \ No newline at end of file diff --git a/test/data/merge/sieve_output_2.csv b/test/data/merge/sieve_output_2.csv deleted file mode 100644 index ad536a5..0000000 --- a/test/data/merge/sieve_output_2.csv +++ /dev/null @@ -1,3 +0,0 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB -ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455 -CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622 diff --git a/test/data/merge/tmle_output_1.csv b/test/data/merge/tmle_output_1.csv deleted file mode 100644 index 574764e..0000000 --- a/test/data/merge/tmle_output_1.csv +++ /dev/null @@ -1,7 +0,0 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG -IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344, -IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822, -ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208, -IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487, -IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093, -ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685, diff --git a/test/data/merge/tmle_output_2.csv b/test/data/merge/tmle_output_2.csv deleted file mode 100644 index a7d02aa..0000000 --- a/test/data/merge/tmle_output_2.csv +++ /dev/null @@ -1,3 +0,0 @@ -PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG -ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,-1.170325854136744,,,,,,,,,,,"Error" -CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685, diff --git a/test/data/sieve_variances.hdf5 b/test/data/sieve_variances.hdf5 deleted file mode 100644 index ea0776cc2bb95e30c5891d2fdfc4702fbad254fa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4790 zcmeI$d2oz(7zgm(O%dN05dV!BBe4CKjoyMNo0I?2lkcI&KQwyHzwN0*^OOCj~Qtn*Ty*BF=lM~=ut+C*=n|Gj4dB-v6|z| zZH=+fk*Vq9j6J%v()|DU>@Sd&l-RYdzL8$%=cm!=9=^u?>StTv!*L_S!^{~GIIw3@ zVv0^%8X#bAjXK(3H0d;2ef9efuFe|48m-kBrm6X0Y}ADMX+o!=_LBoJwm0PX zy`g9(M`V7$(@wehkF#zb_w`>2Q2!I@C2n@WE9cyt_LZ9jXmZKTh8i+oj*>YyLFTg) zWbU;@X6G`Q>n)PGaF1c8oMa#yo>g27*Ca3%Zr6Z`YparJff8{8l09g z5(N5TlR#JC>QksdudrF529WbHY!qnL27%rx6zJr7fzA~OR1THZZX@r&eW;r61SRVP zngnZ5ABW3u4U%PnV)6yb$`feNN`V5p2{fXtCS~e$H1c51&ZcKG6km7dSHX62`XO6z zfY+A0+H1@E55(gP{VK2R!b-0#sJfnvIs(Gsy%kN$hFI|;-lgzv-;-Y L@l(<}q%Hju(I*3L From 563c550d0a8bcef30e57b8e78e51ecb297399646 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 30 Nov 2023 18:46:22 +0000 Subject: [PATCH 23/71] fix test --- test/sieve_variance.jl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index c357950..b6c66c3 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -256,16 +256,13 @@ end end @testset "Test corrected_stderrors" begin - io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5")) - variances = io["variances"] + variances = [ + 1. 2. 6. + 4. 5. 3. + ] stderrors = TargetedEstimation.corrected_stderrors(variances) # sanity check - @test size(stderrors, 1) == 10 - - # check for the first curve - stderrors[1] == sqrt(maximum(variances[:,1])) - - close(io) + stderrors == sqrt.([4., 5., 6.]) end @testset "Test SVP" begin From afc7eda8c498296596884ac4f1675e4bc04bcbaa Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 1 Dec 2023 10:24:19 +0000 Subject: [PATCH 24/71] restore config file --- test/config/problematic_tmle_ose_config.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 test/config/problematic_tmle_ose_config.jl diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl new file mode 100644 index 0000000..a517cf4 --- /dev/null +++ b/test/config/problematic_tmle_ose_config.jl @@ -0,0 +1,14 @@ +default_models = TMLE.default_models( + Q_continuous = LinearRegressor(), + # For the estimation of E[Y|W, T]: binary target + Q_binary = LogisticClassifier(), + # This will fail + G = LogisticClassifier() +) + +models = merge(default_models, (T2 = LinearRegressor(),)) + +ESTIMATORS = ( + TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001), + OSE = OSE(models=models) +) \ No newline at end of file From 8c8515c3894baf59e9d9f6d3560cfc45f9b41688 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 1 Dec 2023 16:08:10 +0000 Subject: [PATCH 25/71] add precompile file --- deps/execute.jl | 7 +++ estimands_test.yaml | 102 -------------------------------------------- 2 files changed, 7 insertions(+), 102 deletions(-) delete mode 100644 estimands_test.yaml diff --git a/deps/execute.jl b/deps/execute.jl index 666bc56..bde1b1b 100644 --- a/deps/execute.jl +++ b/deps/execute.jl @@ -1,7 +1,14 @@ using TargetedEstimation +@info "Running precompilation script." + +# Run help messages TargetedEstimation.command_main(["-h"]) TargetedEstimation.command_main(["tmle", "-h"]) TargetedEstimation.command_main(["make-summary", "-h"]) TargetedEstimation.command_main(["sieve-variance-plateau", "-h"]) +# Run workload +TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test") +push!(LOAD_PATH, TEST_DIR) +include(joinpath(TEST_DIR, "runtests.jl")) \ No newline at end of file diff --git a/estimands_test.yaml b/estimands_test.yaml deleted file mode 100644 index 8901313..0000000 --- a/estimands_test.yaml +++ /dev/null @@ -1,102 +0,0 @@ -type: "Configuration" -estimands: - - outcome_extra_covariates: - - C1 - type: "IATE" - treatment_values: - T2: - case: true - control: false - T1: - case: true - control: false - outcome: CONTINUOUS, OUTCOME - treatment_confounders: - T2: - - W1 - - W2 - T1: - - W1 - - W2 - - outcome_extra_covariates: - - C1 - type: "IATE" - treatment_values: - T2: - case: true - control: false - T1: - case: true - control: false - outcome: BINARY/OUTCOME - treatment_confounders: - T2: - - W1 - - W2 - T1: - - W1 - - W2 - - outcome_extra_covariates: [] - type: "ATE" - treatment_values: - T1: - case: true - control: false - outcome: CONTINUOUS, OUTCOME - treatment_confounders: - T1: - - W1 - - W2 - - outcome_extra_covariates: [] - type: "IATE" - treatment_values: - T2: - case: false - control: true - T1: - case: true - control: false - outcome: CONTINUOUS, OUTCOME - treatment_confounders: - T2: - - W1 - - W2 - T1: - - W1 - - W2 - - outcome_extra_covariates: - - C1 - type: "IATE" - treatment_values: - T2: - case: false - control: true - T1: - case: true - control: false - outcome: BINARY/OUTCOME - treatment_confounders: - T2: - - W1 - - W2 - T1: - - W1 - - W2 - - outcome_extra_covariates: - - C1 - type: "ATE" - treatment_values: - T2: - case: true - control: false - T1: - case: true - control: false - outcome: CONTINUOUS, OUTCOME - treatment_confounders: - T2: - - W1 - - W2 - T1: - - W1 - - W2 From 0f9932911ac86115d9ee47f8c39d3bf942b9137e Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sun, 3 Dec 2023 13:46:37 +0000 Subject: [PATCH 26/71] fix cli --- src/TargetedEstimation.jl | 1 + src/outputs.jl | 4 ++++ src/runner.jl | 12 ++++++------ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index e38087c..fce10ec 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -30,6 +30,7 @@ using Comonicon using Configurations import MLJModelInterface +import Base.tryparse include("failed_estimate.jl") include("cache_managers.jl") diff --git a/src/outputs.jl b/src/outputs.jl index 4302919..09e223a 100644 --- a/src/outputs.jl +++ b/src/outputs.jl @@ -3,6 +3,10 @@ FileExistsError(filename) = ArgumentError(string("File ", filename, " already ex check_file_exists(filename::Nothing) = nothing check_file_exists(filename) = !isfile(filename) || throw(FileExistsError(filename)) +Base.tryparse(::Type{Union{String, Nothing}}, x::AbstractString) = x +Base.tryparse(::Type{Union{Float64, Nothing}}, x::AbstractString) = tryparse(Float64, x) +Base.tryparse(::Type{Union{T, Nothing}}, x::Nothing) where T = nothing + """ initialize(output) diff --git a/src/runner.jl b/src/runner.jl index e7961c8..f25e5e8 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -144,12 +144,12 @@ TMLE CLI. - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ -@cast function tmle(dataset, estimands, estimators; - verbosity=0, - outputs=Outputs(), - chunksize=100, - rng=123, - cache_strategy="release-unusable", +@cast function tmle(dataset::String, estimands::String, estimators::String; + verbosity::Int=0, + outputs::Outputs=Outputs(), + chunksize::Int=100, + rng::Int=123, + cache_strategy::String="release-unusable", sort_estimands::Bool=false ) runner = Runner(dataset, estimands, estimators; From 65a1a7002400828a024364148ae49afc6ba77e8c Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Mon, 4 Dec 2023 10:23:53 +0000 Subject: [PATCH 27/71] some more addons --- .gitignore | 2 ++ Comonicon.toml | 2 +- docker/Dockerfile | 9 +++++---- test/sieve_variance.jl | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 0a77c2f..2d2d75e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ test/Manifest.toml sysimage/Manifest.toml Manifest.toml + +build/ diff --git a/Comonicon.toml b/Comonicon.toml index 19f0cb6..48c2a41 100644 --- a/Comonicon.toml +++ b/Comonicon.toml @@ -1,4 +1,4 @@ -name = "fasttmle" +name = "tmle" [install] completion = true diff --git a/docker/Dockerfile b/docker/Dockerfile index 8634ee2..7464f91 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -38,10 +38,11 @@ COPY . /TargetedEstimation.jl WORKDIR /TargetedEstimation.jl # Precompile project -RUN julia -q --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' +RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -RUN julia -q --project=/TargetedEstimation.jl/sysimage -e'using Pkg;Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' +RUN julia --project -t auto --startup-file=no deps/build.jl app -# Build Sysimage -RUN julia --project -t auto --startup-file=no sysimage/create_sysimage.jl +ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" + +RUN tmle --help diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index b6c66c3..7317854 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -262,7 +262,7 @@ end ] stderrors = TargetedEstimation.corrected_stderrors(variances) # sanity check - stderrors == sqrt.([4., 5., 6.]) + @test stderrors == sqrt.([4., 5., 6.]) end @testset "Test SVP" begin From 0c4c82d34fbb69873c9cb8b484f1fdc009c3aaa5 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 5 Dec 2023 09:33:18 +0000 Subject: [PATCH 28/71] update docker and add Manifest --- .gitignore | 3 +- Manifest.toml | 1812 ++++++++++++++++++++++++++++++ docker/Dockerfile | 2 +- docs/Project.toml | 4 + sysimage/Project.toml | 2 - sysimage/create_sysimage.jl | 11 - sysimage/precompile_exec_file.jl | 6 - 7 files changed, 1818 insertions(+), 22 deletions(-) create mode 100644 Manifest.toml delete mode 100644 sysimage/Project.toml delete mode 100644 sysimage/create_sysimage.jl delete mode 100644 sysimage/precompile_exec_file.jl diff --git a/.gitignore b/.gitignore index 2d2d75e..648f644 100644 --- a/.gitignore +++ b/.gitignore @@ -16,10 +16,9 @@ deps/src/ # Build artifacts for creating documentation generated by the Documenter package docs/build/ docs/site/ +docs/Manifest.toml test/Manifest.toml sysimage/Manifest.toml -Manifest.toml - build/ diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..eb59e18 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,1812 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.2" +manifest_format = "2.0" +project_hash = "79b338af0999710186711c6c1c568ae8891f4dc2" + +[[deps.ARFFFiles]] +deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"] +git-tree-sha1 = "e8c8e0a2be6eb4f56b1672e46004463033daa409" +uuid = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8" +version = "1.4.1" + +[[deps.AbstractDifferentiation]] +deps = ["ExprTools", "LinearAlgebra", "Requires"] +git-tree-sha1 = "6a5e61dc899ab116035c18ead4ec890269f3c478" +uuid = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" +version = "0.6.0" + + [deps.AbstractDifferentiation.extensions] + AbstractDifferentiationChainRulesCoreExt = "ChainRulesCore" + AbstractDifferentiationFiniteDifferencesExt = "FiniteDifferences" + AbstractDifferentiationForwardDiffExt = ["DiffResults", "ForwardDiff"] + AbstractDifferentiationReverseDiffExt = ["DiffResults", "ReverseDiff"] + AbstractDifferentiationTrackerExt = "Tracker" + AbstractDifferentiationZygoteExt = "Zygote" + + [deps.AbstractDifferentiation.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" + FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" +weakdeps = ["ChainRulesCore", "Test"] + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + +[[deps.AbstractTrees]] +git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.4" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.7.1" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.ArgCheck]] +git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4" +uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" +version = "2.3.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.ArnoldiMethod]] +deps = ["LinearAlgebra", "Random", "StaticArrays"] +git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.2.0" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "247efbccf92448be332d154d6ca56b9fcdd93c31" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.6.1" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.Arrow]] +deps = ["ArrowTypes", "BitIntegers", "CodecLz4", "CodecZstd", "ConcurrentUtilities", "DataAPI", "Dates", "EnumX", "LoggingExtras", "Mmap", "PooledArrays", "SentinelArrays", "Tables", "TimeZones", "TranscodingStreams", "UUIDs"] +git-tree-sha1 = "954666e252835c4cf8819ce4ffaf31073c1b7233" +uuid = "69666777-d1a9-59fb-9406-91d4454c9d45" +version = "2.6.2" + +[[deps.ArrowTypes]] +deps = ["Sockets", "UUIDs"] +git-tree-sha1 = "8c37bfdf1b689c6677bbfc8986968fe641f6a299" +uuid = "31f734f8-188a-4ce0-8406-c8a06bd891cd" +version = "2.2.2" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "0.1.0" + +[[deps.BSON]] +git-tree-sha1 = "2208958832d6e1b59e49f53697483a84ca8d664e" +uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +version = "0.3.7" + +[[deps.BangBang]] +deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"] +git-tree-sha1 = "e28912ce94077686443433c2800104b061a827ed" +uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" +version = "0.3.39" + + [deps.BangBang.extensions] + BangBangChainRulesCoreExt = "ChainRulesCore" + BangBangDataFramesExt = "DataFrames" + BangBangStaticArraysExt = "StaticArrays" + BangBangStructArraysExt = "StructArrays" + BangBangTypedTablesExt = "TypedTables" + + [deps.BangBang.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.Baselet]] +git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e" +uuid = "9718e550-a3fa-408a-8086-8db961cd8217" +version = "0.1.1" + +[[deps.BitFlags]] +git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b" +uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" +version = "0.1.8" + +[[deps.BitIntegers]] +deps = ["Random"] +git-tree-sha1 = "a55462dfddabc34bc97d3a7403a2ca2802179ae6" +uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" +version = "0.3.1" + +[[deps.CEnum]] +git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.2" + +[[deps.CSV]] +deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] +git-tree-sha1 = "44dbf560808d49041989b8a96cae4cffbeb7966a" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.10.11" + +[[deps.CUDA_Driver_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] +git-tree-sha1 = "1e42ef1bdb45487ff28de16182c0df4920181dc3" +uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" +version = "0.7.0+0" + +[[deps.CUDA_Runtime_jll]] +deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "9704e50c9158cf8896c2776b8dbc5edd136caf80" +uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" +version = "0.10.1+0" + +[[deps.Calculus]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" +uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" +version = "0.5.1" + +[[deps.CategoricalArrays]] +deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"] +git-tree-sha1 = "1568b28f91293458345dabba6a5ea3f183250a61" +uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" +version = "0.10.8" +weakdeps = ["JSON", "RecipesBase", "SentinelArrays", "StructTypes"] + + [deps.CategoricalArrays.extensions] + CategoricalArraysJSONExt = "JSON" + CategoricalArraysRecipesBaseExt = "RecipesBase" + CategoricalArraysSentinelArraysExt = "SentinelArrays" + CategoricalArraysStructTypesExt = "StructTypes" + +[[deps.CategoricalDistributions]] +deps = ["CategoricalArrays", "Distributions", "Missings", "OrderedCollections", "Random", "ScientificTypes"] +git-tree-sha1 = "3124343a1b0c9a2f5fdc1d9bcc633ba11735a4c4" +uuid = "af321ab8-2d2e-40a6-b165-3d674595d28e" +version = "0.1.13" + + [deps.CategoricalDistributions.extensions] + UnivariateFiniteDisplayExt = "UnicodePlots" + + [deps.CategoricalDistributions.weakdeps] + UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" + +[[deps.ChainRules]] +deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"] +git-tree-sha1 = "006cc7170be3e0fa02ccac6d4164a1eee1fc8c27" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "1.58.0" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "e0af648f0692ec1691b5d094b8724ba1346281cf" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.18.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CodecLz4]] +deps = ["Lz4_jll", "TranscodingStreams"] +git-tree-sha1 = "8bf4f9e2ee52b5e217451a7cd9171fcd4e16ae23" +uuid = "5ba52731-8f18-5e0d-9241-30f10d1ec561" +version = "0.4.1" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "cd67fc487743b2f0fd4380d4cbd3a24660d0eec8" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.3" + +[[deps.CodecZstd]] +deps = ["CEnum", "TranscodingStreams", "Zstd_jll"] +git-tree-sha1 = "849470b337d0fa8449c21061de922386f32949d9" +uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +version = "0.7.2" + +[[deps.ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.11.4" + +[[deps.Combinatorics]] +git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" +uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" +version = "1.0.2" + +[[deps.CommonSolve]] +git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c" +uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" +version = "0.2.4" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[deps.Comonicon]] +deps = ["Configurations", "ExproniconLite", "Libdl", "Logging", "Markdown", "OrderedCollections", "PackageCompiler", "Pkg", "Scratch", "TOML", "UUIDs"] +git-tree-sha1 = "552667002fdd5602ca72e0aeac8bd099daa0e040" +uuid = "863f3e99-da2a-4334-8734-de3dacbe5542" +version = "1.0.6" + +[[deps.Compat]] +deps = ["UUIDs"] +git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.10.1" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.0.5+0" + +[[deps.CompositionsBase]] +git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" +uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" +version = "0.1.2" + + [deps.CompositionsBase.extensions] + CompositionsBaseInverseFunctionsExt = "InverseFunctions" + + [deps.CompositionsBase.weakdeps] + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.ComputationalResources]] +git-tree-sha1 = "52cb3ec90e8a8bea0e62e275ba577ad0f74821f7" +uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3" +version = "0.3.2" + +[[deps.ConcurrentUtilities]] +deps = ["Serialization", "Sockets"] +git-tree-sha1 = "8cfa272e8bdedfa88b6aefbbca7c19f1befac519" +uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" +version = "2.3.0" + +[[deps.Conda]] +deps = ["Downloads", "JSON", "VersionParsing"] +git-tree-sha1 = "51cab8e982c5b598eea9c8ceaced4b58d9dd37c9" +uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" +version = "1.10.0" + +[[deps.Configurations]] +deps = ["ExproniconLite", "OrderedCollections", "TOML"] +git-tree-sha1 = "4358750bb58a3caefd5f37a4a0c5bfdbbf075252" +uuid = "5218b696-f38b-4ac9-8b61-a12ec717816d" +version = "0.17.6" + +[[deps.ConstructionBase]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "c53fc348ca4d40d7b371e71fd52251839080cbc9" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.4" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.ContextVariablesX]] +deps = ["Compat", "Logging", "UUIDs"] +git-tree-sha1 = "25cc3803f1030ab855e383129dcd3dc294e322cc" +uuid = "6add18c4-b38d-439d-96f6-d6bc489c04c5" +version = "0.1.3" + +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + +[[deps.DataAPI]] +git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.15.0" + +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.6.1" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.15" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DefineSingletons]] +git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c" +uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52" +version = "0.1.2" + +[[deps.DelimitedFiles]] +deps = ["Mmap"] +git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +version = "1.9.1" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distances]] +deps = ["LinearAlgebra", "Statistics", "StatsAPI"] +git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0" +uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +version = "0.10.11" +weakdeps = ["ChainRulesCore", "SparseArrays"] + + [deps.Distances.extensions] + DistancesChainRulesCoreExt = "ChainRulesCore" + DistancesSparseArraysExt = "SparseArrays" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[deps.Distributions]] +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "a6c00f894f24460379cb7136633cef54ac9f6f4a" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.25.103" + + [deps.Distributions.extensions] + DistributionsChainRulesCoreExt = "ChainRulesCore" + DistributionsDensityInterfaceExt = "DensityInterface" + DistributionsTestExt = "Test" + + [deps.Distributions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.DualNumbers]] +deps = ["Calculus", "NaNMath", "SpecialFunctions"] +git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566" +uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74" +version = "0.6.8" + +[[deps.EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.2.4+0" + +[[deps.EarlyStopping]] +deps = ["Dates", "Statistics"] +git-tree-sha1 = "98fdf08b707aaf69f524a6cd0a67858cefe0cfb6" +uuid = "792122b4-ca99-40de-a6bc-6742525f08b6" +version = "0.3.0" + +[[deps.EnumX]] +git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" +uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +version = "1.0.4" + +[[deps.EvoTrees]] +deps = ["BSON", "CategoricalArrays", "Distributions", "MLJModelInterface", "NetworkLayout", "Random", "RecipesBase", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "f08d64339d7259b0c69a00a1e321dc6da79672ea" +uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +version = "0.16.5" + + [deps.EvoTrees.extensions] + EvoTreesCUDAExt = "CUDA" + + [deps.EvoTrees.weakdeps] + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + +[[deps.ExceptionUnwrapping]] +deps = ["Test"] +git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96" +uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" +version = "0.1.9" + +[[deps.ExprTools]] +git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.10" + +[[deps.ExproniconLite]] +git-tree-sha1 = "fbc390c2f896031db5484bc152a7e805ecdfb01f" +uuid = "55351af7-c7e9-48d6-89ff-24e801d99491" +version = "0.10.5" + +[[deps.Extents]] +git-tree-sha1 = "2140cd04483da90b2da7f99b2add0750504fc39c" +uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" +version = "0.1.2" + +[[deps.FLoops]] +deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"] +git-tree-sha1 = "ffb97765602e3cbe59a0589d237bf07f245a8576" +uuid = "cc61a311-1640-44b5-9fba-1b764f453329" +version = "0.2.1" + +[[deps.FLoopsBase]] +deps = ["ContextVariablesX"] +git-tree-sha1 = "656f7a6859be8673bf1f35da5670246b923964f7" +uuid = "b9860ae5-e623-471e-878b-f6a53c775ea6" +version = "0.1.1" + +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.16.1" + +[[deps.FilePathsBase]] +deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"] +git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa" +uuid = "48062228-2e41-5def-b9a4-89aafe57970f" +version = "0.9.21" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.FillArrays]] +deps = ["LinearAlgebra", "Random"] +git-tree-sha1 = "28e4e9c4b7b162398ec8004bdabe9a90c78c122d" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.8.0" +weakdeps = ["PDMats", "SparseArrays", "Statistics"] + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + +[[deps.FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays"] +git-tree-sha1 = "c6e4a1fbe73b31a3dea94b1da449503b8830c306" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.21.1" + + [deps.FiniteDiff.extensions] + FiniteDiffBandedMatricesExt = "BandedMatrices" + FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices" + FiniteDiffStaticArraysExt = "StaticArrays" + + [deps.FiniteDiff.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.4" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.36" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[deps.GLM]] +deps = ["Distributions", "LinearAlgebra", "Printf", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels"] +git-tree-sha1 = "273bd1cd30768a2fddfa3fd63bbc746ed7249e5f" +uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +version = "1.9.0" + +[[deps.GLMNet]] +deps = ["DataFrames", "Distributed", "Distributions", "Printf", "Random", "SparseArrays", "StatsBase", "glmnet_jll"] +git-tree-sha1 = "7ea4e2bbb84183fe52a488d05e16c152b2387b95" +uuid = "8d5ece8b-de18-5317-b113-243142960cc6" +version = "0.7.2" + +[[deps.GPUArrays]] +deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] +git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "9.1.0" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.5" + +[[deps.GeoInterface]] +deps = ["Extents"] +git-tree-sha1 = "d53480c0793b13341c40199190f92c611aa2e93c" +uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +version = "1.3.2" + +[[deps.GeometryBasics]] +deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "424a5a6ce7c5d97cca7bcc4eac551b97294c54af" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.4.9" + +[[deps.Glob]] +git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496" +uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" +version = "1.3.1" + +[[deps.Graphs]] +deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7" +uuid = "86223c79-3864-5bf0-83f7-82e725a168b6" +version = "1.9.0" + +[[deps.HDF5]] +deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"] +git-tree-sha1 = "26407bd1c60129062cec9da63dc7d08251544d53" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.17.1" + + [deps.HDF5.extensions] + MPIExt = "MPI" + + [deps.HDF5.weakdeps] + MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" + +[[deps.HDF5_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"] +git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739" +uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" +version = "1.14.2+1" + +[[deps.HTTP]] +deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] +git-tree-sha1 = "abbbb9ec3afd783a7cbd82ef01dcd088ea051398" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "1.10.1" + +[[deps.HighlyAdaptiveLasso]] +deps = ["DataFrames", "MLJModelInterface", "RCall"] +git-tree-sha1 = "40f12ec0130659287a3d1b7e1a8ffc4fcf7249ba" +uuid = "c5dac772-1445-43c4-b698-9440de7877f6" +version = "0.2.0" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8ecb0b34472a3c98f945e3c75fc7d5428d165511" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.9.3+0" + +[[deps.HypergeometricFunctions]] +deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] +git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685" +uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" +version = "0.3.23" + +[[deps.HypothesisTests]] +deps = ["Combinatorics", "Distributions", "LinearAlgebra", "Printf", "Random", "Rmath", "Roots", "Statistics", "StatsAPI", "StatsBase"] +git-tree-sha1 = "4b5d5ba51f5f473737ed9de6d8a7aa190ad8c72f" +uuid = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" +version = "0.11.0" + +[[deps.IRTools]] +deps = ["InteractiveUtils", "MacroTools", "Test"] +git-tree-sha1 = "8aa91235360659ca7560db43a7d57541120aa31d" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.11" + +[[deps.Inflate]] +git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.4" + +[[deps.InitialValues]] +git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" +uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" +version = "0.3.1" + +[[deps.InlineStrings]] +deps = ["Parsers"] +git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.0" + +[[deps.IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2023.2.0+0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.InvertedIndices]] +git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.0" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.IterTools]] +git-tree-sha1 = "4ced6667f9974fc5c5943fa5e2ef1ca43ea9e450" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.8.0" + +[[deps.IterationControl]] +deps = ["EarlyStopping", "InteractiveUtils"] +git-tree-sha1 = "d7df9a6fdd82a8cfdfe93a94fcce35515be634da" +uuid = "b3c1a2ee-3fec-4384-bf48-272ea71de57c" +version = "0.5.3" + +[[deps.IterativeSolvers]] +deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] +git-tree-sha1 = "b435d190ef8369cf4d79cc9dd5fba88ba0165307" +uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" +version = "0.9.3" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLD2]] +deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "Pkg", "PrecompileTools", "Printf", "Reexport", "Requires", "TranscodingStreams", "UUIDs"] +git-tree-sha1 = "9bbb5130d3b4fa52846546bca4791ecbdfb52730" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.38" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.JSON3]] +deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] +git-tree-sha1 = "95220473901735a0f4df9d1ca5b171b568b2daa3" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.13.2" + +[[deps.JuliaVariables]] +deps = ["MLStyle", "NameResolution"] +git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70" +uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec" +version = "0.2.4" + +[[deps.KernelAbstractions]] +deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] +git-tree-sha1 = "b0737cbbe1c8da6f1139d1c23e35e7cea129c0af" +uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +version = "0.9.13" + + [deps.KernelAbstractions.extensions] + EnzymeExt = "EnzymeCore" + + [deps.KernelAbstractions.weakdeps] + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] +git-tree-sha1 = "c879e47398a7ab671c782e02b51a4456794a7fa3" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "6.4.0" + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + + [deps.LLVM.weakdeps] + BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "98eaee04d96d973e79c25d49167668c5c8fb50e2" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.27+1" + +[[deps.LLVMOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b" +uuid = "1d63c593-3942-5779-bab2-d838dc0a180e" +version = "15.0.4+0" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.3.1" + +[[deps.LatinHypercubeSampling]] +deps = ["Random", "StableRNGs", "StatsBase", "Test"] +git-tree-sha1 = "825289d43c753c7f1bf9bed334c253e9913997f8" +uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d" +version = "1.9.0" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LearnAPI]] +deps = ["InteractiveUtils", "Statistics"] +git-tree-sha1 = "ec695822c1faaaa64cee32d0b21505e1977b4809" +uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb" +version = "0.1.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+0" + +[[deps.LineSearches]] +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] +git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d" +uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" +version = "7.2.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.LinearMaps]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "9df2ab050ffefe870a09c7b6afdb0cde381703f2" +uuid = "7a12625a-238d-50fd-b39a-03d52299707e" +version = "3.11.1" +weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"] + + [deps.LinearMaps.extensions] + LinearMapsChainRulesCoreExt = "ChainRulesCore" + LinearMapsSparseArraysExt = "SparseArrays" + LinearMapsStatisticsExt = "Statistics" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.26" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.0.3" + +[[deps.Lz4_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6c26c5e8a4203d43b5497be3ec5d4e0c3cde240a" +uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" +version = "1.9.4+0" + +[[deps.MKL]] +deps = ["Artifacts", "Libdl", "LinearAlgebra", "MKL_jll"] +git-tree-sha1 = "100521a1d2181cb39036ee1a6955d6b9686bb363" +uuid = "33e6dc65-8f57-5167-99aa-e5a354878fb2" +version = "0.6.1" + +[[deps.MKL_jll]] +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] +git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2023.2.0+0" + +[[deps.MLFlowClient]] +deps = ["Dates", "FilePathsBase", "HTTP", "JSON", "ShowCases", "URIs", "UUIDs"] +git-tree-sha1 = "32cee10a6527476bef0c6484ff4c60c2cead5d3e" +uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83" +version = "0.4.4" + +[[deps.MLJ]] +deps = ["CategoricalArrays", "ComputationalResources", "Distributed", "Distributions", "LinearAlgebra", "MLJBalancing", "MLJBase", "MLJEnsembles", "MLJFlow", "MLJIteration", "MLJModels", "MLJTuning", "OpenML", "Pkg", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "StatisticalMeasures", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "981196c41a23cbc1befbad190558b1f0ebb97910" +uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" +version = "0.20.2" + +[[deps.MLJBalancing]] +deps = ["MLJBase", "MLJModelInterface", "MLUtils", "OrderedCollections", "Random", "StatsBase"] +git-tree-sha1 = "e4be85602f010291f49b6a6464ccde1708ce5d62" +uuid = "45f359ea-796d-4f51-95a5-deb1a414c586" +version = "0.1.3" + +[[deps.MLJBase]] +deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Dates", "DelimitedFiles", "Distributed", "Distributions", "InteractiveUtils", "InvertedIndices", "LearnAPI", "LinearAlgebra", "MLJModelInterface", "Missings", "OrderedCollections", "Parameters", "PrettyTables", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "Serialization", "StatisticalMeasuresBase", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "6d433d34a1764324cf37a1ddc47dcc42ec05340f" +uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +version = "1.0.1" +weakdeps = ["StatisticalMeasures"] + + [deps.MLJBase.extensions] + DefaultMeasuresExt = "StatisticalMeasures" + +[[deps.MLJEnsembles]] +deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatisticalMeasuresBase", "StatsBase"] +git-tree-sha1 = "94403b2c8f692011df6731913376e0e37f6c0fe9" +uuid = "50ed68f4-41fd-4504-931a-ed422449fee0" +version = "0.4.0" + +[[deps.MLJFlow]] +deps = ["MLFlowClient", "MLJBase", "MLJModelInterface"] +git-tree-sha1 = "89d0e7a7e08359476482f20b2d8ff12080d171ee" +uuid = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f" +version = "0.3.0" + +[[deps.MLJGLMInterface]] +deps = ["Distributions", "GLM", "MLJModelInterface", "StatsModels", "Tables"] +git-tree-sha1 = "06aba1c96b19f31744f7e97d96fcf66b79739e05" +uuid = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" +version = "0.3.5" + +[[deps.MLJIteration]] +deps = ["IterationControl", "MLJBase", "Random", "Serialization"] +git-tree-sha1 = "991e10d4c8da49d534e312e8a4fbe56b7ac6f70c" +uuid = "614be32b-d00c-4edb-bd02-1eb411ab5e55" +version = "0.6.0" + +[[deps.MLJLinearModels]] +deps = ["DocStringExtensions", "IterativeSolvers", "LinearAlgebra", "LinearMaps", "MLJModelInterface", "Optim", "Parameters"] +git-tree-sha1 = "7f517fd840ca433a8fae673edb31678ff55d969c" +uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692" +version = "0.10.0" + +[[deps.MLJModelInterface]] +deps = ["Random", "ScientificTypesBase", "StatisticalTraits"] +git-tree-sha1 = "381d99f0af76d98f50bd5512dcf96a99c13f8223" +uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +version = "1.9.3" + +[[deps.MLJModels]] +deps = ["CategoricalArrays", "CategoricalDistributions", "Combinatorics", "Dates", "Distances", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJModelInterface", "Markdown", "OrderedCollections", "Parameters", "Pkg", "PrettyPrinting", "REPL", "Random", "RelocatableFolders", "ScientificTypes", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "10d221910fc3f3eedad567178ddbca3cc0f776a3" +uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +version = "0.16.12" + +[[deps.MLJTuning]] +deps = ["ComputationalResources", "Distributed", "Distributions", "LatinHypercubeSampling", "MLJBase", "ProgressMeter", "Random", "RecipesBase", "StatisticalMeasuresBase"] +git-tree-sha1 = "44dc126646a15018d7829f020d121b85b4def9bc" +uuid = "03970b2e-30c4-11ea-3135-d1576263f10f" +version = "0.8.0" + +[[deps.MLJXGBoostInterface]] +deps = ["MLJModelInterface", "SparseArrays", "Tables", "XGBoost"] +git-tree-sha1 = "988c399a352f0b49bc1345c509d8a4800cb468c5" +uuid = "54119dfa-1dab-4055-a167-80440f4f7a91" +version = "0.3.10" + +[[deps.MLStyle]] +git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8" +uuid = "d8e11817-5142-5d16-987a-aa16d5891078" +version = "0.4.17" + +[[deps.MLUtils]] +deps = ["ChainRulesCore", "Compat", "DataAPI", "DelimitedFiles", "FLoops", "NNlib", "Random", "ShowCases", "SimpleTraits", "Statistics", "StatsBase", "Tables", "Transducers"] +git-tree-sha1 = "3504cdb8c2bc05bde4d4b09a81b01df88fcbbba0" +uuid = "f1d291b0-491e-4a28-83b9-f70985020b54" +version = "0.4.3" + +[[deps.MPICH_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "8a5b4d2220377d1ece13f49438d71ad20cf1ba83" +uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" +version = "4.1.2+0" + +[[deps.MPIPreferences]] +deps = ["Libdl", "Preferences"] +git-tree-sha1 = "8f6af051b9e8ec597fa09d8885ed79fd582f33c9" +uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" +version = "0.1.10" + +[[deps.MPItrampoline_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "6979eccb6a9edbbb62681e158443e79ecc0d056a" +uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" +version = "5.3.1+0" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.11" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] +git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.1.9" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+0" + +[[deps.MetaGraphsNext]] +deps = ["Graphs", "JLD2", "SimpleTraits"] +git-tree-sha1 = "8dd4f3f8a643d53e61ff9115749f522c35a38f3f" +uuid = "fa8bd995-216d-47f1-8a91-f3b68fbeb377" +version = "0.6.0" + +[[deps.MicroCollections]] +deps = ["BangBang", "InitialValues", "Setfield"] +git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e" +uuid = "128add7d-3638-4c79-886c-908ea0c25c34" +version = "0.1.4" + +[[deps.MicrosoftMPI_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "b01beb91d20b0d1312a9471a36017b5b339d26de" +uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" +version = "10.1.4+1" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.1.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.Mocking]] +deps = ["Compat", "ExprTools"] +git-tree-sha1 = "4cc0c5a83933648b615c36c2b956d94fda70641e" +uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" +version = "0.7.7" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.10.11" + +[[deps.MultipleTesting]] +deps = ["Distributions", "SpecialFunctions", "StatsBase"] +git-tree-sha1 = "1e98f8f732e7035c4333135b75605b74f3462b9b" +uuid = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b" +version = "0.6.0" + +[[deps.NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.8.3" + +[[deps.NNlib]] +deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"] +git-tree-sha1 = "ac86d2944bf7a670ac8bf0f7ec099b5898abcc09" +uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +version = "0.9.8" + + [deps.NNlib.extensions] + NNlibAMDGPUExt = "AMDGPU" + NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] + NNlibCUDAExt = "CUDA" + NNlibEnzymeCoreExt = "EnzymeCore" + + [deps.NNlib.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NameResolution]] +deps = ["PrettyPrint"] +git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" +uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" +version = "0.1.5" + +[[deps.NetworkLayout]] +deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "StaticArrays"] +git-tree-sha1 = "91bb2fedff8e43793650e7a677ccda6e6e6e166b" +uuid = "46757867-2c16-5918-afeb-47bfcb05e46a" +version = "0.4.6" +weakdeps = ["Graphs"] + + [deps.NetworkLayout.extensions] + NetworkLayoutGraphsExt = "Graphs" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.21+4" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+0" + +[[deps.OpenML]] +deps = ["ARFFFiles", "HTTP", "JSON", "Markdown", "Pkg", "Scratch"] +git-tree-sha1 = "6efb039ae888699d5a74fb593f6f3e10c7193e33" +uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" +version = "0.3.1" + +[[deps.OpenMPI_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "PMIx_jll", "TOML", "Zlib_jll", "libevent_jll", "prrte_jll"] +git-tree-sha1 = "694458ae803b684f09c07f90459cb79655fb377d" +uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" +version = "5.0.0+0" + +[[deps.OpenSSL]] +deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] +git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2" +uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" +version = "1.4.1" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.12+0" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Optim]] +deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] +git-tree-sha1 = "01f85d9269b13fedc61e63cc72ee2213565f7a72" +uuid = "429524aa-4258-5aef-a3af-852621145aeb" +version = "1.7.8" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "4e5be6bb265d33669f98eb55d2a57addd1eeb72c" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.11.30" + +[[deps.PMIx_jll]] +deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "Zlib_jll", "libevent_jll"] +git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541" +uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab" +version = "4.2.7+0" + +[[deps.PackageCompiler]] +deps = ["Artifacts", "Glob", "LazyArtifacts", "Libdl", "Pkg", "Printf", "RelocatableFolders", "TOML", "UUIDs", "p7zip_jll"] +git-tree-sha1 = "f9392ab72832f4315220a853747ff3dba758c9d1" +uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" +version = "2.1.15" + +[[deps.Parameters]] +deps = ["OrderedCollections", "UnPack"] +git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.12.3" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.9.2" + +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + +[[deps.PositiveFactorizations]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" +uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" +version = "0.2.4" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.0" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.1" + +[[deps.PrettyPrint]] +git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" +uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" +version = "0.2.0" + +[[deps.PrettyPrinting]] +git-tree-sha1 = "22a601b04a154ca38867b991d5017469dc75f2db" +uuid = "54e16d92-306c-5ea0-a30b-337be88ac337" +version = "0.4.1" + +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "2.3.1" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.ProgressMeter]] +deps = ["Distributed", "Printf"] +git-tree-sha1 = "00099623ffee15972c16111bcf84c58a0051257c" +uuid = "92933f4c-e287-5a05-a399-4b506db050ca" +version = "1.9.0" + +[[deps.QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.9.1" + +[[deps.RCall]] +deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"] +git-tree-sha1 = "3084689b18f9e5e817a6ce9a83a7654d8ad0f2f6" +uuid = "6f49c342-dc21-5d91-9882-a32aef131414" +version = "0.13.18" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.RealDot]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9" +uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9" +version = "0.1.0" + +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RelocatableFolders]] +deps = ["SHA", "Scratch"] +git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" +uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" +version = "1.0.1" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.7.1" + +[[deps.Rmath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.4.0+0" + +[[deps.Roots]] +deps = ["ChainRulesCore", "CommonSolve", "Printf", "Setfield"] +git-tree-sha1 = "0f1d92463a020321983d04c110f476c274bafe2e" +uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" +version = "2.0.22" + + [deps.Roots.extensions] + RootsForwardDiffExt = "ForwardDiff" + RootsIntervalRootFindingExt = "IntervalRootFinding" + RootsSymPyExt = "SymPy" + RootsSymPyPythonCallExt = "SymPyPythonCall" + + [deps.Roots.weakdeps] + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + IntervalRootFinding = "d2bf35a9-74e0-55ec-b149-d360ff49b807" + SymPy = "24249f21-da20-56a4-8eb1-6a02cf4ae2e6" + SymPyPythonCall = "bc8888f7-b21e-4b7c-a06a-5d9c9496438c" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.ScientificTypes]] +deps = ["CategoricalArrays", "ColorTypes", "Dates", "Distributions", "PrettyTables", "Reexport", "ScientificTypesBase", "StatisticalTraits", "Tables"] +git-tree-sha1 = "75ccd10ca65b939dab03b812994e571bf1e3e1da" +uuid = "321657f4-b219-11e9-178b-2701a2544e81" +version = "3.0.2" + +[[deps.ScientificTypesBase]] +git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b" +uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161" +version = "3.0.0" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.2.1" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.1" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[deps.ShiftedArrays]] +git-tree-sha1 = "503688b59397b3307443af35cd953a13e8005c16" +uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a" +version = "2.0.0" + +[[deps.ShowCases]] +git-tree-sha1 = "7f534ad62ab2bd48591bdeac81994ea8c445e4a5" +uuid = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" +version = "0.1.0" + +[[deps.SimpleBufferStream]] +git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" +uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" +version = "1.1.0" + +[[deps.SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.4" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "5165dfb9fd131cf0c6957a3a7605dede376e7b63" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.0" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.SparseInverseSubset]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "91402087fd5d13b2d97e3ef29bbdf9d7859e678a" +uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada" +version = "0.1.1" + +[[deps.SparseMatricesCSR]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "38677ca58e80b5cad2382e5a1848f93b054ad28d" +uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" +version = "0.6.7" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.3.1" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.SplittablesBase]] +deps = ["Setfield", "Test"] +git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5" +uuid = "171d559e-b47b-412a-8079-5efa626c420e" +version = "0.1.15" + +[[deps.StableRNGs]] +deps = ["Random", "Test"] +git-tree-sha1 = "3be7d49667040add7ee151fefaf1f8c04c8c8276" +uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" +version = "1.0.0" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "5ef59aea6f18c25168842bded46b16662141ab87" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.7.0" +weakdeps = ["Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.2" + +[[deps.StatisticalMeasures]] +deps = ["CategoricalArrays", "CategoricalDistributions", "Distributions", "LearnAPI", "LinearAlgebra", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "StatisticalMeasuresBase", "Statistics", "StatsBase"] +git-tree-sha1 = "b58c7cc3d7de6c0d75d8437b81481af924970123" +uuid = "a19d573c-0a75-4610-95b3-7071388c7541" +version = "0.1.3" + + [deps.StatisticalMeasures.extensions] + LossFunctionsExt = "LossFunctions" + ScientificTypesExt = "ScientificTypes" + + [deps.StatisticalMeasures.weakdeps] + LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" + ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" + +[[deps.StatisticalMeasuresBase]] +deps = ["CategoricalArrays", "InteractiveUtils", "MLUtils", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "Statistics"] +git-tree-sha1 = "17dfb22e2e4ccc9cd59b487dce52883e0151b4d3" +uuid = "c062fc1d-0d66-479b-b6ac-8b44719de4cc" +version = "0.1.1" + +[[deps.StatisticalTraits]] +deps = ["ScientificTypesBase"] +git-tree-sha1 = "30b9236691858e13f167ce829490a68e1a597782" +uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9" +version = "3.2.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.9.0" + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "1d77abd07f617c4868c33d4f5b9e1dbb2643c9cf" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.2" + +[[deps.StatsFuns]] +deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] +git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "1.3.0" + + [deps.StatsFuns.extensions] + StatsFunsChainRulesCoreExt = "ChainRulesCore" + StatsFunsInverseFunctionsExt = "InverseFunctions" + + [deps.StatsFuns.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.StatsModels]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Printf", "REPL", "ShiftedArrays", "SparseArrays", "StatsAPI", "StatsBase", "StatsFuns", "Tables"] +git-tree-sha1 = "5cf6c4583533ee38639f73b880f35fc85f2941e0" +uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" +version = "0.7.3" + +[[deps.StringEncodings]] +deps = ["Libiconv_jll"] +git-tree-sha1 = "b765e46ba27ecf6b44faf70df40c57aa3a547dcb" +uuid = "69024149-9ee7-55f6-a4c4-859efe599b68" +version = "0.3.7" + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.3.4" + +[[deps.StructArrays]] +deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"] +git-tree-sha1 = "0a3db38e4cce3c54fe7a71f831cd7b6194a54213" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.16" + +[[deps.StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "ca4bccb03acf9faaf4137a9abc1881ed1841aa70" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.10.0" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "5.10.1+6" + +[[deps.TMLE]] +deps = ["AbstractDifferentiation", "CategoricalArrays", "Combinatorics", "Distributions", "GLM", "Graphs", "HypothesisTests", "LogExpFunctions", "MLJBase", "MLJGLMInterface", "MLJModels", "MetaGraphsNext", "Missings", "PrecompileTools", "PrettyTables", "Random", "Statistics", "TableOperations", "Tables", "Zygote"] +git-tree-sha1 = "fe31c10325f1e911dae33a5d521cc07c2c7eeecd" +repo-rev = "cvtmle" +repo-url = "https://github.com/TARGENE/TMLE.jl.git" +uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" +version = "0.12.0" + + [deps.TMLE.extensions] + GraphMakieExt = ["GraphMakie", "CairoMakie"] + JSONExt = "JSON" + YAMLExt = "YAML" + + [deps.TMLE.weakdeps] + CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" + GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2" + JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" + YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TZJData]] +deps = ["Artifacts"] +git-tree-sha1 = "d39314cdbaf5b90a047db33858626f8d1cc973e1" +uuid = "dc5dba14-91b3-4cab-a142-028a31da12f7" +version = "1.0.0+2023c" + +[[deps.TableOperations]] +deps = ["SentinelArrays", "Tables", "Test"] +git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1" +uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" +version = "1.2.0" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.11.1" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.TimeZones]] +deps = ["Artifacts", "Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "Scratch", "TZJData", "Unicode", "p7zip_jll"] +git-tree-sha1 = "89e64d61ef3cd9e80f7fc12b7d13db2d75a23c03" +uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" +version = "1.13.0" +weakdeps = ["RecipesBase"] + + [deps.TimeZones.extensions] + TimeZonesRecipesBaseExt = "RecipesBase" + +[[deps.TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.13" + +[[deps.Transducers]] +deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"] +git-tree-sha1 = "e579d3c991938fecbb225699e8f611fa3fbf2141" +uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" +version = "0.4.79" + + [deps.Transducers.extensions] + TransducersBlockArraysExt = "BlockArrays" + TransducersDataFramesExt = "DataFrames" + TransducersLazyArraysExt = "LazyArrays" + TransducersOnlineStatsBaseExt = "OnlineStatsBase" + TransducersReferenceablesExt = "Referenceables" + + [deps.Transducers.weakdeps] + BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" + OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338" + Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e" + +[[deps.URIs]] +git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.5.1" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.2.1" + +[[deps.UnsafeAtomicsLLVM]] +deps = ["LLVM", "UnsafeAtomics"] +git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e" +uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" +version = "0.1.3" + +[[deps.VersionParsing]] +git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868" +uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" +version = "1.3.0" + +[[deps.WeakRefStrings]] +deps = ["DataAPI", "InlineStrings", "Parsers"] +git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" +uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" +version = "1.4.2" + +[[deps.WinReg]] +git-tree-sha1 = "cd910906b099402bcc50b3eafa9634244e5ec83b" +uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128" +version = "1.0.0" + +[[deps.WorkerUtilities]] +git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" +uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" +version = "1.6.1" + +[[deps.XGBoost]] +deps = ["AbstractTrees", "CEnum", "JSON3", "LinearAlgebra", "OrderedCollections", "SparseArrays", "SparseMatricesCSR", "Statistics", "Tables", "XGBoost_jll"] +git-tree-sha1 = "bacb62e07d104630094c8dac2fd070f5d4b9b305" +uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" +version = "2.5.1" + + [deps.XGBoost.extensions] + XGBoostCUDAExt = "CUDA" + XGBoostTermExt = "Term" + + [deps.XGBoost.weakdeps] + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + Term = "22787eb5-b846-44ae-b979-8e399b8463ab" + +[[deps.XGBoost_jll]] +deps = ["Artifacts", "CUDA_Runtime_jll", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "1c0aa2390a7ebb28a3d6c214f64e57a24091fbd7" +uuid = "a5c6f535-4255-5ca2-a466-0e519f119c46" +version = "2.0.1+0" + +[[deps.YAML]] +deps = ["Base64", "Dates", "Printf", "StringEncodings"] +git-tree-sha1 = "e6330e4b731a6af7959673621e91645eb1356884" +uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" +version = "0.4.9" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+0" + +[[deps.Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.5.5+0" + +[[deps.Zygote]] +deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "5ded212acd815612df112bb895ef3910c5a03f57" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.67" + + [deps.Zygote.extensions] + ZygoteColorsExt = "Colors" + ZygoteDistancesExt = "Distances" + ZygoteTrackerExt = "Tracker" + + [deps.Zygote.weakdeps] + Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" + Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.ZygoteRules]] +deps = ["ChainRulesCore", "MacroTools"] +git-tree-sha1 = "9d749cd449fb448aeca4feee9a2f4186dbb5d184" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.4" + +[[deps.glmnet_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "31adae3b983b579a1fbd7cfd43a4bc0d224c2f5a" +uuid = "78c6b45d-5eaf-5d68-bcfb-a5a2cb06c27f" +version = "2.0.13+0" + +[[deps.libaec_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "eddd19a8dea6b139ea97bdc8a0e2667d4b661720" +uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0" +version = "1.0.6+1" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+0" + +[[deps.libevent_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll"] +git-tree-sha1 = "f04ec6d9a186115fb38f858f05c0c4e1b7fc9dcb" +uuid = "1080aeaf-3a6a-583e-a51c-c537b09f60ec" +version = "2.1.13+1" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" + +[[deps.prrte_jll]] +deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "PMIx_jll", "libevent_jll"] +git-tree-sha1 = "5adb2d7a18a30280feb66cad6f1a1dfdca2dc7b0" +uuid = "eb928a42-fffd-568d-ab9c-3f5d54fc65b9" +version = "3.0.2+0" diff --git a/docker/Dockerfile b/docker/Dockerfile index 7464f91..4302c27 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.9.0-bullseye +FROM julia:1.9.4-bullseye ARG DEBIAN_FRONTEND=noninteractive diff --git a/docs/Project.toml b/docs/Project.toml index dfa65cd..7e440b8 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,2 +1,6 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] + +Documenter = "1.2.1" \ No newline at end of file diff --git a/sysimage/Project.toml b/sysimage/Project.toml deleted file mode 100644 index fbb19f0..0000000 --- a/sysimage/Project.toml +++ /dev/null @@ -1,2 +0,0 @@ -[deps] -PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" diff --git a/sysimage/create_sysimage.jl b/sysimage/create_sysimage.jl deleted file mode 100644 index 2537aa8..0000000 --- a/sysimage/create_sysimage.jl +++ /dev/null @@ -1,11 +0,0 @@ - -SYSIMAGE_DIR = dirname(@__FILE__) -push!(LOAD_PATH, SYSIMAGE_DIR) - -using PackageCompiler - -create_sysimage( - ["TargetedEstimation"]; - sysimage_path="TargetedEstimationSysimage.so", - precompile_execution_file=joinpath(SYSIMAGE_DIR, "precompile_exec_file.jl") -) \ No newline at end of file diff --git a/sysimage/precompile_exec_file.jl b/sysimage/precompile_exec_file.jl deleted file mode 100644 index a45912e..0000000 --- a/sysimage/precompile_exec_file.jl +++ /dev/null @@ -1,6 +0,0 @@ -import TargetedEstimation - -TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test") -push!(LOAD_PATH, TEST_DIR) -cd(TEST_DIR) -include(joinpath(TEST_DIR, "runtests.jl")) \ No newline at end of file From 47f9ce794c3a8d022428ade8a77b9fb19979607a Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 5 Dec 2023 10:52:04 +0000 Subject: [PATCH 29/71] rename build file build_app.jl --- deps/{build.jl => build_app.jl} | 0 docker/Dockerfile | 2 +- docs/Project.toml | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) rename deps/{build.jl => build_app.jl} (100%) diff --git a/deps/build.jl b/deps/build_app.jl similarity index 100% rename from deps/build.jl rename to deps/build_app.jl diff --git a/docker/Dockerfile b/docker/Dockerfile index 4302c27..bab40d7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -41,7 +41,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -RUN julia --project -t auto --startup-file=no deps/build.jl app +RUN julia --project -t auto --startup-file=no deps/build_app.jl app ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" diff --git a/docs/Project.toml b/docs/Project.toml index 7e440b8..5df1ad8 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,5 +2,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" [compat] - Documenter = "1.2.1" \ No newline at end of file From 84356680326cfe1a7db14cc30457da1b9b989125 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 6 Dec 2023 15:46:22 +0000 Subject: [PATCH 30/71] try remove precompilation --- Comonicon.toml | 20 ++++++++++---------- docker/Dockerfile | 3 +-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Comonicon.toml b/Comonicon.toml index 48c2a41..4acd652 100644 --- a/Comonicon.toml +++ b/Comonicon.toml @@ -5,16 +5,16 @@ completion = true quiet = false optimize = 2 -[sysimg] -incremental=true -filter_stdlibs=false +# [sysimg] +# incremental=true +# filter_stdlibs=false -[sysimg.precompile] -execution_file = ["deps/execute.jl"] +# [sysimg.precompile] +# execution_file = ["deps/execute.jl"] -[application] -incremental=true -filter_stdlibs=false +# [application] +# incremental=true +# filter_stdlibs=false -[application.precompile] -execution_file = ["deps/execute.jl"] \ No newline at end of file +# [application.precompile] +# execution_file = ["deps/execute.jl"] \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index bab40d7..99aa2c7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -41,8 +41,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -RUN julia --project -t auto --startup-file=no deps/build_app.jl app +RUN julia --project -t auto --startup-file=no deps/build_app.jl ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" -RUN tmle --help From 9899da68d0e7c0f4f45c8e2d5671cf099223a9c7 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 6 Dec 2023 21:51:06 +0000 Subject: [PATCH 31/71] up TMLE and remove Manifest from repo --- .gitignore | 2 +- Manifest.toml | 1812 ------------------------------------------------- Project.toml | 1 + 3 files changed, 2 insertions(+), 1813 deletions(-) delete mode 100644 Manifest.toml diff --git a/.gitignore b/.gitignore index 648f644..63898a4 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,6 @@ docs/site/ docs/Manifest.toml test/Manifest.toml -sysimage/Manifest.toml +Manifest.toml build/ diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index eb59e18..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,1812 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.9.2" -manifest_format = "2.0" -project_hash = "79b338af0999710186711c6c1c568ae8891f4dc2" - -[[deps.ARFFFiles]] -deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"] -git-tree-sha1 = "e8c8e0a2be6eb4f56b1672e46004463033daa409" -uuid = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8" -version = "1.4.1" - -[[deps.AbstractDifferentiation]] -deps = ["ExprTools", "LinearAlgebra", "Requires"] -git-tree-sha1 = "6a5e61dc899ab116035c18ead4ec890269f3c478" -uuid = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" -version = "0.6.0" - - [deps.AbstractDifferentiation.extensions] - AbstractDifferentiationChainRulesCoreExt = "ChainRulesCore" - AbstractDifferentiationFiniteDifferencesExt = "FiniteDifferences" - AbstractDifferentiationForwardDiffExt = ["DiffResults", "ForwardDiff"] - AbstractDifferentiationReverseDiffExt = ["DiffResults", "ReverseDiff"] - AbstractDifferentiationTrackerExt = "Tracker" - AbstractDifferentiationZygoteExt = "Zygote" - - [deps.AbstractDifferentiation.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" - FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" - ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" - ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" - Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" - Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[[deps.AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "1.5.0" -weakdeps = ["ChainRulesCore", "Test"] - - [deps.AbstractFFTs.extensions] - AbstractFFTsChainRulesCoreExt = "ChainRulesCore" - AbstractFFTsTestExt = "Test" - -[[deps.AbstractTrees]] -git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.4.4" - -[[deps.Adapt]] -deps = ["LinearAlgebra", "Requires"] -git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.7.1" -weakdeps = ["StaticArrays"] - - [deps.Adapt.extensions] - AdaptStaticArraysExt = "StaticArrays" - -[[deps.ArgCheck]] -git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4" -uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" -version = "2.3.0" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.ArnoldiMethod]] -deps = ["LinearAlgebra", "Random", "StaticArrays"] -git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae" -uuid = "ec485272-7323-5ecc-a04f-4719b315124d" -version = "0.2.0" - -[[deps.ArrayInterface]] -deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "247efbccf92448be332d154d6ca56b9fcdd93c31" -uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "7.6.1" - - [deps.ArrayInterface.extensions] - ArrayInterfaceBandedMatricesExt = "BandedMatrices" - ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" - ArrayInterfaceCUDAExt = "CUDA" - ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" - ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" - ArrayInterfaceTrackerExt = "Tracker" - - [deps.ArrayInterface.weakdeps] - BandedMatrices = "aae01518-5342-5314-be14-df237901396f" - BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" - CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" - StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" - Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" - -[[deps.Arrow]] -deps = ["ArrowTypes", "BitIntegers", "CodecLz4", "CodecZstd", "ConcurrentUtilities", "DataAPI", "Dates", "EnumX", "LoggingExtras", "Mmap", "PooledArrays", "SentinelArrays", "Tables", "TimeZones", "TranscodingStreams", "UUIDs"] -git-tree-sha1 = "954666e252835c4cf8819ce4ffaf31073c1b7233" -uuid = "69666777-d1a9-59fb-9406-91d4454c9d45" -version = "2.6.2" - -[[deps.ArrowTypes]] -deps = ["Sockets", "UUIDs"] -git-tree-sha1 = "8c37bfdf1b689c6677bbfc8986968fe641f6a299" -uuid = "31f734f8-188a-4ce0-8406-c8a06bd891cd" -version = "2.2.2" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Atomix]] -deps = ["UnsafeAtomics"] -git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" -uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" -version = "0.1.0" - -[[deps.BSON]] -git-tree-sha1 = "2208958832d6e1b59e49f53697483a84ca8d664e" -uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" -version = "0.3.7" - -[[deps.BangBang]] -deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"] -git-tree-sha1 = "e28912ce94077686443433c2800104b061a827ed" -uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" -version = "0.3.39" - - [deps.BangBang.extensions] - BangBangChainRulesCoreExt = "ChainRulesCore" - BangBangDataFramesExt = "DataFrames" - BangBangStaticArraysExt = "StaticArrays" - BangBangStructArraysExt = "StructArrays" - BangBangTypedTablesExt = "TypedTables" - - [deps.BangBang.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" - TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.Baselet]] -git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e" -uuid = "9718e550-a3fa-408a-8086-8db961cd8217" -version = "0.1.1" - -[[deps.BitFlags]] -git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b" -uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" -version = "0.1.8" - -[[deps.BitIntegers]] -deps = ["Random"] -git-tree-sha1 = "a55462dfddabc34bc97d3a7403a2ca2802179ae6" -uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" -version = "0.3.1" - -[[deps.CEnum]] -git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.2" - -[[deps.CSV]] -deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] -git-tree-sha1 = "44dbf560808d49041989b8a96cae4cffbeb7966a" -uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -version = "0.10.11" - -[[deps.CUDA_Driver_jll]] -deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] -git-tree-sha1 = "1e42ef1bdb45487ff28de16182c0df4920181dc3" -uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" -version = "0.7.0+0" - -[[deps.CUDA_Runtime_jll]] -deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "9704e50c9158cf8896c2776b8dbc5edd136caf80" -uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" -version = "0.10.1+0" - -[[deps.Calculus]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" -uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" -version = "0.5.1" - -[[deps.CategoricalArrays]] -deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"] -git-tree-sha1 = "1568b28f91293458345dabba6a5ea3f183250a61" -uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" -version = "0.10.8" -weakdeps = ["JSON", "RecipesBase", "SentinelArrays", "StructTypes"] - - [deps.CategoricalArrays.extensions] - CategoricalArraysJSONExt = "JSON" - CategoricalArraysRecipesBaseExt = "RecipesBase" - CategoricalArraysSentinelArraysExt = "SentinelArrays" - CategoricalArraysStructTypesExt = "StructTypes" - -[[deps.CategoricalDistributions]] -deps = ["CategoricalArrays", "Distributions", "Missings", "OrderedCollections", "Random", "ScientificTypes"] -git-tree-sha1 = "3124343a1b0c9a2f5fdc1d9bcc633ba11735a4c4" -uuid = "af321ab8-2d2e-40a6-b165-3d674595d28e" -version = "0.1.13" - - [deps.CategoricalDistributions.extensions] - UnivariateFiniteDisplayExt = "UnicodePlots" - - [deps.CategoricalDistributions.weakdeps] - UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" - -[[deps.ChainRules]] -deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"] -git-tree-sha1 = "006cc7170be3e0fa02ccac6d4164a1eee1fc8c27" -uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "1.58.0" - -[[deps.ChainRulesCore]] -deps = ["Compat", "LinearAlgebra"] -git-tree-sha1 = "e0af648f0692ec1691b5d094b8724ba1346281cf" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "1.18.0" -weakdeps = ["SparseArrays"] - - [deps.ChainRulesCore.extensions] - ChainRulesCoreSparseArraysExt = "SparseArrays" - -[[deps.CodecLz4]] -deps = ["Lz4_jll", "TranscodingStreams"] -git-tree-sha1 = "8bf4f9e2ee52b5e217451a7cd9171fcd4e16ae23" -uuid = "5ba52731-8f18-5e0d-9241-30f10d1ec561" -version = "0.4.1" - -[[deps.CodecZlib]] -deps = ["TranscodingStreams", "Zlib_jll"] -git-tree-sha1 = "cd67fc487743b2f0fd4380d4cbd3a24660d0eec8" -uuid = "944b1d66-785c-5afd-91f1-9de20f533193" -version = "0.7.3" - -[[deps.CodecZstd]] -deps = ["CEnum", "TranscodingStreams", "Zstd_jll"] -git-tree-sha1 = "849470b337d0fa8449c21061de922386f32949d9" -uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2" -version = "0.7.2" - -[[deps.ColorTypes]] -deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4" -uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.11.4" - -[[deps.Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - -[[deps.CommonSolve]] -git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c" -uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" -version = "0.2.4" - -[[deps.CommonSubexpressions]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" -uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" -version = "0.3.0" - -[[deps.Comonicon]] -deps = ["Configurations", "ExproniconLite", "Libdl", "Logging", "Markdown", "OrderedCollections", "PackageCompiler", "Pkg", "Scratch", "TOML", "UUIDs"] -git-tree-sha1 = "552667002fdd5602ca72e0aeac8bd099daa0e040" -uuid = "863f3e99-da2a-4334-8734-de3dacbe5542" -version = "1.0.6" - -[[deps.Compat]] -deps = ["UUIDs"] -git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "4.10.1" -weakdeps = ["Dates", "LinearAlgebra"] - - [deps.Compat.extensions] - CompatLinearAlgebraExt = "LinearAlgebra" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.5+0" - -[[deps.CompositionsBase]] -git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" -uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" -version = "0.1.2" - - [deps.CompositionsBase.extensions] - CompositionsBaseInverseFunctionsExt = "InverseFunctions" - - [deps.CompositionsBase.weakdeps] - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - -[[deps.ComputationalResources]] -git-tree-sha1 = "52cb3ec90e8a8bea0e62e275ba577ad0f74821f7" -uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3" -version = "0.3.2" - -[[deps.ConcurrentUtilities]] -deps = ["Serialization", "Sockets"] -git-tree-sha1 = "8cfa272e8bdedfa88b6aefbbca7c19f1befac519" -uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.3.0" - -[[deps.Conda]] -deps = ["Downloads", "JSON", "VersionParsing"] -git-tree-sha1 = "51cab8e982c5b598eea9c8ceaced4b58d9dd37c9" -uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" -version = "1.10.0" - -[[deps.Configurations]] -deps = ["ExproniconLite", "OrderedCollections", "TOML"] -git-tree-sha1 = "4358750bb58a3caefd5f37a4a0c5bfdbbf075252" -uuid = "5218b696-f38b-4ac9-8b61-a12ec717816d" -version = "0.17.6" - -[[deps.ConstructionBase]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "c53fc348ca4d40d7b371e71fd52251839080cbc9" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.5.4" - - [deps.ConstructionBase.extensions] - ConstructionBaseIntervalSetsExt = "IntervalSets" - ConstructionBaseStaticArraysExt = "StaticArrays" - - [deps.ConstructionBase.weakdeps] - IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.ContextVariablesX]] -deps = ["Compat", "Logging", "UUIDs"] -git-tree-sha1 = "25cc3803f1030ab855e383129dcd3dc294e322cc" -uuid = "6add18c4-b38d-439d-96f6-d6bc489c04c5" -version = "0.1.3" - -[[deps.Crayons]] -git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.1.1" - -[[deps.DataAPI]] -git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.15.0" - -[[deps.DataFrames]] -deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.6.1" - -[[deps.DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.15" - -[[deps.DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.DefineSingletons]] -git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c" -uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52" -version = "0.1.2" - -[[deps.DelimitedFiles]] -deps = ["Mmap"] -git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" -version = "1.9.1" - -[[deps.DiffResults]] -deps = ["StaticArraysCore"] -git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" -uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" -version = "1.1.0" - -[[deps.DiffRules]] -deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" -uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.15.1" - -[[deps.Distances]] -deps = ["LinearAlgebra", "Statistics", "StatsAPI"] -git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.10.11" -weakdeps = ["ChainRulesCore", "SparseArrays"] - - [deps.Distances.extensions] - DistancesChainRulesCoreExt = "ChainRulesCore" - DistancesSparseArraysExt = "SparseArrays" - -[[deps.Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[deps.Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] -git-tree-sha1 = "a6c00f894f24460379cb7136633cef54ac9f6f4a" -uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.25.103" - - [deps.Distributions.extensions] - DistributionsChainRulesCoreExt = "ChainRulesCore" - DistributionsDensityInterfaceExt = "DensityInterface" - DistributionsTestExt = "Test" - - [deps.Distributions.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" - Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.DocStringExtensions]] -deps = ["LibGit2"] -git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.9.3" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.DualNumbers]] -deps = ["Calculus", "NaNMath", "SpecialFunctions"] -git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566" -uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74" -version = "0.6.8" - -[[deps.EarCut_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053" -uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" -version = "2.2.4+0" - -[[deps.EarlyStopping]] -deps = ["Dates", "Statistics"] -git-tree-sha1 = "98fdf08b707aaf69f524a6cd0a67858cefe0cfb6" -uuid = "792122b4-ca99-40de-a6bc-6742525f08b6" -version = "0.3.0" - -[[deps.EnumX]] -git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" -uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" -version = "1.0.4" - -[[deps.EvoTrees]] -deps = ["BSON", "CategoricalArrays", "Distributions", "MLJModelInterface", "NetworkLayout", "Random", "RecipesBase", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "f08d64339d7259b0c69a00a1e321dc6da79672ea" -uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -version = "0.16.5" - - [deps.EvoTrees.extensions] - EvoTreesCUDAExt = "CUDA" - - [deps.EvoTrees.weakdeps] - CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - -[[deps.ExceptionUnwrapping]] -deps = ["Test"] -git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96" -uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" -version = "0.1.9" - -[[deps.ExprTools]] -git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" -uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" -version = "0.1.10" - -[[deps.ExproniconLite]] -git-tree-sha1 = "fbc390c2f896031db5484bc152a7e805ecdfb01f" -uuid = "55351af7-c7e9-48d6-89ff-24e801d99491" -version = "0.10.5" - -[[deps.Extents]] -git-tree-sha1 = "2140cd04483da90b2da7f99b2add0750504fc39c" -uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" -version = "0.1.2" - -[[deps.FLoops]] -deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"] -git-tree-sha1 = "ffb97765602e3cbe59a0589d237bf07f245a8576" -uuid = "cc61a311-1640-44b5-9fba-1b764f453329" -version = "0.2.1" - -[[deps.FLoopsBase]] -deps = ["ContextVariablesX"] -git-tree-sha1 = "656f7a6859be8673bf1f35da5670246b923964f7" -uuid = "b9860ae5-e623-471e-878b-f6a53c775ea6" -version = "0.1.1" - -[[deps.FileIO]] -deps = ["Pkg", "Requires", "UUIDs"] -git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" -uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.16.1" - -[[deps.FilePathsBase]] -deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"] -git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa" -uuid = "48062228-2e41-5def-b9a4-89aafe57970f" -version = "0.9.21" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.FillArrays]] -deps = ["LinearAlgebra", "Random"] -git-tree-sha1 = "28e4e9c4b7b162398ec8004bdabe9a90c78c122d" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "1.8.0" -weakdeps = ["PDMats", "SparseArrays", "Statistics"] - - [deps.FillArrays.extensions] - FillArraysPDMatsExt = "PDMats" - FillArraysSparseArraysExt = "SparseArrays" - FillArraysStatisticsExt = "Statistics" - -[[deps.FiniteDiff]] -deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays"] -git-tree-sha1 = "c6e4a1fbe73b31a3dea94b1da449503b8830c306" -uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.21.1" - - [deps.FiniteDiff.extensions] - FiniteDiffBandedMatricesExt = "BandedMatrices" - FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices" - FiniteDiffStaticArraysExt = "StaticArrays" - - [deps.FiniteDiff.weakdeps] - BandedMatrices = "aae01518-5342-5314-be14-df237901396f" - BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.FixedPointNumbers]] -deps = ["Statistics"] -git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" -uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.4" - -[[deps.ForwardDiff]] -deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] -git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" -uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.36" -weakdeps = ["StaticArrays"] - - [deps.ForwardDiff.extensions] - ForwardDiffStaticArraysExt = "StaticArrays" - -[[deps.Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[deps.GLM]] -deps = ["Distributions", "LinearAlgebra", "Printf", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels"] -git-tree-sha1 = "273bd1cd30768a2fddfa3fd63bbc746ed7249e5f" -uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -version = "1.9.0" - -[[deps.GLMNet]] -deps = ["DataFrames", "Distributed", "Distributions", "Printf", "Random", "SparseArrays", "StatsBase", "glmnet_jll"] -git-tree-sha1 = "7ea4e2bbb84183fe52a488d05e16c152b2387b95" -uuid = "8d5ece8b-de18-5317-b113-243142960cc6" -version = "0.7.2" - -[[deps.GPUArrays]] -deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1" -uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "9.1.0" - -[[deps.GPUArraysCore]] -deps = ["Adapt"] -git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0" -uuid = "46192b85-c4d5-4398-a991-12ede77f4527" -version = "0.1.5" - -[[deps.GeoInterface]] -deps = ["Extents"] -git-tree-sha1 = "d53480c0793b13341c40199190f92c611aa2e93c" -uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" -version = "1.3.2" - -[[deps.GeometryBasics]] -deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] -git-tree-sha1 = "424a5a6ce7c5d97cca7bcc4eac551b97294c54af" -uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" -version = "0.4.9" - -[[deps.Glob]] -git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496" -uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" -version = "1.3.1" - -[[deps.Graphs]] -deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] -git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7" -uuid = "86223c79-3864-5bf0-83f7-82e725a168b6" -version = "1.9.0" - -[[deps.HDF5]] -deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"] -git-tree-sha1 = "26407bd1c60129062cec9da63dc7d08251544d53" -uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" -version = "0.17.1" - - [deps.HDF5.extensions] - MPIExt = "MPI" - - [deps.HDF5.weakdeps] - MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" - -[[deps.HDF5_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"] -git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739" -uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" -version = "1.14.2+1" - -[[deps.HTTP]] -deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "abbbb9ec3afd783a7cbd82ef01dcd088ea051398" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.1" - -[[deps.HighlyAdaptiveLasso]] -deps = ["DataFrames", "MLJModelInterface", "RCall"] -git-tree-sha1 = "40f12ec0130659287a3d1b7e1a8ffc4fcf7249ba" -uuid = "c5dac772-1445-43c4-b698-9440de7877f6" -version = "0.2.0" - -[[deps.Hwloc_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "8ecb0b34472a3c98f945e3c75fc7d5428d165511" -uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" -version = "2.9.3+0" - -[[deps.HypergeometricFunctions]] -deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] -git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685" -uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" -version = "0.3.23" - -[[deps.HypothesisTests]] -deps = ["Combinatorics", "Distributions", "LinearAlgebra", "Printf", "Random", "Rmath", "Roots", "Statistics", "StatsAPI", "StatsBase"] -git-tree-sha1 = "4b5d5ba51f5f473737ed9de6d8a7aa190ad8c72f" -uuid = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" -version = "0.11.0" - -[[deps.IRTools]] -deps = ["InteractiveUtils", "MacroTools", "Test"] -git-tree-sha1 = "8aa91235360659ca7560db43a7d57541120aa31d" -uuid = "7869d1d1-7146-5819-86e3-90919afe41df" -version = "0.4.11" - -[[deps.Inflate]] -git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381" -uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" -version = "0.1.4" - -[[deps.InitialValues]] -git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" -uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" -version = "0.3.1" - -[[deps.InlineStrings]] -deps = ["Parsers"] -git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" -uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" -version = "1.4.0" - -[[deps.IntelOpenMP_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32" -uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" -version = "2023.2.0+0" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.InvertedIndices]] -git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.3.0" - -[[deps.IrrationalConstants]] -git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" -uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" -version = "0.2.2" - -[[deps.IterTools]] -git-tree-sha1 = "4ced6667f9974fc5c5943fa5e2ef1ca43ea9e450" -uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" -version = "1.8.0" - -[[deps.IterationControl]] -deps = ["EarlyStopping", "InteractiveUtils"] -git-tree-sha1 = "d7df9a6fdd82a8cfdfe93a94fcce35515be634da" -uuid = "b3c1a2ee-3fec-4384-bf48-272ea71de57c" -version = "0.5.3" - -[[deps.IterativeSolvers]] -deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] -git-tree-sha1 = "b435d190ef8369cf4d79cc9dd5fba88ba0165307" -uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" -version = "0.9.3" - -[[deps.IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[deps.JLD2]] -deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "Pkg", "PrecompileTools", "Printf", "Reexport", "Requires", "TranscodingStreams", "UUIDs"] -git-tree-sha1 = "9bbb5130d3b4fa52846546bca4791ecbdfb52730" -uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.38" - -[[deps.JLLWrappers]] -deps = ["Artifacts", "Preferences"] -git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.5.0" - -[[deps.JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.4" - -[[deps.JSON3]] -deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] -git-tree-sha1 = "95220473901735a0f4df9d1ca5b171b568b2daa3" -uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -version = "1.13.2" - -[[deps.JuliaVariables]] -deps = ["MLStyle", "NameResolution"] -git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70" -uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec" -version = "0.2.4" - -[[deps.KernelAbstractions]] -deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] -git-tree-sha1 = "b0737cbbe1c8da6f1139d1c23e35e7cea129c0af" -uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -version = "0.9.13" - - [deps.KernelAbstractions.extensions] - EnzymeExt = "EnzymeCore" - - [deps.KernelAbstractions.weakdeps] - EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" - -[[deps.LLVM]] -deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] -git-tree-sha1 = "c879e47398a7ab671c782e02b51a4456794a7fa3" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "6.4.0" - - [deps.LLVM.extensions] - BFloat16sExt = "BFloat16s" - - [deps.LLVM.weakdeps] - BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" - -[[deps.LLVMExtra_jll]] -deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "98eaee04d96d973e79c25d49167668c5c8fb50e2" -uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.27+1" - -[[deps.LLVMOpenMP_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b" -uuid = "1d63c593-3942-5779-bab2-d838dc0a180e" -version = "15.0.4+0" - -[[deps.LaTeXStrings]] -git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.1" - -[[deps.LatinHypercubeSampling]] -deps = ["Random", "StableRNGs", "StatsBase", "Test"] -git-tree-sha1 = "825289d43c753c7f1bf9bed334c253e9913997f8" -uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d" -version = "1.9.0" - -[[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[deps.LearnAPI]] -deps = ["InteractiveUtils", "Statistics"] -git-tree-sha1 = "ec695822c1faaaa64cee32d0b21505e1977b4809" -uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb" -version = "0.1.0" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.Libiconv_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.17.0+0" - -[[deps.LineSearches]] -deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] -git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d" -uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" -version = "7.2.0" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.LinearMaps]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "9df2ab050ffefe870a09c7b6afdb0cde381703f2" -uuid = "7a12625a-238d-50fd-b39a-03d52299707e" -version = "3.11.1" -weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"] - - [deps.LinearMaps.extensions] - LinearMapsChainRulesCoreExt = "ChainRulesCore" - LinearMapsSparseArraysExt = "SparseArrays" - LinearMapsStatisticsExt = "Statistics" - -[[deps.LogExpFunctions]] -deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa" -uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.26" - - [deps.LogExpFunctions.extensions] - LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" - LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" - LogExpFunctionsInverseFunctionsExt = "InverseFunctions" - - [deps.LogExpFunctions.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.LoggingExtras]] -deps = ["Dates", "Logging"] -git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "1.0.3" - -[[deps.Lz4_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "6c26c5e8a4203d43b5497be3ec5d4e0c3cde240a" -uuid = "5ced341a-0733-55b8-9ab6-a4889d929147" -version = "1.9.4+0" - -[[deps.MKL]] -deps = ["Artifacts", "Libdl", "LinearAlgebra", "MKL_jll"] -git-tree-sha1 = "100521a1d2181cb39036ee1a6955d6b9686bb363" -uuid = "33e6dc65-8f57-5167-99aa-e5a354878fb2" -version = "0.6.1" - -[[deps.MKL_jll]] -deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] -git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913" -uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2023.2.0+0" - -[[deps.MLFlowClient]] -deps = ["Dates", "FilePathsBase", "HTTP", "JSON", "ShowCases", "URIs", "UUIDs"] -git-tree-sha1 = "32cee10a6527476bef0c6484ff4c60c2cead5d3e" -uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83" -version = "0.4.4" - -[[deps.MLJ]] -deps = ["CategoricalArrays", "ComputationalResources", "Distributed", "Distributions", "LinearAlgebra", "MLJBalancing", "MLJBase", "MLJEnsembles", "MLJFlow", "MLJIteration", "MLJModels", "MLJTuning", "OpenML", "Pkg", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "StatisticalMeasures", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "981196c41a23cbc1befbad190558b1f0ebb97910" -uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" -version = "0.20.2" - -[[deps.MLJBalancing]] -deps = ["MLJBase", "MLJModelInterface", "MLUtils", "OrderedCollections", "Random", "StatsBase"] -git-tree-sha1 = "e4be85602f010291f49b6a6464ccde1708ce5d62" -uuid = "45f359ea-796d-4f51-95a5-deb1a414c586" -version = "0.1.3" - -[[deps.MLJBase]] -deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Dates", "DelimitedFiles", "Distributed", "Distributions", "InteractiveUtils", "InvertedIndices", "LearnAPI", "LinearAlgebra", "MLJModelInterface", "Missings", "OrderedCollections", "Parameters", "PrettyTables", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "Serialization", "StatisticalMeasuresBase", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "6d433d34a1764324cf37a1ddc47dcc42ec05340f" -uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -version = "1.0.1" -weakdeps = ["StatisticalMeasures"] - - [deps.MLJBase.extensions] - DefaultMeasuresExt = "StatisticalMeasures" - -[[deps.MLJEnsembles]] -deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatisticalMeasuresBase", "StatsBase"] -git-tree-sha1 = "94403b2c8f692011df6731913376e0e37f6c0fe9" -uuid = "50ed68f4-41fd-4504-931a-ed422449fee0" -version = "0.4.0" - -[[deps.MLJFlow]] -deps = ["MLFlowClient", "MLJBase", "MLJModelInterface"] -git-tree-sha1 = "89d0e7a7e08359476482f20b2d8ff12080d171ee" -uuid = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f" -version = "0.3.0" - -[[deps.MLJGLMInterface]] -deps = ["Distributions", "GLM", "MLJModelInterface", "StatsModels", "Tables"] -git-tree-sha1 = "06aba1c96b19f31744f7e97d96fcf66b79739e05" -uuid = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" -version = "0.3.5" - -[[deps.MLJIteration]] -deps = ["IterationControl", "MLJBase", "Random", "Serialization"] -git-tree-sha1 = "991e10d4c8da49d534e312e8a4fbe56b7ac6f70c" -uuid = "614be32b-d00c-4edb-bd02-1eb411ab5e55" -version = "0.6.0" - -[[deps.MLJLinearModels]] -deps = ["DocStringExtensions", "IterativeSolvers", "LinearAlgebra", "LinearMaps", "MLJModelInterface", "Optim", "Parameters"] -git-tree-sha1 = "7f517fd840ca433a8fae673edb31678ff55d969c" -uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692" -version = "0.10.0" - -[[deps.MLJModelInterface]] -deps = ["Random", "ScientificTypesBase", "StatisticalTraits"] -git-tree-sha1 = "381d99f0af76d98f50bd5512dcf96a99c13f8223" -uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" -version = "1.9.3" - -[[deps.MLJModels]] -deps = ["CategoricalArrays", "CategoricalDistributions", "Combinatorics", "Dates", "Distances", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJModelInterface", "Markdown", "OrderedCollections", "Parameters", "Pkg", "PrettyPrinting", "REPL", "Random", "RelocatableFolders", "ScientificTypes", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "10d221910fc3f3eedad567178ddbca3cc0f776a3" -uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -version = "0.16.12" - -[[deps.MLJTuning]] -deps = ["ComputationalResources", "Distributed", "Distributions", "LatinHypercubeSampling", "MLJBase", "ProgressMeter", "Random", "RecipesBase", "StatisticalMeasuresBase"] -git-tree-sha1 = "44dc126646a15018d7829f020d121b85b4def9bc" -uuid = "03970b2e-30c4-11ea-3135-d1576263f10f" -version = "0.8.0" - -[[deps.MLJXGBoostInterface]] -deps = ["MLJModelInterface", "SparseArrays", "Tables", "XGBoost"] -git-tree-sha1 = "988c399a352f0b49bc1345c509d8a4800cb468c5" -uuid = "54119dfa-1dab-4055-a167-80440f4f7a91" -version = "0.3.10" - -[[deps.MLStyle]] -git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8" -uuid = "d8e11817-5142-5d16-987a-aa16d5891078" -version = "0.4.17" - -[[deps.MLUtils]] -deps = ["ChainRulesCore", "Compat", "DataAPI", "DelimitedFiles", "FLoops", "NNlib", "Random", "ShowCases", "SimpleTraits", "Statistics", "StatsBase", "Tables", "Transducers"] -git-tree-sha1 = "3504cdb8c2bc05bde4d4b09a81b01df88fcbbba0" -uuid = "f1d291b0-491e-4a28-83b9-f70985020b54" -version = "0.4.3" - -[[deps.MPICH_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] -git-tree-sha1 = "8a5b4d2220377d1ece13f49438d71ad20cf1ba83" -uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" -version = "4.1.2+0" - -[[deps.MPIPreferences]] -deps = ["Libdl", "Preferences"] -git-tree-sha1 = "8f6af051b9e8ec597fa09d8885ed79fd582f33c9" -uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" -version = "0.1.10" - -[[deps.MPItrampoline_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] -git-tree-sha1 = "6979eccb6a9edbbb62681e158443e79ecc0d056a" -uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" -version = "5.3.1+0" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.11" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] -git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.9" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+0" - -[[deps.MetaGraphsNext]] -deps = ["Graphs", "JLD2", "SimpleTraits"] -git-tree-sha1 = "8dd4f3f8a643d53e61ff9115749f522c35a38f3f" -uuid = "fa8bd995-216d-47f1-8a91-f3b68fbeb377" -version = "0.6.0" - -[[deps.MicroCollections]] -deps = ["BangBang", "InitialValues", "Setfield"] -git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e" -uuid = "128add7d-3638-4c79-886c-908ea0c25c34" -version = "0.1.4" - -[[deps.MicrosoftMPI_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "b01beb91d20b0d1312a9471a36017b5b339d26de" -uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" -version = "10.1.4+1" - -[[deps.Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.1.0" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.Mocking]] -deps = ["Compat", "ExprTools"] -git-tree-sha1 = "4cc0c5a83933648b615c36c2b956d94fda70641e" -uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" -version = "0.7.7" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.10.11" - -[[deps.MultipleTesting]] -deps = ["Distributions", "SpecialFunctions", "StatsBase"] -git-tree-sha1 = "1e98f8f732e7035c4333135b75605b74f3462b9b" -uuid = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b" -version = "0.6.0" - -[[deps.NLSolversBase]] -deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] -git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" -uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" -version = "7.8.3" - -[[deps.NNlib]] -deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"] -git-tree-sha1 = "ac86d2944bf7a670ac8bf0f7ec099b5898abcc09" -uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.9.8" - - [deps.NNlib.extensions] - NNlibAMDGPUExt = "AMDGPU" - NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] - NNlibCUDAExt = "CUDA" - NNlibEnzymeCoreExt = "EnzymeCore" - - [deps.NNlib.weakdeps] - AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" - CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" - cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" - -[[deps.NaNMath]] -deps = ["OpenLibm_jll"] -git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" -uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" -version = "1.0.2" - -[[deps.NameResolution]] -deps = ["PrettyPrint"] -git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" -uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" -version = "0.1.5" - -[[deps.NetworkLayout]] -deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "StaticArrays"] -git-tree-sha1 = "91bb2fedff8e43793650e7a677ccda6e6e6e166b" -uuid = "46757867-2c16-5918-afeb-47bfcb05e46a" -version = "0.4.6" -weakdeps = ["Graphs"] - - [deps.NetworkLayout.extensions] - NetworkLayoutGraphsExt = "Graphs" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.21+4" - -[[deps.OpenLibm_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+0" - -[[deps.OpenML]] -deps = ["ARFFFiles", "HTTP", "JSON", "Markdown", "Pkg", "Scratch"] -git-tree-sha1 = "6efb039ae888699d5a74fb593f6f3e10c7193e33" -uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" -version = "0.3.1" - -[[deps.OpenMPI_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "PMIx_jll", "TOML", "Zlib_jll", "libevent_jll", "prrte_jll"] -git-tree-sha1 = "694458ae803b684f09c07f90459cb79655fb377d" -uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" -version = "5.0.0+0" - -[[deps.OpenSSL]] -deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2" -uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.4.1" - -[[deps.OpenSSL_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "3.0.12+0" - -[[deps.OpenSpecFun_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" -uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.5+0" - -[[deps.Optim]] -deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] -git-tree-sha1 = "01f85d9269b13fedc61e63cc72ee2213565f7a72" -uuid = "429524aa-4258-5aef-a3af-852621145aeb" -version = "1.7.8" - -[[deps.OrderedCollections]] -git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.6.3" - -[[deps.PDMats]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "4e5be6bb265d33669f98eb55d2a57addd1eeb72c" -uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.11.30" - -[[deps.PMIx_jll]] -deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "Zlib_jll", "libevent_jll"] -git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541" -uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab" -version = "4.2.7+0" - -[[deps.PackageCompiler]] -deps = ["Artifacts", "Glob", "LazyArtifacts", "Libdl", "Pkg", "Printf", "RelocatableFolders", "TOML", "UUIDs", "p7zip_jll"] -git-tree-sha1 = "f9392ab72832f4315220a853747ff3dba758c9d1" -uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" -version = "2.1.15" - -[[deps.Parameters]] -deps = ["OrderedCollections", "UnPack"] -git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.3" - -[[deps.Parsers]] -deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.8.0" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.9.2" - -[[deps.PooledArrays]] -deps = ["DataAPI", "Future"] -git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.4.3" - -[[deps.PositiveFactorizations]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" -uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" -version = "0.2.4" - -[[deps.PrecompileTools]] -deps = ["Preferences"] -git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" -uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -version = "1.2.0" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.4.1" - -[[deps.PrettyPrint]] -git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" -uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" -version = "0.2.0" - -[[deps.PrettyPrinting]] -git-tree-sha1 = "22a601b04a154ca38867b991d5017469dc75f2db" -uuid = "54e16d92-306c-5ea0-a30b-337be88ac337" -version = "0.4.1" - -[[deps.PrettyTables]] -deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] -git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "2.3.1" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.ProgressMeter]] -deps = ["Distributed", "Printf"] -git-tree-sha1 = "00099623ffee15972c16111bcf84c58a0051257c" -uuid = "92933f4c-e287-5a05-a399-4b506db050ca" -version = "1.9.0" - -[[deps.QuadGK]] -deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1" -uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.9.1" - -[[deps.RCall]] -deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"] -git-tree-sha1 = "3084689b18f9e5e817a6ce9a83a7654d8ad0f2f6" -uuid = "6f49c342-dc21-5d91-9882-a32aef131414" -version = "0.13.18" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.RealDot]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9" -uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9" -version = "0.1.0" - -[[deps.RecipesBase]] -deps = ["PrecompileTools"] -git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" -uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.3.4" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - -[[deps.RelocatableFolders]] -deps = ["SHA", "Scratch"] -git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" -uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" -version = "1.0.1" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.Rmath]] -deps = ["Random", "Rmath_jll"] -git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.7.1" - -[[deps.Rmath_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da" -uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" -version = "0.4.0+0" - -[[deps.Roots]] -deps = ["ChainRulesCore", "CommonSolve", "Printf", "Setfield"] -git-tree-sha1 = "0f1d92463a020321983d04c110f476c274bafe2e" -uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" -version = "2.0.22" - - [deps.Roots.extensions] - RootsForwardDiffExt = "ForwardDiff" - RootsIntervalRootFindingExt = "IntervalRootFinding" - RootsSymPyExt = "SymPy" - RootsSymPyPythonCallExt = "SymPyPythonCall" - - [deps.Roots.weakdeps] - ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" - IntervalRootFinding = "d2bf35a9-74e0-55ec-b149-d360ff49b807" - SymPy = "24249f21-da20-56a4-8eb1-6a02cf4ae2e6" - SymPyPythonCall = "bc8888f7-b21e-4b7c-a06a-5d9c9496438c" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.ScientificTypes]] -deps = ["CategoricalArrays", "ColorTypes", "Dates", "Distributions", "PrettyTables", "Reexport", "ScientificTypesBase", "StatisticalTraits", "Tables"] -git-tree-sha1 = "75ccd10ca65b939dab03b812994e571bf1e3e1da" -uuid = "321657f4-b219-11e9-178b-2701a2544e81" -version = "3.0.2" - -[[deps.ScientificTypesBase]] -git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b" -uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161" -version = "3.0.0" - -[[deps.Scratch]] -deps = ["Dates"] -git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" -uuid = "6c6a2e73-6563-6170-7368-637461726353" -version = "1.2.1" - -[[deps.SentinelArrays]] -deps = ["Dates", "Random"] -git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" -uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" -version = "1.4.1" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.Setfield]] -deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] -git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" -uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" -version = "1.1.1" - -[[deps.SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[deps.ShiftedArrays]] -git-tree-sha1 = "503688b59397b3307443af35cd953a13e8005c16" -uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a" -version = "2.0.0" - -[[deps.ShowCases]] -git-tree-sha1 = "7f534ad62ab2bd48591bdeac81994ea8c445e4a5" -uuid = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" -version = "0.1.0" - -[[deps.SimpleBufferStream]] -git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" -uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" -version = "1.1.0" - -[[deps.SimpleTraits]] -deps = ["InteractiveUtils", "MacroTools"] -git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" -uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" -version = "0.9.4" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "5165dfb9fd131cf0c6957a3a7605dede376e7b63" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.2.0" - -[[deps.SparseArrays]] -deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[deps.SparseInverseSubset]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "91402087fd5d13b2d97e3ef29bbdf9d7859e678a" -uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada" -version = "0.1.1" - -[[deps.SparseMatricesCSR]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "38677ca58e80b5cad2382e5a1848f93b054ad28d" -uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" -version = "0.6.7" - -[[deps.SpecialFunctions]] -deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] -git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "2.3.1" -weakdeps = ["ChainRulesCore"] - - [deps.SpecialFunctions.extensions] - SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" - -[[deps.SplittablesBase]] -deps = ["Setfield", "Test"] -git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5" -uuid = "171d559e-b47b-412a-8079-5efa626c420e" -version = "0.1.15" - -[[deps.StableRNGs]] -deps = ["Random", "Test"] -git-tree-sha1 = "3be7d49667040add7ee151fefaf1f8c04c8c8276" -uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" -version = "1.0.0" - -[[deps.StaticArrays]] -deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] -git-tree-sha1 = "5ef59aea6f18c25168842bded46b16662141ab87" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "1.7.0" -weakdeps = ["Statistics"] - - [deps.StaticArrays.extensions] - StaticArraysStatisticsExt = "Statistics" - -[[deps.StaticArraysCore]] -git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d" -uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -version = "1.4.2" - -[[deps.StatisticalMeasures]] -deps = ["CategoricalArrays", "CategoricalDistributions", "Distributions", "LearnAPI", "LinearAlgebra", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "StatisticalMeasuresBase", "Statistics", "StatsBase"] -git-tree-sha1 = "b58c7cc3d7de6c0d75d8437b81481af924970123" -uuid = "a19d573c-0a75-4610-95b3-7071388c7541" -version = "0.1.3" - - [deps.StatisticalMeasures.extensions] - LossFunctionsExt = "LossFunctions" - ScientificTypesExt = "ScientificTypes" - - [deps.StatisticalMeasures.weakdeps] - LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" - ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" - -[[deps.StatisticalMeasuresBase]] -deps = ["CategoricalArrays", "InteractiveUtils", "MLUtils", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "Statistics"] -git-tree-sha1 = "17dfb22e2e4ccc9cd59b487dce52883e0151b4d3" -uuid = "c062fc1d-0d66-479b-b6ac-8b44719de4cc" -version = "0.1.1" - -[[deps.StatisticalTraits]] -deps = ["ScientificTypesBase"] -git-tree-sha1 = "30b9236691858e13f167ce829490a68e1a597782" -uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9" -version = "3.2.0" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -version = "1.9.0" - -[[deps.StatsAPI]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" -uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.7.0" - -[[deps.StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "1d77abd07f617c4868c33d4f5b9e1dbb2643c9cf" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.34.2" - -[[deps.StatsFuns]] -deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] -git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "1.3.0" - - [deps.StatsFuns.extensions] - StatsFunsChainRulesCoreExt = "ChainRulesCore" - StatsFunsInverseFunctionsExt = "InverseFunctions" - - [deps.StatsFuns.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - -[[deps.StatsModels]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Printf", "REPL", "ShiftedArrays", "SparseArrays", "StatsAPI", "StatsBase", "StatsFuns", "Tables"] -git-tree-sha1 = "5cf6c4583533ee38639f73b880f35fc85f2941e0" -uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -version = "0.7.3" - -[[deps.StringEncodings]] -deps = ["Libiconv_jll"] -git-tree-sha1 = "b765e46ba27ecf6b44faf70df40c57aa3a547dcb" -uuid = "69024149-9ee7-55f6-a4c4-859efe599b68" -version = "0.3.7" - -[[deps.StringManipulation]] -deps = ["PrecompileTools"] -git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" -uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" -version = "0.3.4" - -[[deps.StructArrays]] -deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"] -git-tree-sha1 = "0a3db38e4cce3c54fe7a71f831cd7b6194a54213" -uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" -version = "0.6.16" - -[[deps.StructTypes]] -deps = ["Dates", "UUIDs"] -git-tree-sha1 = "ca4bccb03acf9faaf4137a9abc1881ed1841aa70" -uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" -version = "1.10.0" - -[[deps.SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - -[[deps.SuiteSparse_jll]] -deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] -uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" -version = "5.10.1+6" - -[[deps.TMLE]] -deps = ["AbstractDifferentiation", "CategoricalArrays", "Combinatorics", "Distributions", "GLM", "Graphs", "HypothesisTests", "LogExpFunctions", "MLJBase", "MLJGLMInterface", "MLJModels", "MetaGraphsNext", "Missings", "PrecompileTools", "PrettyTables", "Random", "Statistics", "TableOperations", "Tables", "Zygote"] -git-tree-sha1 = "fe31c10325f1e911dae33a5d521cc07c2c7eeecd" -repo-rev = "cvtmle" -repo-url = "https://github.com/TARGENE/TMLE.jl.git" -uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" -version = "0.12.0" - - [deps.TMLE.extensions] - GraphMakieExt = ["GraphMakie", "CairoMakie"] - JSONExt = "JSON" - YAMLExt = "YAML" - - [deps.TMLE.weakdeps] - CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" - GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2" - JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" - YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.3" - -[[deps.TZJData]] -deps = ["Artifacts"] -git-tree-sha1 = "d39314cdbaf5b90a047db33858626f8d1cc973e1" -uuid = "dc5dba14-91b3-4cab-a142-028a31da12f7" -version = "1.0.0+2023c" - -[[deps.TableOperations]] -deps = ["SentinelArrays", "Tables", "Test"] -git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1" -uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" -version = "1.2.0" - -[[deps.TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.1" - -[[deps.Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] -git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.11.1" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.TimeZones]] -deps = ["Artifacts", "Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "Scratch", "TZJData", "Unicode", "p7zip_jll"] -git-tree-sha1 = "89e64d61ef3cd9e80f7fc12b7d13db2d75a23c03" -uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" -version = "1.13.0" -weakdeps = ["RecipesBase"] - - [deps.TimeZones.extensions] - TimeZonesRecipesBaseExt = "RecipesBase" - -[[deps.TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.13" - -[[deps.Transducers]] -deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"] -git-tree-sha1 = "e579d3c991938fecbb225699e8f611fa3fbf2141" -uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" -version = "0.4.79" - - [deps.Transducers.extensions] - TransducersBlockArraysExt = "BlockArrays" - TransducersDataFramesExt = "DataFrames" - TransducersLazyArraysExt = "LazyArrays" - TransducersOnlineStatsBaseExt = "OnlineStatsBase" - TransducersReferenceablesExt = "Referenceables" - - [deps.Transducers.weakdeps] - BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" - DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" - LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" - OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338" - Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e" - -[[deps.URIs]] -git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" -uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -version = "1.5.1" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.UnPack]] -git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" -uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" -version = "1.0.2" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.UnsafeAtomics]] -git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" -uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" -version = "0.2.1" - -[[deps.UnsafeAtomicsLLVM]] -deps = ["LLVM", "UnsafeAtomics"] -git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e" -uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" -version = "0.1.3" - -[[deps.VersionParsing]] -git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868" -uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" -version = "1.3.0" - -[[deps.WeakRefStrings]] -deps = ["DataAPI", "InlineStrings", "Parsers"] -git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" -uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" -version = "1.4.2" - -[[deps.WinReg]] -git-tree-sha1 = "cd910906b099402bcc50b3eafa9634244e5ec83b" -uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128" -version = "1.0.0" - -[[deps.WorkerUtilities]] -git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" -uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" -version = "1.6.1" - -[[deps.XGBoost]] -deps = ["AbstractTrees", "CEnum", "JSON3", "LinearAlgebra", "OrderedCollections", "SparseArrays", "SparseMatricesCSR", "Statistics", "Tables", "XGBoost_jll"] -git-tree-sha1 = "bacb62e07d104630094c8dac2fd070f5d4b9b305" -uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" -version = "2.5.1" - - [deps.XGBoost.extensions] - XGBoostCUDAExt = "CUDA" - XGBoostTermExt = "Term" - - [deps.XGBoost.weakdeps] - CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - Term = "22787eb5-b846-44ae-b979-8e399b8463ab" - -[[deps.XGBoost_jll]] -deps = ["Artifacts", "CUDA_Runtime_jll", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "1c0aa2390a7ebb28a3d6c214f64e57a24091fbd7" -uuid = "a5c6f535-4255-5ca2-a466-0e519f119c46" -version = "2.0.1+0" - -[[deps.YAML]] -deps = ["Base64", "Dates", "Printf", "StringEncodings"] -git-tree-sha1 = "e6330e4b731a6af7959673621e91645eb1356884" -uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" -version = "0.4.9" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.13+0" - -[[deps.Zstd_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c" -uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" -version = "1.5.5+0" - -[[deps.Zygote]] -deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "5ded212acd815612df112bb895ef3910c5a03f57" -uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" -version = "0.6.67" - - [deps.Zygote.extensions] - ZygoteColorsExt = "Colors" - ZygoteDistancesExt = "Distances" - ZygoteTrackerExt = "Tracker" - - [deps.Zygote.weakdeps] - Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" - Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" - Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" - -[[deps.ZygoteRules]] -deps = ["ChainRulesCore", "MacroTools"] -git-tree-sha1 = "9d749cd449fb448aeca4feee9a2f4186dbb5d184" -uuid = "700de1a5-db45-46bc-99cf-38207098b444" -version = "0.2.4" - -[[deps.glmnet_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "31adae3b983b579a1fbd7cfd43a4bc0d224c2f5a" -uuid = "78c6b45d-5eaf-5d68-bcfb-a5a2cb06c27f" -version = "2.0.13+0" - -[[deps.libaec_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "eddd19a8dea6b139ea97bdc8a0e2667d4b661720" -uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0" -version = "1.0.6+1" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.8.0+0" - -[[deps.libevent_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll"] -git-tree-sha1 = "f04ec6d9a186115fb38f858f05c0c4e1b7fc9dcb" -uuid = "1080aeaf-3a6a-583e-a51c-c537b09f60ec" -version = "2.1.13+1" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" - -[[deps.prrte_jll]] -deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "PMIx_jll", "libevent_jll"] -git-tree-sha1 = "5adb2d7a18a30280feb66cad6f1a1dfdca2dc7b0" -uuid = "eb928a42-fffd-568d-ab9c-3f5d54fc65b9" -version = "3.0.2+0" diff --git a/Project.toml b/Project.toml index 2f187ac..84ec5f3 100644 --- a/Project.toml +++ b/Project.toml @@ -57,4 +57,5 @@ MultipleTesting = "0.6.0" Optim = "1.7" Tables = "1.10.1" YAML = "0.4.9" +TMLE = "0.12" julia = "1.7, 1" From 9051eb1c7f7399f09bca1d05082d42d693d99a4c Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 6 Dec 2023 23:42:13 +0000 Subject: [PATCH 32/71] try build app again --- .github/workflows/CI.yml | 2 ++ Comonicon.toml | 20 ++++++++++---------- docker/Dockerfile | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index fc1b284..a0dde5f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -45,6 +45,8 @@ jobs: - uses: julia-actions/cache@v1 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + - name: Build app + run: julia --project -t auto deps/build_app.jl app tarball - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v2 with: diff --git a/Comonicon.toml b/Comonicon.toml index 4acd652..48c2a41 100644 --- a/Comonicon.toml +++ b/Comonicon.toml @@ -5,16 +5,16 @@ completion = true quiet = false optimize = 2 -# [sysimg] -# incremental=true -# filter_stdlibs=false +[sysimg] +incremental=true +filter_stdlibs=false -# [sysimg.precompile] -# execution_file = ["deps/execute.jl"] +[sysimg.precompile] +execution_file = ["deps/execute.jl"] -# [application] -# incremental=true -# filter_stdlibs=false +[application] +incremental=true +filter_stdlibs=false -# [application.precompile] -# execution_file = ["deps/execute.jl"] \ No newline at end of file +[application.precompile] +execution_file = ["deps/execute.jl"] \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 99aa2c7..0523eeb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -41,7 +41,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -RUN julia --project -t auto --startup-file=no deps/build_app.jl +RUN julia --project -t auto --startup-file=no deps/build_app.jl app ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" From 873095ad147b217da4581ed39f08fee78cb61261 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 7 Dec 2023 16:45:42 +0000 Subject: [PATCH 33/71] remove HAL --- .github/workflows/CI.yml | 18 +----------------- Project.toml | 4 +--- docker/Dockerfile | 25 ------------------------- docs/src/models.md | 1 - src/TargetedEstimation.jl | 1 - test/config/tmle_ose_config.jl | 2 -- 6 files changed, 2 insertions(+), 49 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a0dde5f..19683a3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -22,22 +22,6 @@ jobs: - x64 steps: - uses: actions/checkout@v2 - - name: Install curl - run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: 4.2.3 - - name: Install R dependencies - run: | - install.packages("devtools", repos="http://cran.us.r-project.org", dependecies=TRUE) - require(devtools) - install_version("hal9001", version = "0.4.1", repos = "http://cran.us.r-project.org") - shell: Rscript {0} - - name: Set R_HOME - run: echo "R_HOME=$(R RHOME)" >> $GITHUB_ENV - - name: Set LD_LIBRARY - run: echo "LD_LIBRARY_PATH=$R_HOME/lib" >> $GITHUB_ENV - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} @@ -45,7 +29,7 @@ jobs: - uses: julia-actions/cache@v1 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - - name: Build app + - name: Build App run: julia --project -t auto deps/build_app.jl app tarball - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v2 diff --git a/Project.toml b/Project.toml index 84ec5f3..4518080 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" -HighlyAdaptiveLasso = "c5dac772-1445-43c4-b698-9440de7877f6" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" @@ -43,7 +42,6 @@ Configurations = "0.17.6" DataFrames = "1.3.4" EvoTrees = "0.16.5" GLMNet = "0.7" -HighlyAdaptiveLasso = "0.2.0" JLD2 = "0.4.22" JSON = "0.21.4" MKL = "0.6" @@ -55,7 +53,7 @@ MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.6.0" Optim = "1.7" +TMLE = "0.12" Tables = "1.10.1" YAML = "0.4.9" -TMLE = "0.12" julia = "1.7, 1" diff --git a/docker/Dockerfile b/docker/Dockerfile index 0523eeb..8016127 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,31 +6,6 @@ ENV TZ=Europe/Amsterdam ENV JULIA_DEPOT_PATH=/opt -RUN apt-get update && apt-get install -y wget unzip procps - -# Install R and hal9001 - -RUN apt-get install -y r-base \ - r-base-core \ - r-recommended \ - r-base-dev - -RUN apt-get install -y libssl-dev \ - libxml2-dev \ - libcurl4-openssl-dev \ - libgit2-dev \ - libharfbuzz-dev \ - libfribidi-dev \ - libfontconfig1-dev \ - libfreetype6-dev \ - libpng-dev \ - libtiff5-dev \ - libjpeg-dev - -RUN R -e "install.packages('devtools', repos='http://cran.us.r-project.org', dependecies=TRUE); \ - require(devtools);\ - install_version('hal9001', version = '0.4.1', repos = 'http://cran.us.r-project.org')" - # Import project, build and precompile COPY . /TargetedEstimation.jl diff --git a/docs/src/models.md b/docs/src/models.md index 4d978bd..5a15d88 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -11,7 +11,6 @@ Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of - [EvoTrees.jl](https://evovest.github.io/EvoTrees.jl/stable/): A pure Julia implementation of histogram based gradient boosting trees (subset of XGBoost) - [GLMNet](https://github.com/JuliaStats/GLMNet.jl): A Julia wrapper of the [glmnet](https://glmnet.stanford.edu/articles/glmnet.html) package. See the [GLMNet](@ref) section. - [MLJModels](https://github.com/JuliaAI/MLJModels.jl): General utilities such as the `OneHotEncoder` or `InteractionTransformer`. -- [HighlyAdaptiveLasso](https://github.com/olivierlabayle/HighlyAdaptiveLasso.jl): A Julia wrapper of the [HAL](https://tlverse.org/hal9001/) algorithm, experimental. Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TargetedEstimation.jl/issues). diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index fce10ec..aa01fda 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -10,7 +10,6 @@ using MLJ using CSV using Arrow using TMLE -using HighlyAdaptiveLasso using EvoTrees using MLJXGBoostInterface using MLJLinearModels diff --git a/test/config/tmle_ose_config.jl b/test/config/tmle_ose_config.jl index 8649d9f..1997b70 100644 --- a/test/config/tmle_ose_config.jl +++ b/test/config/tmle_ose_config.jl @@ -13,7 +13,6 @@ default_models = TMLE.default_models( evo_10 = EvoTreeRegressor(nrounds=10), evo_20 = EvoTreeRegressor(nrounds=20), constant = ConstantRegressor(), - hal = HALRegressor(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false) ), # For the estimation of E[Y|W, T]: binary target Q_binary = Stack( @@ -26,7 +25,6 @@ default_models = TMLE.default_models( cache = false ), constant = ConstantClassifier(), - hal = HALClassifier(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false), gridsearch_evo = TunedModel( model = evotree, resampling = CV(), From b11921f82288aeaf2636bfe7cdd68b00ac0239cf Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 7 Dec 2023 17:02:17 +0000 Subject: [PATCH 34/71] add gcc to dockerfile --- docker/Dockerfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8016127..3f24ee7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,6 +2,10 @@ FROM julia:1.9.4-bullseye ARG DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get -y install gcc mono-mcs && \ + rm -rf /var/lib/apt/lists/* + ENV TZ=Europe/Amsterdam ENV JULIA_DEPOT_PATH=/opt @@ -16,7 +20,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -RUN julia --project -t auto --startup-file=no deps/build_app.jl app +# RUN julia --project -t auto --startup-file=no deps/build_app.jl app -ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" +# ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" From 26e822366f56c6817b120752fd8ea2162b9ba3da Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 7 Dec 2023 17:07:58 +0000 Subject: [PATCH 35/71] add back app --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3f24ee7..5eac723 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Precompile Sysimage project -# RUN julia --project -t auto --startup-file=no deps/build_app.jl app +RUN julia --project -t auto --startup-file=no deps/build_app.jl app -# ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" +ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" From df1594ca14f89080780f2d6904b1c490cba176ed Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 8 Dec 2023 10:47:11 +0000 Subject: [PATCH 36/71] add some doc updates --- docs/src/cli.md | 51 +++++++++++++++++++ docs/src/environment.md | 22 -------- docs/src/index.md | 7 +-- .../G-superlearning-Q-glm.jl | 0 .../G-superlearning-Q-glmnet.jl | 0 .../glm-with-interactions-for-Q.jl | 0 .../estimators => estimators-configs}/glm.jl | 0 .../glmnet-with-interactions-for-Q.jl | 0 .../glmnet.jl | 0 .../superlearning-with-interactions-for-Q.jl | 0 .../superlearning.jl | 0 .../tuned-xgboost.jl | 0 12 files changed, 53 insertions(+), 27 deletions(-) create mode 100644 docs/src/cli.md delete mode 100644 docs/src/environment.md rename {docs/src/estimators => estimators-configs}/G-superlearning-Q-glm.jl (100%) rename {docs/src/estimators => estimators-configs}/G-superlearning-Q-glmnet.jl (100%) rename {docs/src/estimators => estimators-configs}/glm-with-interactions-for-Q.jl (100%) rename {docs/src/estimators => estimators-configs}/glm.jl (100%) rename {docs/src/estimators => estimators-configs}/glmnet-with-interactions-for-Q.jl (100%) rename {docs/src/estimators => estimators-configs}/glmnet.jl (100%) rename {docs/src/estimators => estimators-configs}/superlearning-with-interactions-for-Q.jl (100%) rename {docs/src/estimators => estimators-configs}/superlearning.jl (100%) rename {docs/src/estimators => estimators-configs}/tuned-xgboost.jl (100%) diff --git a/docs/src/cli.md b/docs/src/cli.md new file mode 100644 index 0000000..b6127e2 --- /dev/null +++ b/docs/src/cli.md @@ -0,0 +1,51 @@ +# The Command Line Interface + +## Installing the CLI + +### Via Docker (requires Docker) + +While we are getting close to providing a standalone application, the most reliable way to use the app is still via the provided [Docker container](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags). In this container, the command line interface is accessible and can be used directly. For example via: + +```bash +docker run -it --rm -v HOST_DIR:CONTAINER_DIR olivierlabayle/targeted-estimation:TAG tmle --help +``` + +where `HOST_DIR:CONTAINER_DIR` will map the host directory `HOST_DIR` to the container's `CONTAINER_DIR` and `TAG` is the currently released version of the project. + +### Build (requires Julia) + +Alternatively, provided you have Julia installed, you can build the app via: + +```bash +julia --project deps/build_app.jl app +``` + +Be low is a description of the functionalities offered by the CLI. + +## CLI Description + +The CLI contains 3 sub-commands: + +- `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)). +- `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)). +- `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref)) + +### tmle command + +Arguments: + +- dataset: A dataset either in .csv or .arrow format +- estimands: A file containing a serialized Configuration object. +- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning"). + +Options: + +- -v, --verbosity: Verbosity level. +- -o, --outputs: Ouputs to be generated. +- --chunksize <100::Int>: Results are written in batches of size chunksize. +- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment). +- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). + +Flags: + +- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands). \ No newline at end of file diff --git a/docs/src/environment.md b/docs/src/environment.md deleted file mode 100644 index 24a40dc..0000000 --- a/docs/src/environment.md +++ /dev/null @@ -1,22 +0,0 @@ -# The Run Environment - -## General usage - -At this point in time, the package depends on several R dependencies which makes it difficult to package as a single Julia executable. We thus rely on a docker container for the execution of the various command line interfaces. Some familiarity with [Docker](https://docs.docker.com/get-started/) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/quick_start.html) is thus beneficial. - -- The container is available for download from the [Docker registry](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags). -- In this container, the project is stored in `/TargetedEstimation.jl`, as such, any script can be run using the following template command: `julia --startup-file=no --project=/TargetedEstimation.jl /TargetedEstimation.jl/scripts/SCRIPT_NAME.jl`. Dont forget to mount the output directory in order to retrieve the output data. - -Example Docker command: - -```bash -docker run -it --rm -v HOST_DIR:CONTAINER_DIR olivierlabayle/targeted-estimation:0.7 \ -julia --project=/TargetedEstimation.jl /TargetedEstimation.jl/scripts/tmle.jl --help -``` - -## Alternatives - -Here are a couple alternatives to using the Docker container: - -- If you are not using the HAL algorithm, you can simply clone this repository and instantiate the project in order to use the scripts or any other functionality. -- If you are using the HAL algorithm you can use the `docker/Dockerfile` as a guide for your local installation. diff --git a/docs/src/index.md b/docs/src/index.md index 10952d5..4e032b5 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,11 +1,8 @@ # TargetedEstimation.jl -The goal of this package, eventually, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package. +The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package. -The various command line interfaces provided here are described in the following sections and can be run in the associated [Docker container](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags): - -- [Targeted Minimum Loss Based Estimation](@ref): The main command line interface provided in this project to run TMLE. -- [Sieve Variance Plateau Estimation](@ref): Variance correction for non i.i.d. data. +[The Command Line Interface](@ref) We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it): diff --git a/docs/src/estimators/G-superlearning-Q-glm.jl b/estimators-configs/G-superlearning-Q-glm.jl similarity index 100% rename from docs/src/estimators/G-superlearning-Q-glm.jl rename to estimators-configs/G-superlearning-Q-glm.jl diff --git a/docs/src/estimators/G-superlearning-Q-glmnet.jl b/estimators-configs/G-superlearning-Q-glmnet.jl similarity index 100% rename from docs/src/estimators/G-superlearning-Q-glmnet.jl rename to estimators-configs/G-superlearning-Q-glmnet.jl diff --git a/docs/src/estimators/glm-with-interactions-for-Q.jl b/estimators-configs/glm-with-interactions-for-Q.jl similarity index 100% rename from docs/src/estimators/glm-with-interactions-for-Q.jl rename to estimators-configs/glm-with-interactions-for-Q.jl diff --git a/docs/src/estimators/glm.jl b/estimators-configs/glm.jl similarity index 100% rename from docs/src/estimators/glm.jl rename to estimators-configs/glm.jl diff --git a/docs/src/estimators/glmnet-with-interactions-for-Q.jl b/estimators-configs/glmnet-with-interactions-for-Q.jl similarity index 100% rename from docs/src/estimators/glmnet-with-interactions-for-Q.jl rename to estimators-configs/glmnet-with-interactions-for-Q.jl diff --git a/docs/src/estimators/glmnet.jl b/estimators-configs/glmnet.jl similarity index 100% rename from docs/src/estimators/glmnet.jl rename to estimators-configs/glmnet.jl diff --git a/docs/src/estimators/superlearning-with-interactions-for-Q.jl b/estimators-configs/superlearning-with-interactions-for-Q.jl similarity index 100% rename from docs/src/estimators/superlearning-with-interactions-for-Q.jl rename to estimators-configs/superlearning-with-interactions-for-Q.jl diff --git a/docs/src/estimators/superlearning.jl b/estimators-configs/superlearning.jl similarity index 100% rename from docs/src/estimators/superlearning.jl rename to estimators-configs/superlearning.jl diff --git a/docs/src/estimators/tuned-xgboost.jl b/estimators-configs/tuned-xgboost.jl similarity index 100% rename from docs/src/estimators/tuned-xgboost.jl rename to estimators-configs/tuned-xgboost.jl From 600439c99c712248d2bf6b4971717b7c6980e3d4 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 8 Dec 2023 15:00:11 +0000 Subject: [PATCH 37/71] update docs --- docs/src/cli.md | 26 +++----------------------- docs/src/index.md | 2 +- docs/src/make_summary.md | 21 +++++++++++++++++++++ docs/src/merge.md | 17 ----------------- docs/src/sieve_variance.md | 31 ++++++++++++++++++------------- docs/src/tmle_estimation.md | 36 +++++++++++++++++------------------- 6 files changed, 60 insertions(+), 73 deletions(-) create mode 100644 docs/src/make_summary.md delete mode 100644 docs/src/merge.md diff --git a/docs/src/cli.md b/docs/src/cli.md index b6127e2..820f3c2 100644 --- a/docs/src/cli.md +++ b/docs/src/cli.md @@ -1,6 +1,6 @@ -# The Command Line Interface +# The Command Line Interface (CLI) -## Installing the CLI +## CLI Installation ### Via Docker (requires Docker) @@ -20,7 +20,7 @@ Alternatively, provided you have Julia installed, you can build the app via: julia --project deps/build_app.jl app ``` -Be low is a description of the functionalities offered by the CLI. +Bellow is a description of the functionalities offered by the CLI. ## CLI Description @@ -29,23 +29,3 @@ The CLI contains 3 sub-commands: - `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)). - `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)). - `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref)) - -### tmle command - -Arguments: - -- dataset: A dataset either in .csv or .arrow format -- estimands: A file containing a serialized Configuration object. -- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning"). - -Options: - -- -v, --verbosity: Verbosity level. -- -o, --outputs: Ouputs to be generated. -- --chunksize <100::Int>: Results are written in batches of size chunksize. -- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment). -- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). - -Flags: - -- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands). \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 4e032b5..2f78304 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,7 @@ The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package. -[The Command Line Interface](@ref) +[The Command Line Interface (CLI)](@ref) We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it): diff --git a/docs/src/make_summary.md b/docs/src/make_summary.md new file mode 100644 index 0000000..2463498 --- /dev/null +++ b/docs/src/make_summary.md @@ -0,0 +1,21 @@ +# Merging TMLE outputs + +## Usage + +```bash +tmle make-summary --help +``` + +Merges tmle outputs in a single file. + +Args: + +- prefix: Prefix to .hdf5 files to be used to create the summary file + +Options: + +- -o, --outputs : Ouptuts configuration. + +Flags: + +- -h, --help: Print this help message. diff --git a/docs/src/merge.md b/docs/src/merge.md deleted file mode 100644 index 924bf6b..0000000 --- a/docs/src/merge.md +++ /dev/null @@ -1,17 +0,0 @@ -# Merging TMLE and SVP outputs - -If multiple `scripts/tmle.jl` and potentially `scripts/sieve_variance.jl` have been run, you may want to combine the generated CSV outputs in a single result file. This is the purpose of this command line interface. - -## Usage - -You can merge summary CSV files by running: - -```bash -julia scripts/merge_summaries.jl TMLE_PREFIX OUT --sieve-prefix=SIEVE_PREFIX -``` - -where: - -- `TMLE_PREFIX`: is a prefix to all output CSV files generated by the `scripts/tmle.jl` script. -- `OUT`: is a path to the output file that will be generated. -- `--sieve-prefix`: is an optional prefix to the CSV output of the `scripts/sieve_variance.jl` script. diff --git a/docs/src/sieve_variance.md b/docs/src/sieve_variance.md index 3abd1fa..d7c61a9 100644 --- a/docs/src/sieve_variance.md +++ b/docs/src/sieve_variance.md @@ -4,20 +4,25 @@ If the i.i.d. (independent and identically distributed) hypothesis is not satisf ## Usage -At the moment, this script is restricted to the analysis of population genetics datasets mostly in the context of [TarGene](https://targene.github.io/targene-pipeline/stable/sieve_variance/). It can be run with the following command: - ```bash -julia scripts/sieve_variance.jl PREFIX GRM_PREFIX OUT_PREFIX - --nb-estimators=100 - --max-tau=1.0 - --verbosity=1 +tmle sieve-variance-plateau --help ``` -where: +Runs Sieve Variance Plateau correction. + +Args: + +- input_prefix: Prefix to outputs from the tmle command. + +Options: + +- -o, --out Output filename in hdf5 format. +- -g, --grm-prefix : Prefix to the aggregated GRM. +- -v, --verbosity <0>: Verbosity level. +- -n, --n-estimators <10>: Number of variance estimators to build for each estimate. +- -m, --max-tau <0.8>: Maximum distance between any two individuals. +- -e, --estimator-key : Estimator to use to proceed with sieve variance correction. + +Flags: -- `PREFIX`: A prefix to HDF5 files generated by `scripts/tmle.jl` (potentially multiple). -- `GRM_PREFIX`: A prefix to the aggregated Genetic Relationship Matrix. -- `OUT_PREFIX`: Output prefix to save SVP curves and final variance estimates. -- `--nb-estimators`: The number of points per SVP curve. -- `--max-tau`: Maximum distance between individuals to consider. -- `--verbosity`: Verbosity level. +- -h, --help: Print this help message. diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md index 78984cb..67b6c7a 100644 --- a/docs/src/tmle_estimation.md +++ b/docs/src/tmle_estimation.md @@ -4,27 +4,25 @@ This is the main script in this package, it provides a command line interface fo ## Usage -Provided you have the package and all dependencies installed or in the provided docker container, you can run TMLE via the following command: - -```bash -julia scripts/tmle.jl DATAFILE PARAMFILE OUTFILE - --estimator-file=docs/estimators/glmnet.jl - --hdf5-out=output.hdf5 - --pval-threshold=0.05 - --chunksize=100 - --verbosity=1 -``` +Runs TMLE estimation. -where: +Args: + +- dataset: A dataset either in .csv or .arrow format +- estimands: A file containing a serialized Configuration object. +- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning"). + +Options: + +- -v, --verbosity: Verbosity level. +- -o, --outputs: Ouputs to be generated. +- --chunksize <100::Int>: Results are written in batches of size chunksize. +- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment). +- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). + +Flags: -- `DATAFILE`: A CSV (.csv) or Arrow (.arrow) file containing the tabular data. The format will be infered from the extension. -- `PARAMFILE`: A serialized [YAML](https://targene.github.io/TMLE.jl/stable/user_guide/#Reading-Parameters-from-YAML-files) or [bin](https://docs.julialang.org/en/v1/stdlib/Serialization/) file containing the estimands to be estimated. The YAML file can be written by hand or programmatically using the [TMLE.parameters_to_yaml](https://targene.github.io/TMLE.jl/stable/api/#TMLE.parameters_to_yaml-Tuple{Any,%20Any}) function. -- `OUTFILE`: The output .csv file (see [Output file](@ref)) -- `--estimator-file`: A Julia file describing the TMLE specifications (see [Estimator File](@ref)). -- `--hdf5-out`: if provided, a path to a file to save the influence curves. -- `--pval-threshold`: Only "significant" (< this threshold) estimates will actually have their influence curves stored in the previous file. -- `--chunksize`: To manage memory, the results are appended to the output files in batches the size of which can be controlled via this option. -- `--verbosity`: The verbosity level. +- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands). ## Output file From 2c8959323abcd289b31b188ad45d1194a6d5b7c7 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Mon, 11 Dec 2023 13:58:04 +0000 Subject: [PATCH 38/71] make some args optional --- Project.toml | 2 +- docs/make.jl | 2 +- estimators-configs/G-superlearning-Q-glm.jl | 14 +++--- .../G-superlearning-Q-glmnet.jl | 14 +++--- .../glm-with-interactions-for-Q.jl | 14 +++--- estimators-configs/glm.jl | 14 +++--- .../glmnet-with-interactions-for-Q.jl | 14 +++--- estimators-configs/glmnet.jl | 16 +++---- .../superlearning-with-interactions-for-Q.jl | 14 +++--- estimators-configs/superlearning.jl | 16 +++---- estimators-configs/tuned-xgboost.jl | 14 +++--- src/runner.jl | 20 +++++--- src/utils.jl | 46 +++++++++++++------ test/runner.jl | 28 ++++++++--- test/sieve_variance.jl | 6 ++- test/summary.jl | 13 +++++- test/utils.jl | 46 ++++++++++++++----- 17 files changed, 176 insertions(+), 117 deletions(-) diff --git a/Project.toml b/Project.toml index 4518080..48583ae 100644 --- a/Project.toml +++ b/Project.toml @@ -53,7 +53,7 @@ MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.6.0" Optim = "1.7" -TMLE = "0.12" +TMLE = "0.12.1" Tables = "1.10.1" YAML = "0.4.9" julia = "1.7, 1" diff --git a/docs/make.jl b/docs/make.jl index 0280c0d..e6aee0b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,7 +15,7 @@ makedocs( modules = [TargetedEstimation], pages=[ "Home" => "index.md", - "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "merge.md"], + "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"], "MLJ Extensions" => ["models.md", "resampling.md"], ] ) diff --git a/estimators-configs/G-superlearning-Q-glm.jl b/estimators-configs/G-superlearning-Q-glm.jl index 83d44d0..02da072 100644 --- a/estimators-configs/G-superlearning-Q-glm.jl +++ b/estimators-configs/G-superlearning-Q-glm.jl @@ -1,13 +1,7 @@ xgboost_classifier = XGBoostClassifier(tree_method="hist") -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = LinearRegressor(), # For the estimation of E[Y|W, T]: binary target Q_binary = LogisticClassifier(lambda=0.), @@ -30,4 +24,8 @@ tmle_spec = ( cache=false ) ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/G-superlearning-Q-glmnet.jl b/estimators-configs/G-superlearning-Q-glmnet.jl index 47094ef..bb8c495 100644 --- a/estimators-configs/G-superlearning-Q-glmnet.jl +++ b/estimators-configs/G-superlearning-Q-glmnet.jl @@ -1,13 +1,7 @@ xgboost_classifier = XGBoostClassifier(tree_method="hist") -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = GLMNetRegressor(resampling=CV(nfolds=3)), # For the estimation of E[Y|W, T]: binary target Q_binary = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)), @@ -30,4 +24,8 @@ tmle_spec = ( cache=false ) ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/glm-with-interactions-for-Q.jl b/estimators-configs/glm-with-interactions-for-Q.jl index edaeca7..8959c76 100644 --- a/estimators-configs/glm-with-interactions-for-Q.jl +++ b/estimators-configs/glm-with-interactions-for-Q.jl @@ -1,11 +1,5 @@ -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = Pipeline( RestrictedInteractionTransformer(order=2, primary_variables_patterns=[r"^rs[0-9]+"]), LinearRegressor(), @@ -19,4 +13,8 @@ tmle_spec = ( ), # For the estimation of p(T| W) G = LogisticClassifier(lambda=0.) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/glm.jl b/estimators-configs/glm.jl index 6aea32a..9a8b166 100644 --- a/estimators-configs/glm.jl +++ b/estimators-configs/glm.jl @@ -1,14 +1,12 @@ -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = LinearRegressor(), # For the estimation of E[Y|W, T]: binary target Q_binary = LogisticClassifier(lambda=0.), # For the estimation of p(T| W) G = LogisticClassifier(lambda=0.) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/glmnet-with-interactions-for-Q.jl b/estimators-configs/glmnet-with-interactions-for-Q.jl index 003cd7e..b255974 100644 --- a/estimators-configs/glmnet-with-interactions-for-Q.jl +++ b/estimators-configs/glmnet-with-interactions-for-Q.jl @@ -1,11 +1,5 @@ -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = Pipeline( RestrictedInteractionTransformer(order=2, primary_variables_patterns=[r"^rs[0-9]+"]), GLMNetRegressor(resampling=CV(nfolds=3)), @@ -19,4 +13,8 @@ tmle_spec = ( ), # For the estimation of p(T| W) G = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/glmnet.jl b/estimators-configs/glmnet.jl index 27a89f3..5fd2584 100644 --- a/estimators-configs/glmnet.jl +++ b/estimators-configs/glmnet.jl @@ -1,14 +1,12 @@ -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = GLMNetRegressor(resampling=CV(nfolds=3)), - # For the estimation of E[Y|W, T]: binary target + # For the estimation of E[Y|W, T]: binary outcome Q_binary = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)), # For the estimation of p(T| W) G = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/superlearning-with-interactions-for-Q.jl b/estimators-configs/superlearning-with-interactions-for-Q.jl index f3d75ce..df2372a 100644 --- a/estimators-configs/superlearning-with-interactions-for-Q.jl +++ b/estimators-configs/superlearning-with-interactions-for-Q.jl @@ -1,14 +1,8 @@ xgboost_regressor = XGBoostRegressor(tree_method="hist") xgboost_classifier = XGBoostClassifier(tree_method="hist") -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = Stack( metalearner = LinearRegressor(fit_intercept=false), resampling = CV(nfolds=3), @@ -81,4 +75,8 @@ tmle_spec = ( cache=false ) ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/superlearning.jl b/estimators-configs/superlearning.jl index 4bb72f6..5f2ce4c 100644 --- a/estimators-configs/superlearning.jl +++ b/estimators-configs/superlearning.jl @@ -1,14 +1,8 @@ xgboost_regressor = XGBoostRegressor(tree_method="hist") xgboost_classifier = XGBoostClassifier(tree_method="hist") -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = Stack( metalearner = LinearRegressor(fit_intercept=false), resampling = CV(nfolds=3), @@ -27,7 +21,7 @@ tmle_spec = ( cache=false ) ), - # For the estimation of E[Y|W, T]: binary target + # For the estimation of E[Y|W, T]: binary outcome Q_binary = Stack( metalearner = LogisticClassifier(lambda=0., fit_intercept=false), resampling = StratifiedCV(nfolds=3), @@ -65,4 +59,8 @@ tmle_spec = ( cache=false ) ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/estimators-configs/tuned-xgboost.jl b/estimators-configs/tuned-xgboost.jl index 6432206..d7318c5 100644 --- a/estimators-configs/tuned-xgboost.jl +++ b/estimators-configs/tuned-xgboost.jl @@ -1,14 +1,8 @@ xgboost_regressor = XGBoostRegressor(tree_method="hist") xgboost_classifier = XGBoostClassifier(tree_method="hist") -tmle_spec = ( - # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage - cache = false, - # Controls whether the fluctuation is weighted or not - weighted_fluctuation = false, - # Propensity score threshold - threshold = 1e-8, - # For the estimation of E[Y|W, T]: continuous target +default_models = TMLE.default_models( + # For the estimation of E[Y|W, T]: continuous outcome Q_continuous = TunedModel( model = xgboost_regressor, resampling = CV(nfolds=3), @@ -44,4 +38,8 @@ tmle_spec = ( measure = log_loss, cache=false ) +) + +ESTIMATORS = ( + TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8), ) \ No newline at end of file diff --git a/src/runner.jl b/src/runner.jl index f25e5e8..f7a8166 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -7,7 +7,9 @@ mutable struct Runner outputs::Outputs verbosity::Int failed_nuisance::Set - function Runner(dataset, estimands, estimators; + function Runner(dataset; + estimands="generateATEs", + estimators="glmnet", verbosity=0, outputs=Outputs(), chunksize=100, @@ -16,11 +18,11 @@ mutable struct Runner sort_estimands=false ) # Retrieve TMLE specifications - estimators = TargetedEstimation.load_tmle_spec(estimators) + estimators = TargetedEstimation.load_tmle_spec(file=estimators) # Load dataset dataset = TargetedEstimation.instantiate_dataset(dataset) # Read parameter files - estimands = TargetedEstimation.proofread_estimands(estimands, dataset) + estimands = TargetedEstimation.build_estimands_list(estimands, dataset) if sort_estimands estimands = groups_ordering(estimands; brute_force=true, @@ -115,7 +117,9 @@ end """ - tmle(dataset, estimands, estimators; + tmle(dataset; + estimands="generateATEs", + estimators="glmnet"; verbosity=0, outputs=Outputs(), chunksize=100, @@ -144,7 +148,9 @@ TMLE CLI. - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ -@cast function tmle(dataset::String, estimands::String, estimators::String; +@cast function tmle(dataset::String; + estimands::String="default_ATE", + estimators::String="glmnet", verbosity::Int=0, outputs::Outputs=Outputs(), chunksize::Int=100, @@ -152,7 +158,9 @@ TMLE CLI. cache_strategy::String="release-unusable", sort_estimands::Bool=false ) - runner = Runner(dataset, estimands, estimators; + runner = Runner(dataset; + estimands=estimands, + estimators=estimators, verbosity=verbosity, outputs=outputs, chunksize=chunksize, diff --git a/src/utils.jl b/src/utils.jl index a5c4c24..9ab403c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -74,6 +74,34 @@ function proofread_estimands(filename, dataset) return estimands end +""" +This explicitely requires that the following columns belong to the dataset: + +- `T`: for the treatment variable +- `Y`: for the outcome variable +- `^W`: for the confounding variables + +All ATE parameters are generated. +""" +function TMLE.generateATEs(dataset) + colnames = names(dataset) + "T" ∈ colnames || throw(ArgumentError("No column 'T' found in the dataset for the treatment variable.")) + "Y" ∈ colnames || throw(ArgumentError("No column 'Y' found in the dataset for the outcome variable.")) + confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name)) + length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset.")) + + return generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables) +end + +function build_estimands_list(estimands_pattern, dataset) + estimands = if estimands_pattern == "generateATEs" + generateATEs(dataset) + else + proofread_estimands(estimands_pattern, dataset) + end + return estimands +end + ##################################################################### #####  ADDITIONAL METHODS #### ##################################################################### @@ -81,7 +109,6 @@ end TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names = NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt]) - """ instantiate_dataset(path::String) @@ -146,18 +173,11 @@ variables(Ψ::TMLE.Estimand) = Set([ Iterators.flatten(values(Ψ.treatment_confounders))... ]) -load_tmle_spec(file::Nothing) = ( - TMLE = TMLEE( - models = TMLE.default_models( - Q_binary = LogisticClassifier(lambda=0.), - Q_continuous = LinearRegressor(), - G = LogisticClassifier(lambda=0.) - ), - weighted = true, - ), - ) - -function load_tmle_spec(file) +function load_tmle_spec(;file="glmnet") + file = endswith(file, ".jl") ? file : joinpath( + pkgdir(TargetedEstimation), + "estimators-configs", + string(file, ".jl")) include(abspath(file)) return ESTIMATORS end diff --git a/test/runner.jl b/test/runner.jl index 3acccfc..f52ead4 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -26,9 +26,9 @@ include(joinpath(TESTDIR, "testutils.jl")) jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5), ) runner = Runner( - "data.csv", - estimands_filename, - joinpath(CONFIGDIR, "tmle_ose_config.jl"); + "data.csv"; + estimands=estimands_filename, + estimators=joinpath(CONFIGDIR, "tmle_ose_config.jl"), outputs=outputs, cache_strategy="release-unusable", ) @@ -121,7 +121,9 @@ end datafile = string("data.", format) build_dataset(;n=1000, format=format) for chunksize in (4, 10) - tmle(datafile, estimands_filename, estimatorfile; + tmle(datafile; + estimands=estimands_filename, + estimators=estimatorfile, outputs=outputs, chunksize=chunksize, ) @@ -161,7 +163,10 @@ end TMLE.write_json(estimandsfile, configuration) estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") datafile = "data.csv" - tmle(datafile, estimandsfile, estimatorfile; outputs=outputs) + tmle(datafile; + estimands=estimandsfile, + estimators=estimatorfile, + outputs=outputs) # Essential results results_from_json = TMLE.read_json(outputs.json.filename) @@ -190,7 +195,11 @@ end estimatorfile = joinpath(CONFIGDIR, "problematic_tmle_ose_config.jl") datafile = "data.csv" - runner = Runner(datafile, estimandsfile, estimatorfile; outputs=outputs); + runner = Runner(datafile; + estimands=estimandsfile, + estimators=estimatorfile, + outputs=outputs + ); runner() # Test failed nuisance estimates (T2 model) @@ -242,7 +251,12 @@ end estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") datafile = "data.csv" - tmle(datafile, estimandsfile, estimatorfile; outputs=outputs, chunksize=2) + tmle(datafile; + estimands=estimandsfile, + estimators=estimatorfile, + outputs=outputs, + chunksize=2 + ) # JLS Output results = [] diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 7317854..62fb7a9 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -56,7 +56,11 @@ function build_tmle_output_file(sample_ids, estimandfile, outprefix; outputs = TargetedEstimation.Outputs( hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval, sample_ids=true), ) - tmle("data.csv", estimandfile, estimatorfile, outputs=outputs) + tmle("data.csv"; + estimands=estimandfile, + estimators=estimatorfile, + outputs=outputs + ) end function basic_variance_implementation(matrix_distance, influence_curve, n_obs) diff --git a/test/summary.jl b/test/summary.jl index 6903e88..1b8f5d8 100644 --- a/test/summary.jl +++ b/test/summary.jl @@ -23,14 +23,23 @@ include(joinpath(TESTDIR, "testutils.jl")) config_1 = statistical_estimands_only_config() configfile_1 = joinpath(tmpdir, "configuration_1.json") TMLE.write_json(configfile_1, config_1) - tmle(datafile, configfile_1, estimatorfile; outputs=tmle_output_1, chunksize=3) + tmle(datafile; + estimands=configfile_1, + estimators=estimatorfile, + outputs=tmle_output_1, + chunksize=3 + ) # Second Run tmle_output_2 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_2.hdf5")) config_2 = causal_and_composed_estimands_config() configfile_2 = joinpath(tmpdir, "configuration_2.json") TMLE.write_json(configfile_2, config_2) - tmle(datafile, configfile_2, estimatorfile; outputs=tmle_output_2) + tmle(datafile; + estimands=configfile_2, + estimators=estimatorfile, + outputs=tmle_output_2 + ) # Make summary files outputs = TargetedEstimation.Outputs( diff --git a/test/utils.jl b/test/utils.jl index 7525168..9bedbb0 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -14,27 +14,33 @@ check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T check_type(treatment_values::NamedTuple, ::Type{T}) where T = @test treatment_values.case isa T && treatment_values.control isa T -TESTDIR = joinpath(pkgdir(TargetedEstimation), "test") +PKGDIR = pkgdir(TargetedEstimation) +TESTDIR = joinpath(PKGDIR, "test") include(joinpath(TESTDIR, "testutils.jl")) -@testset "Test load_tmle_spec: with configuration file" begin - estimators = TargetedEstimation.load_tmle_spec(joinpath(TESTDIR, "config", "tmle_ose_config.jl")) +@testset "Test load_tmle_spec" begin + # Default + noarg_estimators = TargetedEstimation.load_tmle_spec() + default_models = noarg_estimators.TMLE.models + @test noarg_estimators.TMLE isa TMLEE + @test default_models.Q_binary_default.glm_net_classifier isa GLMNetClassifier + @test default_models.Q_continuous_default.glm_net_regressor isa GLMNetRegressor + @test default_models.G_default isa GLMNetClassifier + # From template name + for file in readdir(joinpath(PKGDIR, "estimators-configs")) + configname = replace(file, ".jl" => "") + estimators = TargetedEstimation.load_tmle_spec(;file=configname) + @test estimators.TMLE isa TMLEE + end + # From explicit file + estimators = TargetedEstimation.load_tmle_spec(file=joinpath(TESTDIR, "config", "tmle_ose_config.jl")) @test estimators.TMLE isa TMLE.TMLEE @test estimators.OSE isa TMLE.OSE @test estimators.TMLE.weighted === true @test estimators.TMLE.models.G_default === estimators.OSE.models.G_default @test estimators.TMLE.models.G_default isa MLJBase.ProbabilisticStack end - -@testset "Test load_tmle_spec: no configuration file" begin - estimators = TargetedEstimation.load_tmle_spec(nothing) - @test !haskey(estimators, :OSE) - @test haskey(estimators, :TMLE) - @test estimators.TMLE.weighted === true - @test estimators.TMLE.models.G_default isa LogisticClassifier -end - @testset "Test convert_treatment_values" begin treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int) newT = TargetedEstimation.convert_treatment_values((T₁=1,), treatment_types) @@ -68,6 +74,22 @@ end # Clean estimands file rm(filename) end + +@testset "Test generateATEs" begin + dataset = DataFrame(C=[1, 2, 3, 4],) + @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset) + dataset.T = [0, 1, missing, 2] + @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset) + dataset.Y = [0, 1, 2, 2] + dataset.W1 = [1, 1, 1, 1] + dataset.W_2 = [1, 1, 1, 1] + ATEs = TargetedEstimation.build_estimands_list("generateATEs", dataset) + @test ATEs == [ + TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()), + TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()), + TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ()) + ] +end @testset "Test coerce_types!" begin Ψ = IATE( outcome=:Ycont, From 46c914f110ae5a0f349055121235d45585bfe7f6 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Mon, 11 Dec 2023 14:01:55 +0000 Subject: [PATCH 39/71] update docstrings --- src/TargetedEstimation.jl | 2 -- src/runner.jl | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index aa01fda..74856d8 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -43,8 +43,6 @@ include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) - -"""TL CLI.""" @main export Runner, tmle, sieve_variance_plateau, make_summary diff --git a/src/runner.jl b/src/runner.jl index f7a8166..b3d0ec9 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -133,11 +133,11 @@ TMLE CLI. # Args - `dataset`: Data file (either .csv or .arrow) -- `estimands`: Estimands file (either .json or .yaml) -- `estimators`: A julia file containing the estimators to use. # Options +- `--estimands`: A string ("generateATEs") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls) +- `--estimators`: A julia file containing the estimators to use. - `-v, --verbosity`: Verbosity level. - `-o, --outputs`: Ouputs to be generated. - `--chunksize`: Results are written in batches of size chunksize. From 6c5e3404785aba7cf23677995f7585c484a305c8 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 12 Dec 2023 06:28:34 +0000 Subject: [PATCH 40/71] add sample dataset --- data/sample_dataset.csv | 101 ++++++++++++++++++++++++++++++++++++++++ src/runner.jl | 2 +- 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 data/sample_dataset.csv diff --git a/data/sample_dataset.csv b/data/sample_dataset.csv new file mode 100644 index 0000000..6dbdd98 --- /dev/null +++ b/data/sample_dataset.csv @@ -0,0 +1,101 @@ +T,W_1,W_2,Y +false,0.18102554215580358,0.5450806703063027,2.9940372099784027 +false,0.3674899461902501,0.6384329204193084,3.6556468914971756 +false,0.6690584311411027,0.43792082835867485,3.6566892558262984 +false,0.04273056581197765,0.7757285573916282,3.4160794440612667 +false,0.4379909608992427,0.3047310991183463,2.789125420976082 +true,0.4832901297478609,0.7451730217771686,1.5859557539740305 +false,0.7639737291557767,0.8904601966212045,5.188296343573969 +true,0.9244830959536434,0.13974660123511873,0.4982270316308855 +false,0.5150451217580685,0.6374757520926937,3.935855162247756 +false,0.49504171746863457,0.8565155620522915,4.561765097844799 +false,0.7013399928855184,0.8136974906450547,4.829305702798148 +false,0.6935693200259092,0.7770630474285287,4.708377370910661 +false,0.8762727975157072,0.6985801341473621,4.853277788606795 +true,0.9442193089864095,0.152194194900529,4.037983372295075 +false,0.7392125921999604,0.15113069121626244,2.928937148645259 +false,0.5595446681103335,0.32420235015472953,3.0840500870314176 +true,0.5068756305502102,0.7933086736255439,4.003336818622111 +true,0.6460238520948196,0.09241238685905295,1.8598644023282838 +false,0.26637285700482627,0.32928469720586406,2.5254756391406956 +true,0.25314126112406954,0.005639780239579784,2.0525644282124227 +false,0.3516179276178317,0.562947511861783,3.3891190015292962 +true,0.5273890936851706,0.5791653992166597,2.236487556841665 +true,0.7210925534616786,0.0815702398426259,3.0907289031725105 +false,0.18851150251759496,0.4507062485489093,2.7103706395089318 +false,0.637996251650204,0.2796408144910678,3.1179620782873023 +false,0.42784339776115243,0.05608349255807643,2.021998670506748 +false,0.36344680674044993,0.8176529855849277,4.187058502288485 +false,0.5776470899697952,0.5038402847145775,3.664411058533266 +false,0.4552051132605446,0.80168727438732,4.324422128772599 +true,0.16582015869838074,0.6517102569667845,3.4493844928436257 +true,0.3485177133589714,0.6950429952352082,3.6465473336626584 +false,0.7370786697973803,0.38967172496607816,3.634455876824883 +false,0.8097943410644535,0.30785356657086305,3.5631085002530503 +true,0.17116662327378251,0.38189260775289746,1.4561452308524758 +true,0.2239059463776638,0.37754440958420843,2.9358196480011927 +false,0.8482049921559374,0.7931999164105743,5.073800536916948 +false,0.6129496106778634,0.5582656110841486,3.9006286335176275 +false,0.9525421237374148,0.7075371298070849,5.027958774493786 +false,0.329471340006501,0.8244276908646733,4.133141608706676 +true,0.9183731870724761,0.5155924027190455,2.1766450039509775 +true,0.5406318800132754,0.7964424508760488,4.684487622068989 +false,0.820474440393214,0.3278374872665033,3.6201478925383053 +false,0.22304578643880224,0.15454141257308707,1.9141989300490125 +true,0.5987759444612732,0.8176931599179378,4.883829492724324 +true,0.5391280234619427,0.0800996880924989,-0.15332305595018275 +true,0.6195348270893413,0.04758713076380294,1.5044413418755769 +false,0.4197760589260566,0.6919387484370496,3.913552132910235 +false,0.45325909384306007,0.8196586617380355,4.361479188698554 +false,0.458012070794656,0.16787410906435518,2.3890361585879396 +false,0.8360692316060747,0.12572716005598905,3.0494749469842053 +false,0.68704221750134,0.9336977783694771,5.162792856806991 +false,0.3539590866764071,0.4938068514848526,3.201878898498728 +false,0.15146985093210463,0.9318499781184257,4.0825781392628055 +true,0.7036713552821277,0.3110022402796051,4.247461621524256 +false,0.7858058549340399,0.7913869099880062,4.9487597218140555 +false,0.5516353577049822,0.7651183843708445,4.394357280603648 +false,0.33689370624999193,0.8200595760169511,4.122787054444181 +false,0.7103550345192344,0.5218538906399544,3.9851980589961844 +false,0.3437537135972244,0.7082383555963896,3.8223674411348094 +true,0.40543796744015514,0.07340489667656125,-0.44867162036508734 +false,0.418787685820859,0.9537197956213714,4.671368501348916 +false,0.3461876693258523,0.17116028512837467,2.196479242014222 +false,0.256693308150987,0.7535261803886308,3.7658816939059676 +false,0.15717578481324845,0.9086295629550201,4.041940493212971 +false,0.06397027012871725,0.748570362698747,3.3694111418545805 +true,0.5960710257852946,0.6663504027833114,3.367016019904959 +true,0.3313524247810329,0.6591751071404244,1.591879202485502 +true,0.09653466861970061,0.6363397790684187,2.9197943852934563 +false,0.2444274100956212,0.2098958139673206,2.1388072276125003 +true,0.128071302925437,0.2212452403166849,0.2377448923043096 +true,0.42570257768498054,0.19674477205988938,1.387006507647053 +true,0.9265378351770237,0.2414821377114318,3.7979295737435383 +false,0.49186280724413045,0.12746729440853555,2.384783686379066 +false,0.13454928219280093,0.4781845302027954,2.710629287272798 +true,0.7767793160877585,0.524392439209832,2.5943698653629785 +false,0.5975255336244989,0.390410633670742,3.3711308436058625 +true,0.36037123968128437,0.08296139972284933,-0.759956439158037 +false,0.0346420282305675,0.0617778348993705,1.2471920828108296 +false,0.002639461613289207,0.7009785602029246,3.0993420302705896 +false,0.6043418433725678,0.6486688636856162,4.156769560508067 +false,0.34013328112005636,0.1345850741469954,2.08159579861953 +false,0.08884383382645145,0.3088038486943412,2.1056178972765487 +false,0.27042373335313585,0.006888583580566321,1.556777817484051 +false,0.2906905645217257,0.541835309258762,3.2236471704885887 +false,0.1159966466957052,0.5135297293779133,2.783128796238891 +true,0.8470732321945746,0.42816797170836707,2.0301351712154556 +true,0.8139519778944555,0.24560986612792113,1.7245624951948406 +false,0.060180250784984235,0.49673727324525174,2.622413005826214 +true,0.42137186429269047,0.16330846948665134,0.8216594084825344 +true,0.6798160152993227,0.5456659244000286,2.8674322144135957 +false,0.7229464588051613,0.43577650941532386,3.7550371808310166 +false,0.7375979790215319,0.9110656955098189,5.216039115140225 +false,0.5523948722167735,0.9189451331877909,4.858501050384263 +false,0.970875486702566,0.06627679288609234,3.143553167360774 +true,0.7978957981860126,0.16648221966941223,0.3279875381813927 +false,0.6832983780571866,0.6493963093415174,4.318015004991571 +false,0.523122205661108,0.05689713675107577,2.230304870061633 +false,0.8553411083874956,0.7608458973060162,4.992917779986071 +false,0.2884613639525233,0.40667243126317154,2.79960851563243 +true,0.4312330027658198,0.24848292057152732,2.172648627086597 diff --git a/src/runner.jl b/src/runner.jl index b3d0ec9..3b37e3d 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -149,7 +149,7 @@ TMLE CLI. - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ @cast function tmle(dataset::String; - estimands::String="default_ATE", + estimands::String="generateATEs", estimators::String="glmnet", verbosity::Int=0, outputs::Outputs=Outputs(), From 4f1c53788041df85b321e72de2ecedbe8c9836c1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 12 Dec 2023 07:00:44 +0000 Subject: [PATCH 41/71] do not build sysimage for now --- Comonicon.toml | 20 ++++++++++---------- docker/Dockerfile | 15 +++++++++------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Comonicon.toml b/Comonicon.toml index 48c2a41..4acd652 100644 --- a/Comonicon.toml +++ b/Comonicon.toml @@ -5,16 +5,16 @@ completion = true quiet = false optimize = 2 -[sysimg] -incremental=true -filter_stdlibs=false +# [sysimg] +# incremental=true +# filter_stdlibs=false -[sysimg.precompile] -execution_file = ["deps/execute.jl"] +# [sysimg.precompile] +# execution_file = ["deps/execute.jl"] -[application] -incremental=true -filter_stdlibs=false +# [application] +# incremental=true +# filter_stdlibs=false -[application.precompile] -execution_file = ["deps/execute.jl"] \ No newline at end of file +# [application.precompile] +# execution_file = ["deps/execute.jl"] \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 5eac723..51da080 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,24 +3,27 @@ FROM julia:1.9.4-bullseye ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get -y install gcc mono-mcs && \ + apt-get -y install gcc mono-mcs vim && \ rm -rf /var/lib/apt/lists/* ENV TZ=Europe/Amsterdam ENV JULIA_DEPOT_PATH=/opt -# Import project, build and precompile +# Import the project COPY . /TargetedEstimation.jl WORKDIR /TargetedEstimation.jl -# Precompile project +# Precompile the project RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' -# Precompile Sysimage project -RUN julia --project -t auto --startup-file=no deps/build_app.jl app +# Build CLI +RUN julia --project --startup-file=no deps/build_app.jl -ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin" +# Add CLI to PATH +ENV PATH="${PATH}:/opt/bin/" +# Test the CLI runs +RUN tmle tmle data/sample_dataset.csv \ No newline at end of file From 4b1fe4be92d16533bf1438bab2c981c165109a7b Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 12 Dec 2023 18:15:57 +0100 Subject: [PATCH 42/71] make sure outcome is nor OrderedFactor for now --- src/utils.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 9ab403c..ce6b854 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -154,14 +154,19 @@ function coerce_types!(dataset, Ψ::ComposedEstimand) end function coerce_types!(dataset, Ψ) + # Make Treatments categorical but preserve order categorical_variables = Set(keys(Ψ.treatment_values)) - continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders))) - union!(continuous_variables, Ψ.outcome_extra_covariates) - TMLE.is_binary(dataset, Ψ.outcome) ? - push!(categorical_variables, Ψ.outcome) : - push!(continuous_variables, Ψ.outcome) make_categorical!(dataset, categorical_variables, infer_ordered=true) + # Make Confounders and extra covariates continuous + continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders))) + union!(continuous_variables, Ψ.outcome_extra_covariates) make_float!(dataset, continuous_variables) + # Make outcome categorical if binary but do not infer order + if TMLE.is_binary(dataset, Ψ.outcome) + make_categorical!(dataset, Ψ.outcome, infer_ordered=false) + else + make_float!(dataset, Ψ.outcome) + end end variables(Ψ::TMLE.ComposedEstimand) = union((variables(arg) for arg in Ψ.args)...) From 18075215662e50ba612c52f5d4609c5d9e78b9d1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 13 Dec 2023 09:13:50 +0100 Subject: [PATCH 43/71] update docs --- docs/make.jl | 7 ++- docs/src/cli.md | 9 ++- docs/src/index.md | 2 +- docs/src/make_summary.md | 16 +---- docs/src/models.md | 2 +- docs/src/sieve_variance.md | 23 ++------ docs/src/tmle_estimation.md | 113 ++++++------------------------------ experiments/runtime.jl | 8 +-- 8 files changed, 41 insertions(+), 139 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index e6aee0b..e278c5d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,9 +15,12 @@ makedocs( modules = [TargetedEstimation], pages=[ "Home" => "index.md", - "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"], + "Command Line Interface" => ["cli.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"], "MLJ Extensions" => ["models.md", "resampling.md"], - ] + ], + pagesonly=true, + clean = true, + checkdocs=:exports ) @info "Deploying docs..." diff --git a/docs/src/cli.md b/docs/src/cli.md index 820f3c2..17dc2d6 100644 --- a/docs/src/cli.md +++ b/docs/src/cli.md @@ -24,8 +24,7 @@ Bellow is a description of the functionalities offered by the CLI. ## CLI Description -The CLI contains 3 sub-commands: - -- `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)). -- `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)). -- `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref)) +```@contents +Pages = ["tmle_estimation.md", "sieve_variance.md", "make_summary.md"] +Depth = 5 +``` diff --git a/docs/src/index.md b/docs/src/index.md index 2f78304..84cd4cb 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,7 @@ The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package. -[The Command Line Interface (CLI)](@ref) +- Jump to [The Command Line Interface (CLI)](@ref) We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it): diff --git a/docs/src/make_summary.md b/docs/src/make_summary.md index 2463498..d93db23 100644 --- a/docs/src/make_summary.md +++ b/docs/src/make_summary.md @@ -6,16 +6,6 @@ tmle make-summary --help ``` -Merges tmle outputs in a single file. - -Args: - -- prefix: Prefix to .hdf5 files to be used to create the summary file - -Options: - -- -o, --outputs : Ouptuts configuration. - -Flags: - -- -h, --help: Print this help message. +```@docs +make_summary +``` diff --git a/docs/src/models.md b/docs/src/models.md index 5a15d88..a660e13 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -14,7 +14,7 @@ Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TargetedEstimation.jl/issues). -Also, because the [Estimator File](@ref) is a pure Julia file, it is possible to use it in order to install additional package that can be used to define additional models. +Also, because the estimator file used by the TMLE CLI is a pure Julia file, it is possible to use it in order to install additional package that can be used to define additional models. Finally, we also provide some additional models described in [Additional models provided by TargetedEstimation.jl](@ref). diff --git a/docs/src/sieve_variance.md b/docs/src/sieve_variance.md index d7c61a9..780812e 100644 --- a/docs/src/sieve_variance.md +++ b/docs/src/sieve_variance.md @@ -2,27 +2,12 @@ If the i.i.d. (independent and identically distributed) hypothesis is not satisfied, most of the traditional statistical inference theory falls apart. This is typically possible in population genetics where a study may contain related individuals. Here we leverage a non-parametric method called [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (SVP) estimation. The hypothesis is that the dependence between individuals is sufficiently small, so that our targeted estimator will still be asymptotically unbiased, but its variance will be under estimated. In brief, the SVP estimator computes a variance estimate for a range of thresholds 𝜏, by considering individuals to be independent if their distance exceeds 𝜏. As the distance threshold 𝜏 increases, fewer individuals are assumed to be independent. The maximum of this curve is the most conservative estimate of the variance of the target parameter estimator and constitutes our SVP corrected variance estimator. -## Usage +## [Usage](@id svp_command) ```bash tmle sieve-variance-plateau --help ``` -Runs Sieve Variance Plateau correction. - -Args: - -- input_prefix: Prefix to outputs from the tmle command. - -Options: - -- -o, --out Output filename in hdf5 format. -- -g, --grm-prefix : Prefix to the aggregated GRM. -- -v, --verbosity <0>: Verbosity level. -- -n, --n-estimators <10>: Number of variance estimators to build for each estimate. -- -m, --max-tau <0.8>: Maximum distance between any two individuals. -- -e, --estimator-key : Estimator to use to proceed with sieve variance correction. - -Flags: - -- -h, --help: Print this help message. +```@docs +sieve_variance_plateau +``` diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md index 67b6c7a..e7b6ae3 100644 --- a/docs/src/tmle_estimation.md +++ b/docs/src/tmle_estimation.md @@ -4,94 +4,19 @@ This is the main script in this package, it provides a command line interface fo ## Usage -Runs TMLE estimation. - -Args: - -- dataset: A dataset either in .csv or .arrow format -- estimands: A file containing a serialized Configuration object. -- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning"). - -Options: - -- -v, --verbosity: Verbosity level. -- -o, --outputs: Ouputs to be generated. -- --chunksize <100::Int>: Results are written in batches of size chunksize. -- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment). -- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size"). - -Flags: - -- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands). - -## Output file - -The output file is a plain CSV file containing one line per estimand in the input `PARAMFILE`. The file contains the following columns: - -- `PARAMETER_TYPE`: The estimand type (e.g. "ATE", "IATE", ...). -- `TREATMENTS`: A "_&_" separated string containing all treatment variables associated with the estimand. -- `CASE`: A "_&_" separated string containing the treatment variables' case values in the same order as `TREATMENTS`. -- `CONTROL`: A "_&_" separated string containing the treatment variables' control values in the same order as `TREATMENTS`. -- `OUTCOME`: The outcome variable. -- `CONFOUNDERS`: A "_&_" separated string containing the confounding variables. -- `COVARIATES`: A "_&_" separated string containing the extra covariates used to estimate the outcome's mean. -- `INITIAL_ESTIMATE`: The initial estimate before the targeting step. -- `TMLE_ESTIMATE`: The targeted estimate. -- `TMLE_STD`: The standard deviation associated with the targeted estimate. -- `TMLE_PVALUE`: The p-value associated with the targeted estimate. -- `TMLE_LWB`: The 95% confidence interval lower bound associated with the targeted estimate. -- `TMLE_UPB`: The 95% confidence interval upper bound associated with the targeted estimate. -- `ONESTEP_ESTIMATE`: The one step estimate. -- `ONESTEP_STD`: The standard deviation associated with the one step estimate. -- `ONESTEP_PVALUE`: The p-value associated with the one step estimate. -- `ONESTEP_LWB`: The 95% confidence interval lower bound associated with the one step estimate. -- `ONESTEP_UPB`: The 95% confidence interval upper bound associated with the one step estimate. -- `LOG`: A log message if estimation failed. - -## Estimator File - -TMLE is an adaptive procedure that depends on the specification of learning algorithms for the estimation of the nuisance parameters (see [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) for a description of the assumed setting). In our case, there are two nuisance parameters for which we need to specify learning algorithms: - -- `E[Y|T, W, C]`: The mean outcome given the treatment, confounders and extra covariates. It is commonly denoted by `Q` in the Targeted Learning litterature. -- `p(T|W)`: The propensity score. It is commonly denoted by `G` in the Targeted Learning litterature. - -### Description of the file - -In order to provide maximum flexibility as to the choice of learning algorithms, the estimator file is a plain [Julia](https://julialang.org/) file. This file is optional and omitting it defaults to using generalized linear models. If provided, it must define a [NamedTuple](https://docs.julialang.org/en/v1/base/base/#Core.NamedTuple) called `tmle_spec` containing any of the following fields as follows (default configuration): - -```julia - -tmle_spec = ( - Q_continuous = LinearRegressor(), - Q_binary = LogisticClassifier(lambda=0.), - G = LogisticClassifier(lambda=0.), - threshold = 1e-8, - cache = false, - weighted_fluctuation = false -) +```bash +tmle tmle --help ``` -where: - -- `Q_continuous`: is a MLJ model used for the estimation of `E[Y|T, W, C]` when the outcome `Y` is continuous. -- `Q_binary`: is a MLJ model used for the estimation of `E[Y|T, W, C]` when the outcome `Y` is binary. -- `G`: is a MLJ model used for the estimation of `p(T|W)`. -- `threshold`: is the minimum value the propensity score `G` is allowed to take. -- `cache`: controls caching of data by [MLJ machines](https://alan-turing-institute.github.io/MLJ.jl/dev/machines/). Setting it to `true` may result in faster runtime but higher memory usage. -- `weighted_fluctuation`: controls whether the fluctuation for `Q` is a weighted glm or not. If some of the treatment values are rare it may lead to more robust estimation. - -Typically, `Q_continuous`, `Q_binary` and `G` will be adjusted and other fields can be left unspecified. +```@docs +tmle +``` -### Ready to use estimator files +## Note on TMLE Outputs -We recognize not everyone will be familiar with [Julia](https://julialang.org/). We thus provide a set of ready to use estimator files that can be simplified or extended as needed: +We can output results in three different formats: HDF5, JSON and JLS. By default no output is written, so you need to specify at least one. An output can be generated by specifying an output filename for it. For instance `--outputs.json.filename=output.json` will output a JSON file. Note that you can generate multiple formats at once, e.g. `--outputs.json.filename=output.json --outputs.hdf5.filename=output.hdf5` will output both JSON and HDF5 result files. Another important output option is the `pval_threshold`. Each estimation result is accompanied by an influence curve vector and by default these vectors are erased before saving the results because they typically take up too much space and are not usually needed. In some occasions you might want to keep them and this can be achieved by specifiying the output's `pval_threhsold`. For instance `--outputs.hdf5.pval_threshold=1.` will keep all such vectors because all p-values lie in between 0 and 1. -- Super Learning: [with](./estimators/superlearning-with-interactions-for-Q.jl) and [without](./estimators/superlearning.jl) interaction terms in the GLM models for Q. -- Super Learning for G and GLMNet for Q: [here](./estimators/G-superlearning-Q-glmnet.jl). -- Super Learning for G and GLM for Q: [here](./estimators/G-superlearning-Q-glm.jl). -- GLMNet: [with](./estimators/glmnet-with-interactions-for-Q.jl) and [without](./estimators/glmnet.jl) interaction terms in the GLM models for Q. -- GLM: [with](./estimators/glm-with-interactions-for-Q.jl) and [without](./estimators/glm.jl) interaction terms in the GLM models for Q. -- XGBoost: [with tuning](./estimators/tuned-xgboost.jl). +In order to run sieve variance plateau correction after a TMLE run you need to save the results in HDF5 format with influence curve vectors. Furthermore, you will need to save the sample-ids associated with each result. A complete option set for this could be: `--outputs.hdf5.filename=output.hdf5 --outputs.hdf5.pval_threshold=0.05 --sample_ids=true`. In this case, only those results with an individual p-value of less than ``0.05`` will keep track of their influence curves and be considered for sieve variance correction. ## Runtime @@ -111,8 +36,8 @@ In what follows, `Y` is an outcome of interest, `W` a set of confounding variabl For all the following experiments: -- The Julia script can be found at [experiments/runtime.jl](../../experiments/runtime.jl). -- The various estimators used below are further described in [Ready to use estimator files](@ref). +- The Julia script can be found at [experiments/runtime.jl](https://github.com/TARGENE/TargetedEstimation.jl/tree/main/experiments/runtime.jl). +- The various estimators used below are further described in the[estimators-configs](https://github.com/TARGENE/TargetedEstimation.jl/tree/main/estimators-configs) folder. ### Multiple treatment contrasts @@ -136,12 +61,12 @@ In a PheWAS, one is interested in the effect of a genetic variation across many With this setup in mind, the computational complexity is mostly driven by the specification of the learning algorithms for `Q`, which will have to be fitted for each outcome. For 10 outcomes, we estimate the 3 Average Treatment Effects corresponding to the 3 possible treatment contrasts defined in the previous section. There are thus two levels of reuse of `G` and `Q` in this study design. In the table below are presented some runtimes for various specifications of `G` and `Q` using a single cpu. The "Unit runtime" is the average runtime across all estimands and can roughly be extrapolated to bigger studies. -| Estimator file | Unit runtime (s) | Extrapolated runtime to 1000 outcomes | +| Estimator | Unit runtime (s) | Extrapolated runtime to 1000 outcomes | | --- | :---: | :---: | -| `docs/src/estimators/glm.jl` | 4.65 | ≈ 1h20 | -| `docs/src/estimators/glmnet.jl` | 7.19 | ≈ 2h | -| `docs/src/estimators/G-superlearning-Q-glmnet.jl` | 50.05| ≈ 13h45 | -| `docs/src/estimators/superlearning.jl` | 168.98 | ≈ 46h | +| `glm.` | 4.65 | ≈ 1h20 | +| `glmnet` | 7.19 | ≈ 2h | +| `G-superlearning-Q-glmnet` | 50.05| ≈ 13h45 | +| `superlearning` | 168.98 | ≈ 46h | Depending on the exact setup, this means one can probably afford to use Super Learning for at least the estimation of `G` (and potentially also for `Q` for a single PheWAS). This turns out to be a great news because TMLE is a double robust estimator. As a reminder, it means that only one of the estimators for `G` or `Q` needs to converge sufficiently fast to the ground truth to guarantee that our estimates will be asymptotically unbiased. @@ -166,9 +91,9 @@ Again, we estimate the 3 Average Treatment Effects corresponding to the 3 possib | Estimator file | Continuous outcome unit runtime (s) | Binary outcome unit runtime (s) | Projected Time on HPC (200 folds //) | | --- | :---: | :---: | :---: | -| `docs/src/estimators/glm.jl` | 5.64 | 6.14 | ≈ 6h30 | -| `docs/src/estimators/glmnet.jl` | 17.46 | 22.24 | ≈ 22h | -| `docs/src/estimators/G-superlearning-Q-glmnet.jl` | 430.54 | 438.67 | ≈ 20 days | -| `docs/src/estimators/superlearning.jl` | 511.26 | 567.72 | ≈ 24 days | +| `glm` | 5.64 | 6.14 | ≈ 6h30 | +| `glmnet` | 17.46 | 22.24 | ≈ 22h | +| `G-superlearning-Q-glmnet` | 430.54 | 438.67 | ≈ 20 days | +| `superlearning` | 511.26 | 567.72 | ≈ 24 days | We can see that modern high performance computing platforms definitely enable this study design when using GLMs or GLMNets. It is unlikely however, that you will be able to use Super Learning for any of `P(V|W)` or `E[Y|V, W]` if you don't have privileged access to such platform. While the double robustness guarantees will generally not be satisfied, our estimate will still be targeted, which means that its bias will be reduced compared to classic inference using a parametric model. diff --git a/experiments/runtime.jl b/experiments/runtime.jl index e641673..e3e2b62 100644 --- a/experiments/runtime.jl +++ b/experiments/runtime.jl @@ -2,10 +2,10 @@ using ArgParse using TargetedEstimation const ESTIMATORS = [ - "docs/src/estimators/glm.jl", - "docs/src/estimators/glmnet.jl", - "docs/src/estimators/G-superlearning-Q-glmnet.jl", - "docs/src/estimators/superlearning.jl" + "glm", + "glmnet", + "G-superlearning-Q-glmnet", + "superlearning" ] const PARAMETERS = [ "experiments/parameters.phewas.yaml", From 51659814828d0e4a38f4abfb48c551ab1eecd9a4 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 13 Dec 2023 11:01:38 +0100 Subject: [PATCH 44/71] up TMLE dep to manage failed fluctuations --- Project.toml | 2 +- src/runner.jl | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 48583ae..7cf526d 100644 --- a/Project.toml +++ b/Project.toml @@ -53,7 +53,7 @@ MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.6.0" Optim = "1.7" -TMLE = "0.12.1" +TMLE = "0.12.2" Tables = "1.10.1" YAML = "0.4.9" julia = "1.7, 1" diff --git a/src/runner.jl b/src/runner.jl index 3b37e3d..a827bd5 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -57,9 +57,11 @@ function try_estimation(runner, Ψ, estimator) return result catch e # Some nuisance function fits may fail. We do not interrupt on them but log instead. - # This also allows to skip fast the next estimands requiring the same nuisance functions. if e isa TMLE.FitFailedError - push!(runner.failed_nuisance, e.estimand) + # This also allows to skip fast the next estimands requiring the same nuisance functions. + if !(e.model isa TMLE.Fluctuation) + push!(runner.failed_nuisance, e.estimand) + end return FailedEstimate(Ψ, e.msg) # On other errors, rethrow else From 837070312f6318e88e989d5a41aedd0ef3449dc1 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 13 Dec 2023 12:42:07 +0100 Subject: [PATCH 45/71] add typing to cli functions --- src/sieve_variance.jl | 14 +++++++------- src/summary.jl | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index d113cc7..10d4be0 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -225,13 +225,13 @@ Sieve Variance Plateau CLI. - `-m, --max_tau`: Maximum distance between any two individuals. - `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction. """ -@cast function sieve_variance_plateau(input_prefix; - out="svp.hdf5", - grm_prefix="GRM", - verbosity=0, - n_estimators=10, - max_tau=0.8, - estimator_key="TMLE" +@cast function sieve_variance_plateau(input_prefix::String; + out::String="svp.hdf5", + grm_prefix::String="GRM", + verbosity::Int=0, + n_estimators::Int=10, + max_tau::Float64=0.8, + estimator_key::String="TMLE" ) estimator_key = Symbol(estimator_key) τs = default_τs(n_estimators;max_τ=max_tau) diff --git a/src/summary.jl b/src/summary.jl index 6ebf4c5..0089df9 100644 --- a/src/summary.jl +++ b/src/summary.jl @@ -29,8 +29,8 @@ Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can - `-o, --outputs`: Ouptuts configuration. """ @cast function make_summary( - prefix; - outputs=Outputs(json=JSONOutput(filename="summary.json")) + prefix::String; + outputs::Outputs=Outputs() ) # Initialize output files From 539c1d7064e0eda5cdec5250e7957983e6659e55 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 13 Dec 2023 16:28:22 +0100 Subject: [PATCH 46/71] add skipping of FailedEstimates in sieve --- src/sieve_variance.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index 10d4be0..70c446d 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -79,6 +79,7 @@ function build_work_list(prefix, grm_ids; estimator_key=:TMLE) batch_results = io[key] for nt_result in batch_results result = nt_result[estimator_key] + result isa FailedEstimate && continue sample_ids = nt_result.SAMPLE_IDS update_work_lists_with!( result, From d50fc6f059745de0c97e6ccba9fab41925fbd70e Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 14 Dec 2023 08:26:22 +0100 Subject: [PATCH 47/71] up docker image with procps --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 51da080..547da61 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,7 +3,7 @@ FROM julia:1.9.4-bullseye ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get -y install gcc mono-mcs vim && \ + apt-get -y install gcc mono-mcs vim procps && \ rm -rf /var/lib/apt/lists/* ENV TZ=Europe/Amsterdam From ef258799cbb1ec6d679b73d2ec63deb4a5a216e9 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 19 Dec 2023 12:40:55 +0100 Subject: [PATCH 48/71] up TMLE dep --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7cf526d..cff4a17 100644 --- a/Project.toml +++ b/Project.toml @@ -53,7 +53,7 @@ MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.6.0" Optim = "1.7" -TMLE = "0.12.2" +TMLE = "0.13.1" Tables = "1.10.1" YAML = "0.4.9" julia = "1.7, 1" From 98ed9b4f32003604da3bcc822e475beddb158ae8 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Tue, 19 Dec 2023 13:46:37 +0100 Subject: [PATCH 49/71] fix default generateATEs --- src/utils.jl | 2 +- test/utils.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index ce6b854..518de66 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -90,7 +90,7 @@ function TMLE.generateATEs(dataset) confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name)) length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset.")) - return generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables) + return [generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)] end function build_estimands_list(estimands_pattern, dataset) diff --git a/test/utils.jl b/test/utils.jl index 9bedbb0..072db30 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -83,12 +83,12 @@ end dataset.Y = [0, 1, 2, 2] dataset.W1 = [1, 1, 1, 1] dataset.W_2 = [1, 1, 1, 1] - ATEs = TargetedEstimation.build_estimands_list("generateATEs", dataset) - @test ATEs == [ + composedATE = TargetedEstimation.build_estimands_list("generateATEs", dataset)[1] + @test composedATE.args == ( TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()), TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()), TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ()) - ] + ) end @testset "Test coerce_types!" begin Ψ = IATE( From 695e661e2bf8bac5032f4593da60c75d557e406e Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 14:46:50 +0100 Subject: [PATCH 50/71] move back to argparse but keep semantics --- Project.toml | 4 +- src/TargetedEstimation.jl | 7 +- src/cli.jl | 180 ++++++++++++++++++++++++++++++++++++++ src/runner.jl | 2 +- src/sieve_variance.jl | 2 +- src/summary.jl | 2 +- test/runner.jl | 54 ++++++------ test/sieve_variance.jl | 26 ++++-- test/summary.jl | 27 +++--- 9 files changed, 247 insertions(+), 57 deletions(-) create mode 100644 src/cli.jl diff --git a/Project.toml b/Project.toml index cff4a17..7575ae0 100644 --- a/Project.toml +++ b/Project.toml @@ -4,11 +4,11 @@ authors = ["Olivier Labayle"] version = "0.7.4" [deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542" Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" @@ -33,11 +33,11 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] +ArgParse = "1.1.4" Arrow = "2.5.2" CSV = "0.10" CategoricalArrays = "0.10" Combinatorics = "1.0.2" -Comonicon = "1.0.6" Configurations = "0.17.6" DataFrames = "1.3.4" EvoTrees = "0.16.5" diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl index 74856d8..a3ee31c 100644 --- a/src/TargetedEstimation.jl +++ b/src/TargetedEstimation.jl @@ -4,6 +4,7 @@ if occursin("Intel", Sys.cpu_info()[1].model) using MKL end +using ArgParse using DataFrames using MLJBase using MLJ @@ -25,7 +26,6 @@ using Tables using Random using YAML using JSON -using Comonicon using Configurations import MLJModelInterface @@ -42,10 +42,9 @@ include("resampling.jl") include(joinpath("models", "glmnet.jl")) include(joinpath("models", "adaptive_interaction_transformer.jl")) include(joinpath("models", "biallelic_snp_encoder.jl")) +include("cli.jl") -@main - -export Runner, tmle, sieve_variance_plateau, make_summary +export Runner, tmle, sieve_variance_plateau, make_summary, main export GLMNetRegressor, GLMNetClassifier export RestrictedInteractionTransformer, BiAllelicSNPEncoder export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV diff --git a/src/cli.jl b/src/cli.jl new file mode 100644 index 0000000..985b088 --- /dev/null +++ b/src/cli.jl @@ -0,0 +1,180 @@ +function cli_settings() + s = ArgParseSettings(description="TMLE CLI.") + + @add_arg_table s begin + "tmle" + action = :command + help = "Run TMLE." + + "svp" + action = :command + help = "Run Sieve Variance Plateau." + + "merge" + action = :command + help = "Merges TMLE outputs together." + end + + @add_arg_table s["tmle"] begin + "dataset" + arg_type = String + required = true + help = "Path to the dataset (either .csv or .arrow)" + + "--estimands" + arg_type = String + help = "A string (`generateATEs`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)" + default = "generateATEs" + + "--estimators" + arg_type = String + help = "A julia file containing the estimators to use." + default = "glmnet" + + "--verbosity" + arg_type = Int + default = 0 + help = "Verbosity level" + + "--hdf5-output" + arg_type = String + help = "HDF5 file output." + + "--json-output" + arg_type = String + help = "JSON file output." + + "--jls-output" + arg_type = String + help = "JLS file output." + + "--chunksize" + arg_type = Int + help = "Results are written in batches of size chunksize." + default = 100 + + "--rng" + arg_type = Int + help = "Random seed (Only used for estimands ordering at the moment)." + default = 123 + + "--cache-strategy" + arg_type = String + help = "Caching Strategy for the nuisance functions, any of (`release-unusable`, `no-cache`, `max-size`)." + default = "release-unusable" + + "--sort-estimands" + help = "Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time)." + action = :store_true + end + + @add_arg_table s["svp"] begin + "input-prefix" + arg_type = String + help = "Input prefix to HDF5 files generated by the tmle CLI." + + "--out" + arg_type = String + help = "Output filename." + default = "svp.hdf5" + + "--grm-prefix" + arg_type = String + help = "Prefix to the aggregated GRM." + default = "GRM" + + "--verbosity" + arg_type = Int + default = 0 + help = "Verbosity level" + + "--n-estimators" + arg_type = Int + default = 10 + help = "Number of variance estimators to build for each estimate." + + "--max-tau" + arg_type = Float64 + default = 0.8 + help = "Maximum distance between any two individuals." + + "--estimator-key" + arg_type = String + help = "Estimator to use to proceed with sieve variance correction." + default = "TMLE" + end + + @add_arg_table s["merge"] begin + "prefix" + arg_type = String + help = "Prefix to .hdf5 files to be used to create the summary file." + + "--hdf5-output" + arg_type = String + help = "HDF5 file output." + + "--json-output" + arg_type = String + help = "JSON file output." + + "--jls-output" + arg_type = String + help = "JLS file output." + end + + return s +end + + +makeOutput(T::Type, ::Nothing) = T() + +function makeOutput(T::Type, str) + args = split(str, ",") + kwargs = Dict(fn => tryparse(ft, val) for (val, fn, ft) ∈ zip(args, fieldnames(T), fieldtypes(T))) + return T(;kwargs...) +end + +make_outputs(hdf5_string, json_string, jls_tring) = Outputs( + hdf5=makeOutput(HDF5Output, hdf5_string), + json=makeOutput(JSONOutput, json_string), + jls=makeOutput(JLSOutput, jls_tring) +) + +function main(args=ARGS) + settings = parse_args(args, cli_settings()) + cmd = settings["%COMMAND%"] + cmd_settings = settings[cmd] + if cmd ∈ ("tmle", "merge") + outputs = make_outputs(cmd_settings["hdf5-output"], cmd_settings["json-output"], cmd_settings["jls-output"]) + if cmd == "tmle" + tmle(cmd_settings["dataset"]; + estimands=cmd_settings["estimands"], + estimators=cmd_settings["estimators"], + verbosity=cmd_settings["verbosity"], + outputs=outputs, + chunksize=cmd_settings["chunksize"], + rng=cmd_settings["rng"], + cache_strategy=cmd_settings["cache-strategy"], + sort_estimands=cmd_settings["sort-estimands"] + ) + else + make_summary(cmd_settings["prefix"]; + outputs=outputs + ) + end + else + sieve_variance_plateau(cmd_settings["input-prefix"]; + out=cmd_settings["out"], + grm_prefix=cmd_settings["grm-prefix"], + verbosity=cmd_settings["verbosity"], + n_estimators=cmd_settings["n-estimators"], + max_tau=cmd_settings["max-tau"], + estimator_key=cmd_settings["estimator-key"] + ) + end +end + +function julia_main()::Cint + main() + return 0 +end \ No newline at end of file diff --git a/src/runner.jl b/src/runner.jl index a827bd5..1079db7 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -150,7 +150,7 @@ TMLE CLI. - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ -@cast function tmle(dataset::String; +function tmle(dataset::String; estimands::String="generateATEs", estimators::String="glmnet", verbosity::Int=0, diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl index 70c446d..ea41eb9 100644 --- a/src/sieve_variance.jl +++ b/src/sieve_variance.jl @@ -226,7 +226,7 @@ Sieve Variance Plateau CLI. - `-m, --max_tau`: Maximum distance between any two individuals. - `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction. """ -@cast function sieve_variance_plateau(input_prefix::String; +function sieve_variance_plateau(input_prefix::String; out::String="svp.hdf5", grm_prefix::String="GRM", verbosity::Int=0, diff --git a/src/summary.jl b/src/summary.jl index 0089df9..a83a383 100644 --- a/src/summary.jl +++ b/src/summary.jl @@ -28,7 +28,7 @@ Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can - `-o, --outputs`: Ouptuts configuration. """ -@cast function make_summary( +function make_summary( prefix::String; outputs::Outputs=Outputs() ) diff --git a/test/runner.jl b/test/runner.jl index f52ead4..a8ce1f2 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -154,22 +154,24 @@ end @testset "Test tmle: lower p-value threshold only JSON output" begin build_dataset(;n=1000, format="csv") - outputs = TargetedEstimation.Outputs( - json=TargetedEstimation.JSONOutput(filename="output.json", pval_threshold=1e-15) - ) tmpdir = mktempdir(cleanup=true) estimandsfile = joinpath(tmpdir, "configuration.json") configuration = statistical_estimands_only_config() TMLE.write_json(estimandsfile, configuration) estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") datafile = "data.csv" - tmle(datafile; - estimands=estimandsfile, - estimators=estimatorfile, - outputs=outputs) + + # Using the main entry point + main([ + "tmle", + datafile, + "--estimands", estimandsfile, + "--estimators", estimatorfile, + "--json-output", "output.json,1e-15"] + ) # Essential results - results_from_json = TMLE.read_json(outputs.json.filename) + results_from_json = TMLE.read_json("output.json") n_IC_empties = 0 for result in results_from_json if result[:OSE].IC != [] @@ -179,7 +181,7 @@ end @test n_IC_empties > 0 rm(datafile) - rm(outputs.json.filename) + rm("output.json") end @testset "Test tmle: Failing estimands" begin @@ -238,11 +240,6 @@ end @testset "Test tmle: Causal and Composed Estimands" begin build_dataset(;n=1000, format="csv") - outputs = TargetedEstimation.Outputs( - json = TargetedEstimation.JSONOutput(filename="output.json"), - jls = TargetedEstimation.JLSOutput(filename="output.jls"), - hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5") - ) tmpdir = mktempdir(cleanup=true) estimandsfile = joinpath(tmpdir, "configuration.jls") @@ -251,16 +248,21 @@ end estimatorfile = joinpath(CONFIGDIR, "ose_config.jl") datafile = "data.csv" - tmle(datafile; - estimands=estimandsfile, - estimators=estimatorfile, - outputs=outputs, - chunksize=2 - ) + # Using the main entry point + main([ + "tmle", + datafile, + "--estimands", estimandsfile, + "--estimators", estimatorfile, + "--chunksize", "2", + "--json-output", "output.json", + "--hdf5-output", "output.hdf5", + "--jls-output", "output.jls" + ]) # JLS Output results = [] - open(outputs.jls.filename) do io + open("output.jls") do io while !eof(io) push!(results, deserialize(io)) end @@ -279,19 +281,19 @@ end @test results[3].OSE isa TMLE.ComposedEstimate # JSON Output - results_from_json = TMLE.read_json(outputs.json.filename) + results_from_json = TMLE.read_json("output.json") @test length(results_from_json) == 3 # HDF5 - results_from_json = jldopen(outputs.hdf5.filename) + results_from_json = jldopen("output.hdf5") @test length(results_from_json["Batch_1"]) == 2 composed_result = only(results_from_json["Batch_2"]) @test composed_result.OSE.cov == results[3].OSE.cov rm(datafile) - rm(outputs.jls.filename) - rm(outputs.json.filename) - rm(outputs.hdf5.filename) + rm("output.jls") + rm("output.json") + rm("output.hdf5") end diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 62fb7a9..de1465c 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -285,10 +285,13 @@ end TMLE.write_json(estimandsfile_2, config_2) build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2"; pval=pval) - sieve_variance_plateau("tmle_output"; - grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"), - max_tau=0.75 - ) + # Using the main command + main([ + "svp", + "tmle_output", + "--grm-prefix", joinpath(TESTDIR, "data", "grm", "test.grm"), + "--max-tau", "0.75" + ]) io = jldopen("svp.hdf5") # Check τs @@ -332,11 +335,16 @@ end "tmle_output"; estimatorfile=joinpath(TESTDIR, "config", "ose_config.jl") ) - sieve_variance_plateau("tmle_output"; - grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"), - max_tau=0.75, - estimator_key="OSE" - ) + + # Using the main command + main([ + "svp", + "tmle_output", + "--grm-prefix", joinpath(TESTDIR, "data", "grm", "test.grm"), + "--max-tau", "0.75", + "--estimator-key", "OSE" + ]) + # The ComposedEstimate std is not updated but each component is. src_results = jldopen("tmle_output.hdf5")["Batch_1"] io = jldopen("svp.hdf5") diff --git a/test/summary.jl b/test/summary.jl index 1b8f5d8..92c454b 100644 --- a/test/summary.jl +++ b/test/summary.jl @@ -41,27 +41,28 @@ include(joinpath(TESTDIR, "testutils.jl")) outputs=tmle_output_2 ) - # Make summary files - outputs = TargetedEstimation.Outputs( - json=TargetedEstimation.JSONOutput(filename="summary.json"), - hdf5=TargetedEstimation.HDF5Output(filename="summary.hdf5"), - jls=TargetedEstimation.JLSOutput(filename="summary.jls") - ) - make_summary("tmle_output", outputs=outputs) + # Using the main entry point + main([ + "merge", + "tmle_output", + "--json-output", "summary.json", + "--jls-output", "summary.jls", + "--hdf5-output", "summary.hdf5" + ]) # Test correctness hdf5file_1 = jldopen("tmle_output_1.hdf5") hdf5file_2 = jldopen("tmle_output_2.hdf5") inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"]) - json_outputs = TMLE.read_json(outputs.json.filename) + json_outputs = TMLE.read_json("summary.json") jls_outputs = [] - open(outputs.jls.filename) do io + open("summary.jls") do io while !eof(io) push!(jls_outputs, deserialize(io)) end end - hdf5_output = jldopen(outputs.hdf5.filename) + hdf5_output = jldopen("summary.hdf5") hdf5_outputs = vcat((hdf5_output[key] for key in keys(hdf5_output))...) @test length(inputs) == 9 @@ -72,9 +73,9 @@ include(joinpath(TESTDIR, "testutils.jl")) # cleanup rm("tmle_output_1.hdf5") rm("tmle_output_2.hdf5") - rm(outputs.json.filename) - rm(outputs.jls.filename) - rm(outputs.hdf5.filename) + rm("summary.hdf5") + rm("summary.jls") + rm("summary.json") rm(datafile) end From 12ba044d605f7f8c6863d65c25f9aace6996a548 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 14:59:42 +0100 Subject: [PATCH 51/71] try build in CI --- .github/workflows/CI.yml | 2 +- .gitignore | 1 + Comonicon.toml | 20 -------------------- Project.toml | 2 ++ deps/build_app.jl | 3 ++- deps/execute.jl | 7 ------- docker/Dockerfile | 5 +---- 7 files changed, 7 insertions(+), 33 deletions(-) delete mode 100644 Comonicon.toml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 19683a3..29551c4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -30,7 +30,7 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - name: Build App - run: julia --project -t auto deps/build_app.jl app tarball + run: julia --project --startup-file=no deps/build_app.jl - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v2 with: diff --git a/.gitignore b/.gitignore index 63898a4..eb170c5 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ deps/build.log deps/downloads/ deps/usr/ deps/src/ +tmle/ # Build artifacts for creating documentation generated by the Documenter package docs/build/ diff --git a/Comonicon.toml b/Comonicon.toml deleted file mode 100644 index 4acd652..0000000 --- a/Comonicon.toml +++ /dev/null @@ -1,20 +0,0 @@ -name = "tmle" - -[install] -completion = true -quiet = false -optimize = 2 - -# [sysimg] -# incremental=true -# filter_stdlibs=false - -# [sysimg.precompile] -# execution_file = ["deps/execute.jl"] - -# [application] -# incremental=true -# filter_stdlibs=false - -# [application.precompile] -# execution_file = ["deps/execute.jl"] \ No newline at end of file diff --git a/Project.toml b/Project.toml index 7575ae0..f5df5c6 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" MultipleTesting = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" @@ -33,6 +34,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] +PackageCompiler = "2.1.16" ArgParse = "1.1.4" Arrow = "2.5.2" CSV = "0.10" diff --git a/deps/build_app.jl b/deps/build_app.jl index e0a9f11..cb0b99c 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1 +1,2 @@ -using TargetedEstimation; TargetedEstimation.comonicon_install() \ No newline at end of file +using PackageCompiler +PackageCompiler.create_app(".", "tmle", precompile_execution_file="execute.jl") diff --git a/deps/execute.jl b/deps/execute.jl index bde1b1b..70f58fe 100644 --- a/deps/execute.jl +++ b/deps/execute.jl @@ -1,13 +1,6 @@ using TargetedEstimation @info "Running precompilation script." - -# Run help messages -TargetedEstimation.command_main(["-h"]) -TargetedEstimation.command_main(["tmle", "-h"]) -TargetedEstimation.command_main(["make-summary", "-h"]) -TargetedEstimation.command_main(["sieve-variance-plateau", "-h"]) - # Run workload TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test") push!(LOAD_PATH, TEST_DIR) diff --git a/docker/Dockerfile b/docker/Dockerfile index 547da61..741a9ba 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -22,8 +22,5 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi # Build CLI RUN julia --project --startup-file=no deps/build_app.jl -# Add CLI to PATH -ENV PATH="${PATH}:/opt/bin/" - # Test the CLI runs -RUN tmle tmle data/sample_dataset.csv \ No newline at end of file +RUN tmle/bin/tmle tmle data/sample_dataset.csv \ No newline at end of file From 478ba5d5b87d7f0bb2576ec0a323eaa176185c62 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 15:02:09 +0100 Subject: [PATCH 52/71] fix execute path --- deps/build_app.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/build_app.jl b/deps/build_app.jl index cb0b99c..8e9a8dd 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1,2 +1,2 @@ using PackageCompiler -PackageCompiler.create_app(".", "tmle", precompile_execution_file="execute.jl") +PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl") From b2d2c929c634b275808629ce94ab0806c0d55e04 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 15:43:59 +0100 Subject: [PATCH 53/71] remove app run at the end of docker build --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 741a9ba..334ab1a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -23,4 +23,4 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi RUN julia --project --startup-file=no deps/build_app.jl # Test the CLI runs -RUN tmle/bin/tmle tmle data/sample_dataset.csv \ No newline at end of file +# RUN tmle/bin/tmle tmle data/sample_dataset.csv \ No newline at end of file From fab24558911c763c8c40951713616f02f7802851 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 16:40:53 +0100 Subject: [PATCH 54/71] try more platforms and upload artifact --- .github/workflows/CI.yml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 29551c4..65c45df 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,11 +15,14 @@ jobs: fail-fast: false matrix: version: - - '1.9' + - '1' os: - ubuntu-latest + - macOS-latest + - windows-latest arch: - - x64 + - 'x64' + - 'x86' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 @@ -29,12 +32,16 @@ jobs: - uses: julia-actions/cache@v1 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - - name: Build App - run: julia --project --startup-file=no deps/build_app.jl - - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v2 with: files: lcov.info + - uses: julia-actions/julia-processcoverage@v1 + - name: Build App + run: julia --project --startup-file=no deps/build_app.jl + - uses: actions/upload-artifact@v4 + with: + name: tmle-${{ matrix.os }}-${{ matrix.arch }} + path: tmle docs: name: Documentation runs-on: ubuntu-latest From 26c273e03f6c97d47eb615e7ea40f7060c7f85ba Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 16:50:39 +0100 Subject: [PATCH 55/71] remove x86 --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 65c45df..ebf50b7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -22,7 +22,6 @@ jobs: - windows-latest arch: - 'x64' - - 'x86' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 From 592c67520c38032055224f9a5e09105998a95278 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 21 Dec 2023 17:05:02 +0100 Subject: [PATCH 56/71] remove the use of mmap in json read --- src/utils.jl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 518de66..c378749 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -18,17 +18,16 @@ MissingSCMError() = ArgumentError(string("A Structural Causal Model should be pr get_identification_method(method::Nothing) = BackdoorAdjustment() get_identification_method(method) = method -function read_method(extension) - method = if extension == ".json" - TMLE.read_json - elseif extension == ".yaml" - TMLE.read_yaml - elseif extension == ".jls" - deserialize +function read_estimands_config(filename) + if endswith(filename, ".json") + TMLE.read_json(filename, use_mmap=false) + elseif endswith(filename, ".yaml") + TMLE.read_yaml(filename) + elseif endswith(filename, ".jls") + return deserialize(filename) else throw(ArgumentError(string("Can't read from ", extension, " file"))) end - return method end function fix_treatment_values!(treatment_types::AbstractDict, Ψ::ComposedEstimand, dataset) @@ -62,8 +61,7 @@ Reads estimands from file and ensures that the treatment values in the config fi respects the treatment types in the dataset. """ function proofread_estimands(filename, dataset) - extension = filename[findlast(isequal('.'), filename):end] - config = read_method(extension)(filename) + config = read_estimands_config(filename) adjustment_method = get_identification_method(config.adjustment) estimands = Vector{TMLE.Estimand}(undef, length(config.estimands)) treatment_types = Dict() From 1518216b74160d4a7aa8c2987816e663f162fd3d Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 22 Dec 2023 17:22:02 +0100 Subject: [PATCH 57/71] close files before removing --- test/runner.jl | 11 ++++++----- test/sieve_variance.jl | 3 +++ test/summary.jl | 4 ++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/test/runner.jl b/test/runner.jl index a8ce1f2..97e39b4 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -231,7 +231,7 @@ end @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand end end - + close(results_from_hdf5) # Clean rm(outputs.json.filename) rm(outputs.hdf5.filename) @@ -285,11 +285,12 @@ end @test length(results_from_json) == 3 # HDF5 - results_from_json = jldopen("output.hdf5") - @test length(results_from_json["Batch_1"]) == 2 - composed_result = only(results_from_json["Batch_2"]) + results_from_hdf5 = jldopen("output.hdf5") + @test length(results_from_hdf5["Batch_1"]) == 2 + composed_result = only(results_from_hdf5["Batch_2"]) @test composed_result.OSE.cov == results[3].OSE.cov - + close(results_from_hdf5) + rm(datafile) rm("output.jls") rm("output.json") diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index de1465c..8365d1b 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -313,6 +313,8 @@ end @test src_result.TMLE.n == svp_result.TMLE.n @test svp_result.TMLE.IC == [] end + close(tmleout1) + close(tmleout2) close(io) # clean rm("svp.hdf5") @@ -361,6 +363,7 @@ end @test standalone_estimates[i].OSE.std != src_results[i].OSE.std end + close(src_results) close(io) # clean diff --git a/test/summary.jl b/test/summary.jl index 92c454b..a75d0e1 100644 --- a/test/summary.jl +++ b/test/summary.jl @@ -70,6 +70,10 @@ include(joinpath(TESTDIR, "testutils.jl")) @test input.OSE.estimand == jls_output.OSE.estimand == hdf5_out.OSE.estimand == json_output[:OSE].estimand end + close(hdf5file_1) + close(hdf5file_2) + close(hdf5_output) + # cleanup rm("tmle_output_1.hdf5") rm("tmle_output_2.hdf5") From bcd5ba5d678cd01286ba7f7d7cd6dd003f7d18a7 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 22 Dec 2023 17:43:12 +0100 Subject: [PATCH 58/71] fix poor closing of jld2 files --- test/runner.jl | 27 ++++++++++++++------------- test/sieve_variance.jl | 10 ++++------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/test/runner.jl b/test/runner.jl index 97e39b4..88b01da 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -222,16 +222,17 @@ end end # Check results from HDF5 - results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"] - for estimator in (:OSE, :TMLE) - @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate - @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate - for i in 3:6 - @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate - @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand + jldopen(outputs.hdf5.filename) do io + results_from_hdf5 = io["Batch_1"] + for estimator in (:OSE, :TMLE) + @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate + @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate + for i in 3:6 + @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate + @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand + end end end - close(results_from_hdf5) # Clean rm(outputs.json.filename) rm(outputs.hdf5.filename) @@ -285,11 +286,11 @@ end @test length(results_from_json) == 3 # HDF5 - results_from_hdf5 = jldopen("output.hdf5") - @test length(results_from_hdf5["Batch_1"]) == 2 - composed_result = only(results_from_hdf5["Batch_2"]) - @test composed_result.OSE.cov == results[3].OSE.cov - close(results_from_hdf5) + jldopen("output.hdf5") do io + @test length(io["Batch_1"]) == 2 + composed_result = only(io["Batch_2"]) + @test composed_result.OSE.cov == results[3].OSE.cov + end rm(datafile) rm("output.jls") diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 8365d1b..0095067 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -301,8 +301,8 @@ end # Check results svp_results = io["results"] - tmleout1 = jldopen("tmle_output_1.hdf5")["Batch_1"] - tmleout2 = jldopen("tmle_output_2.hdf5")["Batch_1"] + tmleout1 = jldopen(x -> x["Batch_1"], "tmle_output_1.hdf5") + tmleout2 = jldopen(x -> x["Batch_1"], "tmle_output_2.hdf5") src_results = [tmleout1..., tmleout2...] for svp_result in svp_results @@ -313,8 +313,7 @@ end @test src_result.TMLE.n == svp_result.TMLE.n @test svp_result.TMLE.IC == [] end - close(tmleout1) - close(tmleout2) + close(io) # clean rm("svp.hdf5") @@ -348,7 +347,7 @@ end ]) # The ComposedEstimate std is not updated but each component is. - src_results = jldopen("tmle_output.hdf5")["Batch_1"] + src_results = jldopen(x -> x["Batch_1"], "tmle_output.hdf5") io = jldopen("svp.hdf5") svp_results = io["results"] standalone_estimates = svp_results[1:2] @@ -363,7 +362,6 @@ end @test standalone_estimates[i].OSE.std != src_results[i].OSE.std end - close(src_results) close(io) # clean From 375980da6d801a37412ae988422b4b5a9af6bacd Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 22 Dec 2023 18:47:08 +0100 Subject: [PATCH 59/71] add add lazy artifacts and test run in docker image --- deps/build_app.jl | 2 +- docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deps/build_app.jl b/deps/build_app.jl index 8e9a8dd..8552971 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1,2 +1,2 @@ using PackageCompiler -PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl") +PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true) diff --git a/docker/Dockerfile b/docker/Dockerfile index 334ab1a..96dec1d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -23,4 +23,4 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi RUN julia --project --startup-file=no deps/build_app.jl # Test the CLI runs -# RUN tmle/bin/tmle tmle data/sample_dataset.csv \ No newline at end of file +RUN /TargetedEstimation.jl/tmle/bin/TargetedEstimation tmle data/sample_dataset.csv \ No newline at end of file From f8358ffe540b548a5cea9e0d52accfa0ca02e37a Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Fri, 22 Dec 2023 18:49:57 +0100 Subject: [PATCH 60/71] fix json memap in tests --- test/outputs.jl | 2 +- test/runner.jl | 10 +++++----- test/summary.jl | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/outputs.jl b/test/outputs.jl index 9b13bb0..73b9207 100644 --- a/test/outputs.jl +++ b/test/outputs.jl @@ -45,7 +45,7 @@ end TargetedEstimation.initialize_json(jsonoutput.filename) TargetedEstimation.update_file(jsonoutput, results[1:3]) TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true) - loaded_results = TMLE.read_json(jsonoutput.filename) + loaded_results = TMLE.read_json(jsonoutput.filename, use_mmap=false) @test size(loaded_results) == size(results) for (result, loaded_result) in zip(results, loaded_results) @test result.TMLE.estimate == loaded_result[:TMLE].estimate diff --git a/test/runner.jl b/test/runner.jl index 88b01da..1985f35 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -44,7 +44,7 @@ include(joinpath(TESTDIR, "testutils.jl")) TargetedEstimation.save(runner, results, partition, true) # Test Save to JSON - loaded_results = TMLE.read_json(outputs.json.filename) + loaded_results = TMLE.read_json(outputs.json.filename, use_mmap=false) for (result, loaded_result) in zip(results, loaded_results) @test loaded_result[:TMLE] isa TMLE.TMLEstimate @test result.TMLE.estimate == loaded_result[:TMLE].estimate @@ -134,7 +134,7 @@ end end end results_from_hdf5 = vcat(results_from_hdf5...) - results_from_json = TMLE.read_json(outputs.json.filename) + results_from_json = TMLE.read_json(outputs.json.filename, use_mmap=false) for i in 1:6 Ψ = configuration.estimands[i] @@ -171,7 +171,7 @@ end ) # Essential results - results_from_json = TMLE.read_json("output.json") + results_from_json = TMLE.read_json("output.json", use_mmap=false) n_IC_empties = 0 for result in results_from_json if result[:OSE].IC != [] @@ -210,7 +210,7 @@ end ]) # Check results from JSON - results_from_json = TMLE.read_json(outputs.json.filename) + results_from_json = TMLE.read_json(outputs.json.filename, use_mmap=false) for estimator in (:OSE, :TMLE) @test results_from_json[1][estimator][:error] == "Could not fit the following propensity score model: P₀(T2 | W1, W2)" @test results_from_json[1][estimator][:estimand] isa TMLE.Estimand @@ -282,7 +282,7 @@ end @test results[3].OSE isa TMLE.ComposedEstimate # JSON Output - results_from_json = TMLE.read_json("output.json") + results_from_json = TMLE.read_json("output.json", use_mmap=false) @test length(results_from_json) == 3 # HDF5 diff --git a/test/summary.jl b/test/summary.jl index a75d0e1..40f3a73 100644 --- a/test/summary.jl +++ b/test/summary.jl @@ -55,7 +55,7 @@ include(joinpath(TESTDIR, "testutils.jl")) hdf5file_2 = jldopen("tmle_output_2.hdf5") inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"]) - json_outputs = TMLE.read_json("summary.json") + json_outputs = TMLE.read_json("summary.json", use_mmap=false) jls_outputs = [] open("summary.jls") do io while !eof(io) From ccc2a07a77612a34f8c773132d9df1ae03d157b2 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sat, 23 Dec 2023 10:28:33 +0100 Subject: [PATCH 61/71] sysimage instead of app in docker container --- deps/build_app.jl | 6 +++++- deps/build_sysimage.jl | 6 ++++++ docker/Dockerfile | 4 ++-- tmle.jl | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 deps/build_sysimage.jl create mode 100644 tmle.jl diff --git a/deps/build_app.jl b/deps/build_app.jl index 8552971..9c36c5a 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1,2 +1,6 @@ using PackageCompiler -PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true) +PackageCompiler.create_app(".", "tmle", + lib_name="tmle", + precompile_execution_file="deps/execute.jl", + include_lazy_artifacts=true +) diff --git a/deps/build_sysimage.jl b/deps/build_sysimage.jl new file mode 100644 index 0000000..b1a32cc --- /dev/null +++ b/deps/build_sysimage.jl @@ -0,0 +1,6 @@ +using PackageCompiler +PackageCompiler.create_sysimage( + ["TargetedEstimation"], + sysimage_path="TMLESysimage.so", + precompile_execution_file="deps/execute.jl", +) diff --git a/docker/Dockerfile b/docker/Dockerfile index 96dec1d..423f096 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /TargetedEstimation.jl RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' # Build CLI -RUN julia --project --startup-file=no deps/build_app.jl +RUN julia --project --startup-file=no deps/build_sysimage.jl # Test the CLI runs -RUN /TargetedEstimation.jl/tmle/bin/TargetedEstimation tmle data/sample_dataset.csv \ No newline at end of file +RUN julia --startup-file=no --project -JTMLESysimage.so tmle.jl tmle data/sample_dataset.csv \ No newline at end of file diff --git a/tmle.jl b/tmle.jl new file mode 100644 index 0000000..592b78f --- /dev/null +++ b/tmle.jl @@ -0,0 +1 @@ +using TargetedEstimation; main() \ No newline at end of file From 4b0dcd6e6b34b7bb3d103c0b9c5c58aa9e0278c0 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sat, 23 Dec 2023 10:44:45 +0100 Subject: [PATCH 62/71] fix executable name --- deps/build_app.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deps/build_app.jl b/deps/build_app.jl index 9c36c5a..c48e8cf 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1,6 +1,6 @@ using PackageCompiler -PackageCompiler.create_app(".", "tmle", - lib_name="tmle", +PackageCompiler.create_app(".", "tmle", + executables = ["tmle" => "julia_main"] precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true ) From 4e0bdf57c82f14e61b33aa53a46ea2c83fb76717 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sat, 23 Dec 2023 10:59:50 +0100 Subject: [PATCH 63/71] fix missing coma --- deps/build_app.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/build_app.jl b/deps/build_app.jl index c48e8cf..8e3ea90 100644 --- a/deps/build_app.jl +++ b/deps/build_app.jl @@ -1,6 +1,6 @@ using PackageCompiler PackageCompiler.create_app(".", "tmle", - executables = ["tmle" => "julia_main"] + executables = ["tmle" => "julia_main"], precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true ) From 6ddceaecad6bc44a905e10df8200ca4da6c8915e Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sat, 23 Dec 2023 11:15:26 +0100 Subject: [PATCH 64/71] try generic cpu target --- deps/build_sysimage.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/build_sysimage.jl b/deps/build_sysimage.jl index b1a32cc..208628a 100644 --- a/deps/build_sysimage.jl +++ b/deps/build_sysimage.jl @@ -1,6 +1,7 @@ using PackageCompiler PackageCompiler.create_sysimage( ["TargetedEstimation"], + cpu_target="generic", sysimage_path="TMLESysimage.so", precompile_execution_file="deps/execute.jl", ) From eea3bc0afdcfdd1bdb029cbd730ba32516c1c300 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Wed, 27 Dec 2023 12:29:43 +0100 Subject: [PATCH 65/71] update cache management to drop composite factors as well --- src/cache_managers.jl | 26 ++++++++++++++----- src/cli.jl | 8 +++--- test/cache_managers.jl | 59 +++++++++++++++++++++++++++++------------- 3 files changed, 65 insertions(+), 28 deletions(-) diff --git a/src/cache_managers.jl b/src/cache_managers.jl index 56acd3f..50d7fd3 100644 --- a/src/cache_managers.jl +++ b/src/cache_managers.jl @@ -10,12 +10,20 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ) # Always drop fluctuations haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation) + # Drop Basic nuisance functions for η in TMLE.nuisance_functions_iterator(Ψ) cache_manager.η_counts[η] -= 1 if cache_manager.η_counts[η] == 0 delete!(cache_manager.cache, η) end end + + # Drop aggregate nuisance function + for η in keys(cache_manager.cache) + if η isa TMLE.CMRelevantFactors + delete!(cache_manager.cache, η) + end + end end struct MaxSizeCacheManager <: CacheManager @@ -25,14 +33,20 @@ struct MaxSizeCacheManager <: CacheManager end function release!(cache_manager::MaxSizeCacheManager, Ψ) - while length(cache_manager.cache) > cache_manager.max_size - # Prioritize the release of the last fluctuation - if haskey(cache_manager.cache, :last_fluctuation) - pop!(cache_manager.cache, :last_fluctuation) - else - pop!(cache_manager.cache) + # Prioritize the release of the last fluctuation + if haskey(cache_manager.cache, :last_fluctuation) + pop!(cache_manager.cache, :last_fluctuation) + end + # Drop aggregate nuisance function + for η in keys(cache_manager.cache) + if η isa TMLE.CMRelevantFactors + delete!(cache_manager.cache, η) end end + # Drop the rest randomly until the size is acceptable + while length(cache_manager.cache) > cache_manager.max_size + pop!(cache_manager.cache) + end end struct NoCacheManager <: CacheManager diff --git a/src/cli.jl b/src/cli.jl index 985b088..b91d0cf 100644 --- a/src/cli.jl +++ b/src/cli.jl @@ -1,7 +1,7 @@ function cli_settings() s = ArgParseSettings(description="TMLE CLI.") - @add_arg_table s begin + @add_arg_table! s begin "tmle" action = :command help = "Run TMLE." @@ -15,7 +15,7 @@ function cli_settings() help = "Merges TMLE outputs together." end - @add_arg_table s["tmle"] begin + @add_arg_table! s["tmle"] begin "dataset" arg_type = String required = true @@ -68,7 +68,7 @@ function cli_settings() action = :store_true end - @add_arg_table s["svp"] begin + @add_arg_table! s["svp"] begin "input-prefix" arg_type = String help = "Input prefix to HDF5 files generated by the tmle CLI." @@ -104,7 +104,7 @@ function cli_settings() default = "TMLE" end - @add_arg_table s["merge"] begin + @add_arg_table! s["merge"] begin "prefix" arg_type = String help = "Prefix to .hdf5 files to be used to create the summary file." diff --git a/test/cache_managers.jl b/test/cache_managers.jl index 294ccd2..1567cad 100644 --- a/test/cache_managers.jl +++ b/test/cache_managers.jl @@ -16,17 +16,26 @@ end @testset "Test MaxSizeCacheManager" begin cache_manager = TargetedEstimation.MaxSizeCacheManager(3) - cache_manager.cache["Toto"] = 1 - cache_manager.cache["Tata"] = 2 - TargetedEstimation.release!(cache_manager, nothing) - @test cache_manager.cache == Dict("Toto" => 1, "Tata" => 2) - cache_manager.cache["Titi"] = 3 - cache_manager.cache["Tutu"] = 4 - @test length(cache_manager.cache) == 4 - TargetedEstimation.release!(cache_manager, nothing) - @test length(cache_manager.cache) == 3 + Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) + cache_manager.cache[Y_T₁T₂] = 1 + T₁_W = TMLE.ConditionalDistribution(:T₁, (:W,)) + cache_manager.cache[T₁_W] = 1 + T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,)) + cache_manager.cache[T₂_W] = 1 + η = TMLE.CMRelevantFactors( + Y_T₁T₂, + (T₁_W, T₂_W) + ) + cache_manager.cache[η] = 1 + cache_manager.cache[:last_fluctuation] = 1 + length(cache_manager.cache) == 5 TargetedEstimation.release!(cache_manager, nothing) - @test length(cache_manager.cache) == 3 + # CMRelevantFactors and fluctuation dropped + @test cache_manager.cache == Dict( + TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1, + TMLE.ConditionalDistribution(:T₂, (:W,)) => 1, + TMLE.ConditionalDistribution(:T₁, (:W,)) => 1 + ) end @testset "Test ReleaseUnusableCacheManager" begin @@ -61,23 +70,37 @@ end cache_manager.cache[T₁_W] = 1 T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,)) cache_manager.cache[T₂_W] = 1 + η = TMLE.CMRelevantFactors( + Y_T₁T₂, + (T₁_W, T₂_W) + ) + cache_manager.cache[η] = 1 cache_manager.cache[:last_fluctuation] = 1 - @test length(cache_manager.cache) == 4 - # After estimation of the first estimand, only the fluctuation is released + @test length(cache_manager.cache) == 5 + # After estimation of the first estimand, the fluctuation and composite factor are released TargetedEstimation.release!(cache_manager, estimands[1]) - @test length(cache_manager.cache) == 3 + @test cache_manager.cache == Dict( + TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1, + TMLE.ConditionalDistribution(:T₂, (:W,)) => 1, + TMLE.ConditionalDistribution(:T₁, (:W,)) => 1 + ) - # Estimation of the second estimand will not result in further nuisance functions + # Estimation of the second estimand will restore the composite factor + cache_manager.cache[η] = 1 + cache_manager.cache[:last_fluctuation] = 1 # Y_T₁T₂ and T₂_W are no longer needed TargetedEstimation.release!(cache_manager, estimands[2]) - @test length(cache_manager.cache) == 1 - @test !haskey(cache_manager.cache, T₂_W) - @test !haskey(cache_manager.cache, Y_T₁T₂) - @test haskey(cache_manager.cache, T₁_W) + @test cache_manager.cache == Dict(TMLE.ConditionalDistribution(:T₁, (:W,)) => 1) # Estimation of the third estimand will fill the cache with the following Y_T₁ = TMLE.ConditionalDistribution(:Y, (:T₁, :W)) cache_manager.cache[Y_T₁] = 1 + η = TMLE.CMRelevantFactors( + Y_T₁, + (T₁_W, ) + ) + cache_manager.cache[η] = 1 + cache_manager.cache[:last_fluctuation] = 1 # Y_T₁ and T₁_W are no longer needed TargetedEstimation.release!(cache_manager, estimands[3]) @test cache_manager.cache == Dict() From 1555173943bfe0131a47970815b7b928604ca053 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 28 Dec 2023 14:07:32 +0100 Subject: [PATCH 66/71] handle case when glmnetcv fails at first lambda --- Project.toml | 2 +- src/models/glmnet.jl | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f5df5c6..82b56c7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TargetedEstimation" uuid = "2573d147-4098-46ba-9db2-8608d210ccac" authors = ["Olivier Labayle"] -version = "0.7.4" +version = "0.8.0" [deps] ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" diff --git a/src/models/glmnet.jl b/src/models/glmnet.jl index 4232ffd..189aca1 100644 --- a/src/models/glmnet.jl +++ b/src/models/glmnet.jl @@ -73,6 +73,10 @@ end function MLJBase.fit(model::GLMNetModel, verbosity::Int, X, y) folds = getfolds(model.resampling, X, y) res = glmnetcv(MLJBase.matrix(X), y; folds=folds, model.params...) + # This is currently not caught by the GLMNet package + if length(res.meanloss) == 0 + throw(error("glmnetcv's mean loss is empty. Probably meaning convergence failed at the first lambda for some fold.")) + end return make_fitresult(model, res, y), nothing, nothing end From 48098c62f28f42ebf818c78971a54d125c66bb83 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 28 Dec 2023 14:08:29 +0100 Subject: [PATCH 67/71] up docker deps to 1.10 --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 423f096..7a0885e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.9.4-bullseye +FROM julia:1.10-bullseye ARG DEBIAN_FRONTEND=noninteractive From 1e25d448dfc7f9dccf851751cb27ecb4dc567ce0 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 28 Dec 2023 15:11:31 +0100 Subject: [PATCH 68/71] add clang compiler --- docker/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7a0885e..f711fc9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,14 +2,16 @@ FROM julia:1.10-bullseye ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && \ - apt-get -y install gcc mono-mcs vim procps && \ - rm -rf /var/lib/apt/lists/* - ENV TZ=Europe/Amsterdam ENV JULIA_DEPOT_PATH=/opt +RUN apt-get update && \ + apt-get -y install gcc mono-mcs vim procps wget lsb-release software-properties-common gnupg && \ + rm -rf /var/lib/apt/lists/* + +RUN bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" + # Import the project COPY . /TargetedEstimation.jl From f0da774f034b8de645da2dfe1bc8768dde49dc0f Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 28 Dec 2023 15:23:25 +0100 Subject: [PATCH 69/71] try solve windows problem --- test/runner.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runner.jl b/test/runner.jl index 1985f35..01bd6da 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -148,6 +148,7 @@ end rm(outputs.hdf5.filename) rm(outputs.json.filename) end + GC.gc() # memory freed for deleting arrow file rm(datafile) end end From 289bfa23ed55911784dbf9ce4c2e5328afe3a0f6 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Sat, 27 Jan 2024 12:14:47 +0000 Subject: [PATCH 70/71] up TMLE and get rid of windows --- .github/workflows/CI.yml | 1 - Project.toml | 2 +- src/cli.jl | 4 ++-- src/runner.jl | 8 ++++---- src/utils.jl | 8 ++++---- test/utils.jl | 9 ++++----- 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ebf50b7..f04e246 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,7 +19,6 @@ jobs: os: - ubuntu-latest - macOS-latest - - windows-latest arch: - 'x64' steps: diff --git a/Project.toml b/Project.toml index 82b56c7..e968a6d 100644 --- a/Project.toml +++ b/Project.toml @@ -55,7 +55,7 @@ MLJModels = "0.16" MLJXGBoostInterface = "0.3.4" MultipleTesting = "0.6.0" Optim = "1.7" -TMLE = "0.13.1" +TMLE = "0.14.0" Tables = "1.10.1" YAML = "0.4.9" julia = "1.7, 1" diff --git a/src/cli.jl b/src/cli.jl index b91d0cf..51d84b1 100644 --- a/src/cli.jl +++ b/src/cli.jl @@ -23,8 +23,8 @@ function cli_settings() "--estimands" arg_type = String - help = "A string (`generateATEs`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)" - default = "generateATEs" + help = "A string (`factorialATE`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)" + default = "factorialATE" "--estimators" arg_type = String diff --git a/src/runner.jl b/src/runner.jl index 1079db7..e91c09d 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -8,7 +8,7 @@ mutable struct Runner verbosity::Int failed_nuisance::Set function Runner(dataset; - estimands="generateATEs", + estimands="factorialATE", estimators="glmnet", verbosity=0, outputs=Outputs(), @@ -120,7 +120,7 @@ end """ tmle(dataset; - estimands="generateATEs", + estimands="factorialATE", estimators="glmnet"; verbosity=0, outputs=Outputs(), @@ -138,7 +138,7 @@ TMLE CLI. # Options -- `--estimands`: A string ("generateATEs") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls) +- `--estimands`: A string ("factorialATE") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls) - `--estimators`: A julia file containing the estimators to use. - `-v, --verbosity`: Verbosity level. - `-o, --outputs`: Ouputs to be generated. @@ -151,7 +151,7 @@ TMLE CLI. - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time). """ function tmle(dataset::String; - estimands::String="generateATEs", + estimands::String="factorialATE", estimators::String="glmnet", verbosity::Int=0, outputs::Outputs=Outputs(), diff --git a/src/utils.jl b/src/utils.jl index c378749..8fa48ca 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -81,19 +81,19 @@ This explicitely requires that the following columns belong to the dataset: All ATE parameters are generated. """ -function TMLE.generateATEs(dataset) +function TMLE.factorialATE(dataset) colnames = names(dataset) "T" ∈ colnames || throw(ArgumentError("No column 'T' found in the dataset for the treatment variable.")) "Y" ∈ colnames || throw(ArgumentError("No column 'Y' found in the dataset for the outcome variable.")) confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name)) length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset.")) - return [generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)] + return [factorialATE(dataset, (:T, ), :Y; confounders=confounding_variables)] end function build_estimands_list(estimands_pattern, dataset) - estimands = if estimands_pattern == "generateATEs" - generateATEs(dataset) + estimands = if estimands_pattern == "factorialATE" + factorialATE(dataset) else proofread_estimands(estimands_pattern, dataset) end diff --git a/test/utils.jl b/test/utils.jl index 072db30..9279170 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -75,18 +75,17 @@ end rm(filename) end -@testset "Test generateATEs" begin +@testset "Test factorialATE" begin dataset = DataFrame(C=[1, 2, 3, 4],) - @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset) + @test_throws ArgumentError TargetedEstimation.build_estimands_list("factorialATE", dataset) dataset.T = [0, 1, missing, 2] - @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset) + @test_throws ArgumentError TargetedEstimation.build_estimands_list("factorialATE", dataset) dataset.Y = [0, 1, 2, 2] dataset.W1 = [1, 1, 1, 1] dataset.W_2 = [1, 1, 1, 1] - composedATE = TargetedEstimation.build_estimands_list("generateATEs", dataset)[1] + composedATE = TargetedEstimation.build_estimands_list("factorialATE", dataset)[1] @test composedATE.args == ( TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()), - TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()), TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ()) ) end From 43a215c0b190df99030e8cd4cd9e9e0c44eacede Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Mon, 5 Feb 2024 09:55:17 +0100 Subject: [PATCH 71/71] remove build app for now and postpone to later --- .github/workflows/CI.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f04e246..c651df5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -34,12 +34,12 @@ jobs: with: files: lcov.info - uses: julia-actions/julia-processcoverage@v1 - - name: Build App - run: julia --project --startup-file=no deps/build_app.jl - - uses: actions/upload-artifact@v4 - with: - name: tmle-${{ matrix.os }}-${{ matrix.arch }} - path: tmle + # - name: Build App + # run: julia --project --startup-file=no deps/build_app.jl + # - uses: actions/upload-artifact@v4 + # with: + # name: tmle-${{ matrix.os }}-${{ matrix.arch }} + # path: tmle docs: name: Documentation runs-on: ubuntu-latest