From b980a36e47eb436d805380fb60b7c7d307f67061 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 17 Nov 2023 10:33:46 +0000
Subject: [PATCH 01/71] WIP

---
 docs/src/tmle_estimation.md                   |   2 +-
 estimands_test.yaml                           | 102 +++++++++++
 scripts/tmle.jl                               |  44 +++--
 src/TargetedEstimation.jl                     |   6 +-
 src/cache_managers.jl                         |  65 +++++++
 src/merge.jl                                  |   4 +-
 src/runner.jl                                 |  98 +++++++++++
 src/sieve_variance.jl                         |   4 +-
 src/tmle.jl                                   |  94 ----------
 src/utils.jl                                  | 151 ++++++++--------
 test/cache_managers.jl                        |  86 ++++++++++
 test/config/failing_parameters.yaml           |  10 +-
 .../{tmle_config_2.jl => ose_config.jl}       |  11 +-
 test/config/parameters.yaml                   |  60 +++----
 test/config/sieve_tests_parameters_1.yaml     |  12 +-
 test/config/sieve_tests_parameters_2.yaml     |   4 +-
 test/config/tmle_config.jl                    |  16 +-
 test/data/merge/empty_sieve.csv               |   2 +-
 test/data/merge/sieve_output_1.csv            |  14 +-
 test/data/merge/sieve_output_2.csv            |   6 +-
 test/data/merge/tmle_output_1.csv             |  14 +-
 test/data/merge/tmle_output_2.csv             |   6 +-
 test/load_tmle_spec.jl                        | 113 ------------
 test/merge.jl                                 |   6 +-
 test/resampling.jl                            |   1 -
 test/{tmle.jl => runner.jl}                   |  89 ++++++----
 test/runtests.jl                              |   5 +-
 test/sieve_variance.jl                        |  14 +-
 test/testutils.jl                             |  57 ++++++
 test/utils.jl                                 | 162 +++++++++++-------
 30 files changed, 759 insertions(+), 499 deletions(-)
 create mode 100644 estimands_test.yaml
 create mode 100644 src/cache_managers.jl
 create mode 100644 src/runner.jl
 delete mode 100644 src/tmle.jl
 create mode 100644 test/cache_managers.jl
 rename test/config/{tmle_config_2.jl => ose_config.jl} (83%)
 delete mode 100644 test/load_tmle_spec.jl
 rename test/{tmle.jl => runner.jl} (70%)
 create mode 100644 test/testutils.jl

diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md
index 4917922..78984cb 100644
--- a/docs/src/tmle_estimation.md
+++ b/docs/src/tmle_estimation.md
@@ -34,7 +34,7 @@ The output file is a plain CSV file containing one line per estimand in the inpu
 - `TREATMENTS`: A "_&_" separated string containing all treatment variables associated with the estimand.
 - `CASE`: A "_&_" separated string containing the treatment variables' case values in the same order as `TREATMENTS`.
 - `CONTROL`: A "_&_" separated string containing the treatment variables' control values in the same order as `TREATMENTS`.
-- `TARGET`: The outcome variable.
+- `OUTCOME`: The outcome variable.
 - `CONFOUNDERS`: A "_&_" separated string containing the confounding variables.
 - `COVARIATES`: A "_&_" separated string containing the extra covariates used to estimate the outcome's mean.
 - `INITIAL_ESTIMATE`: The initial estimate before the targeting step.
diff --git a/estimands_test.yaml b/estimands_test.yaml
new file mode 100644
index 0000000..34ae410
--- /dev/null
+++ b/estimands_test.yaml
@@ -0,0 +1,102 @@
+type: "Configuration"
+estimands:
+  - outcome_extra_covariates:
+      - C1
+    type: "IATE"
+    treatment_values:
+      T2:
+        case: true
+        control: false
+      T1:
+        case: true
+        control: false
+    outcome: CONTINUOUS, outcome
+    treatment_confounders:
+      T2:
+        - W1
+        - W2
+      T1:
+        - W1
+        - W2
+  - outcome_extra_covariates:
+      - C1
+    type: "IATE"
+    treatment_values:
+      T2:
+        case: true
+        control: false
+      T1:
+        case: true
+        control: false
+    outcome: BINARY/outcome
+    treatment_confounders:
+      T2:
+        - W1
+        - W2
+      T1:
+        - W1
+        - W2
+  - outcome_extra_covariates: []
+    type: "ATE"
+    treatment_values:
+      T1:
+        case: true
+        control: false
+    outcome: CONTINUOUS, outcome
+    treatment_confounders:
+      T1:
+        - W1
+        - W2
+  - outcome_extra_covariates: []
+    type: "IATE"
+    treatment_values:
+      T2:
+        case: false
+        control: true
+      T1:
+        case: true
+        control: false
+    outcome: CONTINUOUS, outcome
+    treatment_confounders:
+      T2:
+        - W1
+        - W2
+      T1:
+        - W1
+        - W2
+  - outcome_extra_covariates:
+      - C1
+    type: "IATE"
+    treatment_values:
+      T2:
+        case: false
+        control: true
+      T1:
+        case: true
+        control: false
+    outcome: BINARY/outcome
+    treatment_confounders:
+      T2:
+        - W1
+        - W2
+      T1:
+        - W1
+        - W2
+  - outcome_extra_covariates:
+      - C1
+    type: "ATE"
+    treatment_values:
+      T2:
+        case: true
+        control: false
+      T1:
+        case: true
+        control: false
+    outcome: CONTINUOUS, outcome
+    treatment_confounders:
+      T2:
+        - W1
+        - W2
+      T1:
+        - W1
+        - W2
diff --git a/scripts/tmle.jl b/scripts/tmle.jl
index d16849f..40df722 100644
--- a/scripts/tmle.jl
+++ b/scripts/tmle.jl
@@ -3,37 +3,57 @@ using TargetedEstimation
 
 function parse_commandline()
     s = ArgParseSettings(
-        description = "Targeted Learning estimation",
+        description = "Targeted Learning Estimation",
         commands_are_required = false,
         version = "0.2",
         add_version = true)
 
     @add_arg_table s begin
-        "data"
+        "dataset"
             help = "Path to dataset file (.csv|.arrow)"
             required = true
-        "param-file"
-            help = "A file (.yaml|.bin) listing all parameters to estimate."
+        "estimands-config"
+            help = "A .yaml file listing all parameters to estimate."
             required = true
-        "csv-out"
-            help = "Path to output `.csv` file"
-            required = true
-        "--estimator-file"
-            help = "A file (.jl) describing the tmle estimator to use, README.md"
+        "--estimators-config"
+            help = "A file (.jl) defining the estimators to be used."
             arg_type= String
             required = false
         "--hdf5-out"
-            help = "If the influence curves also need to be stored (see also: --pval-threshold)"
+            help = "Stores the results in a HDF5 file format (see also: --pval-threshold)."
             arg_type = String
             default = nothing
+        "--csv-out"
+            help = "Path to an output `.csv` file."
+            required = true
         "--pval-threshold"
-            help = "Only those parameters passing the threshold will have their influence curve saved."
+            help = """In order to save disk space, only estimation results with a p-value lesser than 
+            the threshold will have their influence curve saved. (default = 1., i.e. all influence curves are saved).
+            """
             default = 1.
             arg_type = Float64
+        "--sort-estimands"
+            help = "If estimands should be sorted to minimize memory usage, see also: cache-strategy."
+            default = false 
+            arg_type = Bool
+        "--cache-strategy"
+            help = string("Nuisance functions are stored in the cache during estimation. The cache can be released from these",
+            " functions to limit memory consumption. There are currently 3 caching management strategies: ",
+            "'release_unusable' (default): Will release the cache from nuisance functions that won't be used in the future. ",
+            "'K': Will keep the cache size under K nuisance functions. ",
+            "'no_cache': Disables caching. ",
+            "Note that caching strategies are better used in conjunction with `--sort-estimands` to minimized memory usage."
+            )
+            default = "release_unusable"
+            arg_type = String
         "--chunksize"
-            help = "Results will be appended to outfiles every chunk"
+            help = "Results are appended to outfiles in chunks."
             default = 100
             arg_type = Int
+        "--rng"
+            help = "Random seed"
+            default = 123
+            arg_type = Int
         "--verbosity", "-v"
             help = "Verbosity level"
             arg_type = Int
diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 6c40623..88a6d4e 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -24,10 +24,12 @@ using MultipleTesting
 using Combinatorics
 using Tables
 using Random
+using YAML
 
 import MLJModelInterface
 
-include("tmle.jl")
+include("cache_managers.jl")
+include("runner.jl")
 include("utils.jl")
 include("sieve_variance.jl")
 include("merge.jl")
@@ -36,7 +38,7 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
-export tmle_estimation, sieve_variance_plateau, merge_csv_files
+export run_estimation, sieve_variance_plateau, merge_csv_files
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
 export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV
diff --git a/src/cache_managers.jl b/src/cache_managers.jl
new file mode 100644
index 0000000..4f0a3f6
--- /dev/null
+++ b/src/cache_managers.jl
@@ -0,0 +1,65 @@
+abstract type CacheManager end
+
+struct ReleaseUnusableCacheManager <: CacheManager
+    cache::Dict
+    η_counts::Dict
+    ReleaseUnusableCacheManager(η_counts) = new(Dict(), η_counts)
+end
+
+function release!(cache_manager::ReleaseUnusableCacheManager, Ψ)
+    # Always drop fluctuations
+    haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation)
+
+    η = TMLE.get_relevant_factors(Ψ)
+    # Propensity scores
+    for ps in η.propensity_score
+        cache_manager.η_counts[ps] -= 1
+        if cache_manager.η_counts[ps] == 0
+            pop!(cache_manager.cache, ps)
+        end
+    end
+    # Outcome Mean
+    cache_manager.η_counts[η.outcome_mean] -= 1
+    if cache_manager.η_counts[η.outcome_mean] == 0
+        pop!(cache_manager.cache, η.outcome_mean)
+    end
+end
+
+struct MaxSizeCacheManager <: CacheManager
+    cache::Dict
+    max_size::Int
+    MaxSizeCacheManager(max_size) = new(Dict(), max_size)
+end
+
+function release!(cache_manager::MaxSizeCacheManager, Ψ)
+    while length(cache_manager.cache) > cache_manager.max_size
+        # Prioritize the release of the last fluctuation
+        if haskey(cache_manager.cache, :last_fluctuation)
+            pop!(cache_manager.cache, :last_fluctuation)
+        else
+            pop!(cache_manager.cache)
+        end
+    end
+end
+
+struct NoCacheManager <: CacheManager
+    cache::Dict
+    NoCacheManager() = new(Dict())
+end
+
+function release!(cache_manager::NoCacheManager, Ψ)
+    empty!(cache_manager.cache)
+end
+
+function make_cache_manager(estimands, string)
+    if string == "release_unusable"
+        return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands))
+    elseif string == "no_cache"
+        return NoCacheManager()
+    else
+        return MaxSizeCacheManager(parse(Int, string))
+    end
+end
+
+
+
diff --git a/src/merge.jl b/src/merge.jl
index 3c34649..4fd4b1f 100644
--- a/src/merge.jl
+++ b/src/merge.jl
@@ -22,7 +22,7 @@ function load_csv_files(data, files)
     return data
 end
 
-joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "TARGET", "CONFOUNDERS", "COVARIATES"]
+joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES"]
 
 function merge_csv_files(parsed_args)
     tmle_files = files_matching_prefix_and_suffix(
@@ -45,7 +45,7 @@ function merge_csv_files(parsed_args)
     end
 
     # Pvalue Adjustment by Target
-    for gp in groupby(data, :TARGET)
+    for gp in groupby(data, :OUTCOME)
         gp.TRAIT_ADJUSTED_TMLE_PVALUE = gp[:, :TMLE_PVALUE]
         pvalues = collect(skipmissing(gp.TMLE_PVALUE))
         if length(pvalues) > 0
diff --git a/src/runner.jl b/src/runner.jl
new file mode 100644
index 0000000..b9127cc
--- /dev/null
+++ b/src/runner.jl
@@ -0,0 +1,98 @@
+struct FailedEstimation
+    message::String
+end
+
+mutable struct Runner
+    estimators::NamedTuple
+    estimands::Vector{TMLE.Estimand}
+    dataset::DataFrame
+    cache_manager::CacheManager
+    chunksize::Int
+    pvalue_threshold::Float64
+    output_ios::NamedTuple
+    function Runner(parsed_args)
+        datafile = parsed_args["dataset"]
+        paramfile = parsed_args["estimands-config"]
+        estimatorfile = parsed_args["estimators-config"]
+        verbosity = parsed_args["verbosity"]
+        csv_filename = parsed_args["csv-out"]
+        hdf5_filename = parsed_args["hdf5-out"]
+        pvalue_threshold = parsed_args["pval-threshold"]
+        chunksize = parsed_args["chunksize"]
+        rng = parsed_args["rng"]
+        cache_strategy = parsed_args["cache-strategy"]
+        sort_estimands = parsed_args["sort-estimands"]
+    
+        # Output IOs
+        output_ios = (CSV=csv_filename, HDF5=hdf5_filename)
+        # Retrieve TMLE specifications
+        estimators = TargetedEstimation.load_tmle_spec(estimatorfile)
+        # Load dataset
+        dataset = TargetedEstimation.instantiate_dataset(datafile)
+        # Read parameter files
+        estimands = TargetedEstimation.proofread_estimands_from_yaml(paramfile, dataset)
+        if sort_estimands
+            estimands = groups_ordering(estimands; 
+                brute_force=true, 
+                do_shuffle=true, 
+                rng=MersenneTwister(rng), 
+                verbosity=verbosity
+            )
+        end
+        cache_manager = make_cache_manager(estimands, cache_strategy)
+
+        return new(estimators, estimands, dataset, cache_manager, chunksize, pvalue_threshold, output_ios)
+    end
+end
+
+
+function (runner::Runner)(partition)
+    results = Vector{NamedTuple}(undef, size(partition, 1))
+    for (partition_index, param_index) in enumerate(partition)
+        Ψ = runner.estimands[param_index]
+        # Make sure data types are appropriate for the estimand
+        TargetedEstimation.coerce_types!(runner.dataset, Ψ)
+        # Maybe update cache with new η_spec
+        estimators_results = []
+        for estimator in estimators
+            try
+                result, _ = estimator(Ψ, runner.dataset,
+                    cache=runner.cache,
+                    verbosity=runner.verbosity, 
+                )
+            catch e
+                # On Error, store the nuisance function where the error occured 
+                # to fail fast the next estimands
+                result = FailedEstimation(string(e))
+            end
+            push!(estimators_results, result)
+        end
+        # Update results
+        results[partition_index] = NamedTuple{keys(runner.estimators)}(estimators_results)
+        # Release cache
+        release!(runner.cache_manager, Ψ)
+        # Try clean C memory
+        GC.gc()
+        if Sys.islinux()
+            ccall(:malloc_trim, Cvoid, (Cint,), 0)
+        end
+    end
+    return results
+end
+
+function (runner::Runner)()
+    # Split worklist in partitions
+    nparams = size(runner.estimands, 1)
+    for partition in Iterators.partition(1:nparams, runner.chunksize)
+        results = runner(partition)
+        # Append CSV result with partition
+        append_csv(csv_file, results)
+        # Append HDF5 result if save-ic is true
+        update_jld2_output(jld2_file, partition, results, dataset; pval_threshold=pval_threshold)
+    end
+
+    verbosity >= 1 && @info "Done."
+    return 0
+end
+
+run_estimation(parsed_args) = Runner(parsed_args)()
\ No newline at end of file
diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index 27c4c04..f6dfeb6 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -28,7 +28,7 @@ sieve_dataframe() = DataFrame(
     TREATMENTS=String[], 
     CASE=String[], 
     CONTROL=Union{String, Missing}[], 
-    TARGET=String[], 
+    OUTCOME=String[], 
     CONFOUNDERS=String[], 
     COVARIATES=Union{String, Missing}[], 
     TMLE_ESTIMATE=Float64[],
@@ -39,7 +39,7 @@ empty_sieve_output() = DataFrame(
     TREATMENTS=String[], 
     CASE=String[], 
     CONTROL=Union{String, Missing}[], 
-    TARGET=String[], 
+    OUTCOME=String[], 
     CONFOUNDERS=String[], 
     COVARIATES=Union{String, Missing}[], 
     SIEVE_STD = Float64[],
diff --git a/src/tmle.jl b/src/tmle.jl
deleted file mode 100644
index 9f0a62e..0000000
--- a/src/tmle.jl
+++ /dev/null
@@ -1,94 +0,0 @@
-struct MissingTMLEResult
-    parameter::TMLE.Parameter
-end
-
-function try_tmle!(cache; verbosity=1, threshold=1e-8, weighted_fluctuation=false)
-    try
-        tmle_result, _ = tmle!(cache; verbosity=verbosity, threshold=threshold, weighted_fluctuation=weighted_fluctuation)
-        return tmle_result, missing
-    catch e
-        @warn string("Failed to run Targeted Estimation for parameter:", cache.Ψ)
-        return MissingTMLEResult(cache.Ψ), string(e)
-    end
-end
-
-
-function partition_tmle!(
-    cache, 
-    tmle_results, 
-    logs, 
-    partition,
-    tmle_spec,
-    parameters,
-    variables; 
-    verbosity=0)
-    for (partition_index, param_index) in enumerate(partition)
-        previous_target_is_binary = isdefined(cache, :Ψ) ? cache.Ψ.target ∈ variables.binarytargets : nothing
-        Ψ = parameters[param_index]
-        # Update cache with new Ψ
-        update!(cache, Ψ)
-        # Maybe update cache with new η_spec
-        target_is_binary = Ψ.target ∈ variables.binarytargets
-        if !isdefined(cache, :η_spec) || !(target_is_binary === previous_target_is_binary)
-            Q_spec = target_is_binary ? tmle_spec.Q_binary : tmle_spec.Q_continuous
-            η_spec = NuisanceSpec(Q_spec, tmle_spec.G, cache=tmle_spec.cache)
-            update!(cache, η_spec)
-        end
-        # Run TMLE
-        tmle_result, log = TargetedEstimation.try_tmle!(
-            cache; 
-            verbosity=verbosity, 
-            threshold=tmle_spec.threshold, 
-            weighted_fluctuation=tmle_spec.weighted_fluctuation
-        )
-        # Update results
-        tmle_results[partition_index] = tmle_result
-        logs[partition_index] = log
-
-        # Try clean C memory
-        GC.gc()
-        if Sys.islinux()
-            ccall(:malloc_trim, Cvoid, (Cint,), 0)
-        end
-    end
-end
-
-function tmle_estimation(parsed_args)
-    datafile = parsed_args["data"]
-    paramfile = parsed_args["param-file"]
-    estimatorfile = parsed_args["estimator-file"]
-    verbosity = parsed_args["verbosity"]
-    csv_file = parsed_args["csv-out"]
-    jld2_file = parsed_args["hdf5-out"]
-    pval_threshold = parsed_args["pval-threshold"]
-    chunksize = parsed_args["chunksize"]
-
-    # Load dataset
-    dataset = TargetedEstimation.instantiate_dataset(datafile)
-    # Read parameter files
-    parameters = TargetedEstimation.read_parameters(paramfile, dataset)
-    optimize_ordering!(parameters)
-
-    # Get covariate, confounder and treatment columns
-    variables = TargetedEstimation.variables(parameters, dataset)
-    TargetedEstimation.coerce_types!(dataset, variables)
-    
-    # Retrieve TMLE specifications
-    tmle_spec = TargetedEstimation.load_tmle_spec(estimatorfile)
-
-    cache = TMLECache(dataset)
-    nparams = size(parameters, 1)
-    for partition in Iterators.partition(1:nparams, chunksize)
-        partition_size = size(partition, 1)
-        tmle_results = Vector{Union{TMLE.TMLEResult, MissingTMLEResult}}(undef, partition_size)
-        logs = Vector{Union{String, Missing}}(undef, partition_size)
-        partition_tmle!(cache, tmle_results, logs, partition, tmle_spec, parameters, variables; verbosity=verbosity)
-        # Append CSV result with partition
-        append_csv(csv_file, tmle_results, logs)
-        # Append HDF5 result if save-ic is true
-        update_jld2_output(jld2_file, partition, tmle_results, dataset; pval_threshold=pval_threshold)
-    end
-
-    verbosity >= 1 && @info "Done."
-    return 0
-end
diff --git a/src/utils.jl b/src/utils.jl
index 9ca8dea..f76b3a2 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -10,7 +10,7 @@ empty_tmle_output(;size=0) = DataFrame(
     TREATMENTS=Vector{String}(undef, size), 
     CASE=Vector{String}(undef, size), 
     CONTROL=Vector{Union{Missing, String}}(undef, size), 
-    TARGET=Vector{String}(undef, size), 
+    OUTCOME=Vector{String}(undef, size), 
     CONFOUNDERS=Vector{String}(undef, size), 
     COVARIATES=Vector{Union{Missing, String}}(undef, size), 
     INITIAL_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), 
@@ -28,9 +28,9 @@ empty_tmle_output(;size=0) = DataFrame(
 )
 
 covariates_string(Ψ; join_string="_&_") = 
-    length(Ψ.covariates) != 0 ? join(Ψ.covariates, join_string) : missing
+    length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing
 
-function param_string(param::T) where T <: TMLE.Parameter
+function param_string(param::T) where T <: TMLE.Estimand
     str = string(T)
     return startswith(str, "TMLE.") ? str[6:end] : str
 end
@@ -44,7 +44,7 @@ control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") =
 
 control_string(t; join_string="_&_") = missing
 
-control_string(Ψ::TMLE.Parameter; join_string="_&_") = 
+control_string(Ψ::TMLE.Estimand; join_string="_&_") = 
     control_string(values(Ψ.treatment); join_string=join_string)
 
 treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment), join_string)
@@ -60,7 +60,7 @@ function statistics_from_estimator(estimator)
     return (Ψ̂, std, pval, l, u)
 end
 
-function statistics_from_result(result::TMLE.TMLEResult)
+function statistics_from_result(result::TMLE.Estimate)
     Ψ̂₀ = result.initial
     # TMLE stats
     tmle_stats = statistics_from_estimator(result.tmle) 
@@ -69,7 +69,7 @@ function statistics_from_result(result::TMLE.TMLEResult)
     return Ψ̂₀, tmle_stats, onestep_stats
 end
 
-statistics_from_result(result::MissingTMLEResult) = 
+statistics_from_result(result::FailedEstimation) = 
     missing, 
     (missing, missing, missing, missing, missing), 
     (missing, missing, missing, missing, missing)
@@ -109,7 +109,7 @@ function update_jld2_output(jld2_file::String, partition, tmle_results, dataset;
 
             for (partition_index, param_index) in enumerate(partition)
                 r = tmle_results[partition_index]
-                if (r isa TMLE.TMLEResult) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold)
+                if (r isa TMLE.Estimate) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold)
                     current_variables = variables(r.parameter)
                     if previous_variables != current_variables
                         sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables)
@@ -127,52 +127,60 @@ function update_jld2_output(jld2_file::String, partition, tmle_results, dataset;
 end
 
 #####################################################################
-#####                    Read Parameters                         ####
+#####                    Read Estimands                         ####
 #####################################################################
 
 
-function treatment_values(Ψ::Union{IATE, ATE}, treatment_names, treatment_types)
+function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names
     return [(
-        case = convert(treatment_types[tn], Ψ.treatment[tn].case), 
-        control = convert(treatment_types[tn], Ψ.treatment[tn].control)
+        case = convert(treatment_types[tn], treatment_levels[tn].case), 
+        control = convert(treatment_types[tn], treatment_levels[tn].control)
     ) 
-        for tn in treatment_names]
+        for tn in names]
 end
 
-treatment_values(Ψ::CM, treatment_names, treatment_types) = 
-    [convert(treatment_types[tn], Ψ.treatment[tn]) for tn in treatment_names]
+convert_treatment_values(treatment_levels::NamedTuple{names,}, treatment_types) where names = 
+    [convert(treatment_types[tn], treatment_levels[tn]) for tn in names]
 
-"""
-    parameters_from_yaml(param_file, dataset)
+MissingSCMError() = ArgumentError(string("A Structural Causal Model should be provided in the configuration file in order to identify causal estimands."))
+
+get_identification_method(method::Nothing) = BackdoorAdjustment()
+get_identification_method(method) = method
+
+maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) = 
+    identify(get_identification_method(method), Ψ, scm)
+
+maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError())
+
+maybe_identify(Ψ, scm, method) = Ψ
 
-Reads parameters from file and ensures that the parameters treatment in the config file
-respect the treatment types in the dataset.
 """
-function read_parameters(param_file, dataset)
-    parameters = if any(endswith(param_file, ext) for ext in ("yaml", "yml"))
-        parameters_from_yaml(param_file)
-    else
-        deserialize(param_file)
-    end
+    read_estimands(param_file, dataset)
 
+Reads estimands from file and ensures that the treatment values in the config file
+respects the treatment types in the dataset.
+"""
+function proofread_estimands_from_yaml(filename, dataset)
+    config = configuration_from_yaml(filename)
+    estimands = Vector{TMLE.Estimand}(undef, length(config.estimands))
     treatment_types = Dict()
-    for index in eachindex(parameters)
-        Ψ = parameters[index]
-        treatment_names = keys(Ψ.treatment)
+    for (index, Ψ) in enumerate(config.estimands)
+        statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment)
+        treatment_names = keys(statisticalΨ.treatment_values)
         for tn in treatment_names
             haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn])
         end
         new_treatment = NamedTuple{treatment_names}(
-            treatment_values(Ψ, treatment_names, treatment_types)
+            TargetedEstimation.convert_treatment_values(statisticalΨ.treatment_values, treatment_types)
         )
-        parameters[index] = typeof(Ψ)(
-            target = Ψ.target,
-            treatment = new_treatment,
-            confounders = Ψ.confounders,
-            covariates = Ψ.covariates
+        estimands[index] = typeof(Ψ)(
+            outcome = Ψ.outcome,
+            treatment_values = new_treatment,
+            treatment_confounders = statisticalΨ.treatment_confounders,
+            outcome_extra_covariates = statisticalΨ.outcome_extra_covariates
         )
     end
-    return collect(parameters)
+    return estimands
 end
 
 #####################################################################
@@ -194,13 +202,15 @@ instantiate_dataset(path::String) =
 
 isbinary(col, dataset) = Set(unique(skipmissing(dataset[!, col]))) == Set([0, 1])
 
+make_categorical(x::CategoricalVector, ordered) = x
+make_categorical(x, ordered) = categorical(x, ordered=ordered)
 
 function make_categorical!(dataset, colname::Union{String, Symbol}; infer_ordered=false)
     ordered = false
     if infer_ordered
         ordered = eltype(dataset[!, colname]) <: Real
     end
-    dataset[!, colname] = categorical(dataset[!, colname], ordered=ordered)
+    dataset[!, colname] = make_categorical(dataset[!, colname], ordered)
 end
 
 function make_categorical!(dataset, colnames; infer_ordered=false)
@@ -209,8 +219,10 @@ function make_categorical!(dataset, colnames; infer_ordered=false)
     end
 end
 
+make_float(x) = float(x)
+
 make_float!(dataset, colname::Union{String, Symbol}) = 
-    dataset[!, colname] = float(dataset[!, colname])
+    dataset[!, colname] = make_float(dataset[!, colname])
 
 function make_float!(dataset, colnames)
     for colname in colnames
@@ -218,55 +230,36 @@ function make_float!(dataset, colnames)
     end
 end
 
-function coerce_types!(dataset, variables)
-    # Treatment columns are converted to categorical
-    make_categorical!(dataset, variables.treatments, infer_ordered=true)
-    # Confounders and Covariates are converted to Float64
-    make_float!(dataset, vcat(variables.confounders, variables.covariates))
-    # Binary targets are converted to categorical
-    make_categorical!(dataset, variables.binarytargets, infer_ordered=false)
-    # Continuous targets are converted to Float64
-    make_float!(dataset, variables.continuoustargets)
+function coerce_types!(dataset, Ψ)
+    categorical_variables = Set(keys(Ψ.treatment_values))
+    continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders)))
+    union!(continuous_variables, Ψ.outcome_extra_covariates) 
+    TMLE.is_binary(dataset, Ψ.outcome) ? 
+        push!(categorical_variables, Ψ.outcome) : 
+        push!(continuous_variables, Ψ.outcome)
+    make_categorical!(dataset, categorical_variables, infer_ordered=true)
+    make_float!(dataset, continuous_variables)
 end
 
-variables(Ψ::TMLE.Parameter) = (
-    target = Ψ.target, 
-    covariates = Ψ.covariates, 
-    confounders = Ψ.confounders,
-    treatments = keys(Ψ.treatment)
-    )
-
-function variables(parameters::Vector{<:TMLE.Parameter}, dataset)
-    treatments = Set{Symbol}()
-    confounders = Set{Symbol}()
-    covariates = Set{Symbol}()
-    binarytargets = Set{Symbol}()
-    continuoustargets = Set{Symbol}()
-    for Ψ in parameters
-        push!(treatments, keys(Ψ.treatment)...)
-        push!(confounders, Ψ.confounders...)
-        length(Ψ.covariates) > 0 && push!(covariates, Ψ.covariates...)
-        isbinary(Ψ.target, dataset) ? push!(binarytargets, Ψ.target) : push!(continuoustargets, Ψ.target)
-    end
-    return (
-        treatments=treatments, 
-        confounders=confounders, 
-        covariates=covariates, 
-        binarytargets=binarytargets,
-        continuoustargets
+variables(Ψ::TMLE.Estimand) = (
+    outcome = Ψ.outcome, 
+    covariates = Ψ.outcome_extra_covariates, 
+    confounders = Ψ.treatment_confounders,
+    treatments = keys(Ψ.treatment_values)
     )
-end
 
 load_tmle_spec(file::Nothing) = (
-    cache        = false,
-    weighted_fluctuation = false,
-    threshold    = 1e-8,
-    Q_continuous = LinearRegressor(),
-    Q_binary = LogisticClassifier(lambda=0.),
-    G = LogisticClassifier(lambda=0.)
-  )
+    TMLE = TMLEE(
+        models = TMLE.default_models(
+            Q_binary = LogisticClassifier(lambda=0.),
+            Q_continuous = LinearRegressor(),
+            G = LogisticClassifier(lambda=0.)
+        ),
+        weighted = true, 
+        ),
+    )
 
 function load_tmle_spec(file)
     include(abspath(file))
-    return merge(load_tmle_spec(nothing), tmle_spec::NamedTuple)
+    return ESTIMATORS
 end
\ No newline at end of file
diff --git a/test/cache_managers.jl b/test/cache_managers.jl
new file mode 100644
index 0000000..6574680
--- /dev/null
+++ b/test/cache_managers.jl
@@ -0,0 +1,86 @@
+module TestRunner
+
+using TargetedEstimation
+using Test
+using TMLE
+
+@testset "Test NoCacheManager" begin
+    cache_manager = TargetedEstimation.NoCacheManager()
+    cache_manager.cache["Toto"] = 1
+    cache_manager.cache["Tata"] = 2
+    TargetedEstimation.release!(cache_manager, nothing)
+    @test cache_manager.cache == Dict()
+end
+
+@testset "Test MaxSizeCacheManager" begin
+    cache_manager = TargetedEstimation.MaxSizeCacheManager(3)
+    cache_manager.cache["Toto"] = 1
+    cache_manager.cache["Tata"] = 2
+    TargetedEstimation.release!(cache_manager, nothing)
+    @test cache_manager.cache == Dict("Toto" => 1, "Tata" => 2)
+    cache_manager.cache["Titi"] = 3
+    cache_manager.cache["Tutu"] = 4
+    @test length(cache_manager.cache) == 4
+    TargetedEstimation.release!(cache_manager, nothing)
+    @test length(cache_manager.cache) == 3
+end
+
+@testset "Test ReleaseUnusableCacheManager" begin
+    estimands = [
+        ATE(
+            outcome=:Y, 
+            treatment_values=(T₁=(case=1, control=0), T₂=(case=1, control=0)),
+            treatment_confounders=(T₁=[:W], T₂=[:W])
+        ),
+        ATE(
+            outcome=:Y, 
+            treatment_values=(T₁=(case=1, control=0), T₂=(case=2, control=0)),
+            treatment_confounders=(T₁=[:W], T₂=[:W])
+        ),
+        ATE(
+            outcome=:Y, 
+            treatment_values=(T₁=(case=1, control=0),),
+            treatment_confounders=(T₁=[:W],)
+        ),
+        ATE(
+            outcome=:Ynew, 
+            treatment_values=(T₃=(case=1, control=0),),
+            treatment_confounders=(T₃=[:W],)
+        )
+    ]
+    η_counts = TMLE.nuisance_counts(estimands)
+    cache_manager = TargetedEstimation.ReleaseUnusableCacheManager(η_counts)
+    # Estimation of the first estimand will fill the cache with the following
+    Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W))
+    cache_manager.cache[Y_T₁T₂] = 1
+    T₁_W = TMLE.ConditionalDistribution(:T₁, (:W,))
+    cache_manager.cache[T₁_W] = 1
+    T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,))
+    cache_manager.cache[T₂_W] = 1
+    cache_manager.cache[:last_fluctuation] = 1
+    @test length(cache_manager.cache) == 4
+    # After estimation of the first estimand, only the fluctuation is released
+    TargetedEstimation.release!(cache_manager, estimands[1])
+    @test length(cache_manager.cache) == 3
+
+    # Estimation of the second estimand will not result in further nuisance functions
+    # Y_T₁T₂ and T₂_W are no longer needed
+    TargetedEstimation.release!(cache_manager, estimands[2])
+    @test length(cache_manager.cache) == 1
+    @test !haskey(cache_manager.cache, T₂_W)
+    @test !haskey(cache_manager.cache, Y_T₁T₂)
+    @test haskey(cache_manager.cache, T₁_W)
+
+    # Estimation of the third estimand will fill the cache with the following
+    Y_T₁ = TMLE.ConditionalDistribution(:Y, (:T₁, :W))
+    cache_manager.cache[Y_T₁] = 1
+    # Y_T₁ and T₁_W are no longer needed
+    TargetedEstimation.release!(cache_manager, estimands[3])
+    @test cache_manager.cache == Dict()
+
+
+end
+
+end
+
+true
\ No newline at end of file
diff --git a/test/config/failing_parameters.yaml b/test/config/failing_parameters.yaml
index 9991cd1..92fdeff 100644
--- a/test/config/failing_parameters.yaml
+++ b/test/config/failing_parameters.yaml
@@ -1,6 +1,6 @@
-  Parameters:
+  Estimands:
     - type: ATE
-      target: EXTREME_BINARY
-      treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-      confounders: [W1, W2]
-      covariates: [C1]
\ No newline at end of file
+      outcome: EXTREME_BINARY
+      treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
+      treatment_confounders: (T1 = [W1, W2], T2 = [W1, W2])
+      outcome_extra_covariates: [C1]
\ No newline at end of file
diff --git a/test/config/tmle_config_2.jl b/test/config/ose_config.jl
similarity index 83%
rename from test/config/tmle_config_2.jl
rename to test/config/ose_config.jl
index d77aaa0..5462955 100644
--- a/test/config/tmle_config_2.jl
+++ b/test/config/ose_config.jl
@@ -1,13 +1,7 @@
 
 evotree = EvoTreeClassifier(nrounds=10)
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true`` may result in faster execution but higher memory usage
-  cache = true,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = true,
-  # Propensity score threshold
-  threshold    = 1e-8,
+default_models = TMLE.default_models(
   # For the estimation of E[Y|W, T]: continuous target
   Q_continuous = Stack(
     metalearner        = LinearRegressor(fit_intercept=false),
@@ -42,3 +36,6 @@ tmle_spec = (
     )
 )
 
+ESTIMATORS = (
+  OSE  = OSE(models=default_models),
+)
\ No newline at end of file
diff --git a/test/config/parameters.yaml b/test/config/parameters.yaml
index 8399487..4dea179 100644
--- a/test/config/parameters.yaml
+++ b/test/config/parameters.yaml
@@ -1,29 +1,31 @@
-Parameters:
-  - type: IATE
-    target: CONTINUOUS, TARGET
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: IATE
-    target: "BINARY/TARGET"
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: ATE
-    target: CONTINUOUS, TARGET
-    treatment: (T1 = (control = 0, case = 1),)
-    confounders: [W1, W2]
-  - type: IATE
-    target: CONTINUOUS, TARGET
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    confounders: [W1, W2]
-  - type: IATE
-    target: "BINARY/TARGET"
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: ATE
-    target: CONTINUOUS, TARGET
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
\ No newline at end of file
+Estimands:
+  - type: TMLE.StatisticalIATE
+    outcome: CONTINUOUS, outcome
+    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
+    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
+    outcome_extra_covariates: (:C1,)
+  - type: TMLE.StatisticalIATE
+    outcome: "BINARY/outcome"
+    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
+    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
+    outcome_extra_covariates: (:C1,)
+  - type: TMLE.StatisticalATE
+    outcome: CONTINUOUS, outcome
+    treatment_values: (T1 = (control = 0, case = 1),)
+    treatment_confounders: (T1 = (:W1, :W2),)
+    outcome_extra_covariates: ()
+  - type: TMLE.StatisticalIATE
+    outcome: CONTINUOUS, outcome
+    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
+    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
+    outcome_extra_covariates: ()
+  - type: TMLE.StatisticalIATE
+    outcome: "BINARY/outcome"
+    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
+    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
+    outcome_extra_covariates: (:C1,)
+  - type: TMLE.StatisticalATE
+    outcome: CONTINUOUS, outcome
+    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
+    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
+    outcome_extra_covariates: (:C1,)
\ No newline at end of file
diff --git a/test/config/sieve_tests_parameters_1.yaml b/test/config/sieve_tests_parameters_1.yaml
index 510b500..9edf5fe 100644
--- a/test/config/sieve_tests_parameters_1.yaml
+++ b/test/config/sieve_tests_parameters_1.yaml
@@ -1,31 +1,31 @@
 Parameters:
   - type: IATE
-    target: CONTINUOUS, TARGET
+    target: CONTINUOUS, OUTCOME
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
     confounders: [W1, W2]
     covariates: [C1]
   - type: IATE
-    target: CONTINUOUS, TARGET
+    target: CONTINUOUS, OUTCOME
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
     confounders: [W1, W2]
     covariates: [C1]
   - type: ATE
-    target: CONTINUOUS, TARGET
+    target: CONTINUOUS, OUTCOME
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
     confounders: [W1, W2]
     covariates: [C1]
   - type: IATE
-    target: "BINARY/TARGET"
+    target: "BINARY/OUTCOME"
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
     confounders: [W1, W2]
     covariates: [C1]
   - type: IATE
-    target: "BINARY/TARGET"
+    target: "BINARY/OUTCOME"
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
     confounders: [W1, W2]
     covariates: [C1]
   - type: ATE
-    target: "BINARY/TARGET"
+    target: "BINARY/OUTCOME"
     treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
     confounders: [W1, W2]
     covariates: [C1]
diff --git a/test/config/sieve_tests_parameters_2.yaml b/test/config/sieve_tests_parameters_2.yaml
index 371868c..0d147be 100644
--- a/test/config/sieve_tests_parameters_2.yaml
+++ b/test/config/sieve_tests_parameters_2.yaml
@@ -1,9 +1,9 @@
 Parameters:
   - type: ATE
-    target: CONTINUOUS, TARGET
+    target: CONTINUOUS, OUTCOME
     treatment: (T1 = (control = 0, case = 1),)
     confounders: [W1]
   - type: CM
-    target: CONTINUOUS, TARGET
+    target: CONTINUOUS, OUTCOME
     treatment: (T1 = 0,)
     confounders: [W1]
\ No newline at end of file
diff --git a/test/config/tmle_config.jl b/test/config/tmle_config.jl
index 4281e80..8649d9f 100644
--- a/test/config/tmle_config.jl
+++ b/test/config/tmle_config.jl
@@ -1,16 +1,10 @@
 evotree = EvoTreeClassifier(nrounds=10)
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache=false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 0.001,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
   Q_continuous = Stack(
     metalearner        = LinearRegressor(fit_intercept=false),
     resampling         = CV(nfolds=2),
+    cache              = false,
     interaction_glmnet = Pipeline(
       interaction_transformer = RestrictedInteractionTransformer(order=3, primary_variables_patterns=[r"^rs[0-9]+"]),
       glmnet                  = GLMNetRegressor(),
@@ -46,6 +40,7 @@ tmle_spec = (
   G = Stack(
     metalearner        = LogisticClassifier(lambda=0., fit_intercept=false),
     resampling         = StratifiedCV(nfolds=2),
+    cache              = false,
     interaction_glmnet = Pipeline(
       interaction_transformer = RestrictedInteractionTransformer(
           order=2,
@@ -58,4 +53,9 @@ tmle_spec = (
     constant           = ConstantClassifier(),
     evo                = EvoTreeClassifier(nrounds=10)
   )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=0.001),
+  OSE  = OSE(models=default_models)
 )
\ No newline at end of file
diff --git a/test/data/merge/empty_sieve.csv b/test/data/merge/empty_sieve.csv
index 4b160ac..3241e3c 100644
--- a/test/data/merge/empty_sieve.csv
+++ b/test/data/merge/empty_sieve.csv
@@ -1 +1 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES
+PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES
diff --git a/test/data/merge/sieve_output_1.csv b/test/data/merge/sieve_output_1.csv
index 119c5fa..cfe77b9 100644
--- a/test/data/merge/sieve_output_1.csv
+++ b/test/data/merge/sieve_output_1.csv
@@ -1,7 +1,7 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
-IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935
-IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624
-ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587
-IATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185
-IATE,T2_&_T1,0_&_1,1_&_0,BINARY/TARGET,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833
-ATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235
\ No newline at end of file
+PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
+IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935
+IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624
+ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587
+IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185
+IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833
+ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235
\ No newline at end of file
diff --git a/test/data/merge/sieve_output_2.csv b/test/data/merge/sieve_output_2.csv
index 1de809d..ad536a5 100644
--- a/test/data/merge/sieve_output_2.csv
+++ b/test/data/merge/sieve_output_2.csv
@@ -1,3 +1,3 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
-ATE,T1,1,0,"CONTINUOUS, TARGET",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455
-CM,T1,0,,"CONTINUOUS, TARGET",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622
+PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
+ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455
+CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622
diff --git a/test/data/merge/tmle_output_1.csv b/test/data/merge/tmle_output_1.csv
index 21d2ae0..574764e 100644
--- a/test/data/merge/tmle_output_1.csv
+++ b/test/data/merge/tmle_output_1.csv
@@ -1,7 +1,7 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
-IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344,
-IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822,
-ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, TARGET",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208,
-IATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487,
-IATE,T2_&_T1,0_&_1,1_&_0,BINARY/TARGET,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093,
-ATE,T2_&_T1,1_&_1,0_&_0,BINARY/TARGET,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685,
+PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
+IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344,
+IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822,
+ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208,
+IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487,
+IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093,
+ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685,
diff --git a/test/data/merge/tmle_output_2.csv b/test/data/merge/tmle_output_2.csv
index 4e76aa3..a7d02aa 100644
--- a/test/data/merge/tmle_output_2.csv
+++ b/test/data/merge/tmle_output_2.csv
@@ -1,3 +1,3 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,TARGET,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
-ATE,T1,1,0,"CONTINUOUS, TARGET",W1,,-1.170325854136744,,,,,,,,,,,"Error"
-CM,T1,0,,"CONTINUOUS, TARGET",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685,
+PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
+ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,-1.170325854136744,,,,,,,,,,,"Error"
+CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685,
diff --git a/test/load_tmle_spec.jl b/test/load_tmle_spec.jl
deleted file mode 100644
index 71988cc..0000000
--- a/test/load_tmle_spec.jl
+++ /dev/null
@@ -1,113 +0,0 @@
-module TestsStackBuilding
-
-using Test
-using TargetedEstimation
-using MLJ
-using MLJGLMInterface
-using MLJLinearModels
-using EvoTrees
-
-@testset "Test tmle_spec_from_yaml: Only Stacks" begin
-    tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl"))
-
-    @test tmle_spec.threshold == 0.001
-    @test tmle_spec.weighted_fluctuation == false
-    # Test binary target TMLE's Qstack
-    Q_binary = tmle_spec.Q_binary
-    @test Q_binary.cache == false
-    ## Checking Qstack.metalearner
-    @test Q_binary.metalearner isa LogisticClassifier
-    @test Q_binary.metalearner.fit_intercept == false
-    ## Checking Qstack.resampling
-    @test Q_binary.resampling isa StratifiedCV
-    @test Q_binary.resampling.nfolds == 2
-    ## Checking Qstack EvoTree models
-    @test Q_binary.gridsearch_evo.tuning.goal == 5
-    @test Q_binary.gridsearch_evo.cache == false
-    @test Q_binary.gridsearch_evo.model.nrounds == 10
-    @test Q_binary.gridsearch_evo.resampling isa CV
-    ranges = Q_binary.gridsearch_evo.range
-    @test ranges[2].lower == 1e-5
-    @test ranges[2].upper == 10
-    @test ranges[2].scale == :log
-    @test ranges[1].lower == 3
-    @test ranges[1].upper == 5
-    @test ranges[1].scale == :linear
-    ## Checking Qstack  Interaction Logistic models
-    @test Q_binary.interaction_glmnet isa MLJ.ProbabilisticPipeline
-    @test Q_binary.interaction_glmnet.interaction_transformer.order == 2
-    ## Checking Qstack HAL model
-    @test Q_binary.hal.lambda == 10
-    @test Q_binary.hal.smoothness_orders == 1
-    @test Q_binary.hal.cv_select == false
-    @test Q_binary.hal.num_knots == [10, 5]
-
-    # Test continuous target TMLE's Qstack
-    Q_continuous = tmle_spec.Q_continuous
-    ## Checking Qstack.metalearner
-    @test Q_continuous.metalearner isa MLJLinearModels.LinearRegressor
-    @test Q_continuous.metalearner.fit_intercept == false
-
-    ## Checking Qstack.resampling
-    @test Q_continuous.resampling isa CV
-    @test Q_continuous.resampling.nfolds == 2
-    ## Checking Qstack EvoTree models
-    @test Q_continuous.evo_10.nrounds == 10
-    @test Q_continuous.evo_20.nrounds == 20
-    ## Checking Qstack Interaction Linear model
-    @test Q_continuous.interaction_glmnet isa MLJ.DeterministicPipeline
-    @test Q_continuous.interaction_glmnet.interaction_transformer.order == 3
-    @test Q_continuous.interaction_glmnet.interaction_transformer.primary_variables == []
-    @test Q_continuous.interaction_glmnet.interaction_transformer.primary_variables_patterns == [r"^rs[0-9]+"]
-    ## Checking Qstack HAL model
-    @test Q_continuous.hal.lambda == 10
-    @test Q_continuous.hal.smoothness_orders == 1
-    @test Q_continuous.hal.cv_select == false
-    @test Q_continuous.hal.num_knots == [10, 5]
-    
-    # TMLE G Stack
-    G = tmle_spec.G
-    ## Checking Gstack.metalearner
-    @test G.metalearner isa LogisticClassifier
-    @test G.metalearner.fit_intercept == false
-    ## Checking Gstack.resampling
-    @test G.resampling isa StratifiedCV
-    @test G.resampling.nfolds == 2
-    ## Checking Gstack models
-    @test G.interaction_glmnet.interaction_transformer.order == 2
-    @test G.interaction_glmnet.interaction_transformer.primary_variables == [:T1, :T2]
-    @test G.interaction_glmnet.interaction_transformer.primary_variables_patterns == [r"C"]
-    @test G.evo.nrounds == 10
-
-    @test tmle_spec.cache == false
-end
-
-@testset "Test tmle_spec_from_yaml: Simple models and GridSearch" begin
-    tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config_2.jl"))
-    @test tmle_spec.G.cache == true
-    @test tmle_spec.weighted_fluctuation == true
-    @test tmle_spec.G.measure isa LogLoss
-    @test tmle_spec.G.tuning.goal == 5
-    @test tmle_spec.G.model.nrounds == 10
-    lambda_range = tmle_spec.G.range[2]
-    @test lambda_range.lower == 1e-5
-    @test lambda_range.upper == 10
-    @test lambda_range.scale == :log
-    depth_range = tmle_spec.G.range[1]
-    @test depth_range.lower == 3
-    @test depth_range.upper == 5
-    @test depth_range.scale == :linear
-
-    @test tmle_spec.Q_binary isa MLJ.ProbabilisticPipeline
-    @test tmle_spec.threshold == 1e-8
-
-    @test tmle_spec.Q_continuous.cache == true
-    @test tmle_spec.Q_continuous.interaction_glmnet.cache == true
-
-    @test tmle_spec.cache == true
-end
-
-end;
-
-true
-
diff --git a/test/merge.jl b/test/merge.jl
index 038ed03..d8967ca 100644
--- a/test/merge.jl
+++ b/test/merge.jl
@@ -15,7 +15,7 @@ using DataFrames
     output = CSV.read(parsed_args["out"], DataFrame)
     @test names(output) == [
         "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "TARGET", "CONFOUNDERS",
+        "CONTROL", "OUTCOME", "CONFOUNDERS",
         "COVARIATES", "INITIAL_ESTIMATE", 
         "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB",
         "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", 
@@ -42,7 +42,7 @@ end
 @testset "Test merge_csv_files, sieve file" begin
     sieve_colnames = [
         "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "TARGET", "CONFOUNDERS",
+        "CONTROL", "OUTCOME", "CONFOUNDERS",
         "COVARIATES", "INITIAL_ESTIMATE", 
         "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", 
         "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB",
@@ -88,7 +88,7 @@ end
     output = CSV.read(parsed_args["out"], DataFrame)
     @test names(output) == [
         "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "TARGET", "CONFOUNDERS",
+        "CONTROL", "OUTCOME", "CONFOUNDERS",
         "COVARIATES", "INITIAL_ESTIMATE", 
         "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB",
         "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", 
diff --git a/test/resampling.jl b/test/resampling.jl
index 1e13e8d..9032ad7 100644
--- a/test/resampling.jl
+++ b/test/resampling.jl
@@ -76,7 +76,6 @@ end
     @test stratification_col == ["_0", "_0", "_1", "_0", "_1", "_0", "_missing"]
 end
 
-
 @testset "Test JointStratifiedCV" begin
     X = (
         X1 = [0, 0, 1, 0, 1, 0, missing],
diff --git a/test/tmle.jl b/test/runner.jl
similarity index 70%
rename from test/tmle.jl
rename to test/runner.jl
index 281041b..5f61bca 100644
--- a/test/tmle.jl
+++ b/test/runner.jl
@@ -10,23 +10,31 @@ using LogExpFunctions
 using CategoricalArrays
 using DataFrames
 using CSV
+using Serialization
 using Arrow
+using YAML
+
+PKGDIR = pkgdir(TargetedEstimation)
+
+CONFIGDIR = joinpath(PKGDIR, "test", "config")
+
+include(joinpath(PKGDIR, "test", "testutils.jl"))
 
 function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_idx)
     jld2_res = jldio[string(param_index)]
     csv_row = data[param_index, :]
     Ψ = jld2_res["result"].parameter
-    @test jld2_res["result"] isa TMLE.TMLEResult
+    @test jld2_res["result"] isa TMLE.Estimate
     @test jld2_res["result"].tmle.Ψ̂ isa Float64
     @test Ψ == expected_param
     @test jld2_res["sample_ids_idx"] == sample_ids_idx
     sample_ids = jldio[string(jld2_res["sample_ids_idx"])]["sample_ids"]
-    if expected_param.target == Symbol("BINARY/TARGET")
+    if expected_param.target == Symbol("BINARY/OUTCOME")
         @test sample_ids == 2:1000
     else
         @test sample_ids == 1:1000
     end
-    @test jld2_res["result"] isa TMLE.TMLEResult
+    @test jld2_res["result"] isa TMLE.Estimate
 
     if csv_row.COVARIATES === missing
         @test TargetedEstimation.covariates_string(Ψ) === csv_row.COVARIATES
@@ -42,11 +50,11 @@ function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_i
 end
 
 """
-CONTINUOUS_TARGET: 
+CONTINUOUS_OUTCOME: 
 - IATE(0->1, 0->1) = E[W₂] = 0.5
 - ATE(0->1, 0->1)  = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5
 
-BINARY_TARGET:
+BINARY_OUTCOME:
 - IATE(0->1, 0->1) =
 - ATE(0->1, 0->1)  = 
 
@@ -77,9 +85,9 @@ function build_dataset(;n=1000, format="csv")
         C1 = C₁,
     )
     # Comma in name
-    dataset[!, "CONTINUOUS, TARGET"] = categorical(y₁)
+    dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁)
     # Slash in name
-    dataset[!, "BINARY/TARGET"] = categorical(y₂)
+    dataset[!, "BINARY/OUTCOME"] = categorical(y₂)
     dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1)))
 
     format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset)
@@ -88,13 +96,13 @@ end
 @testset "Test partition_tmle!" begin
     build_dataset(;n=1000, format="csv")
     dataset = TargetedEstimation.instantiate_dataset("data.csv")
-    parameters = TargetedEstimation.read_parameters(joinpath("config", "parameters.yaml"), dataset)
-    variables = TargetedEstimation.variables(parameters, dataset)
+    estimands = TargetedEstimation.read_estimands(joinpath(config_dir, "parameters.yaml"), dataset)
+    variables = TargetedEstimation.variables(estimands, dataset)
     TargetedEstimation.coerce_types!(dataset, variables)
     tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl"))
     cache = TMLECache(dataset)
 
-    tmle_results = Vector{Union{TMLE.TMLEResult, TargetedEstimation.MissingTMLEResult}}(undef, 3)
+    tmle_results = Vector{Union{TMLE.Estimate, TargetedEstimation.FailedEstimation}}(undef, 3)
     logs = Vector{Union{String, Missing}}(undef, 3)
     part = 4:6
     TargetedEstimation.partition_tmle!(cache, tmle_results, logs, part, tmle_spec, parameters, variables; verbosity=0)
@@ -107,31 +115,35 @@ end
 
 @testset "Test tmle_estimation" begin
     expected_parameters = [
-        ATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]),
-        IATE(Symbol("BINARY/TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
-        IATE(Symbol("BINARY/TARGET"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]),
-        IATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]),
-        IATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
-        ATE(Symbol("CONTINUOUS, TARGET"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1])
+        ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]),
+        IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
+        IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]),
+        IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]),
+        IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
+        ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1])
     ]
+    outfilename = "statistical_estimands.yml"
+    configuration_to_yaml(outfilename, statistical_estimands_only_config())
     expected_param_sample_ids_idx = [1, 2, 2, 4, 5, 5]
     # Run tests over CSV and Arrow data formats
     for format in ("csv", "arrow")
         build_dataset(;n=1000, format=format)
         parsed_args = Dict(
-                    "data" => string("data.", format),
-                    "param-file" => nothing,
-                    "estimator-file" => joinpath("config", "tmle_config.jl"),
+                    "dataset" => string("data.", format),
+                    "estimands-config" => nothing,
+                    "estimators-config" => joinpath(config_dir, "tmle_config.jl"),
                     "csv-out" => "output.csv",
                     "verbosity" => 0,
                     "hdf5-out" => "output.hdf5",
                     "pval-threshold" => 1.,
                     "chunksize" => nothing
                 )
+        runner = TargetedEstimation.Runner(parsed_args)
         for param_file in ("parameters.yaml", "parameters.bin")
             for chunksize in (4, 10)
                 # Only one continuous phenotype / machines not saved / no adaptive cv
-                parsed_args["param-file"] = joinpath("config", param_file)
+
+                parsed_args["estimands-config"] = outfilename
                 parsed_args["chunksize"] = chunksize
 
                 tmle_estimation(parsed_args)
@@ -151,26 +163,29 @@ end
                 rm(parsed_args["hdf5-out"])
             end
         end
-        rm(parsed_args["data"])
+        rm(parsed_args["dataset"])
     end
 end
 
 @testset "Test tmle_estimation: No hdf5 file" begin
     build_dataset(;n=1000, format="csv")
+    estimands_filename = "estimands_test.yaml"
+    configuration_to_yaml(estimands_filename, statistical_estimands_only_config())
     # Only one continuous phenotype / machines not saved / no adaptive cv
-    param_file = "parameters.yaml"
     parsed_args = Dict(
-        "data" => "data.csv",
-        "param-file" => joinpath("config", param_file),
-        "estimator-file" => joinpath("config", "tmle_config_2.jl"),
+        "dataset" => "data.csv",
+        "estimands-config" => estimands_filename,
+        "estimators-config" => joinpath(CONFIGDIR, "ose_config.jl"),
         "csv-out" => "output.csv",
         "verbosity" => 0,
         "hdf5-out" => nothing,
         "pval-threshold" => 1.,
-        "chunksize" => 10
+        "chunksize" => 10,
+        "rng" => 123,
+        "sort-estimands" => false,
+        "cache-strategy" => "release_unusable"
     )
-
-    tmle_estimation(parsed_args)
+    @enter run_estimation(parsed_args)
 
     ## Check CSV file
     data = CSV.read(parsed_args["csv-out"], DataFrame)
@@ -179,16 +194,16 @@ end
     all(x === missing for x in data.LOG)
     # Clean
     rm(parsed_args["csv-out"])
-    rm(parsed_args["data"])
+    rm(parsed_args["dataset"])
 end
 
 
 @testset "Test tmle_estimation: lower p-value threhsold" begin
     build_dataset(;n=1000, format="csv")
     parsed_args = Dict(
-        "data" => "data.csv",
-        "param-file" => joinpath("config", "parameters.yaml"),
-        "estimator-file" => joinpath("config", "tmle_config.jl"),
+        "dataset" => "data.csv",
+        "estimands-config" => joinpath("config", "parameters.yaml"),
+        "estimators-config" => joinpath("config", "tmle_config.jl"),
         "csv-out" => "output.csv",
         "verbosity" => 0,
         "hdf5-out" => "output.hdf5",
@@ -209,7 +224,7 @@ end
 
     @test jldio["1"]["result"].tmle.Ψ̂ == data[1, :TMLE_ESTIMATE]
 
-    rm(parsed_args["data"])
+    rm(parsed_args["dataset"])
     rm(parsed_args["csv-out"])
     rm(parsed_args["hdf5-out"])
 end
@@ -217,9 +232,9 @@ end
 @testset "Test tmle_estimation: Failing parameters" begin
     build_dataset(;n=1000, format="csv")
     parsed_args = Dict(
-        "data" => "data.csv",
-        "param-file" => joinpath("config", "failing_parameters.yaml"),
-        "estimator-file" => joinpath("config", "tmle_config.jl"),
+        "dataset" => "data.csv",
+        "estimands-config" => joinpath("config", "failing_parameters.yaml"),
+        "estimators-config" => joinpath("config", "tmle_config.jl"),
         "csv-out" => "output.csv",
         "verbosity" => 0,
         "hdf5-out" => nothing,
@@ -234,7 +249,7 @@ end
     @test size(data) == (1, 19)
     @test data[1, :TMLE_ESTIMATE] === missing
 
-    rm(parsed_args["data"])
+    rm(parsed_args["dataset"])
     rm(parsed_args["csv-out"])
 
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7c461fe..0a34f46 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,7 +1,8 @@
-include("tmle.jl")
-include("load_tmle_spec.jl")
+
+include("cache_managers.jl")
 include("utils.jl")
 include("sieve_variance.jl")
+include("runner.jl")
 include("merge.jl")
 include("resampling.jl")
 include(joinpath("models", "glmnet.jl"))
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index b90153d..901a286 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -38,8 +38,8 @@ function build_dataset(sample_ids)
         C1 = C₁,
     )
 
-    dataset[!, "CONTINUOUS, TARGET"] = y₁
-    dataset[!, "BINARY/TARGET"] = categorical(y₂)
+    dataset[!, "CONTINUOUS, OUTCOME"] = y₁
+    dataset[!, "BINARY/OUTCOME"] = categorical(y₂)
 
     CSV.write("data.csv", dataset)
 end
@@ -91,7 +91,7 @@ end
 
 function test_initial_output(output, expected_output)
     # Metadata columns
-    for col in [:PARAMETER_TYPE, :TREATMENTS, :CASE, :CONTROL, :TARGET, :CONFOUNDERS, :COVARIATES]
+    for col in [:PARAMETER_TYPE, :TREATMENTS, :CASE, :CONTROL, :OUTCOME, :CONFOUNDERS, :COVARIATES]
         for index in eachindex(output[!, col])
             if expected_output[index, col] === missing
                 @test expected_output[index, col] === output[index, col]
@@ -126,7 +126,7 @@ end
         result = io[key]["result"]
         IC = result.tmle.IC
         # missing sample
-        if result.parameter.target == Symbol("BINARY/TARGET")
+        if result.parameter.target == Symbol("BINARY/OUTCOME")
             IC = vcat(0, IC)
         end
         @test convert(Vector{Float32}, IC) == influence_curves[parse(Int, key), :]
@@ -138,7 +138,7 @@ end
         TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2"],
         CASE=["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true"],
         CONTROL=["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false"],
-        TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"],
+        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
         CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2"],
         COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1"]
     )
@@ -159,7 +159,7 @@ end
         TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"],
         CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"],
         CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing],
-        TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"],
+        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
         CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"],
         COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing]
     )
@@ -319,7 +319,7 @@ end
         TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"],
         CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"],
         CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing],
-        TARGET = ["BINARY/TARGET", "BINARY/TARGET", "BINARY/TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET", "CONTINUOUS, TARGET"],
+        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
         CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"],
         COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing]
     )
diff --git a/test/testutils.jl b/test/testutils.jl
new file mode 100644
index 0000000..76d6ab4
--- /dev/null
+++ b/test/testutils.jl
@@ -0,0 +1,57 @@
+using TMLE
+
+function statistical_estimands_only_config()
+    configuration = Configuration(
+        estimands=[
+            IATE(
+                outcome = Symbol("CONTINUOUS, outcome"), 
+                treatment_values = (
+                    T1 = (case = true, control = false), 
+                    T2 = (case = true, control = false)), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
+                outcome_extra_covariates = (:C1,)
+            ),
+            IATE(
+                outcome = Symbol("BINARY/outcome"), 
+                treatment_values = (
+                    T1 = (case = true, control = false), 
+                    T2 = (case = true, control = false)), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
+                outcome_extra_covariates = (:C1,)
+            ),
+            ATE(
+                outcome = Symbol("CONTINUOUS, outcome"), 
+                treatment_values = (T1 = (case = true, control = false),), 
+                treatment_confounders = (T1 = (:W1, :W2),), 
+                outcome_extra_covariates = ()
+            ),
+            IATE(
+                outcome = Symbol("CONTINUOUS, outcome"), 
+                treatment_values = (
+                    T1 = (case = true, control = false), 
+                    T2 = (case = false, control = true)
+                ), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
+                outcome_extra_covariates = ()
+            ),
+            IATE(
+                outcome = Symbol("BINARY/outcome"), 
+                treatment_values = (
+                    T1 = (case = true, control = false), 
+                    T2 = (case = false, control = true)
+                ), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
+                outcome_extra_covariates = (:C1,)
+            ),
+            ATE(
+                outcome = Symbol("CONTINUOUS, outcome"), 
+                treatment_values = (
+                    T1 = (case = true, control = false), 
+                    T2 = (case = true, control = false)), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)),
+                outcome_extra_covariates = (:C1,)
+            )
+        ]
+    )
+    return configuration
+end
diff --git a/test/utils.jl b/test/utils.jl
index 480fb24..386f413 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -5,13 +5,69 @@ using TargetedEstimation
 using TMLE
 using DataFrames
 using CSV
+using MLJBase
+using MLJLinearModels
 using CategoricalArrays
 
+PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation)))
+
+include(joinpath(PROJECT_DIR, "test", "testutils.jl"))
+
+@testset "Test load_tmle_spec: with configuration file" begin
+    estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_config.jl"))
+    @test estimators.TMLE isa TMLE.TMLEE
+    @test estimators.OSE isa TMLE.OSE
+    @test estimators.TMLE.weighted === true
+    @test estimators.TMLE.models.G_default === estimators.OSE.models.G_default
+    @test estimators.TMLE.models.G_default isa MLJBase.ProbabilisticStack
+end
+
+@testset "Test load_tmle_spec: no configuration file" begin
+    estimators = TargetedEstimation.load_tmle_spec(nothing)
+    @test !haskey(estimators, :OSE)
+    @test haskey(estimators, :TMLE)
+    @test estimators.TMLE.weighted === true
+    @test estimators.TMLE.models.G_default isa LogisticClassifier
+end
+
+@testset "Test convert_treatment_values" begin
+    treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int)
+    newT = TargetedEstimation.convert_treatment_values((T₁=1,), treatment_types)
+    @test newT isa Vector{Bool}
+    @test newT == [1]
+
+    newT = TargetedEstimation.convert_treatment_values((T₁=(case=1, control=0.),), treatment_types)
+    @test newT isa Vector{NamedTuple{(:case, :control), Tuple{Bool, Bool}}}
+    @test newT == [(case = true, control = false)]
+
+    newT = TargetedEstimation.convert_treatment_values((T₁=(case=1, control=0.), T₂=(case=true, control=0)), treatment_types)
+    @test newT isa Vector{NamedTuple{(:case, :control)}}
+    @test newT == [(case = true, control = false), (case = 1, control = 0)]
+end
+
+@testset "Test proofread_estimands_from_yaml" begin
+    filename = "statistical_estimands.yml"
+    configuration_to_yaml(filename, statistical_estimands_only_config())
+    dataset = DataFrame(T1 = [1., 0.], T2=[true, false])
+    estimands = TargetedEstimation.proofread_estimands_from_yaml(filename, dataset)
+    for estimand in estimands
+        if haskey(estimand.treatment_values, :T1)
+            @test estimand.treatment_values.T1.case isa Float64
+            @test estimand.treatment_values.T1.control isa Float64
+        end
+        if haskey(estimand.treatment_values, :T2)
+            @test estimand.treatment_values.T2.case isa Bool
+            @test estimand.treatment_values.T2.control isa Bool
+        end
+    end
+    rm(filename)
+end
+
 @testset "Test CSV writing" begin
     Ψ = IATE(
-        target=:Y,
-        treatment=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")),
-        confounders=[:W₁, :W₂]
+        outcome=:Y,
+        treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")),
+        treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂])
     )
     @test TargetedEstimation.covariates_string(Ψ) === missing
     @test TargetedEstimation.param_string(Ψ) == "IATE"
@@ -33,38 +89,43 @@ using CategoricalArrays
     @test TargetedEstimation.control_string(Ψ) === missing
     @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂"
     @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂"
-
 end
 
-@testset "Test variables" begin
-    parameters = [
-        IATE(
-        target=:Y,
-        treatment=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")),
-        confounders=[:W₁, :W₂]),
-        CM(
-        target=:Y₂,
-        treatment=(T₁=1, T₃="AC"),
-        confounders=[:W₃, :W₂],
-        covariates=[:C₁])
-    ]
-    dataset = DataFrame(Y=[1.1, 2.2, missing], Y₂=[1, 0, missing])
-    variables = TargetedEstimation.variables(parameters, dataset)
-    @test variables == (
-        treatments = Set([:T₃, :T₁, :T₂]),
-        confounders = Set([:W₁, :W₃, :W₂]),
-        covariates = Set([:C₁]),
-        binarytargets = Set([:Y₂]),
-        continuoustargets = Set([:Y])
+@testset "Test coerce_types!" begin
+    Ψ = IATE(
+        outcome=:Ycont,
+        treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")),
+        treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂]),
     )
 
-    variables = TargetedEstimation.variables(parameters[1])
-    @test variables == (
-        target = :Y,
-        covariates = Symbol[],
-        confounders = [:W₁, :W₂],
-        treatments = (:T₁, :T₂)
+    dataset = DataFrame(
+        Ycont  = [1.1, 2.2, missing],
+        Ycat = [1., 0., missing],
+        T₁ = [1, 0, missing],
+        T₂ = [missing, "AC", "CC"],
+        W₁ = [1., 0., 0.],
+        W₂ = [missing, 0., 0.],
+        C = [1, 2, 3]
     )
+    TargetedEstimation.coerce_types!(dataset, Ψ)
+
+    @test dataset.T₁ isa CategoricalArray
+    @test dataset.T₂ isa CategoricalArray
+    for var in [:W₁, :W₂, :Ycont]
+        @test eltype(dataset[!, var]) <: Union{Missing, Float64}
+    end
+
+    Ψ = IATE(
+        outcome=:Ycat,
+        treatment_values=(T₂=(case="AC", control="CC"), ),
+        treatment_confounders=(T₂=[:W₂],),
+        outcome_extra_covariates=[:C]
+    )
+    TargetedEstimation.coerce_types!(dataset, Ψ)
+
+    @test dataset.Ycat isa CategoricalArray
+    @test eltype(dataset.C) <: Union{Missing, Float64}
+
 end
 
 @testset "Test get_sample_ids" begin
@@ -89,41 +150,6 @@ end
     @test sample_ids == [2]
 end
 
-@testset "Test treatment_values" begin
-    treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int)
-    Ψ = CM(target=:Y, treatment=(T₁=1,), confounders=[:W₁])
-    newT = TargetedEstimation.treatment_values(Ψ, (:T₁,), treatment_types)
-    @test newT isa Vector{Bool}
-    @test newT == [1]
-
-    Ψ = ATE(target=:Y, treatment=(T₁=(case=1, control=0.),), confounders=[:W₁])
-    newT = TargetedEstimation.treatment_values(Ψ, (:T₁,), treatment_types)
-    @test newT isa Vector{NamedTuple{(:case, :control), Tuple{Bool, Bool}}}
-    @test newT == [(case = true, control = false)]
-
-    Ψ = ATE(target=:Y, treatment=(T₁=(case=1, control=0.), T₂=(case=true, control=0)), confounders=[:W₁])
-    newT = TargetedEstimation.treatment_values(Ψ, (:T₁, :T₂), treatment_types)
-    @test newT isa Vector{NamedTuple{(:case, :control)}}
-    @test newT == [(case = true, control = false), (case = 1, control = 0)]
-end
-
-@testset "Test read_parameters" for param_file in ("parameters.yaml", "parameters.bin")
-    param_file = joinpath("config", param_file)
-    dataset = DataFrame(T1 = [1., 0.], T2=[true, false])
-    params = TargetedEstimation.read_parameters(param_file, dataset)
-    for param in params
-        if haskey(param.treatment, :T1)
-            @test param.treatment.T1.case isa Float64
-            @test param.treatment.T1.control isa Float64
-        end
-        if haskey(param.treatment, :T2)
-            @test param.treatment.T2.case isa Bool
-            @test param.treatment.T2.control isa Bool
-        end
-    end
-end
-
-
 @testset "Test write_target_results with missing values" begin
     filename = "test.csv"
     parameters = [
@@ -133,7 +159,7 @@ end
         confounders=[:W₁, :W₂],
         covariates=[:C₁]
     )]
-    tmle_results = [TargetedEstimation.MissingTMLEResult(parameters[1])]
+    tmle_results = [TargetedEstimation.FailedEstimation(parameters[1])]
     logs = ["Error X"]
     TargetedEstimation.append_csv(filename, tmle_results, logs)
     out = CSV.read(filename, DataFrame)
@@ -176,6 +202,10 @@ end
     TargetedEstimation.make_float!(dataset, [:C₁])
     @test eltype(dataset.C₁) == Float64
 
+    # If the type is already coerced then no-operation is applied 
+    TargetedEstimation.make_float(dataset.C₁) === dataset.C₁
+    TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁
+
 end
 
 end;

From 667968fbab9dac4b488ca9fba53f5e3050f0869e Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Mon, 20 Nov 2023 18:36:12 +0000
Subject: [PATCH 02/71] add WIP

---
 Project.toml                                  |  16 +-
 deps/build.jl                                 |   1 +
 estimands_test.yaml                           |  12 +-
 src/TargetedEstimation.jl                     |   5 +-
 src/cache_managers.jl                         |  10 +-
 src/runner.jl                                 | 154 +++++++++++++-----
 src/utils.jl                                  | 136 +++++++++++-----
 .../{tmle_config.jl => tmle_ose_config.jl}    |   0
 test/runner.jl                                |  60 +++++--
 test/testutils.jl                             |  12 +-
 test/utils.jl                                 |  44 +++--
 11 files changed, 325 insertions(+), 125 deletions(-)
 create mode 100644 deps/build.jl
 rename test/config/{tmle_config.jl => tmle_ose_config.jl} (100%)

diff --git a/Project.toml b/Project.toml
index 1c787d9..4d74093 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,16 +4,19 @@ authors = ["Olivier Labayle"]
 version = "0.7.4"
 
 [deps]
-ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
+Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542"
+Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
+HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 HighlyAdaptiveLasso = "c5dac772-1445-43c4-b698-9440de7877f6"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
@@ -28,27 +31,30 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [compat]
-ArgParse = "1.1.4"
 Arrow = "2.5.2"
 CSV = "0.10"
 CategoricalArrays = "0.10"
 Combinatorics = "1.0.2"
+Comonicon = "1.0.6"
+Configurations = "0.17.6"
+JSON = "0.21.4"
 DataFrames = "1.3.4"
 EvoTrees = "0.14.6"
 GLMNet = "0.7"
 HighlyAdaptiveLasso = "0.2.0"
 JLD2 = "0.4.22"
 MKL = "0.6"
-MLJ = "0.19"
-MLJBase = "0.21"
+MLJ = "0.20.0"
+MLJBase = "1.0.1"
 MLJLinearModels = "0.9"
 MLJModelInterface = "1.8.0"
 MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.5.1"
 Optim = "1.7"
-TMLE = "0.11.4"
 Tables = "1.10.1"
+YAML = "0.4.9"
 julia = "1.7, 1"
diff --git a/deps/build.jl b/deps/build.jl
new file mode 100644
index 0000000..e0a9f11
--- /dev/null
+++ b/deps/build.jl
@@ -0,0 +1 @@
+using TargetedEstimation; TargetedEstimation.comonicon_install()
\ No newline at end of file
diff --git a/estimands_test.yaml b/estimands_test.yaml
index 34ae410..8901313 100644
--- a/estimands_test.yaml
+++ b/estimands_test.yaml
@@ -10,7 +10,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: CONTINUOUS, outcome
+    outcome: CONTINUOUS, OUTCOME
     treatment_confounders:
       T2:
         - W1
@@ -28,7 +28,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: BINARY/outcome
+    outcome: BINARY/OUTCOME
     treatment_confounders:
       T2:
         - W1
@@ -42,7 +42,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: CONTINUOUS, outcome
+    outcome: CONTINUOUS, OUTCOME
     treatment_confounders:
       T1:
         - W1
@@ -56,7 +56,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: CONTINUOUS, outcome
+    outcome: CONTINUOUS, OUTCOME
     treatment_confounders:
       T2:
         - W1
@@ -74,7 +74,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: BINARY/outcome
+    outcome: BINARY/OUTCOME
     treatment_confounders:
       T2:
         - W1
@@ -92,7 +92,7 @@ estimands:
       T1:
         case: true
         control: false
-    outcome: CONTINUOUS, outcome
+    outcome: CONTINUOUS, OUTCOME
     treatment_confounders:
       T2:
         - W1
diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 88a6d4e..6ade605 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -25,6 +25,9 @@ using Combinatorics
 using Tables
 using Random
 using YAML
+using JSON
+using Comonicon
+using Configurations
 
 import MLJModelInterface
 
@@ -38,7 +41,7 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
-export run_estimation, sieve_variance_plateau, merge_csv_files
+export Runner, run_estimation, sieve_variance_plateau, merge_csv_files
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
 export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV
diff --git a/src/cache_managers.jl b/src/cache_managers.jl
index 4f0a3f6..a9908ee 100644
--- a/src/cache_managers.jl
+++ b/src/cache_managers.jl
@@ -52,12 +52,16 @@ function release!(cache_manager::NoCacheManager, Ψ)
 end
 
 function make_cache_manager(estimands, string)
-    if string == "release_unusable"
+    if string == "release-unusable"
         return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands))
-    elseif string == "no_cache"
+    elseif string == "no-cache"
         return NoCacheManager()
     else
-        return MaxSizeCacheManager(parse(Int, string))
+        maxsize = try parse(Int, string) 
+            catch E
+                throw(ArgumentError(string("Could not convert the provided cache value to an integer: ", string)))
+            end
+        return MaxSizeCacheManager(maxsize)
     end
 end
 
diff --git a/src/runner.jl b/src/runner.jl
index b9127cc..0ed6220 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -2,35 +2,57 @@ struct FailedEstimation
     message::String
 end
 
+
+@option struct JSONOutput
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+end
+
+initialize(output::JSONOutput) = initialize_json(output.filename)
+
+
+@option struct HDF5Output
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+end
+
+initialize_hdf5(x) = nothing
+
+initialize(output::HDF5Output) = initialize_hdf5(output.filename)
+
+@option struct Outputs
+    json::JSONOutput = JSONOutput()
+    hdf5::HDF5Output = HDF5Output()
+    std::Bool = true
+end
+
+function initialize(outputs::Outputs)
+    initialize(outputs.json)
+    initialize(outputs.hdf5)
+end
+
 mutable struct Runner
     estimators::NamedTuple
     estimands::Vector{TMLE.Estimand}
     dataset::DataFrame
     cache_manager::CacheManager
     chunksize::Int
-    pvalue_threshold::Float64
-    output_ios::NamedTuple
-    function Runner(parsed_args)
-        datafile = parsed_args["dataset"]
-        paramfile = parsed_args["estimands-config"]
-        estimatorfile = parsed_args["estimators-config"]
-        verbosity = parsed_args["verbosity"]
-        csv_filename = parsed_args["csv-out"]
-        hdf5_filename = parsed_args["hdf5-out"]
-        pvalue_threshold = parsed_args["pval-threshold"]
-        chunksize = parsed_args["chunksize"]
-        rng = parsed_args["rng"]
-        cache_strategy = parsed_args["cache-strategy"]
-        sort_estimands = parsed_args["sort-estimands"]
-    
-        # Output IOs
-        output_ios = (CSV=csv_filename, HDF5=hdf5_filename)
+    outputs::Outputs
+    verbosity::Int
+    function Runner(dataset, estimands, estimators; 
+        verbosity=0, 
+        outputs=Outputs(), 
+        chunksize=100,
+        rng=123,
+        cache_strategy="release-unusable",
+        sort_estimands=false
+        )    
         # Retrieve TMLE specifications
-        estimators = TargetedEstimation.load_tmle_spec(estimatorfile)
+        estimators = TargetedEstimation.load_tmle_spec(estimators)
         # Load dataset
-        dataset = TargetedEstimation.instantiate_dataset(datafile)
+        dataset = TargetedEstimation.instantiate_dataset(dataset)
         # Read parameter files
-        estimands = TargetedEstimation.proofread_estimands_from_yaml(paramfile, dataset)
+        estimands = TargetedEstimation.proofread_estimands(estimands, dataset)
         if sort_estimands
             estimands = groups_ordering(estimands; 
                 brute_force=true, 
@@ -41,10 +63,33 @@ mutable struct Runner
         end
         cache_manager = make_cache_manager(estimands, cache_strategy)
 
-        return new(estimators, estimands, dataset, cache_manager, chunksize, pvalue_threshold, output_ios)
+        return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity)
     end
 end
 
+function save(runner::Runner, results, partition, finalize)
+    # Append STD Out
+    update(runner.outputs.std, results)
+    # Append JSON result with partition
+    update(runner.outputs.json, results; finalize=finalize)
+    # Append HDF5 result if save-ic is true
+    # update_jld2_output(runner.output_ios.HDF5, partition, results, runner.dataset)
+end
+
+
+function try_estimation(runner, Ψ, estimator)
+    try
+        result, _ = estimator(Ψ, runner.dataset,
+            cache=runner.cache_manager.cache,
+            verbosity=runner.verbosity, 
+        )
+        return result
+    catch e
+        # On Error, store the nuisance function where the error occured 
+        # to fail fast the next estimands
+        return FailedEstimation(string(e))
+    end
+end
 
 function (runner::Runner)(partition)
     results = Vector{NamedTuple}(undef, size(partition, 1))
@@ -54,17 +99,8 @@ function (runner::Runner)(partition)
         TargetedEstimation.coerce_types!(runner.dataset, Ψ)
         # Maybe update cache with new η_spec
         estimators_results = []
-        for estimator in estimators
-            try
-                result, _ = estimator(Ψ, runner.dataset,
-                    cache=runner.cache,
-                    verbosity=runner.verbosity, 
-                )
-            catch e
-                # On Error, store the nuisance function where the error occured 
-                # to fail fast the next estimands
-                result = FailedEstimation(string(e))
-            end
+        for estimator in runner.estimators
+            result = try_estimation(runner, Ψ, estimator)
             push!(estimators_results, result)
         end
         # Update results
@@ -81,18 +117,58 @@ function (runner::Runner)(partition)
 end
 
 function (runner::Runner)()
+    # Initialize output files
+    initialize_outputs(runner.output_ios)
     # Split worklist in partitions
     nparams = size(runner.estimands, 1)
-    for partition in Iterators.partition(1:nparams, runner.chunksize)
+    partitions = collect(Iterators.partition(1:nparams, runner.chunksize))
+    for partition in partitions
         results = runner(partition)
-        # Append CSV result with partition
-        append_csv(csv_file, results)
-        # Append HDF5 result if save-ic is true
-        update_jld2_output(jld2_file, partition, results, dataset; pval_threshold=pval_threshold)
+        save(runner, results, partition, partition===partitions[end])
     end
-
     verbosity >= 1 && @info "Done."
     return 0
 end
 
-run_estimation(parsed_args) = Runner(parsed_args)()
\ No newline at end of file
+
+"""
+TMLE CLI.
+
+# Args
+
+- `dataset`: Data file (either .csv or .arrow)
+- `estimands`: Estimands file (either .json or .yaml)
+- `estimators`: A julia file containing the estimators to use.
+
+# Options
+
+- `-v, --verbosity`: Verbosity level.
+- `-j, --json_out`: JSON output filename.
+- `--hdf5_out`: HDF5 output filename.
+- `--chunksize`: Results are written in batches of size chunksize.
+- `-r, --rng`: Random seed (Only used for estimands ordering at the moment).
+- `-c, --cache_strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
+
+# Flags
+
+- `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
+"""
+@main function tmle(dataset, estimands, estimators; 
+    verbosity=0, 
+    outputs=Outputs(),
+    chunksize=100,
+    rng=123,
+    cache_strategy="release-unusable",
+    sort_estimands=false
+    )
+    runner = Runner(dataset, estimands, estimators; 
+        verbosity=verbosity, 
+        outputs=outputs, 
+        chunksize=chunksize,
+        rng=rng,
+        cache_strategy=cache_strategy,
+        sort_estimands=sort_estimands
+    )
+    runner()
+    return
+end
\ No newline at end of file
diff --git a/src/utils.jl b/src/utils.jl
index f76b3a2..2c31c6e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -30,14 +30,12 @@ empty_tmle_output(;size=0) = DataFrame(
 covariates_string(Ψ; join_string="_&_") = 
     length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing
 
-function param_string(param::T) where T <: TMLE.Estimand
-    str = string(T)
-    return startswith(str, "TMLE.") ? str[6:end] : str
-end
+param_string(param::T) where T <: TMLE.Estimand = replace(string(T), "TMLE.Statistical" => "")
+
 
 case(nt::NamedTuple) = nt.case
 case(x) = x
-case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment)), join_string)
+case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment_values)), join_string)
 
 control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = 
     join((val.control for val in t), join_string)
@@ -45,10 +43,10 @@ control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") =
 control_string(t; join_string="_&_") = missing
 
 control_string(Ψ::TMLE.Estimand; join_string="_&_") = 
-    control_string(values(Ψ.treatment); join_string=join_string)
+    control_string(values(Ψ.treatment_values); join_string=join_string)
 
-treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment), join_string)
-confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders, join_string)
+treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment_values), join_string)
+confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders_values, join_string)
 
 
 function statistics_from_estimator(estimator)
@@ -74,9 +72,9 @@ statistics_from_result(result::FailedEstimation) =
     (missing, missing, missing, missing, missing), 
     (missing, missing, missing, missing, missing)
 
-function append_csv(filename, tmle_results, logs)
-    data = empty_tmle_output(size=size(tmle_results, 1))
-    for (i, (result, log)) in enumerate(zip(tmle_results, logs))
+function append_csv(filename, results)
+    data = empty_tmle_output(size=size(results, 1))
+    for (i, result) in enumerate(results)
         Ψ = result.parameter
         param_type = param_string(Ψ)
         treatments = treatment_string(Ψ)
@@ -93,41 +91,83 @@ function append_csv(filename, tmle_results, logs)
     CSV.write(filename, data, append=true, header=!isfile(filename))
 end
 
+#####################################################################
+#####                       JSON OUTPUT                          ####
+#####################################################################
+
+initialize_json(filename::Nothing) = nothing
+
+initialize_json(filename::String) = open(filename, "w") do io
+    print(io, '[')
+end
+
+function update(output::JSONOutput, results; finalize=false)
+    output.filename === nothing && return
+    open(output.filename, "a") do io
+        for result in results
+            result = TMLE.emptyIC(result, output.pval_threshold)
+            JSON.print(io, TMLE.to_dict(result))
+            print(io, ',')
+        end
+        if finalize
+            skip(io, -1) # get rid of the last comma which JSON doesn't allow
+            print(io, ']')
+        end
+    end
+end
+
+#####################################################################
+#####                       STD OUTPUT                          ####
+#####################################################################
+
+function update(doprint, results)
+    if doprint
+        mimetext = MIME"text/plain"()
+        index = 1
+        for result in results
+            for (key, val) ∈ zip(keys(result), result)
+                show(stdout, mimetext, string("⋆⋆⋆ Estimand ", index, " ⋆⋆⋆"))
+                show(stdout, mimetext, val.estimand)
+                show(stdout, mimetext, string("Estimation Result From: ", key, ))
+                show(stdout, mimetext, val)
+                index += 1
+            end
+        end
+    end
+end
 
 #####################################################################
 #####                       JLD2 OUTPUT                          ####
 #####################################################################
 
-update_jld2_output(jld2_file::Nothing, partition, tmle_results, dataset; pval_threshold=0.05) = nothing
-
-function update_jld2_output(jld2_file::String, partition, tmle_results, dataset; pval_threshold=0.05)
-    if jld2_file !== nothing
-        jldopen(jld2_file, "a+", compress=true) do io
-        # Append only with results passing the threshold
-            previous_variables = nothing
-            sample_ids_idx = nothing
-
-            for (partition_index, param_index) in enumerate(partition)
-                r = tmle_results[partition_index]
-                if (r isa TMLE.Estimate) && (pvalue(OneSampleZTest(r.tmle)) <= pval_threshold)
-                    current_variables = variables(r.parameter)
-                    if previous_variables != current_variables
-                        sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables)
-                        io["$param_index/sample_ids"] = sample_ids
-                        sample_ids_idx = param_index
-                    end
-                    io["$param_index/result"] = r
-                    io["$param_index/sample_ids_idx"] = sample_ids_idx
-
-                    previous_variables = current_variables
-                end
+
+function update(output::HDF5Output, partition, results, dataset)
+    output.filename === nothing && return
+
+    jldopen(output.filename, "a+", compress=true) do io
+    # Append only with results passing the threshold
+        previous_variables = nothing
+        sample_ids_idx = nothing
+        for (partition_index, param_index) in enumerate(partition)
+            estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold)
+            current_variables = variables(r.parameter)
+            if previous_variables != current_variables
+                sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables)
+                io["$param_index/sample_ids"] = sample_ids
+                sample_ids_idx = param_index
             end
+            io["$param_index/result"] = r
+            io["$param_index/sample_ids_idx"] = sample_ids_idx
+
+            previous_variables = current_variables
         end
+        
     end
+
 end
 
 #####################################################################
-#####                    Read Estimands                         ####
+#####                    Read TMLE Estimands Configuration                         ####
 #####################################################################
 
 
@@ -154,14 +194,17 @@ maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = thro
 
 maybe_identify(Ψ, scm, method) = Ψ
 
+read_method(extension) = extension == ".json" ? read_json : read_yaml
+
 """
-    read_estimands(param_file, dataset)
+    proofread_estimands(param_file, dataset)
 
 Reads estimands from file and ensures that the treatment values in the config file
 respects the treatment types in the dataset.
 """
-function proofread_estimands_from_yaml(filename, dataset)
-    config = configuration_from_yaml(filename)
+function proofread_estimands(filename, dataset)
+    extension = filename[findlast(isequal('.'), filename):end]
+    config = read_method(extension)(filename)
     estimands = Vector{TMLE.Estimand}(undef, length(config.estimands))
     treatment_types = Dict()
     for (index, Ψ) in enumerate(config.estimands)
@@ -187,6 +230,18 @@ end
 #####                 ADDITIONAL METHODS                         ####
 #####################################################################
 
+TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = 
+    NamedTuple{names}([TMLE.emptyIC(r) for r in result])
+
+function TMLE.emptyIC(result, pval_threshold::Float64)
+    pval = pvalue(OneSampleZTest(result))
+    return pval < pval_threshold ? result : TMLE.emptyIC(result)
+end
+
+TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names =
+    NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result])
+
+
 function get_sample_ids(data, variables)
     cols = [:SAMPLE_ID, variables.target, variables.treatments..., variables.confounders..., variables.covariates...]
     return dropmissing(data[!, cols]).SAMPLE_ID
@@ -262,4 +317,7 @@ load_tmle_spec(file::Nothing) = (
 function load_tmle_spec(file)
     include(abspath(file))
     return ESTIMATORS
-end
\ No newline at end of file
+end
+
+TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = 
+    Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
\ No newline at end of file
diff --git a/test/config/tmle_config.jl b/test/config/tmle_ose_config.jl
similarity index 100%
rename from test/config/tmle_config.jl
rename to test/config/tmle_ose_config.jl
diff --git a/test/runner.jl b/test/runner.jl
index 5f61bca..3b7779c 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -93,24 +93,52 @@ function build_dataset(;n=1000, format="csv")
     format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset)
 end
 
-@testset "Test partition_tmle!" begin
+@testset "Integration Test" begin
     build_dataset(;n=1000, format="csv")
-    dataset = TargetedEstimation.instantiate_dataset("data.csv")
-    estimands = TargetedEstimation.read_estimands(joinpath(config_dir, "parameters.yaml"), dataset)
-    variables = TargetedEstimation.variables(estimands, dataset)
-    TargetedEstimation.coerce_types!(dataset, variables)
-    tmle_spec = TargetedEstimation.load_tmle_spec(joinpath("config", "tmle_config.jl"))
-    cache = TMLECache(dataset)
-
-    tmle_results = Vector{Union{TMLE.Estimate, TargetedEstimation.FailedEstimation}}(undef, 3)
-    logs = Vector{Union{String, Missing}}(undef, 3)
-    part = 4:6
-    TargetedEstimation.partition_tmle!(cache, tmle_results, logs, part, tmle_spec, parameters, variables; verbosity=0)
-    @test [x.tmle.Ψ̂ for x in tmle_results] isa Vector{Float64}
-    @test [x.parameter for x in tmle_results] == parameters[part]
-    @test [x.onestep.Ψ̂ for x in tmle_results] isa Vector{Float64}
-    @test all(x === missing for x in logs)
+    tmpdir = mktempdir(cleanup=true)
+    estimands_filename = joinpath(tmpdir, "configuration.yaml")
+    TMLE.write_json(estimands_filename, statistical_estimands_only_config())
+    outputs = TargetedEstimation.Outputs(
+        json=TargetedEstimation.JSONOutput(filename="output.json"),
+        std=true,
+    )
+    runner = Runner(
+        "data.csv", 
+        estimands_filename, 
+        joinpath(CONFIGDIR, "tmle_ose_config.jl"); 
+        outputs=outputs, 
+        cache_strategy="release-unusable",
+    )
+    partition = 1:3
+    results = runner(partition)
+    for result in results
+        @test result.TMLE isa TMLE.TMLEstimate
+        @test result.OSE isa TMLE.OSEstimate
+    end
+
+    output_txt = "output.txt"
+    TargetedEstimation.initialize(outputs)
+    open(output_txt, "w") do io
+        redirect_stdout(io) do
+            TargetedEstimation.save(runner, results, partition, true)
+        end
+    end
+    # Read STDOUT
+    stdout_content = split(read(output_txt, String), "\n")
+    @test length(stdout_content) > 20
+
+    # Read JSON
+    loaded_results = TMLE.read_json(outputs.json.filename)
+    for (result, loaded_result) in zip(results, loaded_results)
+        @test loaded_result[:TMLE] isa TMLE.TMLEstimate
+        @test result.TMLE.estimate == loaded_result[:TMLE].estimate
+        @test loaded_result[:OSE] isa TMLE.OSEstimate
+        @test result.OSE.estimate == loaded_result[:OSE].estimate
+    end
+
     rm("data.csv")
+    rm(output_txt)
+    rm(outputs.json.filename)
 end
 
 @testset "Test tmle_estimation" begin
diff --git a/test/testutils.jl b/test/testutils.jl
index 76d6ab4..c41ad33 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -4,7 +4,7 @@ function statistical_estimands_only_config()
     configuration = Configuration(
         estimands=[
             IATE(
-                outcome = Symbol("CONTINUOUS, outcome"), 
+                outcome = Symbol("CONTINUOUS, OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = true, control = false)), 
@@ -12,7 +12,7 @@ function statistical_estimands_only_config()
                 outcome_extra_covariates = (:C1,)
             ),
             IATE(
-                outcome = Symbol("BINARY/outcome"), 
+                outcome = Symbol("BINARY/OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = true, control = false)), 
@@ -20,13 +20,13 @@ function statistical_estimands_only_config()
                 outcome_extra_covariates = (:C1,)
             ),
             ATE(
-                outcome = Symbol("CONTINUOUS, outcome"), 
+                outcome = Symbol("CONTINUOUS, OUTCOME"), 
                 treatment_values = (T1 = (case = true, control = false),), 
                 treatment_confounders = (T1 = (:W1, :W2),), 
                 outcome_extra_covariates = ()
             ),
             IATE(
-                outcome = Symbol("CONTINUOUS, outcome"), 
+                outcome = Symbol("CONTINUOUS, OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = false, control = true)
@@ -35,7 +35,7 @@ function statistical_estimands_only_config()
                 outcome_extra_covariates = ()
             ),
             IATE(
-                outcome = Symbol("BINARY/outcome"), 
+                outcome = Symbol("BINARY/OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = false, control = true)
@@ -44,7 +44,7 @@ function statistical_estimands_only_config()
                 outcome_extra_covariates = (:C1,)
             ),
             ATE(
-                outcome = Symbol("CONTINUOUS, outcome"), 
+                outcome = Symbol("CONTINUOUS, OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = true, control = false)), 
diff --git a/test/utils.jl b/test/utils.jl
index 386f413..8c4bf5e 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -14,7 +14,7 @@ PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation)))
 include(joinpath(PROJECT_DIR, "test", "testutils.jl"))
 
 @testset "Test load_tmle_spec: with configuration file" begin
-    estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_config.jl"))
+    estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_ose_config.jl"))
     @test estimators.TMLE isa TMLE.TMLEE
     @test estimators.OSE isa TMLE.OSE
     @test estimators.TMLE.weighted === true
@@ -45,11 +45,13 @@ end
     @test newT == [(case = true, control = false), (case = 1, control = 0)]
 end
 
-@testset "Test proofread_estimands_from_yaml" begin
-    filename = "statistical_estimands.yml"
-    configuration_to_yaml(filename, statistical_estimands_only_config())
+@testset "Test proofread_estimands" for extension in ("yaml", "json")
+    # Write estimands file
+    filename = "statistical_estimands.$extension"
+    eval(Meta.parse("write_$extension"))(filename, statistical_estimands_only_config())
+
     dataset = DataFrame(T1 = [1., 0.], T2=[true, false])
-    estimands = TargetedEstimation.proofread_estimands_from_yaml(filename, dataset)
+    estimands = TargetedEstimation.proofread_estimands(filename, dataset)
     for estimand in estimands
         if haskey(estimand.treatment_values, :T1)
             @test estimand.treatment_values.T1.case isa Float64
@@ -60,6 +62,7 @@ end
             @test estimand.treatment_values.T2.control isa Bool
         end
     end
+    # Clean estimands file
     rm(filename)
 end
 
@@ -77,10 +80,10 @@ end
     @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂"
 
     Ψ = CM(
-        target=:Y,
-        treatment=(T₁=1, T₂="AC"),
-        confounders=[:W₁, :W₂],
-        covariates=[:C₁]
+        outcome=:Y,
+        treatment_values=(T₁=1, T₂="AC"),
+        treatment_confounders=(T₁=[:W₁, :W₂], T₂ = [:W₁, :W₂]),
+        outcome_extra_covariates=[:C₁]
     )
 
     @test TargetedEstimation.covariates_string(Ψ) === "C₁"
@@ -125,7 +128,6 @@ end
 
     @test dataset.Ycat isa CategoricalArray
     @test eltype(dataset.C) <: Union{Missing, Float64}
-
 end
 
 @testset "Test get_sample_ids" begin
@@ -205,7 +207,29 @@ end
     # If the type is already coerced then no-operation is applied 
     TargetedEstimation.make_float(dataset.C₁) === dataset.C₁
     TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁
+end
 
+@tetset "Test JSON writing" begin
+    results = []
+    for Ψ in statistical_estimands_only_config().estimands
+        push!(results, (
+            TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]),
+            OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[])
+            ))
+    end
+    tmpdir = mktempdir(cleanup=true)
+    filename = joinpath(tmpdir, "output_test.json")
+    TargetedEstimation.initialize_json(filename)
+    TargetedEstimation.update(filename, results[1:3])
+    TargetedEstimation.update(filename, results[4:end]; finalize=true)
+    loaded_results = TMLE.read_json(filename)
+    @test size(loaded_results) == size(results)
+    for (result, loaded_result) in zip(results, loaded_results)
+        @test result.TMLE.estimate == loaded_result[:TMLE].estimate
+        @test result.TMLE.std == loaded_result[:TMLE].std
+        @test result.OSE.estimate == loaded_result[:OSE].estimate
+        @test result.OSE.std == loaded_result[:OSE].std
+    end
 end
 
 end;

From e61d7c84c1f606901f3c1b953f79a3d6feadfbc9 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 21 Nov 2023 12:07:09 +0000
Subject: [PATCH 03/71] fix more tests

---
 src/TargetedEstimation.jl |   2 +-
 src/runner.jl             |  21 ++--
 src/utils.jl              |  37 +++---
 test/runner.jl            | 229 ++++++++++++++++----------------------
 test/testutils.jl         |  20 ++--
 test/utils.jl             |  31 +++++-
 6 files changed, 157 insertions(+), 183 deletions(-)

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 6ade605..2f809ec 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -41,7 +41,7 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
-export Runner, run_estimation, sieve_variance_plateau, merge_csv_files
+export Runner, tmle, sieve_variance_plateau, merge_csv_files
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
 export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV
diff --git a/src/runner.jl b/src/runner.jl
index 0ed6220..2120cfa 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -2,7 +2,6 @@ struct FailedEstimation
     message::String
 end
 
-
 @option struct JSONOutput
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
@@ -10,25 +9,19 @@ end
 
 initialize(output::JSONOutput) = initialize_json(output.filename)
 
-
 @option struct HDF5Output
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
 end
 
-initialize_hdf5(x) = nothing
-
-initialize(output::HDF5Output) = initialize_hdf5(output.filename)
-
 @option struct Outputs
     json::JSONOutput = JSONOutput()
     hdf5::HDF5Output = HDF5Output()
-    std::Bool = true
+    std::Bool = false
 end
 
 function initialize(outputs::Outputs)
     initialize(outputs.json)
-    initialize(outputs.hdf5)
 end
 
 mutable struct Runner
@@ -69,11 +62,11 @@ end
 
 function save(runner::Runner, results, partition, finalize)
     # Append STD Out
-    update(runner.outputs.std, results)
-    # Append JSON result with partition
+    update(runner.outputs.std, results, partition)
+    # Append JSON Output
     update(runner.outputs.json, results; finalize=finalize)
-    # Append HDF5 result if save-ic is true
-    # update_jld2_output(runner.output_ios.HDF5, partition, results, runner.dataset)
+    # Append HDF5 Output
+    update(runner.outputs.hdf5, partition, results, runner.dataset)
 end
 
 
@@ -118,7 +111,7 @@ end
 
 function (runner::Runner)()
     # Initialize output files
-    initialize_outputs(runner.output_ios)
+    initialize(runner.outputs)
     # Split worklist in partitions
     nparams = size(runner.estimands, 1)
     partitions = collect(Iterators.partition(1:nparams, runner.chunksize))
@@ -126,7 +119,7 @@ function (runner::Runner)()
         results = runner(partition)
         save(runner, results, partition, partition===partitions[end])
     end
-    verbosity >= 1 && @info "Done."
+    runner.verbosity >= 1 && @info "Done."
     return 0
 end
 
diff --git a/src/utils.jl b/src/utils.jl
index 2c31c6e..cecc9ed 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -120,15 +120,17 @@ end
 #####                       STD OUTPUT                          ####
 #####################################################################
 
-function update(doprint, results)
+function update(doprint, results, partition)
     if doprint
         mimetext = MIME"text/plain"()
         index = 1
-        for result in results
+        for (result, estimand_index) in zip(results, partition)
+            show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆"))
+            println(stdout)
+            show(stdout, mimetext, first(result).estimand)
             for (key, val) ∈ zip(keys(result), result)
-                show(stdout, mimetext, string("⋆⋆⋆ Estimand ", index, " ⋆⋆⋆"))
-                show(stdout, mimetext, val.estimand)
-                show(stdout, mimetext, string("Estimation Result From: ", key, ))
+                show(stdout, mimetext, string("→ Estimation Result From: ", key, ))
+                println(stdout)
                 show(stdout, mimetext, val)
                 index += 1
             end
@@ -145,25 +147,22 @@ function update(output::HDF5Output, partition, results, dataset)
     output.filename === nothing && return
 
     jldopen(output.filename, "a+", compress=true) do io
-    # Append only with results passing the threshold
         previous_variables = nothing
         sample_ids_idx = nothing
         for (partition_index, param_index) in enumerate(partition)
             estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold)
-            current_variables = variables(r.parameter)
+            current_variables = variables(first(estimator_results).estimand)
             if previous_variables != current_variables
                 sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables)
                 io["$param_index/sample_ids"] = sample_ids
                 sample_ids_idx = param_index
             end
-            io["$param_index/result"] = r
+            io["$param_index/result"] = estimator_results
             io["$param_index/sample_ids_idx"] = sample_ids_idx
 
             previous_variables = current_variables
         end
-        
     end
-
 end
 
 #####################################################################
@@ -242,10 +241,8 @@ TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names =
     NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result])
 
 
-function get_sample_ids(data, variables)
-    cols = [:SAMPLE_ID, variables.target, variables.treatments..., variables.confounders..., variables.covariates...]
-    return dropmissing(data[!, cols]).SAMPLE_ID
-end
+get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
+
 
 """
     instantiate_dataset(path::String)
@@ -296,12 +293,12 @@ function coerce_types!(dataset, Ψ)
     make_float!(dataset, continuous_variables)
 end
 
-variables(Ψ::TMLE.Estimand) = (
-    outcome = Ψ.outcome, 
-    covariates = Ψ.outcome_extra_covariates, 
-    confounders = Ψ.treatment_confounders,
-    treatments = keys(Ψ.treatment_values)
-    )
+variables(Ψ::TMLE.Estimand) = Set([
+    Ψ.outcome,
+    keys(Ψ.treatment_values)...,
+    Ψ.outcome_extra_covariates..., 
+    Iterators.flatten(values(Ψ.treatment_confounders))...
+    ])
 
 load_tmle_spec(file::Nothing) = (
     TMLE = TMLEE(
diff --git a/test/runner.jl b/test/runner.jl
index 3b7779c..cde4dbb 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -20,33 +20,18 @@ CONFIGDIR = joinpath(PKGDIR, "test", "config")
 
 include(joinpath(PKGDIR, "test", "testutils.jl"))
 
-function test_tmle_output(param_index, jldio, data, expected_param, sample_ids_idx)
-    jld2_res = jldio[string(param_index)]
-    csv_row = data[param_index, :]
-    Ψ = jld2_res["result"].parameter
-    @test jld2_res["result"] isa TMLE.Estimate
-    @test jld2_res["result"].tmle.Ψ̂ isa Float64
-    @test Ψ == expected_param
-    @test jld2_res["sample_ids_idx"] == sample_ids_idx
-    sample_ids = jldio[string(jld2_res["sample_ids_idx"])]["sample_ids"]
-    if expected_param.target == Symbol("BINARY/OUTCOME")
-        @test sample_ids == 2:1000
-    else
-        @test sample_ids == 1:1000
+sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt)
+sort_nt_by_key(x) = x
+
+function test_estimands_match(Ψ₁::T1, Ψ₂::T2) where {T1, T2}
+    @test T1 == T2
+    @test Ψ₁.outcome == Ψ₂.outcome
+    @test Ψ₁.outcome_extra_covariates == Ψ₂.outcome_extra_covariates
+    @test sort_nt_by_key(Ψ₁.treatment_confounders) == sort_nt_by_key(Ψ₂.treatment_confounders)
+    @test sort(keys(Ψ₁.treatment_values)) == sort(keys(Ψ₂.treatment_values))
+    for key in keys(Ψ₁.treatment_values)
+        @test sort_nt_by_key(Ψ₁.treatment_values[key]) == sort_nt_by_key(Ψ₂.treatment_values[key])
     end
-    @test jld2_res["result"] isa TMLE.Estimate
-
-    if csv_row.COVARIATES === missing
-        @test TargetedEstimation.covariates_string(Ψ) === csv_row.COVARIATES
-    else
-        @test TargetedEstimation.covariates_string(Ψ) == csv_row.COVARIATES
-    end
-    @test TargetedEstimation.param_string(Ψ) == csv_row.PARAMETER_TYPE
-    @test TargetedEstimation.case_string(Ψ) == csv_row.CASE
-    @test TargetedEstimation.control_string(Ψ) == csv_row.CONTROL
-    @test TargetedEstimation.treatment_string(Ψ) == csv_row.TREATMENTS
-    @test TargetedEstimation.confounders_string(Ψ) == csv_row.CONFOUNDERS
-    @test csv_row.TMLE_ESTIMATE == jld2_res["result"].tmle.Ψ̂
 end
 
 """
@@ -100,6 +85,7 @@ end
     TMLE.write_json(estimands_filename, statistical_estimands_only_config())
     outputs = TargetedEstimation.Outputs(
         json=TargetedEstimation.JSONOutput(filename="output.json"),
+        hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.),
         std=true,
     )
     runner = Runner(
@@ -109,13 +95,14 @@ end
         outputs=outputs, 
         cache_strategy="release-unusable",
     )
-    partition = 1:3
+    partition = 4:6
     results = runner(partition)
     for result in results
         @test result.TMLE isa TMLE.TMLEstimate
         @test result.OSE isa TMLE.OSEstimate
     end
 
+    # Test Save to STDOUT
     output_txt = "output.txt"
     TargetedEstimation.initialize(outputs)
     open(output_txt, "w") do io
@@ -123,138 +110,116 @@ end
             TargetedEstimation.save(runner, results, partition, true)
         end
     end
-    # Read STDOUT
-    stdout_content = split(read(output_txt, String), "\n")
-    @test length(stdout_content) > 20
+    stdout_content = read(output_txt, String)
+    @test all(occursin("Estimand $i", stdout_content) for i in partition)
 
-    # Read JSON
+    # Test Save to JSON
     loaded_results = TMLE.read_json(outputs.json.filename)
     for (result, loaded_result) in zip(results, loaded_results)
         @test loaded_result[:TMLE] isa TMLE.TMLEstimate
         @test result.TMLE.estimate == loaded_result[:TMLE].estimate
+        @test loaded_result[:TMLE].IC == []
+
         @test loaded_result[:OSE] isa TMLE.OSEstimate
         @test result.OSE.estimate == loaded_result[:OSE].estimate
+        @test loaded_result[:OSE].IC == []
     end
 
+    # Test Save to HDF5
+    hdf5file = jldopen(outputs.hdf5.filename, "r")
+    for (result_index, param_index) in enumerate(4:6)
+        result = hdf5file[string(param_index, "/result")]
+        @test result.TMLE isa TMLE.TMLEstimate
+        @test results[result_index].TMLE.estimate == result.TMLE.estimate
+
+        @test result.OSE isa TMLE.OSEstimate
+        @test results[result_index].OSE.estimate == result.OSE.estimate
+    end
+    @test hdf5file["4/sample_ids"] == collect(2:1000)
+    @test hdf5file["4/sample_ids_idx"] == 4
+    @test size(hdf5file["4/result"].TMLE.IC, 1) == 999
+
+    @test !haskey(hdf5file, "5/sample_ids")
+    @test hdf5file["5/sample_ids_idx"] == 4
+    @test size(hdf5file["5/result"].TMLE.IC, 1) == 999
+
+    @test hdf5file["6/sample_ids"] == collect(1:1000)
+    @test hdf5file["6/sample_ids_idx"] == 6
+    @test size(hdf5file["6/result"].TMLE.IC, 1) == 1000
+
+    close(hdf5file)
+
+    # Clean
     rm("data.csv")
     rm(output_txt)
     rm(outputs.json.filename)
+    rm(outputs.hdf5.filename)
 end
 
-@testset "Test tmle_estimation" begin
-    expected_parameters = [
-        ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false),), [:W1, :W2], Symbol[]),
-        IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
-        IATE(Symbol("BINARY/OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], [:C1]),
-        IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = false, control = true)), [:W1, :W2], Symbol[]),
-        IATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1]),
-        ATE(Symbol("CONTINUOUS, OUTCOME"), (T1 = (case = true, control = false), T2 = (case = true, control = false)), [:W1, :W2], [:C1])
-    ]
-    outfilename = "statistical_estimands.yml"
-    configuration_to_yaml(outfilename, statistical_estimands_only_config())
-    expected_param_sample_ids_idx = [1, 2, 2, 4, 5, 5]
+@testset "Test tmle" begin
+    tmpdir = mktempdir(cleanup=true)
+    estimands_filename = joinpath(tmpdir, "configuration.json")
+    configuration = statistical_estimands_only_config()
+    TMLE.write_json(estimands_filename, configuration)
+    outputs = TargetedEstimation.Outputs(
+        json=TargetedEstimation.JSONOutput(filename="output.json"),
+        hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.),
+    )
+    estimatorfile = joinpath(CONFIGDIR, "tmle_ose_config.jl")
     # Run tests over CSV and Arrow data formats
     for format in ("csv", "arrow")
+        datafile = string("data.", format)
         build_dataset(;n=1000, format=format)
-        parsed_args = Dict(
-                    "dataset" => string("data.", format),
-                    "estimands-config" => nothing,
-                    "estimators-config" => joinpath(config_dir, "tmle_config.jl"),
-                    "csv-out" => "output.csv",
-                    "verbosity" => 0,
-                    "hdf5-out" => "output.hdf5",
-                    "pval-threshold" => 1.,
-                    "chunksize" => nothing
-                )
-        runner = TargetedEstimation.Runner(parsed_args)
-        for param_file in ("parameters.yaml", "parameters.bin")
-            for chunksize in (4, 10)
-                # Only one continuous phenotype / machines not saved / no adaptive cv
-
-                parsed_args["estimands-config"] = outfilename
-                parsed_args["chunksize"] = chunksize
-
-                tmle_estimation(parsed_args)
-
-                # Given the threshold is 1, all
-                # estimation results will make the threshold
-                jldio = jldopen(parsed_args["hdf5-out"])
-                data = CSV.read(parsed_args["csv-out"], DataFrame)
-
-                @test all(data[i, :TMLE_ESTIMATE] != data[j, :TMLE_ESTIMATE] for i in 1:5 for j in i+1:6)
-
-                for (param_index, (Ψ, sample_ids_idx)) in enumerate(zip(expected_parameters, expected_param_sample_ids_idx))
-                    test_tmle_output(param_index, jldio, data, Ψ, sample_ids_idx)
-                end
-                # Clean
-                rm(parsed_args["csv-out"])
-                rm(parsed_args["hdf5-out"])
+        for chunksize in (4, 10)
+            tmle(datafile, estimands_filename, estimatorfile; 
+                outputs=outputs,
+                chunksize=chunksize,
+            )
+
+            hdf5file = jldopen(outputs.hdf5.filename)
+            results_from_json = TMLE.read_json(outputs.json.filename)
+
+            for i in 1:6
+                Ψ = configuration.estimands[i]
+                test_estimands_match(Ψ, results_from_json[i][:TMLE].estimand)
+                hdf5result = hdf5file[string(i, "/result")]
+                @test results_from_json[i][:TMLE].estimate == hdf5result.TMLE.estimate
+                @test results_from_json[i][:OSE].estimate == hdf5result.OSE.estimate
             end
+
+            # Clean
+            rm(outputs.hdf5.filename)
+            rm(outputs.json.filename)
         end
-        rm(parsed_args["dataset"])
+        rm(datafile)
     end
 end
 
-@testset "Test tmle_estimation: No hdf5 file" begin
+@testset "Test tmle: lower p-value threshold only JSON output" begin
     build_dataset(;n=1000, format="csv")
-    estimands_filename = "estimands_test.yaml"
-    configuration_to_yaml(estimands_filename, statistical_estimands_only_config())
-    # Only one continuous phenotype / machines not saved / no adaptive cv
-    parsed_args = Dict(
-        "dataset" => "data.csv",
-        "estimands-config" => estimands_filename,
-        "estimators-config" => joinpath(CONFIGDIR, "ose_config.jl"),
-        "csv-out" => "output.csv",
-        "verbosity" => 0,
-        "hdf5-out" => nothing,
-        "pval-threshold" => 1.,
-        "chunksize" => 10,
-        "rng" => 123,
-        "sort-estimands" => false,
-        "cache-strategy" => "release_unusable"
-    )
-    @enter run_estimation(parsed_args)
-
-    ## Check CSV file
-    data = CSV.read(parsed_args["csv-out"], DataFrame)
-    @test names(TargetedEstimation.empty_tmle_output()) == names(data)
-    @test size(data) == (6, 19)
-    all(x === missing for x in data.LOG)
-    # Clean
-    rm(parsed_args["csv-out"])
-    rm(parsed_args["dataset"])
-end
-
-
-@testset "Test tmle_estimation: lower p-value threhsold" begin
-    build_dataset(;n=1000, format="csv")
-    parsed_args = Dict(
-        "dataset" => "data.csv",
-        "estimands-config" => joinpath("config", "parameters.yaml"),
-        "estimators-config" => joinpath("config", "tmle_config.jl"),
-        "csv-out" => "output.csv",
-        "verbosity" => 0,
-        "hdf5-out" => "output.hdf5",
-        "pval-threshold" => 1e-15,
-        "chunksize" => 10
+    outputs = TargetedEstimation.Outputs(
+        json=TargetedEstimation.JSONOutput(filename="output.json", pval_threshold=1e-15)
     )
-
-    tmle_estimation(parsed_args)
+    tmpdir = mktempdir(cleanup=true)
+    estimandsfile = joinpath(tmpdir, "configuration.json")
+    configuration = statistical_estimands_only_config()
+    TMLE.write_json(estimandsfile, configuration)
+    estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
+    datafile = "data.csv"
+    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs)
     
     # Essential results
-    data = CSV.read(parsed_args["csv-out"], DataFrame)
-    jldio = jldopen(parsed_args["hdf5-out"])
-    @test !haskey(jldio, "2")
-    @test !haskey(jldio, "3")
-    @test !haskey(jldio, "4")
-    @test !haskey(jldio, "5")
-    @test !haskey(jldio, "6")
-
-    @test jldio["1"]["result"].tmle.Ψ̂ == data[1, :TMLE_ESTIMATE]
+    results_from_json = TMLE.read_json(outputs.json.filename)
+    n_IC_empties = 0
+    for result in results_from_json
+        if result[:OSE].IC != []
+            n_IC_empties += 1
+        end
+    end
+    @test n_IC_empties > 0
 
-    rm(parsed_args["dataset"])
-    rm(parsed_args["csv-out"])
-    rm(parsed_args["hdf5-out"])
+    rm(datafile)
+    rm(outputs.json.filename)
 end
 
 @testset "Test tmle_estimation: Failing parameters" begin
diff --git a/test/testutils.jl b/test/testutils.jl
index c41ad33..c9bc500 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -11,14 +11,6 @@ function statistical_estimands_only_config()
                 treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
                 outcome_extra_covariates = (:C1,)
             ),
-            IATE(
-                outcome = Symbol("BINARY/OUTCOME"), 
-                treatment_values = (
-                    T1 = (case = true, control = false), 
-                    T2 = (case = true, control = false)), 
-                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
-                outcome_extra_covariates = (:C1,)
-            ),
             ATE(
                 outcome = Symbol("CONTINUOUS, OUTCOME"), 
                 treatment_values = (T1 = (case = true, control = false),), 
@@ -43,11 +35,19 @@ function statistical_estimands_only_config()
                 treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
                 outcome_extra_covariates = (:C1,)
             ),
-            ATE(
-                outcome = Symbol("CONTINUOUS, OUTCOME"), 
+            IATE(
+                outcome = Symbol("BINARY/OUTCOME"), 
                 treatment_values = (
                     T1 = (case = true, control = false), 
                     T2 = (case = true, control = false)), 
+                treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)), 
+                outcome_extra_covariates = (:C1,)
+            ),
+            CM(
+                outcome = Symbol("CONTINUOUS, OUTCOME"), 
+                treatment_values = (
+                    T1 = true, 
+                    T2 = false), 
                 treatment_confounders = (T1 = (:W1, :W2), T2 = (:W1, :W2)),
                 outcome_extra_covariates = (:C1,)
             )
diff --git a/test/utils.jl b/test/utils.jl
index 8c4bf5e..1859622 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -130,13 +130,32 @@ end
     @test eltype(dataset.C) <: Union{Missing, Float64}
 end
 
-@testset "Test get_sample_ids" begin
-    variables = (
-        target = :Y,
-        covariates = Symbol[],
-        confounders = [:W₁, :W₂],
-        treatments = (:T₁, :T₂)
+@testset "Test misc" begin
+    Ψ = ATE(
+        outcome = :Y,
+        treatment_values = (
+            T₁ = (case=1, control=0), 
+            T₂ = (case=1, control=0)),
+        treatment_confounders = (
+            T₁=[:W₁, :W₂], 
+            T₂=[:W₂, :W₃]
+        ),
+        outcome_extra_covariates = [:C]
     )
+    variables = TargetedEstimation.variables(Ψ)
+    @test variables == Set([:Y, :C, :T₁, :T₂, :W₁, :W₂, :W₃])
+    Ψ = ATE(
+        outcome = :Y,
+        treatment_values = (
+            T₁ = (case=1, control=0), 
+            T₂ = (case=1, control=0)),
+        treatment_confounders = (
+            T₁=[:W₁, :W₂], 
+            T₂=[:W₁, :W₂]
+        ),
+    )
+    variables = TargetedEstimation.variables(Ψ)
+    @test variables == Set([:Y, :T₁, :T₂, :W₁, :W₂])
     data = DataFrame(
         SAMPLE_ID  = [1, 2, 3, 4, 5],
         Y          = [1, 2, 3, missing, 5],

From 07e0c7f33e47e1207ca9a36bca1aa23688713309 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 21 Nov 2023 17:16:11 +0000
Subject: [PATCH 04/71] add new tmle function

---
 src/cache_managers.jl                      |   4 +-
 src/runner.jl                              |  47 +++++++--
 src/utils.jl                               | 113 +++------------------
 test/cache_managers.jl                     |   8 +-
 test/config/problematic_tmle_ose_config.jl |  14 +++
 test/models/biallelic_snp_encoder.jl       |   1 +
 test/resampling.jl                         |   4 +-
 test/runner.jl                             |  60 +++++++----
 test/runtests.jl                           |  23 +++--
 test/utils.jl                              |  79 +++-----------
 10 files changed, 144 insertions(+), 209 deletions(-)
 create mode 100644 test/config/problematic_tmle_ose_config.jl

diff --git a/src/cache_managers.jl b/src/cache_managers.jl
index a9908ee..64d5004 100644
--- a/src/cache_managers.jl
+++ b/src/cache_managers.jl
@@ -15,13 +15,13 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ)
     for ps in η.propensity_score
         cache_manager.η_counts[ps] -= 1
         if cache_manager.η_counts[ps] == 0
-            pop!(cache_manager.cache, ps)
+            delete!(cache_manager.cache, ps)
         end
     end
     # Outcome Mean
     cache_manager.η_counts[η.outcome_mean] -= 1
     if cache_manager.η_counts[η.outcome_mean] == 0
-        pop!(cache_manager.cache, η.outcome_mean)
+        delete!(cache_manager.cache, η.outcome_mean)
     end
 end
 
diff --git a/src/runner.jl b/src/runner.jl
index 2120cfa..24cff63 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -1,7 +1,14 @@
 struct FailedEstimation
-    message::String
+    estimand::TMLE.Estimand
+    msg::String
 end
 
+TMLE.to_dict(x::FailedEstimation) = Dict(
+        :estimand => TMLE.to_dict(x.estimand),
+        :error => x.msg
+    )
+
+
 @option struct JSONOutput
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
@@ -32,6 +39,7 @@ mutable struct Runner
     chunksize::Int
     outputs::Outputs
     verbosity::Int
+    failed_nuisance::Set
     function Runner(dataset, estimands, estimators; 
         verbosity=0, 
         outputs=Outputs(), 
@@ -55,21 +63,22 @@ mutable struct Runner
             )
         end
         cache_manager = make_cache_manager(estimands, cache_strategy)
+        
+        failed_nuisance = Set([])
 
-        return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity)
+        return new(estimators, estimands, dataset, cache_manager, chunksize, outputs, verbosity, failed_nuisance)
     end
 end
 
 function save(runner::Runner, results, partition, finalize)
     # Append STD Out
-    update(runner.outputs.std, results, partition)
+    update_file(runner.outputs.std, results, partition)
     # Append JSON Output
-    update(runner.outputs.json, results; finalize=finalize)
+    update_file(runner.outputs.json, results; finalize=finalize)
     # Append HDF5 Output
-    update(runner.outputs.hdf5, partition, results, runner.dataset)
+    update_file(runner.outputs.hdf5, partition, results, runner.dataset)
 end
 
-
 function try_estimation(runner, Ψ, estimator)
     try
         result, _ = estimator(Ψ, runner.dataset,
@@ -78,16 +87,33 @@ function try_estimation(runner, Ψ, estimator)
         )
         return result
     catch e
-        # On Error, store the nuisance function where the error occured 
-        # to fail fast the next estimands
-        return FailedEstimation(string(e))
+        # Some nuisance function fits may fail. We do not interrupt on them but log instead.
+        # This also allows to skip fast the next estimands requiring the same nuisance functions.
+        if e isa TMLE.FitFailedError
+            push!(runner.failed_nuisance, e.estimand)
+            return FailedEstimation(Ψ, e.msg)
+        # On other errors, rethrow
+        else 
+            rethrow(e) 
+        end
     end
 end
 
+function skip_fast(runner, Ψ)
+    ηs = TMLE.get_relevant_factors(Ψ)
+    ηs.propensity_score
+    any(η ∈ runner.failed_nuisance for η in (ηs.outcome_mean, ηs.propensity_score...)) && return true
+    return false
+end
+
 function (runner::Runner)(partition)
     results = Vector{NamedTuple}(undef, size(partition, 1))
     for (partition_index, param_index) in enumerate(partition)
         Ψ = runner.estimands[param_index]
+        if skip_fast(runner, Ψ)
+            results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimation(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)])
+            continue
+        end
         # Make sure data types are appropriate for the estimand
         TargetedEstimation.coerce_types!(runner.dataset, Ψ)
         # Maybe update cache with new η_spec
@@ -119,8 +145,6 @@ function (runner::Runner)()
         results = runner(partition)
         save(runner, results, partition, partition===partitions[end])
     end
-    runner.verbosity >= 1 && @info "Done."
-    return 0
 end
 
 
@@ -163,5 +187,6 @@ TMLE CLI.
         sort_estimands=sort_estimands
     )
     runner()
+    verbosity >= 1 && @info "Done."
     return
 end
\ No newline at end of file
diff --git a/src/utils.jl b/src/utils.jl
index cecc9ed..ce62c37 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,96 +1,3 @@
-
-
-#####################################################################
-#####                       CSV OUTPUT                           ####
-#####################################################################
-
-
-empty_tmle_output(;size=0) = DataFrame(
-    PARAMETER_TYPE=Vector{String}(undef, size), 
-    TREATMENTS=Vector{String}(undef, size), 
-    CASE=Vector{String}(undef, size), 
-    CONTROL=Vector{Union{Missing, String}}(undef, size), 
-    OUTCOME=Vector{String}(undef, size), 
-    CONFOUNDERS=Vector{String}(undef, size), 
-    COVARIATES=Vector{Union{Missing, String}}(undef, size), 
-    INITIAL_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size), 
-    TMLE_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size),
-    TMLE_STD=Vector{Union{Missing, Float64}}(undef, size),
-    TMLE_PVALUE=Vector{Union{Missing, Float64}}(undef, size),
-    TMLE_LWB=Vector{Union{Missing, Float64}}(undef, size),
-    TMLE_UPB=Vector{Union{Missing, Float64}}(undef, size),
-    ONESTEP_ESTIMATE=Vector{Union{Missing, Float64}}(undef, size),
-    ONESTEP_STD=Vector{Union{Missing, Float64}}(undef, size),
-    ONESTEP_PVALUE=Vector{Union{Missing, Float64}}(undef, size),
-    ONESTEP_LWB=Vector{Union{Missing, Float64}}(undef, size),
-    ONESTEP_UPB=Vector{Union{Missing, Float64}}(undef, size),
-    LOG=Vector{Union{Missing, String}}(undef, size)
-)
-
-covariates_string(Ψ; join_string="_&_") = 
-    length(Ψ.outcome_extra_covariates) != 0 ? join(Ψ.outcome_extra_covariates, join_string) : missing
-
-param_string(param::T) where T <: TMLE.Estimand = replace(string(T), "TMLE.Statistical" => "")
-
-
-case(nt::NamedTuple) = nt.case
-case(x) = x
-case_string(Ψ; join_string="_&_") = join((case(x) for x in values(Ψ.treatment_values)), join_string)
-
-control_string(t::Tuple{Vararg{NamedTuple}}; join_string="_&_") = 
-    join((val.control for val in t), join_string)
-
-control_string(t; join_string="_&_") = missing
-
-control_string(Ψ::TMLE.Estimand; join_string="_&_") = 
-    control_string(values(Ψ.treatment_values); join_string=join_string)
-
-treatment_string(Ψ; join_string="_&_") = join(keys(Ψ.treatment_values), join_string)
-confounders_string(Ψ; join_string="_&_") = join(Ψ.confounders_values, join_string)
-
-
-function statistics_from_estimator(estimator)
-    Ψ̂ = TMLE.estimate(estimator)
-    std = √(var(estimator))
-    testresult = OneSampleTTest(estimator)
-    pval = pvalue(testresult)
-    l, u = confint(testresult)
-    return (Ψ̂, std, pval, l, u)
-end
-
-function statistics_from_result(result::TMLE.Estimate)
-    Ψ̂₀ = result.initial
-    # TMLE stats
-    tmle_stats = statistics_from_estimator(result.tmle) 
-    # OneStep stats
-    onestep_stats = statistics_from_estimator(result.onestep)
-    return Ψ̂₀, tmle_stats, onestep_stats
-end
-
-statistics_from_result(result::FailedEstimation) = 
-    missing, 
-    (missing, missing, missing, missing, missing), 
-    (missing, missing, missing, missing, missing)
-
-function append_csv(filename, results)
-    data = empty_tmle_output(size=size(results, 1))
-    for (i, result) in enumerate(results)
-        Ψ = result.parameter
-        param_type = param_string(Ψ)
-        treatments = treatment_string(Ψ)
-        case = case_string(Ψ)
-        control = control_string(Ψ)
-        confounders = confounders_string(Ψ)
-        covariates = covariates_string(Ψ)
-        Ψ̂₀, tmle_stats, onestep_stats = statistics_from_result(result)
-        data[i, :] = (
-            param_type, treatments, case, control, string(Ψ.target), confounders, covariates, 
-            Ψ̂₀, tmle_stats..., onestep_stats..., log
-        )
-    end
-    CSV.write(filename, data, append=true, header=!isfile(filename))
-end
-
 #####################################################################
 #####                       JSON OUTPUT                          ####
 #####################################################################
@@ -101,7 +8,7 @@ initialize_json(filename::String) = open(filename, "w") do io
     print(io, '[')
 end
 
-function update(output::JSONOutput, results; finalize=false)
+function update_file(output::JSONOutput, results; finalize=false)
     output.filename === nothing && return
     open(output.filename, "a") do io
         for result in results
@@ -120,7 +27,7 @@ end
 #####                       STD OUTPUT                          ####
 #####################################################################
 
-function update(doprint, results, partition)
+function update_file(doprint, results, partition)
     if doprint
         mimetext = MIME"text/plain"()
         index = 1
@@ -143,7 +50,7 @@ end
 #####################################################################
 
 
-function update(output::HDF5Output, partition, results, dataset)
+function update_file(output::HDF5Output, partition, results, dataset)
     output.filename === nothing && return
 
     jldopen(output.filename, "a+", compress=true) do io
@@ -169,7 +76,6 @@ end
 #####                    Read TMLE Estimands Configuration                         ####
 #####################################################################
 
-
 function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names
     return [(
         case = convert(treatment_types[tn], treatment_levels[tn].case), 
@@ -229,17 +135,21 @@ end
 #####                 ADDITIONAL METHODS                         ####
 #####################################################################
 
+TMLE.emptyIC(result::FailedEstimation) = result
+
+TMLE.emptyIC(result::FailedEstimation, pval_threshold::Float64) = result
+
 TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = 
     NamedTuple{names}([TMLE.emptyIC(r) for r in result])
 
+TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names =
+    NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result])
+
 function TMLE.emptyIC(result, pval_threshold::Float64)
     pval = pvalue(OneSampleZTest(result))
     return pval < pval_threshold ? result : TMLE.emptyIC(result)
 end
 
-TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names =
-    NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result])
-
 
 get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
 
@@ -317,4 +227,7 @@ function load_tmle_spec(file)
 end
 
 TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = 
+    Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
+
+TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{FailedEstimation}}}) where names = 
     Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
\ No newline at end of file
diff --git a/test/cache_managers.jl b/test/cache_managers.jl
index 6574680..0252f13 100644
--- a/test/cache_managers.jl
+++ b/test/cache_managers.jl
@@ -10,6 +10,8 @@ using TMLE
     cache_manager.cache["Tata"] = 2
     TargetedEstimation.release!(cache_manager, nothing)
     @test cache_manager.cache == Dict()
+    # Check this does not throw
+    TargetedEstimation.release!(cache_manager, nothing)
 end
 
 @testset "Test MaxSizeCacheManager" begin
@@ -23,6 +25,8 @@ end
     @test length(cache_manager.cache) == 4
     TargetedEstimation.release!(cache_manager, nothing)
     @test length(cache_manager.cache) == 3
+    TargetedEstimation.release!(cache_manager, nothing)
+    @test length(cache_manager.cache) == 3
 end
 
 @testset "Test ReleaseUnusableCacheManager" begin
@@ -77,8 +81,8 @@ end
     # Y_T₁ and T₁_W are no longer needed
     TargetedEstimation.release!(cache_manager, estimands[3])
     @test cache_manager.cache == Dict()
-
-
+    # Check this does not throw
+    TargetedEstimation.release!(cache_manager, estimands[1])
 end
 
 end
diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl
new file mode 100644
index 0000000..a517cf4
--- /dev/null
+++ b/test/config/problematic_tmle_ose_config.jl
@@ -0,0 +1,14 @@
+default_models = TMLE.default_models(
+  Q_continuous = LinearRegressor(),
+  # For the estimation of E[Y|W, T]: binary target
+  Q_binary = LogisticClassifier(),
+  # This will fail
+  G = LogisticClassifier()
+)
+
+models = merge(default_models, (T2 = LinearRegressor(),))
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001),
+  OSE  = OSE(models=models)
+)
\ No newline at end of file
diff --git a/test/models/biallelic_snp_encoder.jl b/test/models/biallelic_snp_encoder.jl
index f929508..5eb7406 100644
--- a/test/models/biallelic_snp_encoder.jl
+++ b/test/models/biallelic_snp_encoder.jl
@@ -49,3 +49,4 @@ end
 
 end
 
+true
\ No newline at end of file
diff --git a/test/resampling.jl b/test/resampling.jl
index 9032ad7..4976dc3 100644
--- a/test/resampling.jl
+++ b/test/resampling.jl
@@ -104,6 +104,6 @@ end
     @test length(ttp) == 5
 end
 
-true
+end
 
-end
\ No newline at end of file
+true
\ No newline at end of file
diff --git a/test/runner.jl b/test/runner.jl
index cde4dbb..6782d22 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -222,29 +222,55 @@ end
     rm(outputs.json.filename)
 end
 
-@testset "Test tmle_estimation: Failing parameters" begin
+@testset "Test tmle: Failing estimands" begin
     build_dataset(;n=1000, format="csv")
-    parsed_args = Dict(
-        "dataset" => "data.csv",
-        "estimands-config" => joinpath("config", "failing_parameters.yaml"),
-        "estimators-config" => joinpath("config", "tmle_config.jl"),
-        "csv-out" => "output.csv",
-        "verbosity" => 0,
-        "hdf5-out" => nothing,
-        "pval-threshold" => 1e-10,
-        "chunksize" => 10
+    outputs = TargetedEstimation.Outputs(
+        json=TargetedEstimation.JSONOutput(filename="output.json"),
+        hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5")
     )
+    tmpdir = mktempdir(cleanup=true)
+    estimandsfile = joinpath(tmpdir, "configuration.json")
+    configuration = statistical_estimands_only_config()
+    TMLE.write_json(estimandsfile, configuration)
+    estimatorfile = joinpath(CONFIGDIR, "problematic_tmle_ose_config.jl")
+    datafile = "data.csv"
 
-    tmle_estimation(parsed_args)
+    runner = Runner(datafile, estimandsfile, estimatorfile; outputs=outputs);
+    runner()
 
-    # Essential results
-    data = CSV.read(parsed_args["csv-out"], DataFrame)
-    @test size(data) == (1, 19)
-    @test data[1, :TMLE_ESTIMATE] === missing
+    # Test failed nuisance estimates (T2 model)
+    @test runner.failed_nuisance == Set([
+        TMLE.ConditionalDistribution(:T2, (:W1, :W2))
+    ])
 
-    rm(parsed_args["dataset"])
-    rm(parsed_args["csv-out"])
+    # Check results from JSON
+    results_from_json = TMLE.read_json(outputs.json.filename)
+    for estimator in (:OSE, :TMLE)
+        @test results_from_json[1][estimator][:error] == "Could not fit the following propensity score model: P₀(T2 | W1, W2)"
+        @test results_from_json[1][estimator][:estimand] isa TMLE.Estimand
+        @test results_from_json[2][estimator] isa TMLE.EICEstimate
+        for i in 3:6
+            @test results_from_json[i][estimator][:error] == "Skipped due to shared failed nuisance fit."
+            @test results_from_json[i][estimator][:estimand] isa TMLE.Estimand
+        end
+    end
+
+    # Check results from HDF5
+    hdf5file = jldopen(outputs.hdf5.filename)
+    for estimator in (:OSE, :TMLE)
+        @test hdf5file["1/result"][estimator] isa TargetedEstimation.FailedEstimation
+        @test hdf5file["2/result"][estimator] isa TMLE.EICEstimate
+        for i in 3:6
+            @test hdf5file[string(i, "/result")][estimator] isa TargetedEstimation.FailedEstimation
+            @test hdf5file[string(i, "/result")][estimator].estimand isa TMLE.Estimand
+        end
+    end
+    close(hdf5file)
 
+    # Clean
+    rm(outputs.json.filename)
+    rm(outputs.hdf5.filename)
+    rm(datafile)
 end
 
 end;
diff --git a/test/runtests.jl b/test/runtests.jl
index 0a34f46..13cf36f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,10 +1,15 @@
+using TargetedEstimation
 
-include("cache_managers.jl")
-include("utils.jl")
-include("sieve_variance.jl")
-include("runner.jl")
-include("merge.jl")
-include("resampling.jl")
-include(joinpath("models", "glmnet.jl"))
-include(joinpath("models", "adaptive_interaction_transformer.jl"))
-include(joinpath("models", "biallelic_snp_encoder.jl"))
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
+
+@time begin
+    @test include(joinpath(TESTDIR, "cache_managers.jl"))
+    @test include(joinpath(TESTDIR, "utils.jl"))
+    @test include(joinpath(TESTDIR, "sieve_variance.jl"))
+    @test include(joinpath(TESTDIR, "runner.jl"))
+    @test include(joinpath(TESTDIR, "merge.jl"))
+    @test include(joinpath(TESTDIR, "resampling.jl"))
+    @test include(joinpath(TESTDIR, "models", "glmnet.jl"))
+    @test include(joinpath(TESTDIR, "models", "adaptive_interaction_transformer.jl"))
+    @test include(joinpath(TESTDIR, "models", "biallelic_snp_encoder.jl"))
+end
\ No newline at end of file
diff --git a/test/utils.jl b/test/utils.jl
index 1859622..10d6237 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -9,6 +9,11 @@ using MLJBase
 using MLJLinearModels
 using CategoricalArrays
 
+check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T
+
+check_type(treatment_values::NamedTuple, ::Type{T}) where T = 
+    @test treatment_values.case isa T && treatment_values.control isa T 
+
 PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation)))
 
 include(joinpath(PROJECT_DIR, "test", "testutils.jl"))
@@ -54,46 +59,15 @@ end
     estimands = TargetedEstimation.proofread_estimands(filename, dataset)
     for estimand in estimands
         if haskey(estimand.treatment_values, :T1)
-            @test estimand.treatment_values.T1.case isa Float64
-            @test estimand.treatment_values.T1.control isa Float64
+            check_type(estimand.treatment_values.T1, Float64)
         end
         if haskey(estimand.treatment_values, :T2)
-            @test estimand.treatment_values.T2.case isa Bool
-            @test estimand.treatment_values.T2.control isa Bool
+            check_type(estimand.treatment_values.T2, Bool)
         end
     end
     # Clean estimands file
     rm(filename)
 end
-
-@testset "Test CSV writing" begin
-    Ψ = IATE(
-        outcome=:Y,
-        treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")),
-        treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂])
-    )
-    @test TargetedEstimation.covariates_string(Ψ) === missing
-    @test TargetedEstimation.param_string(Ψ) == "IATE"
-    @test TargetedEstimation.case_string(Ψ) == "1_&_AC"
-    @test TargetedEstimation.control_string(Ψ) == "0_&_CC"
-    @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂"
-    @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂"
-
-    Ψ = CM(
-        outcome=:Y,
-        treatment_values=(T₁=1, T₂="AC"),
-        treatment_confounders=(T₁=[:W₁, :W₂], T₂ = [:W₁, :W₂]),
-        outcome_extra_covariates=[:C₁]
-    )
-
-    @test TargetedEstimation.covariates_string(Ψ) === "C₁"
-    @test TargetedEstimation.param_string(Ψ) == "CM"
-    @test TargetedEstimation.case_string(Ψ) == "1_&_AC"
-    @test TargetedEstimation.control_string(Ψ) === missing
-    @test TargetedEstimation.treatment_string(Ψ) == "T₁_&_T₂"
-    @test TargetedEstimation.confounders_string(Ψ) == "W₁_&_W₂"
-end
-
 @testset "Test coerce_types!" begin
     Ψ = IATE(
         outcome=:Ycont,
@@ -171,33 +145,6 @@ end
     @test sample_ids == [2]
 end
 
-@testset "Test write_target_results with missing values" begin
-    filename = "test.csv"
-    parameters = [
-        CM(
-        target=:Y,
-        treatment=(T₁=1, T₂="AC"),
-        confounders=[:W₁, :W₂],
-        covariates=[:C₁]
-    )]
-    tmle_results = [TargetedEstimation.FailedEstimation(parameters[1])]
-    logs = ["Error X"]
-    TargetedEstimation.append_csv(filename, tmle_results, logs)
-    out = CSV.read(filename, DataFrame)
-    expected_out = ["CM", "T₁_&_T₂", "1_&_AC", missing, "Y", "W₁_&_W₂", "C₁", 
-        missing, missing, missing, missing, missing, missing,
-        missing, missing, missing, missing, missing,
-        "Error X"]
-    for (x, y) in zip(first(out), expected_out)
-        if x === missing 
-            @test x === y
-        else
-            @test x == y
-        end
-    end
-    rm(filename)
-end
-
 @testset "Test make_categorical! and make_float!" begin
     dataset = DataFrame(
         T₁ = [1, 1, 0, 0],
@@ -228,7 +175,7 @@ end
     TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁
 end
 
-@tetset "Test JSON writing" begin
+@testset "Test JSON writing" begin
     results = []
     for Ψ in statistical_estimands_only_config().estimands
         push!(results, (
@@ -237,11 +184,11 @@ end
             ))
     end
     tmpdir = mktempdir(cleanup=true)
-    filename = joinpath(tmpdir, "output_test.json")
-    TargetedEstimation.initialize_json(filename)
-    TargetedEstimation.update(filename, results[1:3])
-    TargetedEstimation.update(filename, results[4:end]; finalize=true)
-    loaded_results = TMLE.read_json(filename)
+    jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json"))
+    TargetedEstimation.initialize_json(jsonoutput.filename)
+    TargetedEstimation.update_file(jsonoutput, results[1:3])
+    TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true)
+    loaded_results = TMLE.read_json(jsonoutput.filename)
     @test size(loaded_results) == size(results)
     for (result, loaded_result) in zip(results, loaded_results)
         @test result.TMLE.estimate == loaded_result[:TMLE].estimate

From 77a8f3f53da34057a090cd3f79f90c5279b7bbc1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 21 Nov 2023 18:11:39 +0000
Subject: [PATCH 05/71] start to fix sieve variance

---
 src/sieve_variance.jl  |  26 ++++-----
 test/sieve_variance.jl | 126 +++++++++++++++++------------------------
 test/utils.jl          |   6 +-
 3 files changed, 69 insertions(+), 89 deletions(-)

diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index f6dfeb6..34154d0 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -88,32 +88,32 @@ function build_work_list(prefix, grm_ids)
             x -> startswith(x, prefix_) && endswith(x, ".hdf5"), 
             readdir(dirname__)
     )
-    hdf5files = [joinpath(dirname_, x) for x in hdf5files]
+    hdf5files = sort([joinpath(dirname_, x) for x in hdf5files])
 
     influence_curves = Vector{Float32}[]
     n_obs = Int[]
-    sieve_df = sieve_dataframe()
+    tmle_results = []
     for hdf5file in hdf5files
         jldopen(hdf5file) do io
             # templateΨs = io["parameters"]
             # results = io["results"]
             for key in keys(io)
                 result_group = io[key]
-                tmleresult = io[key]["result"]
-                Ψ = tmleresult.parameter
-                sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] :
-                    io[string(result_group["sample_ids_idx"])]["sample_ids"]
-                sample_ids = string.(sample_ids)
-                Ψ̂ = TMLE.estimate(tmleresult.tmle)
-
-                push!(influence_curves, align_ic(tmleresult.tmle.IC, sample_ids, grm_ids))
-                push!(n_obs, size(sample_ids, 1))
-                push_sieveless!(sieve_df, Ψ, Ψ̂)
+                tmleresult = io[key]["result"].TMLE
+                if size(tmleresult.IC, 1) > 0
+                    sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] :
+                        io[string(result_group["sample_ids_idx"])]["sample_ids"]
+                    sample_ids = string.(sample_ids)
+
+                    push!(influence_curves, align_ic(tmleresult.IC, sample_ids, grm_ids))
+                    push!(n_obs, size(sample_ids, 1))
+                    push!(tmle_results, tmleresult)
+                end
             end
         end
     end
     influence_curves = length(influence_curves) > 0 ? reduce(vcat, transpose(influence_curves)) : Matrix{Float32}(undef, 0, 0)
-    return sieve_df, influence_curves, n_obs
+    return tmle_results, influence_curves, n_obs
 end
 
 
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 901a286..daf683e 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -11,6 +11,8 @@ using StableRNGs
 using Distributions
 using LogExpFunctions
 
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
+
 function build_dataset(sample_ids)
     rng = StableRNG(123)
     n = size(sample_ids, 1)
@@ -44,24 +46,14 @@ function build_dataset(sample_ids)
     CSV.write("data.csv", dataset)
 end
 
-function build_tmle_output_file(sample_ids, param_file, outprefix)
+function build_tmle_output_file(sample_ids, estimandfile, outprefix, pval)
     build_dataset(sample_ids)
-    # Only one continuous phenotype / machines not saved / no adaptive cv
-    parsed_args = Dict(
-        "data" => "data.csv",
-        "param-file" => param_file,
-        "estimator-file" => joinpath("config", "tmle_config.jl"),
-        "csv-out" => string(outprefix, ".csv"),
-        "verbosity" => 0,
-        "hdf5-out" => string(outprefix, ".hdf5"),
-        "pval-threshold" => 1.,
-        "chunksize" => 100
+    outputs = TargetedEstimation.Outputs(
+        hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval),
     )
-
-    TargetedEstimation.tmle_estimation(parsed_args)
+    tmle("data.csv", estimandfile, joinpath(TESTDIR, "config", "tmle_ose_config.jl"), outputs=outputs)
 end
 
-
 function basic_variance_implementation(matrix_distance, influence_curve, n_obs)
     variance = 0.f0
     n_samples = size(influence_curve, 1)
@@ -101,9 +93,8 @@ function test_initial_output(output, expected_output)
         end
     end
 end
-
 @testset "Test readGRM" begin
-    prefix = joinpath("data", "grm", "test.grm")
+    prefix = joinpath(TESTDIR, "data", "grm", "test.grm")
     GRM, ids = TargetedEstimation.readGRM(prefix)
     @test eltype(ids.SAMPLE_ID) == String
     @test size(GRM, 1) == 18915
@@ -111,65 +102,55 @@ end
 end
 
 @testset "Test build_work_list" begin
-    grm_ids = TargetedEstimation.GRMIDs(joinpath("data", "grm", "test.grm.id"))
-    param_file_1 = joinpath("config", "sieve_tests_parameters_1.yaml")
-    outprefix_1 = "tmle_output_1"
-    prefix = "tmle_output"
-    # CASE_1: only one file
-    build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_1, outprefix_1)
-    # Since pval = 1., all parameters are considered for sieve variance
-    sieve_df, influence_curves, n_obs = TargetedEstimation.build_work_list(prefix, grm_ids)
-    @test n_obs == [193, 193, 193, 194, 194, 194]
+    grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id"))
+    tmpdir = mktempdir(cleanup=true)
+    configuration = statistical_estimands_only_config()
+
+    # CASE_1: Since pval = 1.
+    # Simulate multiple runs that occured
+    pval = 1.
+    config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3])
+    estimandsfile_1 = joinpath(tmpdir, "configuration_1.json")
+    TMLE.write_json(estimandsfile_1, config_1)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval)
+
+    config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end])
+    estimandsfile_2 = joinpath(tmpdir, "configuration_2.json")
+    TMLE.write_json(estimandsfile_2, config_2)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval)
+
+    results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids)
+    # Check n_obs
+    @test n_obs == [194, 194, 194, 193, 193, 194]
     # Check influence curves
-    io = jldopen(string(outprefix_1, ".hdf5"))
-    for key in keys(io)
-        result = io[key]["result"]
-        IC = result.tmle.IC
-        # missing sample
-        if result.parameter.target == Symbol("BINARY/OUTCOME")
-            IC = vcat(0, IC)
-        end
-        @test convert(Vector{Float32}, IC) == influence_curves[parse(Int, key), :]
+    expected_influence_curves = [size(r.IC, 1) == 194 ? r.IC : vcat(0, r.IC) for r in results]
+    for rowindex in 1:6
+        @test convert(Vector{Float32}, expected_influence_curves[rowindex]) == influence_curves[rowindex, :]
     end
-    close(io)
-    # Check output
-    some_expected_cols = DataFrame(
-        PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE"],
-        TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2"],
-        CASE=["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true"],
-        CONTROL=["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false"],
-        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
-        CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2"],
-        COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1"]
-    )
-    test_initial_output(sieve_df, some_expected_cols)
-
-    # CASE_2: add another file 
-    param_file_2 = joinpath("config", "sieve_tests_parameters_2.yaml")
-    outprefix_2 = "tmle_output_2"
-    build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_2, outprefix_2)
-    # This p-value filters the influence curves for the binary outcome
-    sieve_df, influence_curves, n_obs = TargetedEstimation.build_work_list(prefix, grm_ids)
-    @test size(influence_curves) == (8, 194)
-    @test n_obs == [193, 193, 193, 194, 194, 194, 194, 194]
-
-    # Check output
-    some_expected_cols = DataFrame(
-        PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE", "ATE", "CM"],
-        TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"],
-        CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"],
-        CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing],
-        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
-        CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"],
-        COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing]
-    )
-    test_initial_output(sieve_df, some_expected_cols)
-
+    # Check results
+    all(x isa TMLE.TMLEstimate for x in results)
+    all(size(x.IC, 1) > 0 for x in results)
     # clean
-    rm(string(outprefix_1, ".hdf5"))
-    rm(string(outprefix_1, ".csv"))
-    rm(string(outprefix_2, ".hdf5"))
-    rm(string(outprefix_2, ".csv"))
+    rm("tmle_output_1.hdf5")
+    rm("tmle_output_2.hdf5")
+
+    # CASE_2: Since pval = 0.8
+    pval = 0.8
+    estimandsfile = joinpath(tmpdir, "configuration.json")
+    TMLE.write_json(estimandsfile, configuration)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output", pval)
+    results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids)
+    # Check n_obs
+    @test n_obs == [194, 194, 194]
+    # Check influence curves
+    for rowindex in 1:3
+        @test convert(Vector{Float32}, results[rowindex].IC) == influence_curves[rowindex, :]
+    end
+    # Check results
+    all(x isa TMLE.TMLEstimate for x in results)
+    all(size(x.IC, 1) > 0 for x in results)
+    # Clean
+    rm("tmle_output.hdf5")
     rm("data.csv")
 end
 
@@ -257,7 +238,6 @@ end
 
     # Check by hand for a single τ=0.5
     @test variances[2, :] ≈ Float32[0.03666667, 0.045, 0.045]
-
 end
 
 @testset "Test grm_rows_bounds" begin
diff --git a/test/utils.jl b/test/utils.jl
index 10d6237..f98d1bc 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -14,12 +14,12 @@ check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T
 check_type(treatment_values::NamedTuple, ::Type{T}) where T = 
     @test treatment_values.case isa T && treatment_values.control isa T 
 
-PROJECT_DIR = dirname(dirname(pathof(TargetedEstimation)))
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
-include(joinpath(PROJECT_DIR, "test", "testutils.jl"))
+include(joinpath(TESTDIR, "testutils.jl"))
 
 @testset "Test load_tmle_spec: with configuration file" begin
-    estimators = TargetedEstimation.load_tmle_spec(joinpath(PROJECT_DIR, "test", "config", "tmle_ose_config.jl"))
+    estimators = TargetedEstimation.load_tmle_spec(joinpath(TESTDIR, "config", "tmle_ose_config.jl"))
     @test estimators.TMLE isa TMLE.TMLEE
     @test estimators.OSE isa TMLE.OSE
     @test estimators.TMLE.weighted === true

From 4e7c24ec4171aa9bbba5e40fa07136e163a02204 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 21 Nov 2023 18:12:57 +0000
Subject: [PATCH 06/71] fix another test

---
 test/sieve_variance.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index daf683e..6057fa7 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -251,7 +251,7 @@ end
 end
 
 @testset "Test corrected_stderrors" begin
-    io = jldopen(joinpath("data", "sieve_variances.hdf5"))
+    io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5"))
     variances = io["variances"]
     n_obs = [10, 10, 10, 10, 10, 100, 100, 1000, 1000, 1000]
     stderrors = TargetedEstimation.corrected_stderrors(variances, n_obs)

From f5cd1d40a0e11de4eb2c7af9b40b83fccf74473b Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 22 Nov 2023 14:20:22 +0000
Subject: [PATCH 07/71] fix svp

---
 Comonicon.toml            |   8 +++
 src/TargetedEstimation.jl |   3 +
 src/runner.jl             |  20 +++++--
 src/sieve_variance.jl     |  88 +++++++++++++++++------------
 test/sieve_variance.jl    | 116 ++++++++++++++++++--------------------
 5 files changed, 131 insertions(+), 104 deletions(-)
 create mode 100644 Comonicon.toml

diff --git a/Comonicon.toml b/Comonicon.toml
new file mode 100644
index 0000000..bd97f0f
--- /dev/null
+++ b/Comonicon.toml
@@ -0,0 +1,8 @@
+name = "TargetedEstimation"
+
+[install]
+completion = true
+quiet = false
+optimize = 2
+
+[sysimg]
\ No newline at end of file
diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 2f809ec..5dd5011 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -41,6 +41,9 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
+"""TL CLI."""
+@main
+
 export Runner, tmle, sieve_variance_plateau, merge_csv_files
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
diff --git a/src/runner.jl b/src/runner.jl
index 24cff63..edf3812 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -149,6 +149,15 @@ end
 
 
 """
+    tmle(dataset, estimands, estimators; 
+        verbosity=0, 
+        outputs=Outputs(),
+        chunksize=100,
+        rng=123,
+        cache_strategy="release-unusable",
+        sort_estimands=false
+    )
+
 TMLE CLI.
 
 # Args
@@ -160,23 +169,22 @@ TMLE CLI.
 # Options
 
 - `-v, --verbosity`: Verbosity level.
-- `-j, --json_out`: JSON output filename.
-- `--hdf5_out`: HDF5 output filename.
+- `-o, --outputs`: Ouputs to be genrated.
 - `--chunksize`: Results are written in batches of size chunksize.
 - `-r, --rng`: Random seed (Only used for estimands ordering at the moment).
-- `-c, --cache_strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
+- `-c, --cache-strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
 
 # Flags
 
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
-@main function tmle(dataset, estimands, estimators; 
+@cast function tmle(dataset, estimands, estimators; 
     verbosity=0, 
     outputs=Outputs(),
     chunksize=100,
     rng=123,
     cache_strategy="release-unusable",
-    sort_estimands=false
+    sort_estimands::Bool=false
     )
     runner = Runner(dataset, estimands, estimators; 
         verbosity=verbosity, 
@@ -189,4 +197,4 @@ TMLE CLI.
     runner()
     verbosity >= 1 && @info "Done."
     return
-end
\ No newline at end of file
+end
diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index 34154d0..cc953e9 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -99,7 +99,7 @@ function build_work_list(prefix, grm_ids)
             # results = io["results"]
             for key in keys(io)
                 result_group = io[key]
-                tmleresult = io[key]["result"].TMLE
+                tmleresult = first(io[key]["result"])
                 if size(tmleresult.IC, 1) > 0
                     sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] :
                         io[string(result_group["sample_ids_idx"])]["sample_ids"]
@@ -184,7 +184,6 @@ function compute_variances(influence_curves, grm, τs, n_obs)
     return variances
 end
 
-
 function grm_rows_bounds(n_samples)
     bounds = Pair{Int, Int}[]
     start_idx = 1
@@ -198,59 +197,74 @@ function grm_rows_bounds(n_samples)
     return bounds
 end
 
-
-function save_results(outprefix, output, τs, variances)
-    CSV.write(string(outprefix, ".csv"), output)
+function save_results(outprefix, results, τs, variances)
+    TMLE.write_json(string(outprefix, ".json"), results)
     jldopen(string(outprefix, ".hdf5"), "w") do io
         io["taus"] = τs
         io["variances"] = variances
     end
 end
 
-
-corrected_stderrors(variances, n_obs) =
-    sqrt.(view(maximum(variances, dims=1), 1, :) ./ n_obs)
-
-function update_sieve_df!(df, stds)
-    n = size(stds, 1)
-    df.SIEVE_STD = Vector{Float64}(undef, n)
-    df.SIEVE_PVALUE = Vector{Float64}(undef, n)
-    df.SIEVE_LWB = Vector{Float64}(undef, n)
-    df.SIEVE_UPB = Vector{Float64}(undef, n)
-
-    for index in 1:n
-        std = stds[index]
-        estimate = df.TMLE_ESTIMATE[index]
-        testresult = OneSampleZTest(estimate, std, 1)
-        lwb, upb = confint(testresult)
-        df.SIEVE_STD[index] = std
-        df.SIEVE_PVALUE[index] = pvalue(testresult)
-        df.SIEVE_LWB[index] = lwb
-        df.SIEVE_UPB[index] = upb
+corrected_stderrors(variances) =
+    sqrt.(view(maximum(variances, dims=1), 1, :))
+
+function update_with_sieve_estimate!(results, stds)
+    for index in eachindex(results)
+        old = results[index]
+        results[index] = typeof(old)(
+            old.estimand,
+            old.estimate,
+            convert(Float64, stds[index]),
+            old.n,
+            Float64[]
+        )
     end
-
-    select!(df, Not(:TMLE_ESTIMATE))
 end
 
-function sieve_variance_plateau(parsed_args)
-    prefix = parsed_args["prefix"]
-    outprefix = parsed_args["out-prefix"]
-    verbosity = parsed_args["verbosity"]
+"""
+    sieve_variance_plateau(input_prefix;
+        output_prefix="svp",
+        grm_prefix="GRM",
+        verbosity=0, 
+        n_estimators=10, 
+        max_tau=0.8
+    )
+
+Sieve Variance Plateau CLI.
+
+# Args
+
+- `input-prefix`: Input prefix to HDF5 files generated by the tmle CLI.
 
-    τs = default_τs(parsed_args["nb-estimators"];max_τ=parsed_args["max-tau"])
-    grm, grm_ids = readGRM(parsed_args["grm-prefix"])
+# Options
+
+- `-o, --output-prefix`: Output prefix.
+- `-g, --grm-prefix`: Prefix to the aggregated GRM.
+- `-v, --verbosity`: Verbosity level.
+- `-n, --n_estimators`: Number of variance estimators to build for each estimate. 
+- `-m, --max_tau`: Maximum distance between any two individuals.
+"""
+@cast function sieve_variance_plateau(input_prefix;
+    output_prefix="svp",
+    grm_prefix="GRM",
+    verbosity=0, 
+    n_estimators=10, 
+    max_tau=0.8
+    )
+    τs = default_τs(n_estimators;max_τ=max_tau)
+    grm, grm_ids = readGRM(grm_prefix)
     verbosity > 0 && @info "Preparing work list."
-    sieve_df, influence_curves, n_obs = build_work_list(prefix, grm_ids)
+    results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids)
 
     if length(influence_curves) > 0
         verbosity > 0 && @info "Computing variance estimates."
         variances = compute_variances(influence_curves, grm, τs, n_obs)
-        std_errors = corrected_stderrors(variances, n_obs)
-        update_sieve_df!(sieve_df, std_errors)
+        std_errors = corrected_stderrors(variances)
+        update_with_sieve_estimate!(results, std_errors)
     else
         variances = Float32[]
     end
-    save_results(outprefix, sieve_df, τs, variances)
+    save_results(output_prefix, results, τs, variances)
 
     verbosity > 0 && @info "Done."
     return 0
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 6057fa7..3359f2a 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -13,6 +13,8 @@ using LogExpFunctions
 
 TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
+include(joinpath(TESTDIR, "testutils.jl"))
+
 function build_dataset(sample_ids)
     rng = StableRNG(123)
     n = size(sample_ids, 1)
@@ -106,7 +108,7 @@ end
     tmpdir = mktempdir(cleanup=true)
     configuration = statistical_estimands_only_config()
 
-    # CASE_1: Since pval = 1.
+    # CASE_1: pval = 1.
     # Simulate multiple runs that occured
     pval = 1.
     config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3])
@@ -134,17 +136,18 @@ end
     rm("tmle_output_1.hdf5")
     rm("tmle_output_2.hdf5")
 
-    # CASE_2: Since pval = 0.8
-    pval = 0.8
+    # CASE_2: pval = 0.1
+    pval = 0.1
     estimandsfile = joinpath(tmpdir, "configuration.json")
     TMLE.write_json(estimandsfile, configuration)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output", pval)
     results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids)
     # Check n_obs
-    @test n_obs == [194, 194, 194]
+    @test n_obs == [194, 193, 193, 194]
     # Check influence curves
-    for rowindex in 1:3
-        @test convert(Vector{Float32}, results[rowindex].IC) == influence_curves[rowindex, :]
+    expected_influence_curves = [size(r.IC, 1) == 194 ? r.IC : vcat(0, r.IC) for r in results]
+    for rowindex in 1:4
+        @test convert(Vector{Float32}, expected_influence_curves[rowindex]) == influence_curves[rowindex, :]
     end
     # Check results
     all(x isa TMLE.TMLEstimate for x in results)
@@ -253,76 +256,67 @@ end
 @testset "Test corrected_stderrors" begin
     io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5"))
     variances = io["variances"]
-    n_obs = [10, 10, 10, 10, 10, 100, 100, 1000, 1000, 1000]
-    stderrors = TargetedEstimation.corrected_stderrors(variances, n_obs)
+    stderrors = TargetedEstimation.corrected_stderrors(variances)
     # sanity check
     @test size(stderrors, 1) == 10
 
     # check for the first curve
-    stderrors[1] == sqrt(maximum(variances[:,1])/n_obs[1])
+    stderrors[1] == sqrt(maximum(variances[:,1]))
 
     close(io)
 end
 
 @testset "Test sieve_variance_plateau" begin
     # Generate data
-    nb_estimators = 10
-    grm_ids = TargetedEstimation.GRMIDs(joinpath("data", "grm", "test.grm.id"))
-    param_file_1 = joinpath("config", "sieve_tests_parameters_1.yaml")
-    tmle_outprefix_1 = "tmle_output_1"
-    param_file_2 = joinpath("config", "sieve_tests_parameters_2.yaml")
-    tmle_outprefix_2 = "tmle_output_2"
-    build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_1, tmle_outprefix_1)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, param_file_2, tmle_outprefix_2)
-
-    outprefix = "sieve_output"
-    parsed_args = Dict(
-        "prefix" => "tmle_output",
-        "pval" => 1e-10,
-        "grm-prefix" => "data/grm/test.grm",
-        "out-prefix" => outprefix,
-        "nb-estimators" => nb_estimators,
-        "max-tau" => 0.75,
-        "verbosity" => 0
+    grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id"))
+    tmpdir = mktempdir(cleanup=true)
+    configuration = statistical_estimands_only_config()
+    pval = 0.1
+    config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3])
+    estimandsfile_1 = joinpath(tmpdir, "configuration_1.json")
+    TMLE.write_json(estimandsfile_1, config_1)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval)
+
+    config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end])
+    estimandsfile_2 = joinpath(tmpdir, "configuration_2.json")
+    TMLE.write_json(estimandsfile_2, config_2)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval)
+
+    sieve_variance_plateau("tmle_output";
+        grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"),
+        max_tau=0.75
     )
 
-    sieve_variance_plateau(parsed_args)
-    # check hdf5 file
-    io = jldopen(string(outprefix, ".hdf5"))
-    @test io["taus"] == TargetedEstimation.default_τs(nb_estimators; max_τ=parsed_args["max-tau"])
-    @test size(io["variances"]) == (10, 8)
+    # Check HDF5 file
+    io = jldopen("svp.hdf5")
+    @test io["taus"] == TargetedEstimation.default_τs(10; max_τ=0.75)
+    @test size(io["variances"]) == (10, 4)
     close(io)
-    # check csv file
-    output = TargetedEstimation.read_output_with_types(string(outprefix, ".csv"))
-    some_expected_cols = DataFrame(
-        PARAMETER_TYPE = ["IATE", "IATE", "ATE", "IATE", "IATE", "ATE", "ATE", "CM"],
-        TREATMENTS = ["T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1_&_T2", "T1", "T1"],
-        CASE = ["true_&_true", "true_&_false", "true_&_true", "true_&_true", "true_&_false", "true_&_true", "true", "false"],
-        CONTROL = ["false_&_false", "false_&_true", "false_&_false", "false_&_false", "false_&_true", "false_&_false", "false", missing],
-        OUTCOME = ["BINARY/OUTCOME", "BINARY/OUTCOME", "BINARY/OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME", "CONTINUOUS, OUTCOME"],
-        CONFOUNDERS = ["W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1_&_W2", "W1", "W1"],
-        COVARIATES = ["C1", "C1", "C1", "C1", "C1", "C1", missing, missing]
-    )
-    test_initial_output(output, some_expected_cols)
-    @test output.SIEVE_PVALUE isa Vector{Float64}
-    @test output.SIEVE_LWB isa Vector{Float64}
-    @test output.SIEVE_UPB isa Vector{Float64}
-    @test output.SIEVE_STD isa Vector{Float64}
-
-    tmle_output = TargetedEstimation.load_csv_files(
-        TargetedEstimation.empty_tmle_output(),
-        ["tmle_output_1.csv", "tmle_output_2.csv"]
+
+    # Check JSON file
+    svp_results = TMLE.read_json("svp.json")
+    tmleout1 = jldopen("tmle_output_1.hdf5")
+    tmleout2 = jldopen("tmle_output_2.hdf5")
+
+    src_results = vcat(
+        [tmleout1[string(i, "/result")].TMLE for i in 1:3],
+        [tmleout2[string(i, "/result")].TMLE for i in 1:3],
     )
 
-    joined = leftjoin(tmle_output, output, on=TargetedEstimation.joining_keys(), matchmissing=:equal)
-    @test all(joined.SIEVE_PVALUE .> 0 )
+    for svp_result in svp_results
+        src_result_index = findall(x.estimand == svp_result.estimand for x in src_results)
+        src_result = src_results[only(src_result_index)]
+        @test src_result.std != svp_result.std
+        @test src_result.estimate == svp_result.estimate
+        @test src_result.n == svp_result.n
+        @test svp_result.IC == []
+    end
+
     # clean
-    rm(string(outprefix, ".csv"))
-    rm(string(outprefix, ".hdf5"))
-    rm(string(tmle_outprefix_1, ".hdf5"))
-    rm(string(tmle_outprefix_1, ".csv"))
-    rm(string(tmle_outprefix_2, ".hdf5"))
-    rm(string(tmle_outprefix_2, ".csv"))
+    rm("svp.json")
+    rm("svp.hdf5")
+    rm("tmle_output_1.hdf5")
+    rm("tmle_output_2.hdf5")
     rm("data.csv")
 end
 

From 78c4ee92708aaae0c7b66e564cfae64494d57a43 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 23 Nov 2023 17:58:54 +0000
Subject: [PATCH 08/71] start to work on composed estimands

---
 src/runner.jl     | 10 ++++++-
 src/utils.jl      | 73 +++++++++++++++++++++++++++++++++++++----------
 test/runner.jl    | 38 +++++++++++++++++++++++-
 test/testutils.jl | 22 ++++++++++++++
 test/utils.jl     | 30 +++++++++++++++++++
 5 files changed, 156 insertions(+), 17 deletions(-)

diff --git a/src/runner.jl b/src/runner.jl
index edf3812..0cf30e5 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -21,10 +21,16 @@ initialize(output::JSONOutput) = initialize_json(output.filename)
     pval_threshold::Union{Nothing, Float64} = nothing
 end
 
+@option struct JLSOutput
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+end
+
 @option struct Outputs
     json::JSONOutput = JSONOutput()
     hdf5::HDF5Output = HDF5Output()
-    std::Bool = false
+    jls::JLSOutput   = JLSOutput()
+    std::Bool        = false
 end
 
 function initialize(outputs::Outputs)
@@ -75,6 +81,8 @@ function save(runner::Runner, results, partition, finalize)
     update_file(runner.outputs.std, results, partition)
     # Append JSON Output
     update_file(runner.outputs.json, results; finalize=finalize)
+    # Append JLS Output
+    update_file(runner.outputs.jls, results)
     # Append HDF5 Output
     update_file(runner.outputs.hdf5, partition, results, runner.dataset)
 end
diff --git a/src/utils.jl b/src/utils.jl
index ce62c37..dfd376b 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -46,7 +46,7 @@ function update_file(doprint, results, partition)
 end
 
 #####################################################################
-#####                       JLD2 OUTPUT                          ####
+#####                       HDF5 OUTPUT                          ####
 #####################################################################
 
 
@@ -72,6 +72,21 @@ function update_file(output::HDF5Output, partition, results, dataset)
     end
 end
 
+#####################################################################
+#####                        JLS OUTPUT                          ####
+#####################################################################
+
+function update_file(output::JLSOutput, results)
+    output.filename === nothing && return
+
+    open(output.filename, "a") do io
+        for result in results
+            result = TMLE.emptyIC(result, output.pval_threshold)
+            serialize(io, result)
+        end
+    end
+end
+
 #####################################################################
 #####                    Read TMLE Estimands Configuration                         ####
 #####################################################################
@@ -97,9 +112,49 @@ maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) =
 
 maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError())
 
+function maybe_identify(Ψ::TMLE.ComposedEstimand, scm, method)
+    method = get_identification_method(method)
+    return TMLE.ComposedEstimand(Ψ.f, Tuple(maybe_identify(arg, scm, method) for arg ∈ Ψ.args))
+end
+
 maybe_identify(Ψ, scm, method) = Ψ
 
-read_method(extension) = extension == ".json" ? read_json : read_yaml
+function read_method(extension)
+    method = if extension == ".json"
+        TMLE.read_json
+    elseif extension == ".yaml"
+        TMLE.read_yaml
+    elseif extension == ".jls"
+        deserialize
+    else
+        throw(ArgumentError(string("Can't read from ", extension, " file")))
+    end
+    return method
+end
+
+function fix_treatment_values!(treatment_types::AbstractDict, Ψ::ComposedEstimand, dataset)
+    new_args = Tuple(fix_treatment_values!(treatment_types, arg, dataset) for arg in Ψ.args)
+    return ComposedEstimand(Ψ.f, new_args)
+end
+
+"""
+Uses the values found in the dataset to create a new estimand with adjusted values.
+"""
+function fix_treatment_values!(treatment_types::AbstractDict, Ψ, dataset)
+    treatment_names = keys(Ψ.treatment_values)
+    for tn in treatment_names
+        haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn])
+    end
+    new_treatment = NamedTuple{treatment_names}(
+        convert_treatment_values(Ψ.treatment_values, treatment_types)
+    )
+    return typeof(Ψ)(
+        outcome = Ψ.outcome,
+        treatment_values = new_treatment,
+        treatment_confounders = Ψ.treatment_confounders,
+        outcome_extra_covariates = Ψ.outcome_extra_covariates
+    )
+end
 
 """
     proofread_estimands(param_file, dataset)
@@ -114,19 +169,7 @@ function proofread_estimands(filename, dataset)
     treatment_types = Dict()
     for (index, Ψ) in enumerate(config.estimands)
         statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment)
-        treatment_names = keys(statisticalΨ.treatment_values)
-        for tn in treatment_names
-            haskey(treatment_types, tn) ? nothing : treatment_types[tn] = eltype(dataset[!, tn])
-        end
-        new_treatment = NamedTuple{treatment_names}(
-            TargetedEstimation.convert_treatment_values(statisticalΨ.treatment_values, treatment_types)
-        )
-        estimands[index] = typeof(Ψ)(
-            outcome = Ψ.outcome,
-            treatment_values = new_treatment,
-            treatment_confounders = statisticalΨ.treatment_confounders,
-            outcome_extra_covariates = statisticalΨ.outcome_extra_covariates
-        )
+        estimands[index] = fix_treatment_values!(treatment_types, statisticalΨ, dataset)
     end
     return estimands
 end
diff --git a/test/runner.jl b/test/runner.jl
index 6782d22..33bacc8 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -86,6 +86,7 @@ end
     outputs = TargetedEstimation.Outputs(
         json=TargetedEstimation.JSONOutput(filename="output.json"),
         hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.),
+        jls=TargetedEstimation.JLSOutput(filename="output.jls"),
         std=true,
     )
     runner = Runner(
@@ -125,6 +126,23 @@ end
         @test loaded_result[:OSE].IC == []
     end
 
+    # Test Save to JLS
+    loaded_results = []
+    open(outputs.jls.filename) do io
+        while !eof(io)
+            push!(loaded_results, deserialize(io))
+        end
+    end
+    for (result, loaded_result) in zip(results, loaded_results)
+        @test loaded_result[:TMLE] isa TMLE.TMLEstimate
+        @test result.TMLE.estimate == loaded_result[:TMLE].estimate
+        @test loaded_result[:TMLE].IC == []
+
+        @test loaded_result[:OSE] isa TMLE.OSEstimate
+        @test result.OSE.estimate == loaded_result[:OSE].estimate
+        @test loaded_result[:OSE].IC == []
+    end
+
     # Test Save to HDF5
     hdf5file = jldopen(outputs.hdf5.filename, "r")
     for (result_index, param_index) in enumerate(4:6)
@@ -156,7 +174,7 @@ end
     rm(outputs.hdf5.filename)
 end
 
-@testset "Test tmle" begin
+@testset "Test tmle: varying dataset format and chunksize" begin
     tmpdir = mktempdir(cleanup=true)
     estimands_filename = joinpath(tmpdir, "configuration.json")
     configuration = statistical_estimands_only_config()
@@ -273,6 +291,24 @@ end
     rm(datafile)
 end
 
+@testset "Test tmle: Causal and Composed Estimands" begin
+    build_dataset(;n=1000, format="csv")
+    outputs = TargetedEstimation.Outputs(
+        jls=TargetedEstimation.JLSOutput(filename="output.jls")
+    )
+    tmpdir = mktempdir(cleanup=true)
+    estimandsfile = joinpath(tmpdir, "configuration.jls")
+
+    configuration = causal_and_composed_estimands_config()
+    serialize(estimandsfile, configuration)
+    estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
+    datafile = "data.csv"
+    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs)
+    
+    rm(datafile)
+end
+
+
 end;
 
 true
\ No newline at end of file
diff --git a/test/testutils.jl b/test/testutils.jl
index c9bc500..4f286c0 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -55,3 +55,25 @@ function statistical_estimands_only_config()
     )
     return configuration
 end
+
+function causal_and_composed_estimands_config()
+    ATE₁ = ATE(
+        outcome = Symbol("CONTINUOUS, OUTCOME"), 
+        treatment_values = (T1 = (case = true, control = false),), 
+    )
+    ATE₂ = ATE(
+        outcome = Symbol("CONTINUOUS, OUTCOME"), 
+        treatment_values = (T1 = (case = false, control = true),), 
+    )
+    diff = ComposedEstimand(-, (ATE₁, ATE₂))
+    scm = StaticSCM(
+        outcomes = ["CONTINUOUS, OUTCOME"],
+        treatments = ["T1"],
+        confounders = [:W1, :W2]
+    )
+    configuration = Configuration(
+        estimands = [ATE₁, ATE₂, diff],
+        scm       = scm
+    )
+    return configuration
+end
diff --git a/test/utils.jl b/test/utils.jl
index f98d1bc..2a3b5c0 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -198,6 +198,36 @@ end
     end
 end
 
+@testset "Test maybe_identify" begin
+    scm = StaticSCM(
+        outcomes = [:Y],
+        treatments = [:T₁, :T₂],
+        confounders = [:W]
+    )
+    adjustment = BackdoorAdjustment()
+    causalATE = ATE(
+        outcome = :Y, 
+        treatment_values = (T₁ =(case=1, control=0),)
+    )
+    statisticalATE = ATE(
+        outcome = :Y, 
+        treatment_values = (T₁ =(case=1, control=0),),
+        treatment_confounders = (T₁=[:W],)
+    )
+    # Correctly identifies the estimand
+    identifiedATE = TargetedEstimation.maybe_identify(causalATE, scm, nothing)
+    @test statisticalATE == identifiedATE
+    # Just returns the estimand
+    @test TargetedEstimation.maybe_identify(statisticalATE, scm, nothing) === statisticalATE
+    # Throws if can't identify
+    @test_throws TargetedEstimation.MissingSCMError() TargetedEstimation.maybe_identify(causalATE, nothing, nothing)
+    # Composed Estimand with a weird mixture of statistical/causal estimands
+    diff = ComposedEstimand(-, (causalATE, statisticalATE))
+    identified_diff = TargetedEstimation.maybe_identify(diff, scm, nothing)
+    statistical_diff = ComposedEstimand(-, (statisticalATE, statisticalATE))
+    @test identified_diff == statistical_diff
+end
+
 end;
 
 true
\ No newline at end of file

From 57267deac9bcf0526a80c69da633e1db6321bed5 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 28 Nov 2023 11:31:25 +0000
Subject: [PATCH 09/71] add test for causal and composite estimands

---
 src/cache_managers.jl                | 17 +++++------------
 src/runner.jl                        |  5 ++---
 src/utils.jl                         | 22 +++++++++-------------
 test/cache_managers.jl               |  2 +-
 test/models/biallelic_snp_encoder.jl |  2 +-
 test/runner.jl                       | 28 +++++++++++++++++++++++++---
 test/utils.jl                        |  2 +-
 7 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/src/cache_managers.jl b/src/cache_managers.jl
index 64d5004..56acd3f 100644
--- a/src/cache_managers.jl
+++ b/src/cache_managers.jl
@@ -10,19 +10,12 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ)
     # Always drop fluctuations
     haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation)
 
-    η = TMLE.get_relevant_factors(Ψ)
-    # Propensity scores
-    for ps in η.propensity_score
-        cache_manager.η_counts[ps] -= 1
-        if cache_manager.η_counts[ps] == 0
-            delete!(cache_manager.cache, ps)
+    for η in TMLE.nuisance_functions_iterator(Ψ)
+        cache_manager.η_counts[η] -= 1
+        if cache_manager.η_counts[η] == 0
+            delete!(cache_manager.cache, η)
         end
     end
-    # Outcome Mean
-    cache_manager.η_counts[η.outcome_mean] -= 1
-    if cache_manager.η_counts[η.outcome_mean] == 0
-        delete!(cache_manager.cache, η.outcome_mean)
-    end
 end
 
 struct MaxSizeCacheManager <: CacheManager
@@ -53,7 +46,7 @@ end
 
 function make_cache_manager(estimands, string)
     if string == "release-unusable"
-        return ReleaseUnusableCacheManager(TMLE.nuisance_counts(estimands))
+        return ReleaseUnusableCacheManager(TMLE.nuisance_function_counts(estimands))
     elseif string == "no-cache"
         return NoCacheManager()
     else
diff --git a/src/runner.jl b/src/runner.jl
index 0cf30e5..0523489 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -108,9 +108,8 @@ function try_estimation(runner, Ψ, estimator)
 end
 
 function skip_fast(runner, Ψ)
-    ηs = TMLE.get_relevant_factors(Ψ)
-    ηs.propensity_score
-    any(η ∈ runner.failed_nuisance for η in (ηs.outcome_mean, ηs.propensity_score...)) && return true
+    ηs = TMLE.nuisance_functions_iterator(Ψ)
+    any(η ∈ runner.failed_nuisance for η in ηs) && return true
     return false
 end
 
diff --git a/src/utils.jl b/src/utils.jl
index dfd376b..2e188c8 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -178,20 +178,10 @@ end
 #####                 ADDITIONAL METHODS                         ####
 #####################################################################
 
-TMLE.emptyIC(result::FailedEstimation) = result
+TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result
 
-TMLE.emptyIC(result::FailedEstimation, pval_threshold::Float64) = result
-
-TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Nothing) where names = 
-    NamedTuple{names}([TMLE.emptyIC(r) for r in result])
-
-TMLE.emptyIC(result::NamedTuple{names}, pval_threshold::Float64) where names =
-    NamedTuple{names}([TMLE.emptyIC(r, pval_threshold) for r in result])
-
-function TMLE.emptyIC(result, pval_threshold::Float64)
-    pval = pvalue(OneSampleZTest(result))
-    return pval < pval_threshold ? result : TMLE.emptyIC(result)
-end
+TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names =
+    NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt])
 
 
 get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
@@ -235,6 +225,12 @@ function make_float!(dataset, colnames)
     end
 end
 
+function coerce_types!(dataset, Ψ::ComposedEstimand)
+    for arg in Ψ.args
+        coerce_types!(dataset, arg)
+    end
+end
+
 function coerce_types!(dataset, Ψ)
     categorical_variables = Set(keys(Ψ.treatment_values))
     continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders)))
diff --git a/test/cache_managers.jl b/test/cache_managers.jl
index 0252f13..294ccd2 100644
--- a/test/cache_managers.jl
+++ b/test/cache_managers.jl
@@ -52,7 +52,7 @@ end
             treatment_confounders=(T₃=[:W],)
         )
     ]
-    η_counts = TMLE.nuisance_counts(estimands)
+    η_counts = TMLE.nuisance_function_counts(estimands)
     cache_manager = TargetedEstimation.ReleaseUnusableCacheManager(η_counts)
     # Estimation of the first estimand will fill the cache with the following
     Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W))
diff --git a/test/models/biallelic_snp_encoder.jl b/test/models/biallelic_snp_encoder.jl
index 5eb7406..27d544b 100644
--- a/test/models/biallelic_snp_encoder.jl
+++ b/test/models/biallelic_snp_encoder.jl
@@ -17,7 +17,7 @@ using MLJBase
     fit!(mach, verbosity=0)
     fitresult = fitted_params(mach).fitresult
     @test fitresult == Dict(:rs1234 => 'A', :rs4567 => 'C')
-    Xt = transform(mach)
+    Xt = MLJBase.transform(mach)
     @test Xt.rs1234[1:3] == [1, 0, 0]
     @test Xt.rs1234[4] === missing
     @test Xt.rs4567 == [0, 1, 2, 2]
diff --git a/test/runner.jl b/test/runner.jl
index 33bacc8..013cd41 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -14,11 +14,11 @@ using Serialization
 using Arrow
 using YAML
 
-PKGDIR = pkgdir(TargetedEstimation)
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
-CONFIGDIR = joinpath(PKGDIR, "test", "config")
+CONFIGDIR = joinpath(TESTDIR, "config")
 
-include(joinpath(PKGDIR, "test", "testutils.jl"))
+include(joinpath(TESTDIR, "testutils.jl"))
 
 sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt)
 sort_nt_by_key(x) = x
@@ -169,6 +169,7 @@ end
 
     # Clean
     rm("data.csv")
+    rm(outputs.jls.filename)
     rm(output_txt)
     rm(outputs.json.filename)
     rm(outputs.hdf5.filename)
@@ -305,7 +306,28 @@ end
     datafile = "data.csv"
     tmle(datafile, estimandsfile, estimatorfile; outputs=outputs)
     
+    results = []
+    open(outputs.jls.filename) do io
+        while !eof(io)
+            push!(results, deserialize(io))
+        end
+    end
+
+    for (index, Ψ) ∈ enumerate(configuration.estimands)
+        @test results[index].OSE.estimand == identify(Ψ, configuration.scm)
+    end
+    # The components of the diff should match the estimands 1 and 2
+    for index in 1:2
+        ATE_from_diff = results[3].OSE.estimates[index] 
+        ATE_standalone = results[index].OSE
+        @test ATE_from_diff.estimand == ATE_standalone.estimand
+        @test ATE_from_diff.estimate == ATE_standalone.estimate
+        @test ATE_from_diff.std == ATE_standalone.std
+    end
+    @test results[3].OSE isa TMLE.ComposedEstimate
+    
     rm(datafile)
+    rm(outputs.jls.filename)
 end
 
 
diff --git a/test/utils.jl b/test/utils.jl
index 2a3b5c0..569171f 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -53,7 +53,7 @@ end
 @testset "Test proofread_estimands" for extension in ("yaml", "json")
     # Write estimands file
     filename = "statistical_estimands.$extension"
-    eval(Meta.parse("write_$extension"))(filename, statistical_estimands_only_config())
+    eval(Meta.parse("TMLE.write_$extension"))(filename, statistical_estimands_only_config())
 
     dataset = DataFrame(T1 = [1., 0.], T2=[true, false])
     estimands = TargetedEstimation.proofread_estimands(filename, dataset)

From 23484dd94968fed90112fe77cf2c03d6c944ec35 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 28 Nov 2023 16:46:47 +0000
Subject: [PATCH 10/71] add new fields to output managers

---
 src/runner.jl |  7 +++++--
 src/utils.jl  | 55 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/runner.jl b/src/runner.jl
index 0523489..3b25053 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -19,11 +19,14 @@ initialize(output::JSONOutput) = initialize_json(output.filename)
 @option struct HDF5Output
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
+    sample_ids::Bool = false
+    compress::Bool = false
 end
 
 @option struct JLSOutput
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
+    sample_ids::Bool = false
 end
 
 @option struct Outputs
@@ -82,9 +85,9 @@ function save(runner::Runner, results, partition, finalize)
     # Append JSON Output
     update_file(runner.outputs.json, results; finalize=finalize)
     # Append JLS Output
-    update_file(runner.outputs.jls, results)
+    update_file(runner.outputs.jls, results, runner.dataset)
     # Append HDF5 Output
-    update_file(runner.outputs.hdf5, partition, results, runner.dataset)
+    update_file(runner.outputs.hdf5, results, runner.dataset)
 end
 
 function try_estimation(runner, Ψ, estimator)
diff --git a/src/utils.jl b/src/utils.jl
index 2e188c8..c15724e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -49,26 +49,12 @@ end
 #####                       HDF5 OUTPUT                          ####
 #####################################################################
 
-
-function update_file(output::HDF5Output, partition, results, dataset)
+function update_file(output::HDF5Output, results, dataset)
     output.filename === nothing && return
-
-    jldopen(output.filename, "a+", compress=true) do io
-        previous_variables = nothing
-        sample_ids_idx = nothing
-        for (partition_index, param_index) in enumerate(partition)
-            estimator_results = TMLE.emptyIC(results[partition_index], output.pval_threshold)
-            current_variables = variables(first(estimator_results).estimand)
-            if previous_variables != current_variables
-                sample_ids = TargetedEstimation.get_sample_ids(dataset, current_variables)
-                io["$param_index/sample_ids"] = sample_ids
-                sample_ids_idx = param_index
-            end
-            io["$param_index/result"] = estimator_results
-            io["$param_index/sample_ids_idx"] = sample_ids_idx
-
-            previous_variables = current_variables
-        end
+    results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids)
+    jldopen(output.filename, "a+", compress=output.compress) do io
+        latest_index = maximum(parse(Int, split(key, "_")[2]) for key in keys(io))
+        io[string("Batch_", latest_index + 1)] = results
     end
 end
 
@@ -76,12 +62,12 @@ end
 #####                        JLS OUTPUT                          ####
 #####################################################################
 
-function update_file(output::JLSOutput, results)
+function update_file(output::JLSOutput, results, dataset)
     output.filename === nothing && return
+    results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids)
 
     open(output.filename, "a") do io
         for result in results
-            result = TMLE.emptyIC(result, output.pval_threshold)
             serialize(io, result)
         end
     end
@@ -183,9 +169,32 @@ TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result
 TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names =
     NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt])
 
+function post_process(results, dataset, pval_threshold, save_sample_ids)
+    results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results]
+    if save_sample_ids
+        sample_ids = get_sample_ids(dataset, results)
+        results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)]
+    end
+    return results
+end
 
-get_sample_ids(data, variables) = dropmissing(data[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
-
+sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
+
+function get_sample_ids(dataset, results)
+    previous_variables = nothing
+    sample_ids = []
+    current_ref_id = 0
+    for (index, result) in enumerate(results)
+        current_variables = variables(first(result).estimand)
+        if previous_variables != current_variables
+            push!(sample_ids, sample_ids_from_variables(dataset, current_variables))
+            current_ref_id = index
+        else
+            push!(sample_ids, current_ref_id)
+        end
+    end
+    return sample_ids
+end
 
 """
     instantiate_dataset(path::String)

From 7da3375a3fc383c4276257cd99dc60ba8a09a892 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 15:47:24 +0000
Subject: [PATCH 11/71] up tests

---
 Project.toml   |   2 +-
 src/runner.jl  |   1 -
 src/utils.jl   |  15 ++++---
 test/runner.jl | 103 +++++++++++++++++++++++++++----------------------
 4 files changed, 64 insertions(+), 57 deletions(-)

diff --git a/Project.toml b/Project.toml
index 4d74093..a4f5a0d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -40,12 +40,12 @@ CategoricalArrays = "0.10"
 Combinatorics = "1.0.2"
 Comonicon = "1.0.6"
 Configurations = "0.17.6"
-JSON = "0.21.4"
 DataFrames = "1.3.4"
 EvoTrees = "0.14.6"
 GLMNet = "0.7"
 HighlyAdaptiveLasso = "0.2.0"
 JLD2 = "0.4.22"
+JSON = "0.21.4"
 MKL = "0.6"
 MLJ = "0.20.0"
 MLJBase = "1.0.1"
diff --git a/src/runner.jl b/src/runner.jl
index 3b25053..eb991e9 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -8,7 +8,6 @@ TMLE.to_dict(x::FailedEstimation) = Dict(
         :error => x.msg
     )
 
-
 @option struct JSONOutput
     filename::Union{Nothing, String} = nothing
     pval_threshold::Union{Nothing, Float64} = nothing
diff --git a/src/utils.jl b/src/utils.jl
index c15724e..e2aeb91 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -51,9 +51,10 @@ end
 
 function update_file(output::HDF5Output, results, dataset)
     output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids)
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
     jldopen(output.filename, "a+", compress=output.compress) do io
-        latest_index = maximum(parse(Int, split(key, "_")[2]) for key in keys(io))
+        batches_keys = keys(io)
+        latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys)
         io[string("Batch_", latest_index + 1)] = results
     end
 end
@@ -64,7 +65,7 @@ end
 
 function update_file(output::JLSOutput, results, dataset)
     output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.save_sample_ids)
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
 
     open(output.filename, "a") do io
         for result in results
@@ -74,7 +75,7 @@ function update_file(output::JLSOutput, results, dataset)
 end
 
 #####################################################################
-#####                    Read TMLE Estimands Configuration                         ####
+#####           Read TMLE Estimands Configuration                ####
 #####################################################################
 
 function convert_treatment_values(treatment_levels::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}, treatment_types) where names
@@ -189,6 +190,7 @@ function get_sample_ids(dataset, results)
         if previous_variables != current_variables
             push!(sample_ids, sample_ids_from_variables(dataset, current_variables))
             current_ref_id = index
+            previous_variables = current_variables
         else
             push!(sample_ids, current_ref_id)
         end
@@ -274,8 +276,5 @@ function load_tmle_spec(file)
     return ESTIMATORS
 end
 
-TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{TMLE.EICEstimate}}}) where names = 
-    Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
-
-TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{FailedEstimation}}}) where names = 
+TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimation, TMLE.ComposedEstimate}}}}) where names = 
     Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
\ No newline at end of file
diff --git a/test/runner.jl b/test/runner.jl
index 013cd41..bb415bf 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -13,6 +13,7 @@ using CSV
 using Serialization
 using Arrow
 using YAML
+using JSON
 
 TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
@@ -20,20 +21,6 @@ CONFIGDIR = joinpath(TESTDIR, "config")
 
 include(joinpath(TESTDIR, "testutils.jl"))
 
-sort_nt_by_key(nt::NamedTuple{names}) where names = NamedTuple{sort(names)}(nt)
-sort_nt_by_key(x) = x
-
-function test_estimands_match(Ψ₁::T1, Ψ₂::T2) where {T1, T2}
-    @test T1 == T2
-    @test Ψ₁.outcome == Ψ₂.outcome
-    @test Ψ₁.outcome_extra_covariates == Ψ₂.outcome_extra_covariates
-    @test sort_nt_by_key(Ψ₁.treatment_confounders) == sort_nt_by_key(Ψ₂.treatment_confounders)
-    @test sort(keys(Ψ₁.treatment_values)) == sort(keys(Ψ₂.treatment_values))
-    for key in keys(Ψ₁.treatment_values)
-        @test sort_nt_by_key(Ψ₁.treatment_values[key]) == sort_nt_by_key(Ψ₂.treatment_values[key])
-    end
-end
-
 """
 CONTINUOUS_OUTCOME: 
 - IATE(0->1, 0->1) = E[W₂] = 0.5
@@ -85,8 +72,8 @@ end
     TMLE.write_json(estimands_filename, statistical_estimands_only_config())
     outputs = TargetedEstimation.Outputs(
         json=TargetedEstimation.JSONOutput(filename="output.json"),
-        hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1.),
-        jls=TargetedEstimation.JLSOutput(filename="output.jls"),
+        hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1., sample_ids=true),
+        jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5),
         std=true,
     )
     runner = Runner(
@@ -133,37 +120,40 @@ end
             push!(loaded_results, deserialize(io))
         end
     end
-    for (result, loaded_result) in zip(results, loaded_results)
+    for (index, (result, loaded_result)) in enumerate(zip(results, loaded_results))
         @test loaded_result[:TMLE] isa TMLE.TMLEstimate
         @test result.TMLE.estimate == loaded_result[:TMLE].estimate
-        @test loaded_result[:TMLE].IC == []
-
         @test loaded_result[:OSE] isa TMLE.OSEstimate
         @test result.OSE.estimate == loaded_result[:OSE].estimate
-        @test loaded_result[:OSE].IC == []
+        @test !haskey(loaded_result, :SAMPLE_IDS)
+        if index ∈ (1, 2)
+            @test loaded_result[:TMLE].IC == []
+            @test loaded_result[:OSE].IC == []
+        else
+            @test length(loaded_result[:TMLE].IC) > 0
+            @test length(loaded_result[:OSE].IC) > 0
+        end
     end
 
     # Test Save to HDF5
     hdf5file = jldopen(outputs.hdf5.filename, "r")
-    for (result_index, param_index) in enumerate(4:6)
-        result = hdf5file[string(param_index, "/result")]
+    loaded_results = hdf5file[string("Batch_1")]
+    for (param_index, result) in enumerate(loaded_results)
         @test result.TMLE isa TMLE.TMLEstimate
-        @test results[result_index].TMLE.estimate == result.TMLE.estimate
+        @test results[param_index].TMLE.estimate == result.TMLE.estimate
 
         @test result.OSE isa TMLE.OSEstimate
-        @test results[result_index].OSE.estimate == result.OSE.estimate
+        @test results[param_index].OSE.estimate == result.OSE.estimate
     end
-    @test hdf5file["4/sample_ids"] == collect(2:1000)
-    @test hdf5file["4/sample_ids_idx"] == 4
-    @test size(hdf5file["4/result"].TMLE.IC, 1) == 999
 
-    @test !haskey(hdf5file, "5/sample_ids")
-    @test hdf5file["5/sample_ids_idx"] == 4
-    @test size(hdf5file["5/result"].TMLE.IC, 1) == 999
+    @test loaded_results[1].SAMPLE_IDS == collect(2:1000)
+    @test size(loaded_results[1].TMLE.IC, 1) == 999
+
+    @test loaded_results[2].SAMPLE_IDS == 1
+    @test size(loaded_results[2].TMLE.IC, 1) == 999
 
-    @test hdf5file["6/sample_ids"] == collect(1:1000)
-    @test hdf5file["6/sample_ids_idx"] == 6
-    @test size(hdf5file["6/result"].TMLE.IC, 1) == 1000
+    @test loaded_results[3].SAMPLE_IDS == collect(1:1000)
+    @test size(loaded_results[3].TMLE.IC, 1) == 1000
 
     close(hdf5file)
 
@@ -195,15 +185,20 @@ end
                 chunksize=chunksize,
             )
 
-            hdf5file = jldopen(outputs.hdf5.filename)
+            results_from_hdf5 = jldopen(outputs.hdf5.filename) do io
+                map(keys(io)) do key
+                    io[key]
+                end
+            end
+            results_from_hdf5 = vcat(results_from_hdf5...)
             results_from_json = TMLE.read_json(outputs.json.filename)
 
             for i in 1:6
                 Ψ = configuration.estimands[i]
-                test_estimands_match(Ψ, results_from_json[i][:TMLE].estimand)
-                hdf5result = hdf5file[string(i, "/result")]
-                @test results_from_json[i][:TMLE].estimate == hdf5result.TMLE.estimate
-                @test results_from_json[i][:OSE].estimate == hdf5result.OSE.estimate
+                for estimator_name in (:OSE, :TMLE)
+                    @test Ψ == results_from_hdf5[i][estimator_name].estimand == results_from_json[i][estimator_name].estimand
+                    @test results_from_hdf5[i][estimator_name].estimate == results_from_json[i][estimator_name].estimate
+                end
             end
 
             # Clean
@@ -275,16 +270,15 @@ end
     end
 
     # Check results from HDF5
-    hdf5file = jldopen(outputs.hdf5.filename)
+    results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"]
     for estimator in (:OSE, :TMLE)
-        @test hdf5file["1/result"][estimator] isa TargetedEstimation.FailedEstimation
-        @test hdf5file["2/result"][estimator] isa TMLE.EICEstimate
+        @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimation
+        @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate
         for i in 3:6
-            @test hdf5file[string(i, "/result")][estimator] isa TargetedEstimation.FailedEstimation
-            @test hdf5file[string(i, "/result")][estimator].estimand isa TMLE.Estimand
+            @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimation
+            @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand
         end
     end
-    close(hdf5file)
 
     # Clean
     rm(outputs.json.filename)
@@ -295,7 +289,9 @@ end
 @testset "Test tmle: Causal and Composed Estimands" begin
     build_dataset(;n=1000, format="csv")
     outputs = TargetedEstimation.Outputs(
-        jls=TargetedEstimation.JLSOutput(filename="output.jls")
+        json = TargetedEstimation.JSONOutput(filename="output.json"),
+        jls = TargetedEstimation.JLSOutput(filename="output.jls"),
+        hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5")
     )
     tmpdir = mktempdir(cleanup=true)
     estimandsfile = joinpath(tmpdir, "configuration.jls")
@@ -304,15 +300,16 @@ end
     serialize(estimandsfile, configuration)
     estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
     datafile = "data.csv"
-    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs)
+
+    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs, chunksize=2)
     
+    # JLS Output
     results = []
     open(outputs.jls.filename) do io
         while !eof(io)
             push!(results, deserialize(io))
         end
     end
-
     for (index, Ψ) ∈ enumerate(configuration.estimands)
         @test results[index].OSE.estimand == identify(Ψ, configuration.scm)
     end
@@ -326,8 +323,20 @@ end
     end
     @test results[3].OSE isa TMLE.ComposedEstimate
     
+    # JSON Output
+    results_from_json = TMLE.read_json(outputs.json.filename)
+    @test length(results_from_json) == 3
+
+    # HDF5
+    results_from_json = jldopen(outputs.hdf5.filename)
+    @test length(results_from_json["Batch_1"]) == 2
+    composed_result = only(results_from_json["Batch_2"])
+    @test composed_result.OSE.cov == results[3].OSE.cov
+    
     rm(datafile)
     rm(outputs.jls.filename)
+    rm(outputs.json.filename)
+    rm(outputs.hdf5.filename)
 end
 
 
From a3469cbd9bf36e7e78f4536e688c67e9a8463775 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 15:57:15 +0000
Subject: [PATCH 12/71] reorg outputs code

---
 src/TargetedEstimation.jl |   1 +
 src/outputs.jl            | 138 ++++++++++++++++++++++++++++++++++++++
 src/runner.jl             |  31 ---------
 src/utils.jl              | 100 ---------------------------
 4 files changed, 139 insertions(+), 131 deletions(-)
 create mode 100644 src/outputs.jl

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 5dd5011..6e11e08 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -32,6 +32,7 @@ using Configurations
 import MLJModelInterface
 
 include("cache_managers.jl")
+include("outputs.jl")
 include("runner.jl")
 include("utils.jl")
 include("sieve_variance.jl")
diff --git a/src/outputs.jl b/src/outputs.jl
new file mode 100644
index 0000000..9563d12
--- /dev/null
+++ b/src/outputs.jl
@@ -0,0 +1,138 @@
+#####################################################################
+#####                       JSON OUTPUT                          ####
+#####################################################################
+
+@option struct JSONOutput
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+end
+
+initialize(output::JSONOutput) = initialize_json(output.filename)
+
+initialize_json(filename::Nothing) = nothing
+
+initialize_json(filename::String) = open(filename, "w") do io
+    print(io, '[')
+end
+
+function update_file(output::JSONOutput, results; finalize=false)
+    output.filename === nothing && return
+    open(output.filename, "a") do io
+        for result in results
+            result = TMLE.emptyIC(result, output.pval_threshold)
+            JSON.print(io, TMLE.to_dict(result))
+            print(io, ',')
+        end
+        if finalize
+            skip(io, -1) # get rid of the last comma which JSON doesn't allow
+            print(io, ']')
+        end
+    end
+end
+
+#####################################################################
+#####                       HDF5 OUTPUT                          ####
+#####################################################################
+
+@option struct HDF5Output
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+    sample_ids::Bool = false
+    compress::Bool = false
+end
+
+function update_file(output::HDF5Output, results, dataset)
+    output.filename === nothing && return
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
+    jldopen(output.filename, "a+", compress=output.compress) do io
+        batches_keys = keys(io)
+        latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys)
+        io[string("Batch_", latest_index + 1)] = results
+    end
+end
+
+#####################################################################
+#####                        JLS OUTPUT                          ####
+#####################################################################
+
+@option struct JLSOutput
+    filename::Union{Nothing, String} = nothing
+    pval_threshold::Union{Nothing, Float64} = nothing
+    sample_ids::Bool = false
+end
+
+function update_file(output::JLSOutput, results, dataset)
+    output.filename === nothing && return
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
+
+    open(output.filename, "a") do io
+        for result in results
+            serialize(io, result)
+        end
+    end
+end
+
+#####################################################################
+#####                       STD OUTPUT                          ####
+#####################################################################
+
+function update_file(doprint, results, partition)
+    if doprint
+        mimetext = MIME"text/plain"()
+        index = 1
+        for (result, estimand_index) in zip(results, partition)
+            show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆"))
+            println(stdout)
+            show(stdout, mimetext, first(result).estimand)
+            for (key, val) ∈ zip(keys(result), result)
+                show(stdout, mimetext, string("→ Estimation Result From: ", key, ))
+                println(stdout)
+                show(stdout, mimetext, val)
+                index += 1
+            end
+        end
+    end
+end
+
+#####################################################################
+#####                         OUTPUTS                            ####
+#####################################################################
+
+@option struct Outputs
+    json::JSONOutput = JSONOutput()
+    hdf5::HDF5Output = HDF5Output()
+    jls::JLSOutput   = JLSOutput()
+    std::Bool        = false
+end
+
+function initialize(outputs::Outputs)
+    initialize(outputs.json)
+end
+
+function post_process(results, dataset, pval_threshold, save_sample_ids)
+    results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results]
+    if save_sample_ids
+        sample_ids = get_sample_ids(dataset, results)
+        results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)]
+    end
+    return results
+end
+
+sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
+
+function get_sample_ids(dataset, results)
+    previous_variables = nothing
+    sample_ids = []
+    current_ref_id = 0
+    for (index, result) in enumerate(results)
+        current_variables = variables(first(result).estimand)
+        if previous_variables != current_variables
+            push!(sample_ids, sample_ids_from_variables(dataset, current_variables))
+            current_ref_id = index
+            previous_variables = current_variables
+        else
+            push!(sample_ids, current_ref_id)
+        end
+    end
+    return sample_ids
+end
\ No newline at end of file
diff --git a/src/runner.jl b/src/runner.jl
index eb991e9..873cf82 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -8,37 +8,6 @@ TMLE.to_dict(x::FailedEstimation) = Dict(
         :error => x.msg
     )
 
-@option struct JSONOutput
-    filename::Union{Nothing, String} = nothing
-    pval_threshold::Union{Nothing, Float64} = nothing
-end
-
-initialize(output::JSONOutput) = initialize_json(output.filename)
-
-@option struct HDF5Output
-    filename::Union{Nothing, String} = nothing
-    pval_threshold::Union{Nothing, Float64} = nothing
-    sample_ids::Bool = false
-    compress::Bool = false
-end
-
-@option struct JLSOutput
-    filename::Union{Nothing, String} = nothing
-    pval_threshold::Union{Nothing, Float64} = nothing
-    sample_ids::Bool = false
-end
-
-@option struct Outputs
-    json::JSONOutput = JSONOutput()
-    hdf5::HDF5Output = HDF5Output()
-    jls::JLSOutput   = JLSOutput()
-    std::Bool        = false
-end
-
-function initialize(outputs::Outputs)
-    initialize(outputs.json)
-end
-
 mutable struct Runner
     estimators::NamedTuple
     estimands::Vector{TMLE.Estimand}
diff --git a/src/utils.jl b/src/utils.jl
index e2aeb91..0fc4f9e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,78 +1,5 @@
-#####################################################################
-#####                       JSON OUTPUT                          ####
-#####################################################################
-
-initialize_json(filename::Nothing) = nothing
-
-initialize_json(filename::String) = open(filename, "w") do io
-    print(io, '[')
-end
-
-function update_file(output::JSONOutput, results; finalize=false)
-    output.filename === nothing && return
-    open(output.filename, "a") do io
-        for result in results
-            result = TMLE.emptyIC(result, output.pval_threshold)
-            JSON.print(io, TMLE.to_dict(result))
-            print(io, ',')
-        end
-        if finalize
-            skip(io, -1) # get rid of the last comma which JSON doesn't allow
-            print(io, ']')
-        end
-    end
-end
-
-#####################################################################
-#####                       STD OUTPUT                          ####
-#####################################################################
-
-function update_file(doprint, results, partition)
-    if doprint
-        mimetext = MIME"text/plain"()
-        index = 1
-        for (result, estimand_index) in zip(results, partition)
-            show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆"))
-            println(stdout)
-            show(stdout, mimetext, first(result).estimand)
-            for (key, val) ∈ zip(keys(result), result)
-                show(stdout, mimetext, string("→ Estimation Result From: ", key, ))
-                println(stdout)
-                show(stdout, mimetext, val)
-                index += 1
-            end
-        end
-    end
-end
-
-#####################################################################
-#####                       HDF5 OUTPUT                          ####
-#####################################################################
-
-function update_file(output::HDF5Output, results, dataset)
-    output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
-    jldopen(output.filename, "a+", compress=output.compress) do io
-        batches_keys = keys(io)
-        latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys)
-        io[string("Batch_", latest_index + 1)] = results
-    end
-end
 
-#####################################################################
-#####                        JLS OUTPUT                          ####
-#####################################################################
 
-function update_file(output::JLSOutput, results, dataset)
-    output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
-
-    open(output.filename, "a") do io
-        for result in results
-            serialize(io, result)
-        end
-    end
-end
 
 #####################################################################
 #####           Read TMLE Estimands Configuration                ####
@@ -170,33 +97,6 @@ TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result
 TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names =
     NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt])
 
-function post_process(results, dataset, pval_threshold, save_sample_ids)
-    results = [TMLE.emptyIC(result, pval_threshold) for result ∈ results]
-    if save_sample_ids
-        sample_ids = get_sample_ids(dataset, results)
-        results = [(result..., SAMPLE_IDS=s_ids) for (result, s_ids) in zip(results, sample_ids)]
-    end
-    return results
-end
-
-sample_ids_from_variables(dataset, variables) = dropmissing(dataset[!, [:SAMPLE_ID, variables...]]).SAMPLE_ID
-
-function get_sample_ids(dataset, results)
-    previous_variables = nothing
-    sample_ids = []
-    current_ref_id = 0
-    for (index, result) in enumerate(results)
-        current_variables = variables(first(result).estimand)
-        if previous_variables != current_variables
-            push!(sample_ids, sample_ids_from_variables(dataset, current_variables))
-            current_ref_id = index
-            previous_variables = current_variables
-        else
-            push!(sample_ids, current_ref_id)
-        end
-    end
-    return sample_ids
-end
 
 """
     instantiate_dataset(path::String)

From 3f3751443a01633d27223c4bc539d778fdb62d1b Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 16:00:32 +0000
Subject: [PATCH 13/71] add failed estimate file

---
 src/TargetedEstimation.jl |  2 ++
 src/failed_estimate.jl    | 11 +++++++++++
 src/runner.jl             | 14 ++------------
 src/utils.jl              |  4 +---
 test/runner.jl            |  4 ++--
 5 files changed, 18 insertions(+), 17 deletions(-)
 create mode 100644 src/failed_estimate.jl

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 6e11e08..7d94897 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -31,6 +31,7 @@ using Configurations
 
 import MLJModelInterface
 
+include("failed_estimate.jl")
 include("cache_managers.jl")
 include("outputs.jl")
 include("runner.jl")
@@ -42,6 +43,7 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
+
 """TL CLI."""
 @main
 
diff --git a/src/failed_estimate.jl b/src/failed_estimate.jl
new file mode 100644
index 0000000..b512e93
--- /dev/null
+++ b/src/failed_estimate.jl
@@ -0,0 +1,11 @@
+struct FailedEstimate
+    estimand::TMLE.Estimand
+    msg::String
+end
+
+TMLE.to_dict(x::FailedEstimate) = Dict(
+    :estimand => TMLE.to_dict(x.estimand),
+    :error => x.msg
+)
+
+TMLE.emptyIC(result::FailedEstimate, pval_threshold) = result
diff --git a/src/runner.jl b/src/runner.jl
index 873cf82..0817526 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -1,13 +1,3 @@
-struct FailedEstimation
-    estimand::TMLE.Estimand
-    msg::String
-end
-
-TMLE.to_dict(x::FailedEstimation) = Dict(
-        :estimand => TMLE.to_dict(x.estimand),
-        :error => x.msg
-    )
-
 mutable struct Runner
     estimators::NamedTuple
     estimands::Vector{TMLE.Estimand}
@@ -70,7 +60,7 @@ function try_estimation(runner, Ψ, estimator)
         # This also allows to skip fast the next estimands requiring the same nuisance functions.
         if e isa TMLE.FitFailedError
             push!(runner.failed_nuisance, e.estimand)
-            return FailedEstimation(Ψ, e.msg)
+            return FailedEstimate(Ψ, e.msg)
         # On other errors, rethrow
         else 
             rethrow(e) 
@@ -89,7 +79,7 @@ function (runner::Runner)(partition)
     for (partition_index, param_index) in enumerate(partition)
         Ψ = runner.estimands[param_index]
         if skip_fast(runner, Ψ)
-            results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimation(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)])
+            results[partition_index] = NamedTuple{keys(runner.estimators)}([FailedEstimate(Ψ, "Skipped due to shared failed nuisance fit.") for _ in 1:length(runner.estimators)])
             continue
         end
         # Make sure data types are appropriate for the estimand
diff --git a/src/utils.jl b/src/utils.jl
index 0fc4f9e..4c534f4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -92,8 +92,6 @@ end
 #####                 ADDITIONAL METHODS                         ####
 #####################################################################
 
-TMLE.emptyIC(result::FailedEstimation, pval_threshold) = result
-
 TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names =
     NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt])
 
@@ -176,5 +174,5 @@ function load_tmle_spec(file)
     return ESTIMATORS
 end
 
-TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimation, TMLE.ComposedEstimate}}}}) where names = 
+TMLE.to_dict(nt::NamedTuple{names, <:Tuple{Vararg{Union{TMLE.EICEstimate, FailedEstimate, TMLE.ComposedEstimate}}}}) where names = 
     Dict(key => TMLE.to_dict(val) for (key, val) ∈ zip(keys(nt), nt))
\ No newline at end of file
diff --git a/test/runner.jl b/test/runner.jl
index bb415bf..fd52bc3 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -272,10 +272,10 @@ end
     # Check results from HDF5
     results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"]
     for estimator in (:OSE, :TMLE)
-        @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimation
+        @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate
         @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate
         for i in 3:6
-            @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimation
+            @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate
             @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand
         end
     end

From 77f93b6e00c59e8014f2e55a0961c7ab3ae3401d Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 16:12:14 +0000
Subject: [PATCH 14/71] fremove some deprecate methods and simplify ose config

---
 src/utils.jl              | 18 ++----------------
 test/config/ose_config.jl | 34 +++-------------------------------
 test/runtests.jl          |  1 +
 test/utils.jl             | 34 ++--------------------------------
 4 files changed, 8 insertions(+), 79 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index 4c534f4..a045168 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,6 +1,3 @@
-
-
-
 #####################################################################
 #####           Read TMLE Estimands Configuration                ####
 #####################################################################
@@ -21,18 +18,6 @@ MissingSCMError() = ArgumentError(string("A Structural Causal Model should be pr
 get_identification_method(method::Nothing) = BackdoorAdjustment()
 get_identification_method(method) = method
 
-maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::SCM, method) = 
-    identify(get_identification_method(method), Ψ, scm)
-
-maybe_identify(Ψ::TMLE.CausalCMCompositeEstimands, scm::Nothing, method) = throw(MissingSCMError())
-
-function maybe_identify(Ψ::TMLE.ComposedEstimand, scm, method)
-    method = get_identification_method(method)
-    return TMLE.ComposedEstimand(Ψ.f, Tuple(maybe_identify(arg, scm, method) for arg ∈ Ψ.args))
-end
-
-maybe_identify(Ψ, scm, method) = Ψ
-
 function read_method(extension)
     method = if extension == ".json"
         TMLE.read_json
@@ -79,10 +64,11 @@ respects the treatment types in the dataset.
 function proofread_estimands(filename, dataset)
     extension = filename[findlast(isequal('.'), filename):end]
     config = read_method(extension)(filename)
+    adjustment_method = get_identification_method(config.adjustment)
     estimands = Vector{TMLE.Estimand}(undef, length(config.estimands))
     treatment_types = Dict()
     for (index, Ψ) in enumerate(config.estimands)
-        statisticalΨ = TargetedEstimation.maybe_identify(Ψ, config.scm, config.adjustment)
+        statisticalΨ = identify(Ψ, config.scm, method=adjustment_method)
         estimands[index] = fix_treatment_values!(treatment_types, statisticalΨ, dataset)
     end
     return estimands
diff --git a/test/config/ose_config.jl b/test/config/ose_config.jl
index 5462955..28f2367 100644
--- a/test/config/ose_config.jl
+++ b/test/config/ose_config.jl
@@ -1,39 +1,11 @@
 
-evotree = EvoTreeClassifier(nrounds=10)
-
 default_models = TMLE.default_models(
   # For the estimation of E[Y|W, T]: continuous target
-  Q_continuous = Stack(
-    metalearner        = LinearRegressor(fit_intercept=false),
-    cache              = true,
-    resampling         = AdaptiveCV(),
-    interaction_glmnet = Pipeline(
-      interaction_transformer = InteractionTransformer(order=3),
-      glmnet                  = GLMNetRegressor(),
-      cache                   = true
-    ),
-    evo_1              = EvoTreeRegressor(nrounds=10, lambda=0., gamma=0.3),
-    evo_2              = EvoTreeRegressor(nrounds=10, lambda=1., gamma=0.3),
-    evo_3              = EvoTreeRegressor(nrounds=20, lambda=0., gamma=0.3),
-    evo_4              = EvoTreeRegressor(nrounds=20, lambda=1., gamma=0.3),
-    constant           = ConstantRegressor(),
-    hal                = HALRegressor(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false)
-    ),
+  Q_continuous = LinearRegressor(),
   # For the estimation of E[Y|W, T]: binary target
-  Q_binary = Pipeline(
-    interaction_transformer = InteractionTransformer(order=2),
-    glmnet                  = GLMNetClassifier(),
-    cache                   = false
-  ),
+  Q_binary = LogisticClassifier(),
   # For the estimation of p(T| W)
-  G = TunedModel(
-    model = evotree,
-    resampling = CV(),
-    tuning = Grid(goal=5),
-    range = [range(evotree, :max_depth, lower=3, upper=5), range(evotree, :lambda, lower=1e-5, upper=10, scale=:log)],
-    measure = log_loss,
-    cache=true
-    )
+  G = LogisticClassifier()
 )
 
 ESTIMATORS = (
diff --git a/test/runtests.jl b/test/runtests.jl
index 13cf36f..e8f5280 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,5 @@
 using TargetedEstimation
+using Test
 
 TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
diff --git a/test/utils.jl b/test/utils.jl
index 569171f..1355b29 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -138,10 +138,10 @@ end
         T₁         = [1, 2, 3, 4, 5],
         T₂         = [1, 2, 3, 4, missing],
     )
-    sample_ids = TargetedEstimation.get_sample_ids(data, variables)
+    sample_ids = TargetedEstimation.sample_ids_from_variables(data, variables)
     @test sample_ids == [2, 3]
     data.W₁ = [1, 2, missing, 4, 5]
-    sample_ids = TargetedEstimation.get_sample_ids(data, variables)
+    sample_ids = TargetedEstimation.sample_ids_from_variables(data, variables)
     @test sample_ids == [2]
 end
 
@@ -198,36 +198,6 @@ end
     end
 end
 
-@testset "Test maybe_identify" begin
-    scm = StaticSCM(
-        outcomes = [:Y],
-        treatments = [:T₁, :T₂],
-        confounders = [:W]
-    )
-    adjustment = BackdoorAdjustment()
-    causalATE = ATE(
-        outcome = :Y, 
-        treatment_values = (T₁ =(case=1, control=0),)
-    )
-    statisticalATE = ATE(
-        outcome = :Y, 
-        treatment_values = (T₁ =(case=1, control=0),),
-        treatment_confounders = (T₁=[:W],)
-    )
-    # Correctly identifies the estimand
-    identifiedATE = TargetedEstimation.maybe_identify(causalATE, scm, nothing)
-    @test statisticalATE == identifiedATE
-    # Just returns the estimand
-    @test TargetedEstimation.maybe_identify(statisticalATE, scm, nothing) === statisticalATE
-    # Throws if can't identify
-    @test_throws TargetedEstimation.MissingSCMError() TargetedEstimation.maybe_identify(causalATE, nothing, nothing)
-    # Composed Estimand with a weird mixture of statistical/causal estimands
-    diff = ComposedEstimand(-, (causalATE, statisticalATE))
-    identified_diff = TargetedEstimation.maybe_identify(diff, scm, nothing)
-    statistical_diff = ComposedEstimand(-, (statisticalATE, statisticalATE))
-    @test identified_diff == statistical_diff
-end
-
 end;
 
 true
\ No newline at end of file

From 060b0901f514005e7eecb1a4db0ac831f02c29d1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 16:29:34 +0000
Subject: [PATCH 15/71] add initialize tests

---
 src/outputs.jl   | 29 ++++++++++++++++++++++-
 test/outputs.jl  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/runtests.jl |  1 +
 test/utils.jl    | 23 -------------------
 4 files changed, 89 insertions(+), 24 deletions(-)
 create mode 100644 test/outputs.jl

diff --git a/src/outputs.jl b/src/outputs.jl
index 9563d12..1044fe3 100644
--- a/src/outputs.jl
+++ b/src/outputs.jl
@@ -1,3 +1,15 @@
+FileExistsError(filename) = ArgumentError(string("File ", filename, " already exists."))
+
+check_file_exists(filename::Nothing) = nothing
+check_file_exists(filename) = !isfile(filename) || throw(FileExistsError(filename))
+
+"""
+    initialize(output)
+
+Default intialization procedure only checks that file does not exist.
+"""
+initialize(output) = check_file_exists(output.filename)
+
 #####################################################################
 #####                       JSON OUTPUT                          ####
 #####################################################################
@@ -7,7 +19,15 @@
     pval_threshold::Union{Nothing, Float64} = nothing
 end
 
-initialize(output::JSONOutput) = initialize_json(output.filename)
+"""
+    initialize(output::JSONOutput)
+
+Checks that file does not exist and inialize the json file
+"""
+function initialize(output::JSONOutput)
+    check_file_exists(output.filename)
+    initialize_json(output.filename)
+end
 
 initialize_json(filename::Nothing) = nothing
 
@@ -105,8 +125,15 @@ end
     std::Bool        = false
 end
 
+"""
+    initialize(output::Outputs)
+
+Initializes all outputs in output.
+"""
 function initialize(outputs::Outputs)
     initialize(outputs.json)
+    initialize(outputs.jls)
+    initialize(outputs.hdf5)
 end
 
 function post_process(results, dataset, pval_threshold, save_sample_ids)
diff --git a/test/outputs.jl b/test/outputs.jl
new file mode 100644
index 0000000..9b13bb0
--- /dev/null
+++ b/test/outputs.jl
@@ -0,0 +1,60 @@
+module TestOutputs
+
+using TargetedEstimation
+using Test
+using JSON
+
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
+
+include(joinpath(TESTDIR, "testutils.jl"))
+
+@testset "Test initialize" begin
+    outputs = TargetedEstimation.Outputs(
+        json = TargetedEstimation.JSONOutput(filename="output.json"),
+        jls = TargetedEstimation.JLSOutput(filename="output.jls"),
+        hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5"),
+    )
+
+    TargetedEstimation.initialize(outputs)
+
+    @test isfile(outputs.json.filename)
+    @test_throws TargetedEstimation.FileExistsError(outputs.json.filename) TargetedEstimation.initialize(outputs)
+    rm(outputs.json.filename)
+
+    touch(outputs.jls.filename)
+    @test_throws TargetedEstimation.FileExistsError(outputs.jls.filename) TargetedEstimation.initialize(outputs)
+    rm(outputs.jls.filename)
+    rm(outputs.json.filename)
+
+    touch(outputs.hdf5.filename)
+    @test_throws TargetedEstimation.FileExistsError(outputs.hdf5.filename) TargetedEstimation.initialize(outputs)
+    rm(outputs.hdf5.filename)
+    rm(outputs.json.filename)
+end
+
+@testset "Test JSON update_file" begin
+    results = []
+    for Ψ in statistical_estimands_only_config().estimands
+        push!(results, (
+            TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]),
+            OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[])
+            ))
+    end
+    tmpdir = mktempdir(cleanup=true)
+    jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json"))
+    TargetedEstimation.initialize_json(jsonoutput.filename)
+    TargetedEstimation.update_file(jsonoutput, results[1:3])
+    TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true)
+    loaded_results = TMLE.read_json(jsonoutput.filename)
+    @test size(loaded_results) == size(results)
+    for (result, loaded_result) in zip(results, loaded_results)
+        @test result.TMLE.estimate == loaded_result[:TMLE].estimate
+        @test result.TMLE.std == loaded_result[:TMLE].std
+        @test result.OSE.estimate == loaded_result[:OSE].estimate
+        @test result.OSE.std == loaded_result[:OSE].std
+    end
+end
+
+end
+
+true
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index e8f5280..e8b741a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,7 @@ using Test
 TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
 
 @time begin
+    @test include(joinpath(TESTDIR, "outputs.jl"))
     @test include(joinpath(TESTDIR, "cache_managers.jl"))
     @test include(joinpath(TESTDIR, "utils.jl"))
     @test include(joinpath(TESTDIR, "sieve_variance.jl"))
diff --git a/test/utils.jl b/test/utils.jl
index 1355b29..7525168 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -175,29 +175,6 @@ end
     TargetedEstimation.make_categorical(dataset.T₁, true) === dataset.T₁
 end
 
-@testset "Test JSON writing" begin
-    results = []
-    for Ψ in statistical_estimands_only_config().estimands
-        push!(results, (
-            TMLE=TMLE.TMLEstimate(Ψ, rand(), rand(), 10, Float64[]),
-            OSE=TMLE.OSEstimate(Ψ, rand(), rand(), 10, Float64[])
-            ))
-    end
-    tmpdir = mktempdir(cleanup=true)
-    jsonoutput = TargetedEstimation.JSONOutput(filename=joinpath(tmpdir, "output_test.json"))
-    TargetedEstimation.initialize_json(jsonoutput.filename)
-    TargetedEstimation.update_file(jsonoutput, results[1:3])
-    TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true)
-    loaded_results = TMLE.read_json(jsonoutput.filename)
-    @test size(loaded_results) == size(results)
-    for (result, loaded_result) in zip(results, loaded_results)
-        @test result.TMLE.estimate == loaded_result[:TMLE].estimate
-        @test result.TMLE.std == loaded_result[:TMLE].std
-        @test result.OSE.estimate == loaded_result[:OSE].estimate
-        @test result.OSE.std == loaded_result[:OSE].std
-    end
-end
-
 end;
 
 true
\ No newline at end of file

From c59ba1305990b6031cb5ae79a9e8d0ff67ce05a1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 17:52:18 +0000
Subject: [PATCH 16/71] update svp test to support composedestimate

---
 src/sieve_variance.jl  | 97 +++++++++++++++++++++++++----------------
 src/utils.jl           |  2 +
 test/sieve_variance.jl | 98 +++++++++++++++++++++++++++++-------------
 3 files changed, 130 insertions(+), 67 deletions(-)

diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index cc953e9..290e672 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -80,8 +80,26 @@ end
 
 default_τs(nτs;max_τ=2) = Float32[max_τ*(i-1)/(nτs-1) for i in 1:nτs]
 
+retrieve_sample_ids(sample_ids::AbstractVector, batch_results) = sample_ids
 
-function build_work_list(prefix, grm_ids)
+retrieve_sample_ids(index::Int, batch_results) = batch_results[index].SAMPLE_IDS
+
+function update_work_lists_with!(result::TMLE.ComposedEstimate, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs)
+    for estimate in result.estimates
+        update_work_lists_with!(estimate, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs)
+    end
+end
+
+function update_work_lists_with!(result, sample_ids, batch_results, grm_ids, results, influence_curves, n_obs)
+    if length(result.IC) > 0
+        sample_ids = string.(retrieve_sample_ids(sample_ids, batch_results))
+        push!(influence_curves, align_ic(result.IC, sample_ids, grm_ids))
+        push!(n_obs, size(sample_ids, 1))
+        push!(results, result)
+    end
+end
+
+function build_work_list(prefix, grm_ids; estimator_key=:TMLE)
     dirname_, prefix_ = splitdir(prefix)
     dirname__ = dirname_ == "" ? "." : dirname_
     hdf5files = filter(
@@ -92,28 +110,28 @@ function build_work_list(prefix, grm_ids)
 
     influence_curves = Vector{Float32}[]
     n_obs = Int[]
-    tmle_results = []
+    results = []
     for hdf5file in hdf5files
         jldopen(hdf5file) do io
-            # templateΨs = io["parameters"]
-            # results = io["results"]
             for key in keys(io)
-                result_group = io[key]
-                tmleresult = first(io[key]["result"])
-                if size(tmleresult.IC, 1) > 0
-                    sample_ids = haskey(result_group, "sample_ids") ? result_group["sample_ids"] :
-                        io[string(result_group["sample_ids_idx"])]["sample_ids"]
-                    sample_ids = string.(sample_ids)
-
-                    push!(influence_curves, align_ic(tmleresult.IC, sample_ids, grm_ids))
-                    push!(n_obs, size(sample_ids, 1))
-                    push!(tmle_results, tmleresult)
+                batch_results = io[key]
+                for nt_result in batch_results
+                    result = nt_result[estimator_key]
+                    sample_ids = nt_result.SAMPLE_IDS
+                    update_work_lists_with!(
+                        result,
+                        sample_ids,
+                        batch_results, 
+                        grm_ids, results, 
+                        influence_curves, 
+                        n_obs
+                    )
                 end
             end
         end
     end
     influence_curves = length(influence_curves) > 0 ? reduce(vcat, transpose(influence_curves)) : Matrix{Float32}(undef, 0, 0)
-    return tmle_results, influence_curves, n_obs
+    return results, influence_curves, n_obs
 end
 
 
@@ -197,37 +215,37 @@ function grm_rows_bounds(n_samples)
     return bounds
 end
 
-function save_results(outprefix, results, τs, variances)
-    TMLE.write_json(string(outprefix, ".json"), results)
-    jldopen(string(outprefix, ".hdf5"), "w") do io
+function save_results(filename, results, τs, variances)
+    jldopen(filename, "w") do io
         io["taus"] = τs
         io["variances"] = variances
+        io["results"] = results
     end
 end
 
 corrected_stderrors(variances) =
     sqrt.(view(maximum(variances, dims=1), 1, :))
 
-function update_with_sieve_estimate!(results, stds)
-    for index in eachindex(results)
-        old = results[index]
-        results[index] = typeof(old)(
-            old.estimand,
-            old.estimate,
-            convert(Float64, stds[index]),
-            old.n,
-            Float64[]
-        )
-    end
-end
+with_updated_std(estimate::T, std) where T = T(
+    estimate.estimand,
+    estimate.estimate,
+    convert(Float64, std),
+    estimate.n,
+    Float64[]
+)
+
+with_updated_std(results, stds, estimator_key) =
+    [NamedTuple{(estimator_key,)}([with_updated_std(result, std)]) for (result, std) in zip(results, stds)]
+
 
 """
     sieve_variance_plateau(input_prefix;
-        output_prefix="svp",
+        out="svp.hdf5",
         grm_prefix="GRM",
         verbosity=0, 
         n_estimators=10, 
-        max_tau=0.8
+        max_tau=0.8,
+        estimator_key="TMLE"
     )
 
 Sieve Variance Plateau CLI.
@@ -238,33 +256,36 @@ Sieve Variance Plateau CLI.
 
 # Options
 
-- `-o, --output-prefix`: Output prefix.
+- `-o, --out`: Output filename.
 - `-g, --grm-prefix`: Prefix to the aggregated GRM.
 - `-v, --verbosity`: Verbosity level.
 - `-n, --n_estimators`: Number of variance estimators to build for each estimate. 
 - `-m, --max_tau`: Maximum distance between any two individuals.
+- `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction.
 """
 @cast function sieve_variance_plateau(input_prefix;
-    output_prefix="svp",
+    out="svp.hdf5",
     grm_prefix="GRM",
     verbosity=0, 
     n_estimators=10, 
-    max_tau=0.8
+    max_tau=0.8,
+    estimator_key="TMLE"
     )
+    estimator_key = Symbol(estimator_key)
     τs = default_τs(n_estimators;max_τ=max_tau)
     grm, grm_ids = readGRM(grm_prefix)
     verbosity > 0 && @info "Preparing work list."
-    results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids)
+    results, influence_curves, n_obs = build_work_list(input_prefix, grm_ids, estimator_key=estimator_key)
 
     if length(influence_curves) > 0
         verbosity > 0 && @info "Computing variance estimates."
         variances = compute_variances(influence_curves, grm, τs, n_obs)
         std_errors = corrected_stderrors(variances)
-        update_with_sieve_estimate!(results, std_errors)
+        results = with_updated_std(results, std_errors, estimator_key)
     else
         variances = Float32[]
     end
-    save_results(output_prefix, results, τs, variances)
+    save_results(out, results, τs, variances)
 
     verbosity > 0 && @info "Done."
     return 0
diff --git a/src/utils.jl b/src/utils.jl
index a045168..a5c4c24 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -137,6 +137,8 @@ function coerce_types!(dataset, Ψ)
     make_float!(dataset, continuous_variables)
 end
 
+variables(Ψ::TMLE.ComposedEstimand) = union((variables(arg) for arg in Ψ.args)...)
+
 variables(Ψ::TMLE.Estimand) = Set([
     Ψ.outcome,
     keys(Ψ.treatment_values)...,
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 3359f2a..3ffaad7 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -48,12 +48,15 @@ function build_dataset(sample_ids)
     CSV.write("data.csv", dataset)
 end
 
-function build_tmle_output_file(sample_ids, estimandfile, outprefix, pval)
+function build_tmle_output_file(sample_ids, estimandfile, outprefix; 
+    pval=1., 
+    estimatorfile=joinpath(TESTDIR, "config", "tmle_ose_config.jl")
+    )
     build_dataset(sample_ids)
     outputs = TargetedEstimation.Outputs(
-        hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval),
+        hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval, sample_ids=true),
     )
-    tmle("data.csv", estimandfile, joinpath(TESTDIR, "config", "tmle_ose_config.jl"), outputs=outputs)
+    tmle("data.csv", estimandfile, estimatorfile, outputs=outputs)
 end
 
 function basic_variance_implementation(matrix_distance, influence_curve, n_obs)
@@ -110,16 +113,15 @@ end
 
     # CASE_1: pval = 1.
     # Simulate multiple runs that occured
-    pval = 1.
     config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3])
     estimandsfile_1 = joinpath(tmpdir, "configuration_1.json")
     TMLE.write_json(estimandsfile_1, config_1)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1")
 
     config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end])
     estimandsfile_2 = joinpath(tmpdir, "configuration_2.json")
     TMLE.write_json(estimandsfile_2, config_2)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2")
 
     results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids)
     # Check n_obs
@@ -140,7 +142,7 @@ end
     pval = 0.1
     estimandsfile = joinpath(tmpdir, "configuration.json")
     TMLE.write_json(estimandsfile, configuration)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile, "tmle_output"; pval=pval)
     results, influence_curves, n_obs = TargetedEstimation.build_work_list("tmle_output", grm_ids)
     # Check n_obs
     @test n_obs == [194, 193, 193, 194]
@@ -266,7 +268,7 @@ end
     close(io)
 end
 
-@testset "Test sieve_variance_plateau" begin
+@testset "Test SVP" begin
     # Generate data
     grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id"))
     tmpdir = mktempdir(cleanup=true)
@@ -275,49 +277,87 @@ end
     config_1 = TMLE.Configuration(estimands=configuration.estimands[1:3])
     estimandsfile_1 = joinpath(tmpdir, "configuration_1.json")
     TMLE.write_json(estimandsfile_1, config_1)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_1, "tmle_output_1"; pval=pval)
 
     config_2 = TMLE.Configuration(estimands=configuration.estimands[4:end])
     estimandsfile_2 = joinpath(tmpdir, "configuration_2.json")
     TMLE.write_json(estimandsfile_2, config_2)
-    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2", pval)
+    build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2"; pval=pval)
 
     sieve_variance_plateau("tmle_output";
         grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"),
         max_tau=0.75
     )
 
-    # Check HDF5 file
     io = jldopen("svp.hdf5")
+    # Check τs
     @test io["taus"] == TargetedEstimation.default_τs(10; max_τ=0.75)
+    # Check variances
     @test size(io["variances"]) == (10, 4)
-    close(io)
-
-    # Check JSON file
-    svp_results = TMLE.read_json("svp.json")
-    tmleout1 = jldopen("tmle_output_1.hdf5")
-    tmleout2 = jldopen("tmle_output_2.hdf5")
-
-    src_results = vcat(
-        [tmleout1[string(i, "/result")].TMLE for i in 1:3],
-        [tmleout2[string(i, "/result")].TMLE for i in 1:3],
-    )
+    # Check results
+    svp_results = io["results"]
+    
+    tmleout1 = jldopen("tmle_output_1.hdf5")["Batch_1"]
+    tmleout2 = jldopen("tmle_output_2.hdf5")["Batch_1"]
+    src_results = [tmleout1..., tmleout2...]
 
     for svp_result in svp_results
-        src_result_index = findall(x.estimand == svp_result.estimand for x in src_results)
+        src_result_index = findall(x.TMLE.estimand == svp_result.TMLE.estimand for x in src_results)
         src_result = src_results[only(src_result_index)]
-        @test src_result.std != svp_result.std
-        @test src_result.estimate == svp_result.estimate
-        @test src_result.n == svp_result.n
-        @test svp_result.IC == []
+        @test src_result.TMLE.std != svp_result.TMLE.std
+        @test src_result.TMLE.estimate == svp_result.TMLE.estimate
+        @test src_result.TMLE.n == svp_result.TMLE.n
+        @test svp_result.TMLE.IC == []
     end
-
+    close(io)
     # clean
-    rm("svp.json")
     rm("svp.hdf5")
     rm("tmle_output_1.hdf5")
     rm("tmle_output_2.hdf5")
     rm("data.csv")
 end
 
+@testset "Test SVP: causal and composed estimands" begin
+    # Generate data
+    grm_ids = TargetedEstimation.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id"))
+    tmpdir = mktempdir(cleanup=true)
+    configuration = causal_and_composed_estimands_config()
+    pval = 1.
+    configfile = joinpath(tmpdir, "configuration.json")
+    TMLE.write_json(configfile, configuration)
+    build_tmle_output_file(
+        grm_ids.SAMPLE_ID, 
+        configfile, 
+        "tmle_output";
+        estimatorfile=joinpath(TESTDIR, "config", "ose_config.jl")
+    )
+    sieve_variance_plateau("tmle_output";
+        grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"),
+        max_tau=0.75,
+        estimator_key="OSE"
+    )
+    # The ComposedEstimate std is not updated but each component is.
+    src_results = jldopen("tmle_output.hdf5")["Batch_1"]
+    io = jldopen("svp.hdf5")
+    svp_results = io["results"]
+    standalone_estimates = svp_results[1:2]
+    from_composite = svp_results[3:4]
+    @test standalone_estimates[1].OSE.estimand == from_composite[1].OSE.estimand
+    @test standalone_estimates[2].OSE.estimand == from_composite[2].OSE.estimand
+
+    # Check std has been updated
+    for i in 1:2
+        @test standalone_estimates[i].OSE.estimand == src_results[i].OSE.estimand
+        @test standalone_estimates[i].OSE.estimate == src_results[i].OSE.estimate
+        @test standalone_estimates[i].OSE.std != src_results[i].OSE.std
+    end
+
+    close(io)
+    
+    # clean
+    rm("svp.hdf5")
+    rm("tmle_output.hdf5")
+    rm("data.csv")
+end
+
 end

From 25d0496707646a27d5f59790393c341a6bdf340b Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 29 Nov 2023 18:11:51 +0000
Subject: [PATCH 17/71] start working on merge

---
 src/merge.jl  | 77 ++++++++++++++++++----------------------
 test/merge.jl | 97 +--------------------------------------------------
 2 files changed, 35 insertions(+), 139 deletions(-)

diff --git a/src/merge.jl b/src/merge.jl
index 4fd4b1f..3bcce8e 100644
--- a/src/merge.jl
+++ b/src/merge.jl
@@ -12,55 +12,46 @@ end
 read_output_with_types(file) = 
     CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys()))
 
-function load_csv_files(data, files)
-    for file in files
-        new_data = read_output_with_types(file)
-        if size(new_data, 1) > 0 
-            data = vcat(data, new_data)
-        end
-    end
-    return data
-end
 
-joining_keys() = ["PARAMETER_TYPE", "TREATMENTS", "CASE", "CONTROL", "OUTCOME", "CONFOUNDERS", "COVARIATES"]
 
-function merge_csv_files(parsed_args)
-    tmle_files = files_matching_prefix_and_suffix(
-        parsed_args["tmle-prefix"],
-        ".csv"
+
+"""
+    make_summary(prefix; out="summary.json")
+
+# Args
+
+- `prefix`: Prefix to .hdf5 files to be used to create the summary file
+
+# Options
+
+- `-o, --out`: Ouptut JSON file
+"""
+@task function make_summary(prefix; output=JSONOutput(filename="summary.json"))
+    dirname_, prefix_ = splitdir(prefix)
+    dirname__ = dirname_ == "" ? "." : dirname_
+    files = filter(
+            x -> startswith(x, prefix_), 
+            readdir(dirname__)
     )
-    # Load tmle data
-    data = load_csv_files(empty_tmle_output(), tmle_files)
-    # Load sieve data
-    sieveprefix = parsed_args["sieve-prefix"]
-    if sieveprefix !== nothing
-        sieve_files = files_matching_prefix_and_suffix(
-            parsed_args["sieve-prefix"],
-            ".csv"
-        )
-        sieve_data = load_csv_files(empty_sieve_output(), sieve_files)
-        if size(sieve_data, 1) > 0
-            data = leftjoin(data, sieve_data, on=joining_keys(), matchmissing=:equal)
+    # Initialize JSON output
+    initialize(output)
+    # Write all but last batch
+    for filename in files[1:end-1]        
+        filepath = joinpath(dirname_, filename)
+        jldopen(filepath) do io
+            for batch_key in keys(io)
+                update_file(output, io[batch_key])
+            end
         end
     end
-
-    # Pvalue Adjustment by Target
-    for gp in groupby(data, :OUTCOME)
-        gp.TRAIT_ADJUSTED_TMLE_PVALUE = gp[:, :TMLE_PVALUE]
-        pvalues = collect(skipmissing(gp.TMLE_PVALUE))
-        if length(pvalues) > 0
-            adjusted_pvalues = adjust(pvalues, BenjaminiHochberg())
-            adjusted_pval_index = 1
-            for index in eachindex(gp.TRAIT_ADJUSTED_TMLE_PVALUE)
-                gp.TRAIT_ADJUSTED_TMLE_PVALUE[index] === missing && continue
-                gp.TRAIT_ADJUSTED_TMLE_PVALUE[index] = adjusted_pvalues[adjusted_pval_index]
-                adjusted_pval_index += 1
-            end
+    # Write last batch
+    filepath = joinpath(dirname_, files[end])
+    jldopen(filepath) do io
+        nkeys = length(keys(io))
+        for (batch_index, batch_key) in enumerate(keys(io))
+            finalize = batch_index == nkeys ? true : false
+            update_file(output, io[batch_key], finalize=finalize)
         end
     end
-
-    # Write to output file
-    CSV.write(parsed_args["out"], data)
-
     return 0
 end
\ No newline at end of file
diff --git a/test/merge.jl b/test/merge.jl
index d8967ca..946e84c 100644
--- a/test/merge.jl
+++ b/test/merge.jl
@@ -6,102 +6,7 @@ using CSV
 using DataFrames
 
 @testset "Test merge_csv_files, no sieve file" begin
-    parsed_args = Dict(
-        "tmle-prefix" => joinpath("data", "merge", "tmle"),
-        "sieve-prefix" => nothing,
-        "out" => "output.csv"
-    )
-    merge_csv_files(parsed_args)
-    output = CSV.read(parsed_args["out"], DataFrame)
-    @test names(output) == [
-        "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "OUTCOME", "CONFOUNDERS",
-        "COVARIATES", "INITIAL_ESTIMATE", 
-        "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB",
-        "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", 
-        "LOG", "TRAIT_ADJUSTED_TMLE_PVALUE"
-    ]
-    @test size(output, 1) == 8
-
-    for (pval, adjusted_pval) in zip(output.TMLE_PVALUE, output.TRAIT_ADJUSTED_TMLE_PVALUE)
-        if pval === missing
-            @test adjusted_pval === missing
-        else
-            @test pval <= adjusted_pval
-        end
-    end
-
-    @test output.PARAMETER_TYPE == [
-        "IATE", "IATE", "ATE",
-        "IATE", "IATE", "ATE",
-        "ATE", "CM"
-    ]
-    rm(parsed_args["out"])
-end
-
-@testset "Test merge_csv_files, sieve file" begin
-    sieve_colnames = [
-        "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "OUTCOME", "CONFOUNDERS",
-        "COVARIATES", "INITIAL_ESTIMATE", 
-        "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB", 
-        "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB",
-        "LOG", "SIEVE_STD", "SIEVE_PVALUE", "SIEVE_LWB", "SIEVE_UPB", "TRAIT_ADJUSTED_TMLE_PVALUE"
-    ]
-    parsed_args = Dict(
-        "tmle-prefix" => joinpath("data", "merge", "tmle"),
-        "sieve-prefix" => joinpath("data", "merge", "sieve"),
-        "out" => "output.csv"
-    )
-    merge_csv_files(parsed_args)
-    output = CSV.read(parsed_args["out"], DataFrame)
-    @test names(output) == sieve_colnames
-    @test size(output, 1) == 8
-    @test output.SIEVE_STD isa Vector{Float64}
-    @test output.PARAMETER_TYPE == [
-        "IATE", "IATE", "ATE",
-        "IATE", "IATE", "ATE",
-        "ATE", "CM"
-    ]
-
-    parsed_args = Dict(
-        "tmle-prefix" => joinpath("data", "merge", "tmle"),
-        "sieve-prefix" => joinpath("data", "merge", "sieve_output_2"),
-        "out" => "output.csv"
-    )
-    merge_csv_files(parsed_args)
-    output = CSV.read(parsed_args["out"], DataFrame)
-    @test names(output) == sieve_colnames
-    @test size(output, 1) == 8
-    @test all(x===missing for x in output.SIEVE_STD[3:end])
-
-    rm(parsed_args["out"])
-end
-
-@testset "Test merge_csv_files, empty sieve file" begin
-    parsed_args = Dict(
-        "tmle-prefix" => joinpath("data", "merge", "tmle"),
-        "sieve-prefix" => joinpath("data", "merge", "empty"),
-        "out" => "output.csv"
-    )
-    merge_csv_files(parsed_args)
-    output = CSV.read(parsed_args["out"], DataFrame)
-    @test names(output) == [
-        "PARAMETER_TYPE", "TREATMENTS", "CASE",
-        "CONTROL", "OUTCOME", "CONFOUNDERS",
-        "COVARIATES", "INITIAL_ESTIMATE", 
-        "TMLE_ESTIMATE", "TMLE_STD", "TMLE_PVALUE", "TMLE_LWB", "TMLE_UPB",
-        "ONESTEP_ESTIMATE", "ONESTEP_STD", "ONESTEP_PVALUE", "ONESTEP_LWB", "ONESTEP_UPB", 
-        "LOG", "TRAIT_ADJUSTED_TMLE_PVALUE"
-    ]
-    @test size(output, 1) == 8
-    @test output.PARAMETER_TYPE == [
-        "IATE", "IATE", "ATE",
-        "IATE", "IATE", "ATE",
-        "ATE", "CM"
-    ]
-
-    rm(parsed_args["out"])
+    make_summary("tmle_out")
 end
 
 
From 6b128fd2bbe76305812257dc3d3956da54f4fca2 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 17:33:32 +0000
Subject: [PATCH 18/71] fix all tests

---
 src/TargetedEstimation.jl |  4 +--
 src/merge.jl              | 57 -----------------------------
 src/outputs.jl            | 41 +++++++--------------
 src/runner.jl             |  2 --
 src/sieve_variance.jl     | 38 --------------------
 src/summary.jl            | 67 ++++++++++++++++++++++++++++++++++
 test/merge.jl             | 15 --------
 test/runner.jl            | 63 ++------------------------------
 test/runtests.jl          |  2 +-
 test/sieve_variance.jl    |  2 ++
 test/summary.jl           | 75 +++++++++++++++++++++++++++++++++++++++
 test/testutils.jl         | 51 ++++++++++++++++++++++++++
 12 files changed, 213 insertions(+), 204 deletions(-)
 delete mode 100644 src/merge.jl
 create mode 100644 src/summary.jl
 delete mode 100644 test/merge.jl
 create mode 100644 test/summary.jl

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 7d94897..e38087c 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -37,7 +37,7 @@ include("outputs.jl")
 include("runner.jl")
 include("utils.jl")
 include("sieve_variance.jl")
-include("merge.jl")
+include("summary.jl")
 include("resampling.jl")
 include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
@@ -47,7 +47,7 @@ include(joinpath("models", "biallelic_snp_encoder.jl"))
 """TL CLI."""
 @main
 
-export Runner, tmle, sieve_variance_plateau, merge_csv_files
+export Runner, tmle, sieve_variance_plateau, make_summary
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
 export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV
diff --git a/src/merge.jl b/src/merge.jl
deleted file mode 100644
index 3bcce8e..0000000
--- a/src/merge.jl
+++ /dev/null
@@ -1,57 +0,0 @@
-
-function files_matching_prefix_and_suffix(prefix, suffix)
-    dirname_, prefix_ = splitdir(prefix)
-    dirname__ = dirname_ == "" ? "." : dirname_
-    files = filter(
-            x -> startswith(x, prefix_) && endswith(x, suffix), 
-            readdir(dirname__)
-    )
-    return [joinpath(dirname_, x) for x in files]
-end
-
-read_output_with_types(file) = 
-    CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys()))
-
-
-
-
-"""
-    make_summary(prefix; out="summary.json")
-
-# Args
-
-- `prefix`: Prefix to .hdf5 files to be used to create the summary file
-
-# Options
-
-- `-o, --out`: Ouptut JSON file
-"""
-@task function make_summary(prefix; output=JSONOutput(filename="summary.json"))
-    dirname_, prefix_ = splitdir(prefix)
-    dirname__ = dirname_ == "" ? "." : dirname_
-    files = filter(
-            x -> startswith(x, prefix_), 
-            readdir(dirname__)
-    )
-    # Initialize JSON output
-    initialize(output)
-    # Write all but last batch
-    for filename in files[1:end-1]        
-        filepath = joinpath(dirname_, filename)
-        jldopen(filepath) do io
-            for batch_key in keys(io)
-                update_file(output, io[batch_key])
-            end
-        end
-    end
-    # Write last batch
-    filepath = joinpath(dirname_, files[end])
-    jldopen(filepath) do io
-        nkeys = length(keys(io))
-        for (batch_index, batch_key) in enumerate(keys(io))
-            finalize = batch_index == nkeys ? true : false
-            update_file(output, io[batch_key], finalize=finalize)
-        end
-    end
-    return 0
-end
\ No newline at end of file
diff --git a/src/outputs.jl b/src/outputs.jl
index 1044fe3..4302919 100644
--- a/src/outputs.jl
+++ b/src/outputs.jl
@@ -61,9 +61,7 @@ end
     compress::Bool = false
 end
 
-function update_file(output::HDF5Output, results, dataset)
-    output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
+function update_file(output::HDF5Output, results; finalize=false)
     jldopen(output.filename, "a+", compress=output.compress) do io
         batches_keys = keys(io)
         latest_index = isempty(batches_keys) ? 0 : maximum(parse(Int, split(key, "_")[2]) for key in batches_keys)
@@ -71,6 +69,12 @@ function update_file(output::HDF5Output, results, dataset)
     end
 end
 
+function update_file(output::HDF5Output, results, dataset)
+    output.filename === nothing && return
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
+    update_file(output, results)
+end
+
 #####################################################################
 #####                        JLS OUTPUT                          ####
 #####################################################################
@@ -81,10 +85,7 @@ end
     sample_ids::Bool = false
 end
 
-function update_file(output::JLSOutput, results, dataset)
-    output.filename === nothing && return
-    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
-
+function update_file(output::JLSOutput, results; finalize=false)
     open(output.filename, "a") do io
         for result in results
             serialize(io, result)
@@ -92,28 +93,13 @@ function update_file(output::JLSOutput, results, dataset)
     end
 end
 
-#####################################################################
-#####                       STD OUTPUT                          ####
-#####################################################################
-
-function update_file(doprint, results, partition)
-    if doprint
-        mimetext = MIME"text/plain"()
-        index = 1
-        for (result, estimand_index) in zip(results, partition)
-            show(stdout, mimetext, string("⋆⋆⋆ Estimand ", estimand_index, " ⋆⋆⋆"))
-            println(stdout)
-            show(stdout, mimetext, first(result).estimand)
-            for (key, val) ∈ zip(keys(result), result)
-                show(stdout, mimetext, string("→ Estimation Result From: ", key, ))
-                println(stdout)
-                show(stdout, mimetext, val)
-                index += 1
-            end
-        end
-    end
+function update_file(output::JLSOutput, results, dataset)
+    output.filename === nothing && return
+    results = post_process(results, dataset, output.pval_threshold, output.sample_ids)
+    update_file(output, results)
 end
 
+
 #####################################################################
 #####                         OUTPUTS                            ####
 #####################################################################
@@ -122,7 +108,6 @@ end
     json::JSONOutput = JSONOutput()
     hdf5::HDF5Output = HDF5Output()
     jls::JLSOutput   = JLSOutput()
-    std::Bool        = false
 end
 
 """
diff --git a/src/runner.jl b/src/runner.jl
index 0817526..473e873 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -38,8 +38,6 @@ mutable struct Runner
 end
 
 function save(runner::Runner, results, partition, finalize)
-    # Append STD Out
-    update_file(runner.outputs.std, results, partition)
     # Append JSON Output
     update_file(runner.outputs.json, results; finalize=finalize)
     # Append JLS Output
diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index 290e672..d113cc7 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -23,44 +23,6 @@ function align_ic(ic, sample_ids, grm_ids)
     return coalesce.(aligned_ic, 0)
 end
 
-sieve_dataframe() = DataFrame(
-    PARAMETER_TYPE=String[], 
-    TREATMENTS=String[], 
-    CASE=String[], 
-    CONTROL=Union{String, Missing}[], 
-    OUTCOME=String[], 
-    CONFOUNDERS=String[], 
-    COVARIATES=Union{String, Missing}[], 
-    TMLE_ESTIMATE=Float64[],
-)
-
-empty_sieve_output() = DataFrame(
-    PARAMETER_TYPE=String[], 
-    TREATMENTS=String[], 
-    CASE=String[], 
-    CONTROL=Union{String, Missing}[], 
-    OUTCOME=String[], 
-    CONFOUNDERS=String[], 
-    COVARIATES=Union{String, Missing}[], 
-    SIEVE_STD = Float64[],
-    SIEVE_PVALUE = Float64[],
-    SIEVE_LWB = Float64[],
-    SIEVE_UPB = Float64[],
-)
-
-function push_sieveless!(output, Ψ, Ψ̂)
-    target = string(Ψ.target)
-    param_type = param_string(Ψ)
-    treatments = treatment_string(Ψ)
-    case = case_string(Ψ)
-    control = control_string(Ψ)
-    confounders = confounders_string(Ψ)
-    covariates = covariates_string(Ψ)
-    push!(output, (
-        param_type, treatments, case, control, target, confounders, covariates, Ψ̂
-    ))
-end
-
 """
     bit_distances(sample_grm, nτs)
 
diff --git a/src/summary.jl b/src/summary.jl
new file mode 100644
index 0000000..6ebf4c5
--- /dev/null
+++ b/src/summary.jl
@@ -0,0 +1,67 @@
+
+function files_matching_prefix_and_suffix(prefix, suffix)
+    dirname_, prefix_ = splitdir(prefix)
+    dirname__ = dirname_ == "" ? "." : dirname_
+    files = filter(
+            x -> startswith(x, prefix_) && endswith(x, suffix), 
+            readdir(dirname__)
+    )
+    return [joinpath(dirname_, x) for x in files]
+end
+
+read_output_with_types(file) = 
+    CSV.read(file, DataFrame, types=Dict(key => String for key in joining_keys()))
+
+"""
+    make_summary(
+        prefix; 
+        outputs=Outputs(json=JSONOutput(filename="summary.json"))
+    )
+
+Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can be output at once.
+
+# Args
+
+- `prefix`: Prefix to .hdf5 files to be used to create the summary file
+
+# Options
+
+- `-o, --outputs`: Ouptuts configuration.
+"""
+@cast function make_summary(
+    prefix; 
+    outputs=Outputs(json=JSONOutput(filename="summary.json"))
+    )
+    
+    # Initialize output files
+    initialize(outputs)
+    actual_outputs = [getfield(outputs, field) for field ∈ fieldnames(Outputs) 
+        if getfield(outputs, field).filename !== nothing]
+
+    # Get all input .hdf5 files
+    dirname_, prefix_ = splitdir(prefix)
+    dirname__ = dirname_ == "" ? "." : dirname_
+    files = sort(filter(
+            x -> startswith(x, prefix_), 
+            readdir(dirname__)
+    ))
+    nfiles = length(files)
+
+    # Write to files
+    for (file_index, filename) in enumerate(files)
+        filepath = joinpath(dirname_, filename)
+        jldopen(filepath) do io
+            batch_keys = collect(keys(io))
+            nbatches = length(batch_keys)
+            for (batch_index, batch_key) in enumerate(batch_keys)
+                results = io[batch_key]
+                finalize = file_index == nfiles && batch_index == nbatches
+                for output in actual_outputs
+                    update_file(output, results; finalize=finalize)
+                end
+            end
+        end
+    end
+
+    return 0
+end
\ No newline at end of file
diff --git a/test/merge.jl b/test/merge.jl
deleted file mode 100644
index 946e84c..0000000
--- a/test/merge.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-module TestMergeCSVFiles
-
-using TargetedEstimation
-using Test
-using CSV
-using DataFrames
-
-@testset "Test merge_csv_files, no sieve file" begin
-    make_summary("tmle_out")
-end
-
-
-end
-
-true
\ No newline at end of file
diff --git a/test/runner.jl b/test/runner.jl
index fd52bc3..3acccfc 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -4,14 +4,8 @@ using Test
 using TargetedEstimation
 using TMLE
 using JLD2
-using StableRNGs
-using Distributions
-using LogExpFunctions
-using CategoricalArrays
-using DataFrames
 using CSV
 using Serialization
-using Arrow
 using YAML
 using JSON
 
@@ -21,50 +15,6 @@ CONFIGDIR = joinpath(TESTDIR, "config")
 
 include(joinpath(TESTDIR, "testutils.jl"))
 
-"""
-CONTINUOUS_OUTCOME: 
-- IATE(0->1, 0->1) = E[W₂] = 0.5
-- ATE(0->1, 0->1)  = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5
-
-BINARY_OUTCOME:
-- IATE(0->1, 0->1) =
-- ATE(0->1, 0->1)  = 
-
-"""
-function build_dataset(;n=1000, format="csv")
-    rng = StableRNG(123)
-    # Confounders
-    W₁ = rand(rng, Uniform(), n)
-    W₂ = rand(rng, Uniform(), n)
-    # Covariates
-    C₁ = rand(rng, n)
-    # Treatment | Confounders
-    T₁ = rand(rng, Uniform(), n) .< logistic.(0.5sin.(W₁) .- 1.5W₂)
-    T₂ = rand(rng, Uniform(), n) .< logistic.(-3W₁ - 1.5W₂)
-    # target | Confounders, Covariates, Treatments
-    μ = 1 .+ 2W₁ .+ 3W₂ .- 4C₁.*T₁ .+ T₁ + T₂.*W₂.*T₁
-    y₁ = μ .+ rand(rng, Normal(0, 0.01), n)
-    y₂ = rand(rng, Uniform(), n) .< logistic.(μ)
-    # Add some missingness
-    y₂ = vcat(missing, y₂[2:end])
-
-    dataset = DataFrame(
-        SAMPLE_ID = 1:n,
-        T1 = categorical(T₁),
-        T2 = categorical(T₂),
-        W1 = W₁, 
-        W2 = W₂,
-        C1 = C₁,
-    )
-    # Comma in name
-    dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁)
-    # Slash in name
-    dataset[!, "BINARY/OUTCOME"] = categorical(y₂)
-    dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1)))
-
-    format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset)
-end
-
 @testset "Integration Test" begin
     build_dataset(;n=1000, format="csv")
     tmpdir = mktempdir(cleanup=true)
@@ -74,7 +24,6 @@ end
         json=TargetedEstimation.JSONOutput(filename="output.json"),
         hdf5=TargetedEstimation.HDF5Output(filename="output.hdf5", pval_threshold=1., sample_ids=true),
         jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5),
-        std=true,
     )
     runner = Runner(
         "data.csv", 
@@ -90,16 +39,9 @@ end
         @test result.OSE isa TMLE.OSEstimate
     end
 
-    # Test Save to STDOUT
-    output_txt = "output.txt"
+    # Save outputs
     TargetedEstimation.initialize(outputs)
-    open(output_txt, "w") do io
-        redirect_stdout(io) do
-            TargetedEstimation.save(runner, results, partition, true)
-        end
-    end
-    stdout_content = read(output_txt, String)
-    @test all(occursin("Estimand $i", stdout_content) for i in partition)
+    TargetedEstimation.save(runner, results, partition, true)
 
     # Test Save to JSON
     loaded_results = TMLE.read_json(outputs.json.filename)
@@ -160,7 +102,6 @@ end
     # Clean
     rm("data.csv")
     rm(outputs.jls.filename)
-    rm(output_txt)
     rm(outputs.json.filename)
     rm(outputs.hdf5.filename)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index e8b741a..50cc785 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,7 +9,7 @@ TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
     @test include(joinpath(TESTDIR, "utils.jl"))
     @test include(joinpath(TESTDIR, "sieve_variance.jl"))
     @test include(joinpath(TESTDIR, "runner.jl"))
-    @test include(joinpath(TESTDIR, "merge.jl"))
+    @test include(joinpath(TESTDIR, "summary.jl"))
     @test include(joinpath(TESTDIR, "resampling.jl"))
     @test include(joinpath(TESTDIR, "models", "glmnet.jl"))
     @test include(joinpath(TESTDIR, "models", "adaptive_interaction_transformer.jl"))
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 3ffaad7..c357950 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -361,3 +361,5 @@ end
 end
 
 end
+
+true
diff --git a/test/summary.jl b/test/summary.jl
new file mode 100644
index 0000000..6903e88
--- /dev/null
+++ b/test/summary.jl
@@ -0,0 +1,75 @@
+module TestMergeCSVFiles
+
+using TargetedEstimation
+using Test
+using CSV
+using DataFrames
+using Serialization
+using JLD2
+
+TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
+
+CONFIGDIR = joinpath(TESTDIR, "config")
+
+include(joinpath(TESTDIR, "testutils.jl"))
+
+@testset "Test make_summary" begin
+    build_dataset()
+    datafile = "data.csv"
+    estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
+    tmpdir = mktempdir(cleanup=true)
+    # First Run
+    tmle_output_1 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_1.hdf5"))
+    config_1 = statistical_estimands_only_config()
+    configfile_1 = joinpath(tmpdir, "configuration_1.json")
+    TMLE.write_json(configfile_1, config_1)
+    tmle(datafile, configfile_1, estimatorfile; outputs=tmle_output_1, chunksize=3)
+    
+    # Second Run
+    tmle_output_2 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_2.hdf5"))
+    config_2 = causal_and_composed_estimands_config()
+    configfile_2 = joinpath(tmpdir, "configuration_2.json")
+    TMLE.write_json(configfile_2, config_2)
+    tmle(datafile, configfile_2, estimatorfile; outputs=tmle_output_2)
+
+    # Make summary files
+    outputs = TargetedEstimation.Outputs(
+        json=TargetedEstimation.JSONOutput(filename="summary.json"),
+        hdf5=TargetedEstimation.HDF5Output(filename="summary.hdf5"),
+        jls=TargetedEstimation.JLSOutput(filename="summary.jls")
+    )
+    make_summary("tmle_output", outputs=outputs)
+
+    # Test correctness
+    hdf5file_1 = jldopen("tmle_output_1.hdf5")
+    hdf5file_2 = jldopen("tmle_output_2.hdf5")
+    inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"])
+
+    json_outputs = TMLE.read_json(outputs.json.filename)
+    jls_outputs = []
+    open(outputs.jls.filename) do io
+        while !eof(io)
+            push!(jls_outputs, deserialize(io))
+        end
+    end
+    hdf5_output = jldopen(outputs.hdf5.filename)
+    hdf5_outputs = vcat((hdf5_output[key] for key in keys(hdf5_output))...)
+
+    @test length(inputs) == 9
+    for (input, jls_output, hdf5_out, json_output) in zip(inputs, jls_outputs, hdf5_outputs, json_outputs)
+        @test input.OSE.estimand == jls_output.OSE.estimand == hdf5_out.OSE.estimand == json_output[:OSE].estimand
+    end
+
+    # cleanup
+    rm("tmle_output_1.hdf5")
+    rm("tmle_output_2.hdf5")
+    rm(outputs.json.filename)
+    rm(outputs.jls.filename)
+    rm(outputs.hdf5.filename)
+    rm(datafile)
+end
+
+
+end
+
+true
\ No newline at end of file
diff --git a/test/testutils.jl b/test/testutils.jl
index 4f286c0..ef5b992 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -1,4 +1,11 @@
 using TMLE
+using StableRNGs
+using DataFrames
+using Distributions
+using LogExpFunctions
+using CSV
+using Arrow
+using CategoricalArrays
 
 function statistical_estimands_only_config()
     configuration = Configuration(
@@ -77,3 +84,47 @@ function causal_and_composed_estimands_config()
     )
     return configuration
 end
+
+"""
+CONTINUOUS_OUTCOME: 
+- IATE(0->1, 0->1) = E[W₂] = 0.5
+- ATE(0->1, 0->1)  = -4 E[C₁] + 1 + E[W₂] = -2 + 1 + 0.5 = -0.5
+
+BINARY_OUTCOME:
+- IATE(0->1, 0->1) =
+- ATE(0->1, 0->1)  = 
+
+"""
+function build_dataset(;n=1000, format="csv")
+    rng = StableRNG(123)
+    # Confounders
+    W₁ = rand(rng, Uniform(), n)
+    W₂ = rand(rng, Uniform(), n)
+    # Covariates
+    C₁ = rand(rng, n)
+    # Treatment | Confounders
+    T₁ = rand(rng, Uniform(), n) .< logistic.(0.5sin.(W₁) .- 1.5W₂)
+    T₂ = rand(rng, Uniform(), n) .< logistic.(-3W₁ - 1.5W₂)
+    # target | Confounders, Covariates, Treatments
+    μ = 1 .+ 2W₁ .+ 3W₂ .- 4C₁.*T₁ .+ T₁ + T₂.*W₂.*T₁
+    y₁ = μ .+ rand(rng, Normal(0, 0.01), n)
+    y₂ = rand(rng, Uniform(), n) .< logistic.(μ)
+    # Add some missingness
+    y₂ = vcat(missing, y₂[2:end])
+
+    dataset = DataFrame(
+        SAMPLE_ID = 1:n,
+        T1 = categorical(T₁),
+        T2 = categorical(T₂),
+        W1 = W₁, 
+        W2 = W₂,
+        C1 = C₁,
+    )
+    # Comma in name
+    dataset[!, "CONTINUOUS, OUTCOME"] = categorical(y₁)
+    # Slash in name
+    dataset[!, "BINARY/OUTCOME"] = categorical(y₂)
+    dataset[!, "EXTREME_BINARY"] = categorical(vcat(0, ones(n-1)))
+
+    format == "csv" ? CSV.write("data.csv", dataset) : Arrow.write("data.arrow", dataset)
+end
\ No newline at end of file

From 72fd116c294a8e6b5180b5be45ee7da0247af030 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 17:51:18 +0000
Subject: [PATCH 19/71] fix typo

---
 src/runner.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runner.jl b/src/runner.jl
index 473e873..e7961c8 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -135,7 +135,7 @@ TMLE CLI.
 # Options
 
 - `-v, --verbosity`: Verbosity level.
-- `-o, --outputs`: Ouputs to be genrated.
+- `-o, --outputs`: Ouputs to be generated.
 - `--chunksize`: Results are written in batches of size chunksize.
 - `-r, --rng`: Random seed (Only used for estimands ordering at the moment).
 - `-c, --cache-strategy`: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").

From a459a1c8da783e3aad7f99c338d00d4ca447aa20 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 18:24:21 +0000
Subject: [PATCH 20/71] add more commonicon stuff

---
 Comonicon.toml             | 16 +++++++--
 deps/execute.jl            |  7 ++++
 scripts/merge_summaries.jl | 26 --------------
 scripts/sieve_variance.jl  | 41 ----------------------
 scripts/tmle.jl            | 70 --------------------------------------
 5 files changed, 21 insertions(+), 139 deletions(-)
 create mode 100644 deps/execute.jl
 delete mode 100644 scripts/merge_summaries.jl
 delete mode 100644 scripts/sieve_variance.jl
 delete mode 100644 scripts/tmle.jl

diff --git a/Comonicon.toml b/Comonicon.toml
index bd97f0f..19f0cb6 100644
--- a/Comonicon.toml
+++ b/Comonicon.toml
@@ -1,8 +1,20 @@
-name = "TargetedEstimation"
+name = "fasttmle"
 
 [install]
 completion = true
 quiet = false
 optimize = 2
 
-[sysimg]
\ No newline at end of file
+[sysimg]
+incremental=true
+filter_stdlibs=false
+
+[sysimg.precompile]
+execution_file = ["deps/execute.jl"]
+
+[application]
+incremental=true
+filter_stdlibs=false
+
+[application.precompile]
+execution_file = ["deps/execute.jl"]
\ No newline at end of file
diff --git a/deps/execute.jl b/deps/execute.jl
new file mode 100644
index 0000000..666bc56
--- /dev/null
+++ b/deps/execute.jl
@@ -0,0 +1,7 @@
+using TargetedEstimation
+
+TargetedEstimation.command_main(["-h"])
+TargetedEstimation.command_main(["tmle", "-h"])
+TargetedEstimation.command_main(["make-summary", "-h"])
+TargetedEstimation.command_main(["sieve-variance-plateau", "-h"])
+
diff --git a/scripts/merge_summaries.jl b/scripts/merge_summaries.jl
deleted file mode 100644
index 34fd604..0000000
--- a/scripts/merge_summaries.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-using ArgParse
-using TargetedEstimation
-
-function parse_commandline()
-    s = ArgParseSettings(
-        description = "Merge files outputs by tmle.jl and sieve_variance.jl in a single file.",
-        commands_are_required = false)
-
-    @add_arg_table s begin
-        "tmle-prefix"
-            help = "Prefix to files output by tmle.jl"
-            required = true
-        "out"
-            help = "Output file to be generated"
-            required = true
-        "--sieve-prefix"
-            help = "Prefix to files output by sieve_variance.jl"
-            required = false
-            arg_type = String
-    end
-
-    return parse_args(s)
-end
-
-parsed_args = parse_commandline()
-merge_csv_files(parsed_args)
\ No newline at end of file
diff --git a/scripts/sieve_variance.jl b/scripts/sieve_variance.jl
deleted file mode 100644
index f6a551e..0000000
--- a/scripts/sieve_variance.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-using TargetedEstimation
-using ArgParse
-
-function parse_commandline()
-    s = ArgParseSettings(description="Compute the Sieve Variance Plateau estimate for each phenotype in the result file")
-
-    @add_arg_table s begin
-        "prefix"
-            help = "Prefix to the .hdf5 files generated by the `tmle.jl` script"
-            arg_type = String
-            required = true
-        "grm-prefix"
-            arg_type = String
-            help = "Prefix of the aggregated GRM"
-            required = true
-        "out-prefix"
-            arg_type = String
-            help = "output filename"
-            required = true
-        "--nb-estimators", "-n"
-            arg_type = Int
-            help = "Number of variance estimators to compute"
-            default = 10
-        "--max-tau", "-m"
-            arg_type = Float64
-            help = "Maximum distance of individuals to take into account (maximum=2)"*
-                   "It was witnessed that beyond 0.9, weird limit effects happen"
-            default = 0.8
-        "--verbosity", "-v"
-            arg_type = Int
-            help = "Verbosity level"
-            default = 1  
-    end
-
-    return parse_args(s)
-end
-
-
-parsed_args = parse_commandline()
-
-sieve_variance_plateau(parsed_args)
diff --git a/scripts/tmle.jl b/scripts/tmle.jl
deleted file mode 100644
index 40df722..0000000
--- a/scripts/tmle.jl
+++ /dev/null
@@ -1,70 +0,0 @@
-using ArgParse
-using TargetedEstimation
-
-function parse_commandline()
-    s = ArgParseSettings(
-        description = "Targeted Learning Estimation",
-        commands_are_required = false,
-        version = "0.2",
-        add_version = true)
-
-    @add_arg_table s begin
-        "dataset"
-            help = "Path to dataset file (.csv|.arrow)"
-            required = true
-        "estimands-config"
-            help = "A .yaml file listing all parameters to estimate."
-            required = true
-        "--estimators-config"
-            help = "A file (.jl) defining the estimators to be used."
-            arg_type= String
-            required = false
-        "--hdf5-out"
-            help = "Stores the results in a HDF5 file format (see also: --pval-threshold)."
-            arg_type = String
-            default = nothing
-        "--csv-out"
-            help = "Path to an output `.csv` file."
-            required = true
-        "--pval-threshold"
-            help = """In order to save disk space, only estimation results with a p-value lesser than 
-            the threshold will have their influence curve saved. (default = 1., i.e. all influence curves are saved).
-            """
-            default = 1.
-            arg_type = Float64
-        "--sort-estimands"
-            help = "If estimands should be sorted to minimize memory usage, see also: cache-strategy."
-            default = false 
-            arg_type = Bool
-        "--cache-strategy"
-            help = string("Nuisance functions are stored in the cache during estimation. The cache can be released from these",
-            " functions to limit memory consumption. There are currently 3 caching management strategies: ",
-            "'release_unusable' (default): Will release the cache from nuisance functions that won't be used in the future. ",
-            "'K': Will keep the cache size under K nuisance functions. ",
-            "'no_cache': Disables caching. ",
-            "Note that caching strategies are better used in conjunction with `--sort-estimands` to minimized memory usage."
-            )
-            default = "release_unusable"
-            arg_type = String
-        "--chunksize"
-            help = "Results are appended to outfiles in chunks."
-            default = 100
-            arg_type = Int
-        "--rng"
-            help = "Random seed"
-            default = 123
-            arg_type = Int
-        "--verbosity", "-v"
-            help = "Verbosity level"
-            arg_type = Int
-            default = 1
-    end
-
-    return parse_args(s)
-end
-
-parsed_args = parse_commandline()
-
-tmle_estimation(parsed_args)
-
-

From c45cb82a9e545b6400ed5ca9ecfddc5101d74def Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 18:41:02 +0000
Subject: [PATCH 21/71] update compats

---
 Project.toml      | 6 +++---
 test/Project.toml | 6 ++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index a4f5a0d..2f187ac 100644
--- a/Project.toml
+++ b/Project.toml
@@ -41,7 +41,7 @@ Combinatorics = "1.0.2"
 Comonicon = "1.0.6"
 Configurations = "0.17.6"
 DataFrames = "1.3.4"
-EvoTrees = "0.14.6"
+EvoTrees = "0.16.5"
 GLMNet = "0.7"
 HighlyAdaptiveLasso = "0.2.0"
 JLD2 = "0.4.22"
@@ -49,11 +49,11 @@ JSON = "0.21.4"
 MKL = "0.6"
 MLJ = "0.20.0"
 MLJBase = "1.0.1"
-MLJLinearModels = "0.9"
+MLJLinearModels = "0.10.0"
 MLJModelInterface = "1.8.0"
 MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
-MultipleTesting = "0.5.1"
+MultipleTesting = "0.6.0"
 Optim = "1.7"
 Tables = "1.10.1"
 YAML = "0.4.9"
diff --git a/test/Project.toml b/test/Project.toml
index 465caa7..440b866 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -6,14 +6,14 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
-MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
 MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
-RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
@@ -23,6 +23,4 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 [compat]
 Distributions = "0.25"
 LogExpFunctions = "0.3"
-MLJGLMInterface = "0.3"
-RCall = "0.13"
 StableRNGs = "1.0"

From 2ec3ac57f876af4d02d7089a157f99143f3146f6 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 18:41:24 +0000
Subject: [PATCH 22/71] remove deprecated files

---
 test/config/failing_parameters.yaml        |   6 ----
 test/config/parameters.bin                 | Bin 487 -> 0 bytes
 test/config/parameters.yaml                |  31 --------------------
 test/config/problematic_tmle_ose_config.jl |  14 ---------
 test/config/sieve_tests_parameters_1.yaml  |  32 ---------------------
 test/config/sieve_tests_parameters_2.yaml  |   9 ------
 test/data/merge/empty_sieve.csv            |   1 -
 test/data/merge/sieve_output_1.csv         |   7 -----
 test/data/merge/sieve_output_2.csv         |   3 --
 test/data/merge/tmle_output_1.csv          |   7 -----
 test/data/merge/tmle_output_2.csv          |   3 --
 test/data/sieve_variances.hdf5             | Bin 4790 -> 0 bytes
 12 files changed, 113 deletions(-)
 delete mode 100644 test/config/failing_parameters.yaml
 delete mode 100644 test/config/parameters.bin
 delete mode 100644 test/config/parameters.yaml
 delete mode 100644 test/config/problematic_tmle_ose_config.jl
 delete mode 100644 test/config/sieve_tests_parameters_1.yaml
 delete mode 100644 test/config/sieve_tests_parameters_2.yaml
 delete mode 100644 test/data/merge/empty_sieve.csv
 delete mode 100644 test/data/merge/sieve_output_1.csv
 delete mode 100644 test/data/merge/sieve_output_2.csv
 delete mode 100644 test/data/merge/tmle_output_1.csv
 delete mode 100644 test/data/merge/tmle_output_2.csv
 delete mode 100644 test/data/sieve_variances.hdf5

diff --git a/test/config/failing_parameters.yaml b/test/config/failing_parameters.yaml
deleted file mode 100644
index 92fdeff..0000000
--- a/test/config/failing_parameters.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-  Estimands:
-    - type: ATE
-      outcome: EXTREME_BINARY
-      treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-      treatment_confounders: (T1 = [W1, W2], T2 = [W1, W2])
-      outcome_extra_covariates: [C1]
\ No newline at end of file
diff --git a/test/config/parameters.bin b/test/config/parameters.bin
deleted file mode 100644
index 3fe2558a5906a110f484958b93a56a08c0b32505..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 487
zcmXr_@)2ZVU|<jxVB`!)EK1BxElDks=RSYpCQs_dr0)C9#d*Ik{q1683GwxDb$Mzc
zz{uk17~+ak1*4F&zh8)_U#Nd*u#Q5AW01RR2v8H3AJF8KkkW#jRC&MIE=)ihMVJ_w
zLJWb}NFW#{CZNN{0AmB?S&|crQyJNl^YcoI@^cW%TwQfI7)*fJ*9St1GB7@5WC{nG
z7H$M&KVW2XHZ;*;W?<xX^7L~IiqwZXM2D5IHUUOvd>+(cVK6~)CL_oj5VtV`y#;p}
hnw#!}+yl~#;szgItjc%@DTDfuAJar%A2bueN&y8sYxV#D

diff --git a/test/config/parameters.yaml b/test/config/parameters.yaml
deleted file mode 100644
index 4dea179..0000000
--- a/test/config/parameters.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-Estimands:
-  - type: TMLE.StatisticalIATE
-    outcome: CONTINUOUS, outcome
-    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
-    outcome_extra_covariates: (:C1,)
-  - type: TMLE.StatisticalIATE
-    outcome: "BINARY/outcome"
-    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
-    outcome_extra_covariates: (:C1,)
-  - type: TMLE.StatisticalATE
-    outcome: CONTINUOUS, outcome
-    treatment_values: (T1 = (control = 0, case = 1),)
-    treatment_confounders: (T1 = (:W1, :W2),)
-    outcome_extra_covariates: ()
-  - type: TMLE.StatisticalIATE
-    outcome: CONTINUOUS, outcome
-    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
-    outcome_extra_covariates: ()
-  - type: TMLE.StatisticalIATE
-    outcome: "BINARY/outcome"
-    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
-    outcome_extra_covariates: (:C1,)
-  - type: TMLE.StatisticalATE
-    outcome: CONTINUOUS, outcome
-    treatment_values: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    treatment_confounders: (T1 = (:W1, :W2), T2 = (:W1, :W2))
-    outcome_extra_covariates: (:C1,)
\ No newline at end of file
diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl
deleted file mode 100644
index a517cf4..0000000
--- a/test/config/problematic_tmle_ose_config.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-default_models = TMLE.default_models(
-  Q_continuous = LinearRegressor(),
-  # For the estimation of E[Y|W, T]: binary target
-  Q_binary = LogisticClassifier(),
-  # This will fail
-  G = LogisticClassifier()
-)
-
-models = merge(default_models, (T2 = LinearRegressor(),))
-
-ESTIMATORS = (
-  TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001),
-  OSE  = OSE(models=models)
-)
\ No newline at end of file
diff --git a/test/config/sieve_tests_parameters_1.yaml b/test/config/sieve_tests_parameters_1.yaml
deleted file mode 100644
index 9edf5fe..0000000
--- a/test/config/sieve_tests_parameters_1.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-Parameters:
-  - type: IATE
-    target: CONTINUOUS, OUTCOME
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: IATE
-    target: CONTINUOUS, OUTCOME
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: ATE
-    target: CONTINUOUS, OUTCOME
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: IATE
-    target: "BINARY/OUTCOME"
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: IATE
-    target: "BINARY/OUTCOME"
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 1, case = 0))
-    confounders: [W1, W2]
-    covariates: [C1]
-  - type: ATE
-    target: "BINARY/OUTCOME"
-    treatment: (T1 = (control = 0, case = 1), T2 = (control = 0, case = 1))
-    confounders: [W1, W2]
-    covariates: [C1]
-  
\ No newline at end of file
diff --git a/test/config/sieve_tests_parameters_2.yaml b/test/config/sieve_tests_parameters_2.yaml
deleted file mode 100644
index 0d147be..0000000
--- a/test/config/sieve_tests_parameters_2.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-Parameters:
-  - type: ATE
-    target: CONTINUOUS, OUTCOME
-    treatment: (T1 = (control = 0, case = 1),)
-    confounders: [W1]
-  - type: CM
-    target: CONTINUOUS, OUTCOME
-    treatment: (T1 = 0,)
-    confounders: [W1]
\ No newline at end of file
diff --git a/test/data/merge/empty_sieve.csv b/test/data/merge/empty_sieve.csv
deleted file mode 100644
index 3241e3c..0000000
--- a/test/data/merge/empty_sieve.csv
+++ /dev/null
@@ -1 +0,0 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES
diff --git a/test/data/merge/sieve_output_1.csv b/test/data/merge/sieve_output_1.csv
deleted file mode 100644
index cfe77b9..0000000
--- a/test/data/merge/sieve_output_1.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
-IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,0.39844897646996624,0.42804034052713935
-IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.10514479130506516,0.0,-0.42804034052713935,-0.39844897646996624
-ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.14041906595230103,0.0,-0.6661267914170061,-0.6266080320986587
-IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04427033991279833,0.04393992135752185
-IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3126221001148224,0.9941422670223119,-0.04393992135752185,0.04427033991279833
-ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.3183199465274811,2.0186741955776768e-7,-0.16400271059341004,-0.07418473022532235
\ No newline at end of file
diff --git a/test/data/merge/sieve_output_2.csv b/test/data/merge/sieve_output_2.csv
deleted file mode 100644
index ad536a5..0000000
--- a/test/data/merge/sieve_output_2.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,SIEVE_STD,SIEVE_PVALUE,SIEVE_LWB,SIEVE_UPB
-ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,0.17398861050605774,0.0,-1.1780933630666999,-1.1291269782236455
-CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,0.09048639982938766,0.0,3.4078416054701566,3.433307593526622
diff --git a/test/data/merge/tmle_output_1.csv b/test/data/merge/tmle_output_1.csv
deleted file mode 100644
index 574764e..0000000
--- a/test/data/merge/tmle_output_1.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
-IATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,0.31289224196843934,0.4132446584985528,0.11215083413905078,0.0002973305980956673,0.19204601585918746,0.6344433011379181,0.847922052214297,0.020796900602100377,0.1808979087784819,0.935635289898083,0.26988547749823344,
-IATE,T2_&_T1,0_&_1,1_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.31289224196843934,-0.4132446584985528,0.11215083413905078,0.0002973305980956673,-0.6344433011379181,-0.19204601585918746,0.1132683682000456,0.007992877453115943,0.05298134725065751,0.3761329000024115,0.8446783494259822,
-ATE,T2_&_T1,1_&_1,0_&_0,"CONTINUOUS, OUTCOME",W1_&_W2,C1,-0.6913496525247373,-0.6463674117578324,0.14465023358495563,1.340594727468874e-5,-0.9316656493686948,-0.36106917414697,0.5750731876257001,0.6616018441386402,0.6626823260683342,0.9943324985582943,0.379330384132208,
-IATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,0.015114902768326591,-0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6129084528900649,0.6125780343347885,0.18524882713929447,0.6791824198934945,0.375539677029601,0.2563919842828919,0.48004747095683487,
-IATE,T2_&_T1,0_&_1,1_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.015114902768326591,0.0001652092776382386,0.3106592646611987,0.9995762353961254,-0.6125780343347885,0.6129084528900649,0.8483223420477747,0.6377742233856645,0.653653091532227,0.6594583118531032,0.3862219522578093,
-ATE,T2_&_T1,1_&_1,0_&_0,BINARY/OUTCOME,W1_&_W2,C1,-0.07124029524113125,-0.1190937204093662,0.3182495428000389,0.7086573850781657,-0.7468080019909507,0.5086205611722183,0.7833975115669672,0.13752408975674002,0.8906874812178406,0.7407394467826026,0.6391102550858685,
diff --git a/test/data/merge/tmle_output_2.csv b/test/data/merge/tmle_output_2.csv
deleted file mode 100644
index a7d02aa..0000000
--- a/test/data/merge/tmle_output_2.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-PARAMETER_TYPE,TREATMENTS,CASE,CONTROL,OUTCOME,CONFOUNDERS,COVARIATES,INITIAL_ESTIMATE,TMLE_ESTIMATE,TMLE_STD,TMLE_PVALUE,TMLE_LWB,TMLE_UPB,ONESTEP_ESTIMATE,ONESTEP_STD,ONESTEP_PVALUE,ONESTEP_LWB,ONESTEP_UPB,LOG
-ATE,T1,1,0,"CONTINUOUS, OUTCOME",W1,,-1.170325854136744,,,,,,,,,,,"Error"
-CM,T1,0,,"CONTINUOUS, OUTCOME",W1,,3.4304882451014653,3.4205745994983894,0.08649674229047534,1.6698354099787253e-94,3.249974334825743,3.5911748641710357,0.11925931782610122,0.1908267610598129,0.3548787761302413,0.6543239505251285,0.8668053182115685,
diff --git a/test/data/sieve_variances.hdf5 b/test/data/sieve_variances.hdf5
deleted file mode 100644
index ea0776cc2bb95e30c5891d2fdfc4702fbad254fa..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4790
zcmeI$d2oz(7zgm(<t4TuD~Y7eY__4iWV5R|B>O%dN05dV!BBe4CKjoyMNo0I?2<S$
z?jwzKUm`7K-#S9vXN%I-kq#B6M~XI0ZN0Q{^&4X9zqa(Bo#{LCneX#Ep5M&7f9%Zr
zOG@k-6Paos>lkcI&KQwyHzwN0*^OOCj~Qtn*Ty*BF=lM~=ut+C*=n|Gj4dB-v6|z|
zZH=+fk*Vq9j6J%v()|DU>@Sd&l-RYdzL8$%=cm!=9=^u?>StTv!*L_S!^{~GIIw3@
zVv0^%8X#bAjXK(3H0d;2ef9efuFe|48m-kBrm6X0Y}ADMX+o!=_LBoJw<h4zyq@hZ
z7B=Lh5F1BKk$6V7#6QjXOUhh{|4t0{6r*Q+mc)tZkC`uVT&~1Pvcv_uCEi~u@v(A=
zn`nt00mRY%#A5@AQ&Go&%Rt--rqn0y*noKZGsI<~#46^t)*=pTNIaqi@iF8qB3{ys
zxVDKnuNiTPp4eSv<vwlWb|p2Bj`ek|F6?XXe`WzM-Lr{9%pNX}_3*m*e=i+94){93
z!_(S%xM^n(ukP#N&nI~}qtL@Ec6&JHu7~}?6<%mkxTjI!^I;0RG2RAc7^lK@)V;87
z4^jBtAcZUa6&`9(xL+NGN4HRTJ=PXCS2!V3;kP;|Y`CCsW4)VmPAK$_VOy-P>m0PX
zy`g9(M`V7$(@wehkF#zb_w`>2Q2!I@C2n@WE9cyt_LZ9jXmZKTh8i+oj*>YyLFTg)
zWbU;@X6G`Q>n)PGa<R;57^fj`gMO*Z-{r`h<&rsnl*|*M%`o&iWDc1k^PK@QuaIP3
z)mrA^!8o_2%<W#6+0(z6cMdGvbwalz*4I@`wMxyy1Ug;ZlHyB5`~is6;R$p^Pn90f
z1?IxC(<0@5DN^AVBK`ciNb08|89o!K_EC|h!wi@SF35)dFz<*+M-Pi+MwTN3k!8s2
zgCd3P6{+5Kk%G2}G;6a+ksgsc7m768?MKHm78YN-wY9CUTe~1!iaKyd8eYpt!{SUd
z`YBC7e<pkd`e+l~d%;Ad$i*=x+GsJ+5UYtIqD(XihCmARfxa*ZrkYLk2Bh2!r%kuQ
zsR(|9*sAn1(Ivq|AJsHbvd%;|Lrk;=`F-z(WSKZ4eu#gsExzu2pJ=It%byP2Fw=8C
zT1b3Ct<e_=HfRS)Rdtz#hJI(EL6<Fb{2L3kxoDwAr53Wo%di<d@CodMwNQdR2KXKO
z<EqAR6daI@xek}GhIPkz_|`&~DsaZTW~#`~PIxo#Oj}<!xZ!fCcj!QB)_f+l>F1<q
zh<O54eO00$5n|(<)SaF5{z4~RpXa2DbDeYmd3U~(6exyW)%GIy!+rP}TEFe2pGG<<
z076DNNrH$;PD(CyQf_w_Z4Y+Ql^_>c8oMa#yo>g27*Ca3%Zr6Z`YparJff8{8l09g
z5(N5TlR#JC>QksdudrF529WbHY!qnL27%rx6zJr7fzA~OR1THZZX@r&eW;r61SRVP
zngnZ5ABW3u4U%PnV)6yb$`feNN`V5p2{fXtCS~e$H1c51&ZcKG6km7dSHX62`XO6z
zfY+A0+H1@E55(gP{VK2R!b-0#sJfnvIs(Gsy%k<tD15lwYuoS`1LmSIE`by93%rIo
z4|3Hq?1cpM8*?GwYpdh-+Ugc~ZF35)+IGxP;tzUKtiEn;ZGSr4?+E48SE=HZN`YUf
z6#4|7Mb9qummOEB@)#2A;5njFNs&sre3fReP-)mQmAp$-dI9P$Rw)RYf_ITh=U_5e
z7pinBTctg-RVtc=Jua29ohqfhs*?6aoDroGKd;hDH&lvVdz1POJ4Z<a?#HKQuHEYE
zGVc`Kd+?>N$<aNy1bB2M@F?{Ut_o^3&}g&<ZJqJ<G3oYEX^yd$>hFI|;-lgzv-;-Y
L@l(<}q%Hju(I*3L


From 563c550d0a8bcef30e57b8e78e51ecb297399646 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 30 Nov 2023 18:46:22 +0000
Subject: [PATCH 23/71] fix test

---
 test/sieve_variance.jl | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index c357950..b6c66c3 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -256,16 +256,13 @@ end
 end
 
 @testset "Test corrected_stderrors" begin
-    io = jldopen(joinpath(TESTDIR, "data", "sieve_variances.hdf5"))
-    variances = io["variances"]
+    variances = [
+        1. 2. 6.
+        4. 5. 3.
+    ]
     stderrors = TargetedEstimation.corrected_stderrors(variances)
     # sanity check
-    @test size(stderrors, 1) == 10
-
-    # check for the first curve
-    stderrors[1] == sqrt(maximum(variances[:,1]))
-
-    close(io)
+    stderrors == sqrt.([4., 5., 6.])
 end
 
 @testset "Test SVP" begin

From afc7eda8c498296596884ac4f1675e4bc04bcbaa Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 1 Dec 2023 10:24:19 +0000
Subject: [PATCH 24/71] restore config file

---
 test/config/problematic_tmle_ose_config.jl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 test/config/problematic_tmle_ose_config.jl

diff --git a/test/config/problematic_tmle_ose_config.jl b/test/config/problematic_tmle_ose_config.jl
new file mode 100644
index 0000000..a517cf4
--- /dev/null
+++ b/test/config/problematic_tmle_ose_config.jl
@@ -0,0 +1,14 @@
+default_models = TMLE.default_models(
+  Q_continuous = LinearRegressor(),
+  # For the estimation of E[Y|W, T]: binary target
+  Q_binary = LogisticClassifier(),
+  # This will fail
+  G = LogisticClassifier()
+)
+
+models = merge(default_models, (T2 = LinearRegressor(),))
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=models, weighted=true, ps_lowerbound=0.001),
+  OSE  = OSE(models=models)
+)
\ No newline at end of file

From 8c8515c3894baf59e9d9f6d3560cfc45f9b41688 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 1 Dec 2023 16:08:10 +0000
Subject: [PATCH 25/71] add precompile file

---
 deps/execute.jl     |   7 +++
 estimands_test.yaml | 102 --------------------------------------------
 2 files changed, 7 insertions(+), 102 deletions(-)
 delete mode 100644 estimands_test.yaml

diff --git a/deps/execute.jl b/deps/execute.jl
index 666bc56..bde1b1b 100644
--- a/deps/execute.jl
+++ b/deps/execute.jl
@@ -1,7 +1,14 @@
 using TargetedEstimation
 
+@info "Running precompilation script."
+
+# Run help messages
 TargetedEstimation.command_main(["-h"])
 TargetedEstimation.command_main(["tmle", "-h"])
 TargetedEstimation.command_main(["make-summary", "-h"])
 TargetedEstimation.command_main(["sieve-variance-plateau", "-h"])
 
+# Run workload
+TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test")
+push!(LOAD_PATH, TEST_DIR)
+include(joinpath(TEST_DIR, "runtests.jl"))
\ No newline at end of file
diff --git a/estimands_test.yaml b/estimands_test.yaml
deleted file mode 100644
index 8901313..0000000
--- a/estimands_test.yaml
+++ /dev/null
@@ -1,102 +0,0 @@
-type: "Configuration"
-estimands:
-  - outcome_extra_covariates:
-      - C1
-    type: "IATE"
-    treatment_values:
-      T2:
-        case: true
-        control: false
-      T1:
-        case: true
-        control: false
-    outcome: CONTINUOUS, OUTCOME
-    treatment_confounders:
-      T2:
-        - W1
-        - W2
-      T1:
-        - W1
-        - W2
-  - outcome_extra_covariates:
-      - C1
-    type: "IATE"
-    treatment_values:
-      T2:
-        case: true
-        control: false
-      T1:
-        case: true
-        control: false
-    outcome: BINARY/OUTCOME
-    treatment_confounders:
-      T2:
-        - W1
-        - W2
-      T1:
-        - W1
-        - W2
-  - outcome_extra_covariates: []
-    type: "ATE"
-    treatment_values:
-      T1:
-        case: true
-        control: false
-    outcome: CONTINUOUS, OUTCOME
-    treatment_confounders:
-      T1:
-        - W1
-        - W2
-  - outcome_extra_covariates: []
-    type: "IATE"
-    treatment_values:
-      T2:
-        case: false
-        control: true
-      T1:
-        case: true
-        control: false
-    outcome: CONTINUOUS, OUTCOME
-    treatment_confounders:
-      T2:
-        - W1
-        - W2
-      T1:
-        - W1
-        - W2
-  - outcome_extra_covariates:
-      - C1
-    type: "IATE"
-    treatment_values:
-      T2:
-        case: false
-        control: true
-      T1:
-        case: true
-        control: false
-    outcome: BINARY/OUTCOME
-    treatment_confounders:
-      T2:
-        - W1
-        - W2
-      T1:
-        - W1
-        - W2
-  - outcome_extra_covariates:
-      - C1
-    type: "ATE"
-    treatment_values:
-      T2:
-        case: true
-        control: false
-      T1:
-        case: true
-        control: false
-    outcome: CONTINUOUS, OUTCOME
-    treatment_confounders:
-      T2:
-        - W1
-        - W2
-      T1:
-        - W1
-        - W2

From 0f9932911ac86115d9ee47f8c39d3bf942b9137e Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sun, 3 Dec 2023 13:46:37 +0000
Subject: [PATCH 26/71] fix cli

---
 src/TargetedEstimation.jl |  1 +
 src/outputs.jl            |  4 ++++
 src/runner.jl             | 12 ++++++------
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index e38087c..fce10ec 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -30,6 +30,7 @@ using Comonicon
 using Configurations
 
 import MLJModelInterface
+import Base.tryparse
 
 include("failed_estimate.jl")
 include("cache_managers.jl")
diff --git a/src/outputs.jl b/src/outputs.jl
index 4302919..09e223a 100644
--- a/src/outputs.jl
+++ b/src/outputs.jl
@@ -3,6 +3,10 @@ FileExistsError(filename) = ArgumentError(string("File ", filename, " already ex
 check_file_exists(filename::Nothing) = nothing
 check_file_exists(filename) = !isfile(filename) || throw(FileExistsError(filename))
 
+Base.tryparse(::Type{Union{String, Nothing}}, x::AbstractString) = x
+Base.tryparse(::Type{Union{Float64, Nothing}}, x::AbstractString) = tryparse(Float64, x)
+Base.tryparse(::Type{Union{T, Nothing}}, x::Nothing) where T = nothing
+
 """
     initialize(output)
 
diff --git a/src/runner.jl b/src/runner.jl
index e7961c8..f25e5e8 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -144,12 +144,12 @@ TMLE CLI.
 
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
-@cast function tmle(dataset, estimands, estimators; 
-    verbosity=0, 
-    outputs=Outputs(),
-    chunksize=100,
-    rng=123,
-    cache_strategy="release-unusable",
+@cast function tmle(dataset::String, estimands::String, estimators::String; 
+    verbosity::Int=0, 
+    outputs::Outputs=Outputs(),
+    chunksize::Int=100,
+    rng::Int=123,
+    cache_strategy::String="release-unusable",
     sort_estimands::Bool=false
     )
     runner = Runner(dataset, estimands, estimators; 

From 65a1a7002400828a024364148ae49afc6ba77e8c Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Mon, 4 Dec 2023 10:23:53 +0000
Subject: [PATCH 27/71] some more addons

---
 .gitignore             | 2 ++
 Comonicon.toml         | 2 +-
 docker/Dockerfile      | 9 +++++----
 test/sieve_variance.jl | 2 +-
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0a77c2f..2d2d75e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@ test/Manifest.toml
 sysimage/Manifest.toml
 
 Manifest.toml
+
+build/
diff --git a/Comonicon.toml b/Comonicon.toml
index 19f0cb6..48c2a41 100644
--- a/Comonicon.toml
+++ b/Comonicon.toml
@@ -1,4 +1,4 @@
-name = "fasttmle"
+name = "tmle"
 
 [install]
 completion = true
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 8634ee2..7464f91 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -38,10 +38,11 @@ COPY . /TargetedEstimation.jl
 WORKDIR /TargetedEstimation.jl
 
 # Precompile project
-RUN julia -q --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
+RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-RUN julia -q --project=/TargetedEstimation.jl/sysimage -e'using Pkg;Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
+RUN julia --project -t auto --startup-file=no deps/build.jl app
 
-# Build Sysimage
-RUN julia --project -t auto --startup-file=no sysimage/create_sysimage.jl
+ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
+
+RUN tmle --help
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index b6c66c3..7317854 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -262,7 +262,7 @@ end
     ]
     stderrors = TargetedEstimation.corrected_stderrors(variances)
     # sanity check
-    stderrors == sqrt.([4., 5., 6.])
+    @test stderrors == sqrt.([4., 5., 6.])
 end
 
 @testset "Test SVP" begin

From 0c4c82d34fbb69873c9cb8b484f1fdc009c3aaa5 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 5 Dec 2023 09:33:18 +0000
Subject: [PATCH 28/71] update docker and add Manifest

---
 .gitignore                       |    3 +-
 Manifest.toml                    | 1812 ++++++++++++++++++++++++++++++
 docker/Dockerfile                |    2 +-
 docs/Project.toml                |    4 +
 sysimage/Project.toml            |    2 -
 sysimage/create_sysimage.jl      |   11 -
 sysimage/precompile_exec_file.jl |    6 -
 7 files changed, 1818 insertions(+), 22 deletions(-)
 create mode 100644 Manifest.toml
 delete mode 100644 sysimage/Project.toml
 delete mode 100644 sysimage/create_sysimage.jl
 delete mode 100644 sysimage/precompile_exec_file.jl

diff --git a/.gitignore b/.gitignore
index 2d2d75e..648f644 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,10 +16,9 @@ deps/src/
 # Build artifacts for creating documentation generated by the Documenter package
 docs/build/
 docs/site/
+docs/Manifest.toml
 
 test/Manifest.toml
 sysimage/Manifest.toml
 
-Manifest.toml
-
 build/
diff --git a/Manifest.toml b/Manifest.toml
new file mode 100644
index 0000000..eb59e18
--- /dev/null
+++ b/Manifest.toml
@@ -0,0 +1,1812 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.2"
+manifest_format = "2.0"
+project_hash = "79b338af0999710186711c6c1c568ae8891f4dc2"
+
+[[deps.ARFFFiles]]
+deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"]
+git-tree-sha1 = "e8c8e0a2be6eb4f56b1672e46004463033daa409"
+uuid = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
+version = "1.4.1"
+
+[[deps.AbstractDifferentiation]]
+deps = ["ExprTools", "LinearAlgebra", "Requires"]
+git-tree-sha1 = "6a5e61dc899ab116035c18ead4ec890269f3c478"
+uuid = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
+version = "0.6.0"
+
+    [deps.AbstractDifferentiation.extensions]
+    AbstractDifferentiationChainRulesCoreExt = "ChainRulesCore"
+    AbstractDifferentiationFiniteDifferencesExt = "FiniteDifferences"
+    AbstractDifferentiationForwardDiffExt = ["DiffResults", "ForwardDiff"]
+    AbstractDifferentiationReverseDiffExt = ["DiffResults", "ReverseDiff"]
+    AbstractDifferentiationTrackerExt = "Tracker"
+    AbstractDifferentiationZygoteExt = "Zygote"
+
+    [deps.AbstractDifferentiation.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+    FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[[deps.AbstractFFTs]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef"
+uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+version = "1.5.0"
+weakdeps = ["ChainRulesCore", "Test"]
+
+    [deps.AbstractFFTs.extensions]
+    AbstractFFTsChainRulesCoreExt = "ChainRulesCore"
+    AbstractFFTsTestExt = "Test"
+
+[[deps.AbstractTrees]]
+git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.4"
+
+[[deps.Adapt]]
+deps = ["LinearAlgebra", "Requires"]
+git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433"
+uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+version = "3.7.1"
+weakdeps = ["StaticArrays"]
+
+    [deps.Adapt.extensions]
+    AdaptStaticArraysExt = "StaticArrays"
+
+[[deps.ArgCheck]]
+git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4"
+uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
+version = "2.3.0"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.1"
+
+[[deps.ArnoldiMethod]]
+deps = ["LinearAlgebra", "Random", "StaticArrays"]
+git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
+uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
+version = "0.2.0"
+
+[[deps.ArrayInterface]]
+deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "247efbccf92448be332d154d6ca56b9fcdd93c31"
+uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+version = "7.6.1"
+
+    [deps.ArrayInterface.extensions]
+    ArrayInterfaceBandedMatricesExt = "BandedMatrices"
+    ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices"
+    ArrayInterfaceCUDAExt = "CUDA"
+    ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore"
+    ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore"
+    ArrayInterfaceTrackerExt = "Tracker"
+
+    [deps.ArrayInterface.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+    StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.Arrow]]
+deps = ["ArrowTypes", "BitIntegers", "CodecLz4", "CodecZstd", "ConcurrentUtilities", "DataAPI", "Dates", "EnumX", "LoggingExtras", "Mmap", "PooledArrays", "SentinelArrays", "Tables", "TimeZones", "TranscodingStreams", "UUIDs"]
+git-tree-sha1 = "954666e252835c4cf8819ce4ffaf31073c1b7233"
+uuid = "69666777-d1a9-59fb-9406-91d4454c9d45"
+version = "2.6.2"
+
+[[deps.ArrowTypes]]
+deps = ["Sockets", "UUIDs"]
+git-tree-sha1 = "8c37bfdf1b689c6677bbfc8986968fe641f6a299"
+uuid = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
+version = "2.2.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[deps.Atomix]]
+deps = ["UnsafeAtomics"]
+git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be"
+uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
+version = "0.1.0"
+
+[[deps.BSON]]
+git-tree-sha1 = "2208958832d6e1b59e49f53697483a84ca8d664e"
+uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
+version = "0.3.7"
+
+[[deps.BangBang]]
+deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"]
+git-tree-sha1 = "e28912ce94077686443433c2800104b061a827ed"
+uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66"
+version = "0.3.39"
+
+    [deps.BangBang.extensions]
+    BangBangChainRulesCoreExt = "ChainRulesCore"
+    BangBangDataFramesExt = "DataFrames"
+    BangBangStaticArraysExt = "StaticArrays"
+    BangBangStructArraysExt = "StructArrays"
+    BangBangTypedTablesExt = "TypedTables"
+
+    [deps.BangBang.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+    StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+    TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[deps.Baselet]]
+git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e"
+uuid = "9718e550-a3fa-408a-8086-8db961cd8217"
+version = "0.1.1"
+
+[[deps.BitFlags]]
+git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b"
+uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
+version = "0.1.8"
+
+[[deps.BitIntegers]]
+deps = ["Random"]
+git-tree-sha1 = "a55462dfddabc34bc97d3a7403a2ca2802179ae6"
+uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
+version = "0.3.1"
+
+[[deps.CEnum]]
+git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90"
+uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
+version = "0.4.2"
+
+[[deps.CSV]]
+deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"]
+git-tree-sha1 = "44dbf560808d49041989b8a96cae4cffbeb7966a"
+uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+version = "0.10.11"
+
+[[deps.CUDA_Driver_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
+git-tree-sha1 = "1e42ef1bdb45487ff28de16182c0df4920181dc3"
+uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc"
+version = "0.7.0+0"
+
+[[deps.CUDA_Runtime_jll]]
+deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
+git-tree-sha1 = "9704e50c9158cf8896c2776b8dbc5edd136caf80"
+uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
+version = "0.10.1+0"
+
+[[deps.Calculus]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad"
+uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
+version = "0.5.1"
+
+[[deps.CategoricalArrays]]
+deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"]
+git-tree-sha1 = "1568b28f91293458345dabba6a5ea3f183250a61"
+uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+version = "0.10.8"
+weakdeps = ["JSON", "RecipesBase", "SentinelArrays", "StructTypes"]
+
+    [deps.CategoricalArrays.extensions]
+    CategoricalArraysJSONExt = "JSON"
+    CategoricalArraysRecipesBaseExt = "RecipesBase"
+    CategoricalArraysSentinelArraysExt = "SentinelArrays"
+    CategoricalArraysStructTypesExt = "StructTypes"
+
+[[deps.CategoricalDistributions]]
+deps = ["CategoricalArrays", "Distributions", "Missings", "OrderedCollections", "Random", "ScientificTypes"]
+git-tree-sha1 = "3124343a1b0c9a2f5fdc1d9bcc633ba11735a4c4"
+uuid = "af321ab8-2d2e-40a6-b165-3d674595d28e"
+version = "0.1.13"
+
+    [deps.CategoricalDistributions.extensions]
+    UnivariateFiniteDisplayExt = "UnicodePlots"
+
+    [deps.CategoricalDistributions.weakdeps]
+    UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
+
+[[deps.ChainRules]]
+deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"]
+git-tree-sha1 = "006cc7170be3e0fa02ccac6d4164a1eee1fc8c27"
+uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+version = "1.58.0"
+
+[[deps.ChainRulesCore]]
+deps = ["Compat", "LinearAlgebra"]
+git-tree-sha1 = "e0af648f0692ec1691b5d094b8724ba1346281cf"
+uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+version = "1.18.0"
+weakdeps = ["SparseArrays"]
+
+    [deps.ChainRulesCore.extensions]
+    ChainRulesCoreSparseArraysExt = "SparseArrays"
+
+[[deps.CodecLz4]]
+deps = ["Lz4_jll", "TranscodingStreams"]
+git-tree-sha1 = "8bf4f9e2ee52b5e217451a7cd9171fcd4e16ae23"
+uuid = "5ba52731-8f18-5e0d-9241-30f10d1ec561"
+version = "0.4.1"
+
+[[deps.CodecZlib]]
+deps = ["TranscodingStreams", "Zlib_jll"]
+git-tree-sha1 = "cd67fc487743b2f0fd4380d4cbd3a24660d0eec8"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.7.3"
+
+[[deps.CodecZstd]]
+deps = ["CEnum", "TranscodingStreams", "Zstd_jll"]
+git-tree-sha1 = "849470b337d0fa8449c21061de922386f32949d9"
+uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
+version = "0.7.2"
+
+[[deps.ColorTypes]]
+deps = ["FixedPointNumbers", "Random"]
+git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4"
+uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
+version = "0.11.4"
+
+[[deps.Combinatorics]]
+git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860"
+uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
+version = "1.0.2"
+
+[[deps.CommonSolve]]
+git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c"
+uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
+version = "0.2.4"
+
+[[deps.CommonSubexpressions]]
+deps = ["MacroTools", "Test"]
+git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
+uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
+version = "0.3.0"
+
+[[deps.Comonicon]]
+deps = ["Configurations", "ExproniconLite", "Libdl", "Logging", "Markdown", "OrderedCollections", "PackageCompiler", "Pkg", "Scratch", "TOML", "UUIDs"]
+git-tree-sha1 = "552667002fdd5602ca72e0aeac8bd099daa0e040"
+uuid = "863f3e99-da2a-4334-8734-de3dacbe5542"
+version = "1.0.6"
+
+[[deps.Compat]]
+deps = ["UUIDs"]
+git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "4.10.1"
+weakdeps = ["Dates", "LinearAlgebra"]
+
+    [deps.Compat.extensions]
+    CompatLinearAlgebraExt = "LinearAlgebra"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.0.5+0"
+
+[[deps.CompositionsBase]]
+git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad"
+uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b"
+version = "0.1.2"
+
+    [deps.CompositionsBase.extensions]
+    CompositionsBaseInverseFunctionsExt = "InverseFunctions"
+
+    [deps.CompositionsBase.weakdeps]
+    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
+
+[[deps.ComputationalResources]]
+git-tree-sha1 = "52cb3ec90e8a8bea0e62e275ba577ad0f74821f7"
+uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3"
+version = "0.3.2"
+
+[[deps.ConcurrentUtilities]]
+deps = ["Serialization", "Sockets"]
+git-tree-sha1 = "8cfa272e8bdedfa88b6aefbbca7c19f1befac519"
+uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb"
+version = "2.3.0"
+
+[[deps.Conda]]
+deps = ["Downloads", "JSON", "VersionParsing"]
+git-tree-sha1 = "51cab8e982c5b598eea9c8ceaced4b58d9dd37c9"
+uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
+version = "1.10.0"
+
+[[deps.Configurations]]
+deps = ["ExproniconLite", "OrderedCollections", "TOML"]
+git-tree-sha1 = "4358750bb58a3caefd5f37a4a0c5bfdbbf075252"
+uuid = "5218b696-f38b-4ac9-8b61-a12ec717816d"
+version = "0.17.6"
+
+[[deps.ConstructionBase]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "c53fc348ca4d40d7b371e71fd52251839080cbc9"
+uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
+version = "1.5.4"
+
+    [deps.ConstructionBase.extensions]
+    ConstructionBaseIntervalSetsExt = "IntervalSets"
+    ConstructionBaseStaticArraysExt = "StaticArrays"
+
+    [deps.ConstructionBase.weakdeps]
+    IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[[deps.ContextVariablesX]]
+deps = ["Compat", "Logging", "UUIDs"]
+git-tree-sha1 = "25cc3803f1030ab855e383129dcd3dc294e322cc"
+uuid = "6add18c4-b38d-439d-96f6-d6bc489c04c5"
+version = "0.1.3"
+
+[[deps.Crayons]]
+git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
+uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
+version = "4.1.1"
+
+[[deps.DataAPI]]
+git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c"
+uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
+version = "1.15.0"
+
+[[deps.DataFrames]]
+deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8"
+uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+version = "1.6.1"
+
+[[deps.DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.15"
+
+[[deps.DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[deps.DefineSingletons]]
+git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c"
+uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52"
+version = "0.1.2"
+
+[[deps.DelimitedFiles]]
+deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+version = "1.9.1"
+
+[[deps.DiffResults]]
+deps = ["StaticArraysCore"]
+git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621"
+uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+version = "1.1.0"
+
+[[deps.DiffRules]]
+deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
+git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272"
+uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
+version = "1.15.1"
+
+[[deps.Distances]]
+deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
+git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0"
+uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+version = "0.10.11"
+weakdeps = ["ChainRulesCore", "SparseArrays"]
+
+    [deps.Distances.extensions]
+    DistancesChainRulesCoreExt = "ChainRulesCore"
+    DistancesSparseArraysExt = "SparseArrays"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[deps.Distributions]]
+deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
+git-tree-sha1 = "a6c00f894f24460379cb7136633cef54ac9f6f4a"
+uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
+version = "0.25.103"
+
+    [deps.Distributions.extensions]
+    DistributionsChainRulesCoreExt = "ChainRulesCore"
+    DistributionsDensityInterfaceExt = "DensityInterface"
+    DistributionsTestExt = "Test"
+
+    [deps.Distributions.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
+    Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[deps.DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
+uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+version = "0.9.3"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.DualNumbers]]
+deps = ["Calculus", "NaNMath", "SpecialFunctions"]
+git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566"
+uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74"
+version = "0.6.8"
+
+[[deps.EarCut_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053"
+uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
+version = "2.2.4+0"
+
+[[deps.EarlyStopping]]
+deps = ["Dates", "Statistics"]
+git-tree-sha1 = "98fdf08b707aaf69f524a6cd0a67858cefe0cfb6"
+uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
+version = "0.3.0"
+
+[[deps.EnumX]]
+git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237"
+uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
+version = "1.0.4"
+
+[[deps.EvoTrees]]
+deps = ["BSON", "CategoricalArrays", "Distributions", "MLJModelInterface", "NetworkLayout", "Random", "RecipesBase", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "f08d64339d7259b0c69a00a1e321dc6da79672ea"
+uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
+version = "0.16.5"
+
+    [deps.EvoTrees.extensions]
+    EvoTreesCUDAExt = "CUDA"
+
+    [deps.EvoTrees.weakdeps]
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+
+[[deps.ExceptionUnwrapping]]
+deps = ["Test"]
+git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96"
+uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4"
+version = "0.1.9"
+
+[[deps.ExprTools]]
+git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec"
+uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
+version = "0.1.10"
+
+[[deps.ExproniconLite]]
+git-tree-sha1 = "fbc390c2f896031db5484bc152a7e805ecdfb01f"
+uuid = "55351af7-c7e9-48d6-89ff-24e801d99491"
+version = "0.10.5"
+
+[[deps.Extents]]
+git-tree-sha1 = "2140cd04483da90b2da7f99b2add0750504fc39c"
+uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910"
+version = "0.1.2"
+
+[[deps.FLoops]]
+deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"]
+git-tree-sha1 = "ffb97765602e3cbe59a0589d237bf07f245a8576"
+uuid = "cc61a311-1640-44b5-9fba-1b764f453329"
+version = "0.2.1"
+
+[[deps.FLoopsBase]]
+deps = ["ContextVariablesX"]
+git-tree-sha1 = "656f7a6859be8673bf1f35da5670246b923964f7"
+uuid = "b9860ae5-e623-471e-878b-f6a53c775ea6"
+version = "0.1.1"
+
+[[deps.FileIO]]
+deps = ["Pkg", "Requires", "UUIDs"]
+git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673"
+uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+version = "1.16.1"
+
+[[deps.FilePathsBase]]
+deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"]
+git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa"
+uuid = "48062228-2e41-5def-b9a4-89aafe57970f"
+version = "0.9.21"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
+[[deps.FillArrays]]
+deps = ["LinearAlgebra", "Random"]
+git-tree-sha1 = "28e4e9c4b7b162398ec8004bdabe9a90c78c122d"
+uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
+version = "1.8.0"
+weakdeps = ["PDMats", "SparseArrays", "Statistics"]
+
+    [deps.FillArrays.extensions]
+    FillArraysPDMatsExt = "PDMats"
+    FillArraysSparseArraysExt = "SparseArrays"
+    FillArraysStatisticsExt = "Statistics"
+
+[[deps.FiniteDiff]]
+deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays"]
+git-tree-sha1 = "c6e4a1fbe73b31a3dea94b1da449503b8830c306"
+uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
+version = "2.21.1"
+
+    [deps.FiniteDiff.extensions]
+    FiniteDiffBandedMatricesExt = "BandedMatrices"
+    FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices"
+    FiniteDiffStaticArraysExt = "StaticArrays"
+
+    [deps.FiniteDiff.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+
+[[deps.FixedPointNumbers]]
+deps = ["Statistics"]
+git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
+uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
+version = "0.8.4"
+
+[[deps.ForwardDiff]]
+deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"]
+git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad"
+uuid = "f6369f11-7733-5829-9624-2563aa707210"
+version = "0.10.36"
+weakdeps = ["StaticArrays"]
+
+    [deps.ForwardDiff.extensions]
+    ForwardDiffStaticArraysExt = "StaticArrays"
+
+[[deps.Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+
+[[deps.GLM]]
+deps = ["Distributions", "LinearAlgebra", "Printf", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels"]
+git-tree-sha1 = "273bd1cd30768a2fddfa3fd63bbc746ed7249e5f"
+uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+version = "1.9.0"
+
+[[deps.GLMNet]]
+deps = ["DataFrames", "Distributed", "Distributions", "Printf", "Random", "SparseArrays", "StatsBase", "glmnet_jll"]
+git-tree-sha1 = "7ea4e2bbb84183fe52a488d05e16c152b2387b95"
+uuid = "8d5ece8b-de18-5317-b113-243142960cc6"
+version = "0.7.2"
+
+[[deps.GPUArrays]]
+deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
+git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1"
+uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
+version = "9.1.0"
+
+[[deps.GPUArraysCore]]
+deps = ["Adapt"]
+git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0"
+uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
+version = "0.1.5"
+
+[[deps.GeoInterface]]
+deps = ["Extents"]
+git-tree-sha1 = "d53480c0793b13341c40199190f92c611aa2e93c"
+uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f"
+version = "1.3.2"
+
+[[deps.GeometryBasics]]
+deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
+git-tree-sha1 = "424a5a6ce7c5d97cca7bcc4eac551b97294c54af"
+uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
+version = "0.4.9"
+
+[[deps.Glob]]
+git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496"
+uuid = "c27321d9-0574-5035-807b-f59d2c89b15c"
+version = "1.3.1"
+
+[[deps.Graphs]]
+deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
+git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7"
+uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
+version = "1.9.0"
+
+[[deps.HDF5]]
+deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"]
+git-tree-sha1 = "26407bd1c60129062cec9da63dc7d08251544d53"
+uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
+version = "0.17.1"
+
+    [deps.HDF5.extensions]
+    MPIExt = "MPI"
+
+    [deps.HDF5.weakdeps]
+    MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+
+[[deps.HDF5_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"]
+git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739"
+uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
+version = "1.14.2+1"
+
+[[deps.HTTP]]
+deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
+git-tree-sha1 = "abbbb9ec3afd783a7cbd82ef01dcd088ea051398"
+uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+version = "1.10.1"
+
+[[deps.HighlyAdaptiveLasso]]
+deps = ["DataFrames", "MLJModelInterface", "RCall"]
+git-tree-sha1 = "40f12ec0130659287a3d1b7e1a8ffc4fcf7249ba"
+uuid = "c5dac772-1445-43c4-b698-9440de7877f6"
+version = "0.2.0"
+
+[[deps.Hwloc_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "8ecb0b34472a3c98f945e3c75fc7d5428d165511"
+uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
+version = "2.9.3+0"
+
+[[deps.HypergeometricFunctions]]
+deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
+git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685"
+uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
+version = "0.3.23"
+
+[[deps.HypothesisTests]]
+deps = ["Combinatorics", "Distributions", "LinearAlgebra", "Printf", "Random", "Rmath", "Roots", "Statistics", "StatsAPI", "StatsBase"]
+git-tree-sha1 = "4b5d5ba51f5f473737ed9de6d8a7aa190ad8c72f"
+uuid = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
+version = "0.11.0"
+
+[[deps.IRTools]]
+deps = ["InteractiveUtils", "MacroTools", "Test"]
+git-tree-sha1 = "8aa91235360659ca7560db43a7d57541120aa31d"
+uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
+version = "0.4.11"
+
+[[deps.Inflate]]
+git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381"
+uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
+version = "0.1.4"
+
+[[deps.InitialValues]]
+git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3"
+uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c"
+version = "0.3.1"
+
+[[deps.InlineStrings]]
+deps = ["Parsers"]
+git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461"
+uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+version = "1.4.0"
+
+[[deps.IntelOpenMP_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32"
+uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0"
+version = "2023.2.0+0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[deps.InvertedIndices]]
+git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038"
+uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+version = "1.3.0"
+
+[[deps.IrrationalConstants]]
+git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2"
+uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
+version = "0.2.2"
+
+[[deps.IterTools]]
+git-tree-sha1 = "4ced6667f9974fc5c5943fa5e2ef1ca43ea9e450"
+uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
+version = "1.8.0"
+
+[[deps.IterationControl]]
+deps = ["EarlyStopping", "InteractiveUtils"]
+git-tree-sha1 = "d7df9a6fdd82a8cfdfe93a94fcce35515be634da"
+uuid = "b3c1a2ee-3fec-4384-bf48-272ea71de57c"
+version = "0.5.3"
+
+[[deps.IterativeSolvers]]
+deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
+git-tree-sha1 = "b435d190ef8369cf4d79cc9dd5fba88ba0165307"
+uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
+version = "0.9.3"
+
+[[deps.IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[deps.JLD2]]
+deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "Pkg", "PrecompileTools", "Printf", "Reexport", "Requires", "TranscodingStreams", "UUIDs"]
+git-tree-sha1 = "9bbb5130d3b4fa52846546bca4791ecbdfb52730"
+uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+version = "0.4.38"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.5.0"
+
+[[deps.JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.4"
+
+[[deps.JSON3]]
+deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"]
+git-tree-sha1 = "95220473901735a0f4df9d1ca5b171b568b2daa3"
+uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
+version = "1.13.2"
+
+[[deps.JuliaVariables]]
+deps = ["MLStyle", "NameResolution"]
+git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70"
+uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec"
+version = "0.2.4"
+
+[[deps.KernelAbstractions]]
+deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"]
+git-tree-sha1 = "b0737cbbe1c8da6f1139d1c23e35e7cea129c0af"
+uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+version = "0.9.13"
+
+    [deps.KernelAbstractions.extensions]
+    EnzymeExt = "EnzymeCore"
+
+    [deps.KernelAbstractions.weakdeps]
+    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
+
+[[deps.LLVM]]
+deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"]
+git-tree-sha1 = "c879e47398a7ab671c782e02b51a4456794a7fa3"
+uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
+version = "6.4.0"
+
+    [deps.LLVM.extensions]
+    BFloat16sExt = "BFloat16s"
+
+    [deps.LLVM.weakdeps]
+    BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
+
+[[deps.LLVMExtra_jll]]
+deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
+git-tree-sha1 = "98eaee04d96d973e79c25d49167668c5c8fb50e2"
+uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
+version = "0.0.27+1"
+
+[[deps.LLVMOpenMP_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b"
+uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
+version = "15.0.4+0"
+
+[[deps.LaTeXStrings]]
+git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec"
+uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+version = "1.3.1"
+
+[[deps.LatinHypercubeSampling]]
+deps = ["Random", "StableRNGs", "StatsBase", "Test"]
+git-tree-sha1 = "825289d43c753c7f1bf9bed334c253e9913997f8"
+uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d"
+version = "1.9.0"
+
+[[deps.LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+
+[[deps.LearnAPI]]
+deps = ["InteractiveUtils", "Statistics"]
+git-tree-sha1 = "ec695822c1faaaa64cee32d0b21505e1977b4809"
+uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
+version = "0.1.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.3"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "7.84.0+0"
+
+[[deps.LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.10.2+0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.17.0+0"
+
+[[deps.LineSearches]]
+deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
+git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d"
+uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
+version = "7.2.0"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.LinearMaps]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "9df2ab050ffefe870a09c7b6afdb0cde381703f2"
+uuid = "7a12625a-238d-50fd-b39a-03d52299707e"
+version = "3.11.1"
+weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"]
+
+    [deps.LinearMaps.extensions]
+    LinearMapsChainRulesCoreExt = "ChainRulesCore"
+    LinearMapsSparseArraysExt = "SparseArrays"
+    LinearMapsStatisticsExt = "Statistics"
+
+[[deps.LogExpFunctions]]
+deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
+git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa"
+uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+version = "0.3.26"
+
+    [deps.LogExpFunctions.extensions]
+    LogExpFunctionsChainRulesCoreExt = "ChainRulesCore"
+    LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables"
+    LogExpFunctionsInverseFunctionsExt = "InverseFunctions"
+
+    [deps.LogExpFunctions.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
+    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.LoggingExtras]]
+deps = ["Dates", "Logging"]
+git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075"
+uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
+version = "1.0.3"
+
+[[deps.Lz4_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "6c26c5e8a4203d43b5497be3ec5d4e0c3cde240a"
+uuid = "5ced341a-0733-55b8-9ab6-a4889d929147"
+version = "1.9.4+0"
+
+[[deps.MKL]]
+deps = ["Artifacts", "Libdl", "LinearAlgebra", "MKL_jll"]
+git-tree-sha1 = "100521a1d2181cb39036ee1a6955d6b9686bb363"
+uuid = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
+version = "0.6.1"
+
+[[deps.MKL_jll]]
+deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
+git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913"
+uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
+version = "2023.2.0+0"
+
+[[deps.MLFlowClient]]
+deps = ["Dates", "FilePathsBase", "HTTP", "JSON", "ShowCases", "URIs", "UUIDs"]
+git-tree-sha1 = "32cee10a6527476bef0c6484ff4c60c2cead5d3e"
+uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83"
+version = "0.4.4"
+
+[[deps.MLJ]]
+deps = ["CategoricalArrays", "ComputationalResources", "Distributed", "Distributions", "LinearAlgebra", "MLJBalancing", "MLJBase", "MLJEnsembles", "MLJFlow", "MLJIteration", "MLJModels", "MLJTuning", "OpenML", "Pkg", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "StatisticalMeasures", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "981196c41a23cbc1befbad190558b1f0ebb97910"
+uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+version = "0.20.2"
+
+[[deps.MLJBalancing]]
+deps = ["MLJBase", "MLJModelInterface", "MLUtils", "OrderedCollections", "Random", "StatsBase"]
+git-tree-sha1 = "e4be85602f010291f49b6a6464ccde1708ce5d62"
+uuid = "45f359ea-796d-4f51-95a5-deb1a414c586"
+version = "0.1.3"
+
+[[deps.MLJBase]]
+deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Dates", "DelimitedFiles", "Distributed", "Distributions", "InteractiveUtils", "InvertedIndices", "LearnAPI", "LinearAlgebra", "MLJModelInterface", "Missings", "OrderedCollections", "Parameters", "PrettyTables", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "Serialization", "StatisticalMeasuresBase", "StatisticalTraits", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "6d433d34a1764324cf37a1ddc47dcc42ec05340f"
+uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+version = "1.0.1"
+weakdeps = ["StatisticalMeasures"]
+
+    [deps.MLJBase.extensions]
+    DefaultMeasuresExt = "StatisticalMeasures"
+
+[[deps.MLJEnsembles]]
+deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatisticalMeasuresBase", "StatsBase"]
+git-tree-sha1 = "94403b2c8f692011df6731913376e0e37f6c0fe9"
+uuid = "50ed68f4-41fd-4504-931a-ed422449fee0"
+version = "0.4.0"
+
+[[deps.MLJFlow]]
+deps = ["MLFlowClient", "MLJBase", "MLJModelInterface"]
+git-tree-sha1 = "89d0e7a7e08359476482f20b2d8ff12080d171ee"
+uuid = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f"
+version = "0.3.0"
+
+[[deps.MLJGLMInterface]]
+deps = ["Distributions", "GLM", "MLJModelInterface", "StatsModels", "Tables"]
+git-tree-sha1 = "06aba1c96b19f31744f7e97d96fcf66b79739e05"
+uuid = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
+version = "0.3.5"
+
+[[deps.MLJIteration]]
+deps = ["IterationControl", "MLJBase", "Random", "Serialization"]
+git-tree-sha1 = "991e10d4c8da49d534e312e8a4fbe56b7ac6f70c"
+uuid = "614be32b-d00c-4edb-bd02-1eb411ab5e55"
+version = "0.6.0"
+
+[[deps.MLJLinearModels]]
+deps = ["DocStringExtensions", "IterativeSolvers", "LinearAlgebra", "LinearMaps", "MLJModelInterface", "Optim", "Parameters"]
+git-tree-sha1 = "7f517fd840ca433a8fae673edb31678ff55d969c"
+uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
+version = "0.10.0"
+
+[[deps.MLJModelInterface]]
+deps = ["Random", "ScientificTypesBase", "StatisticalTraits"]
+git-tree-sha1 = "381d99f0af76d98f50bd5512dcf96a99c13f8223"
+uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
+version = "1.9.3"
+
+[[deps.MLJModels]]
+deps = ["CategoricalArrays", "CategoricalDistributions", "Combinatorics", "Dates", "Distances", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJModelInterface", "Markdown", "OrderedCollections", "Parameters", "Pkg", "PrettyPrinting", "REPL", "Random", "RelocatableFolders", "ScientificTypes", "StatisticalTraits", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "10d221910fc3f3eedad567178ddbca3cc0f776a3"
+uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
+version = "0.16.12"
+
+[[deps.MLJTuning]]
+deps = ["ComputationalResources", "Distributed", "Distributions", "LatinHypercubeSampling", "MLJBase", "ProgressMeter", "Random", "RecipesBase", "StatisticalMeasuresBase"]
+git-tree-sha1 = "44dc126646a15018d7829f020d121b85b4def9bc"
+uuid = "03970b2e-30c4-11ea-3135-d1576263f10f"
+version = "0.8.0"
+
+[[deps.MLJXGBoostInterface]]
+deps = ["MLJModelInterface", "SparseArrays", "Tables", "XGBoost"]
+git-tree-sha1 = "988c399a352f0b49bc1345c509d8a4800cb468c5"
+uuid = "54119dfa-1dab-4055-a167-80440f4f7a91"
+version = "0.3.10"
+
+[[deps.MLStyle]]
+git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8"
+uuid = "d8e11817-5142-5d16-987a-aa16d5891078"
+version = "0.4.17"
+
+[[deps.MLUtils]]
+deps = ["ChainRulesCore", "Compat", "DataAPI", "DelimitedFiles", "FLoops", "NNlib", "Random", "ShowCases", "SimpleTraits", "Statistics", "StatsBase", "Tables", "Transducers"]
+git-tree-sha1 = "3504cdb8c2bc05bde4d4b09a81b01df88fcbbba0"
+uuid = "f1d291b0-491e-4a28-83b9-f70985020b54"
+version = "0.4.3"
+
+[[deps.MPICH_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
+git-tree-sha1 = "8a5b4d2220377d1ece13f49438d71ad20cf1ba83"
+uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
+version = "4.1.2+0"
+
+[[deps.MPIPreferences]]
+deps = ["Libdl", "Preferences"]
+git-tree-sha1 = "8f6af051b9e8ec597fa09d8885ed79fd582f33c9"
+uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+version = "0.1.10"
+
+[[deps.MPItrampoline_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
+git-tree-sha1 = "6979eccb6a9edbbb62681e158443e79ecc0d056a"
+uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748"
+version = "5.3.1+0"
+
+[[deps.MacroTools]]
+deps = ["Markdown", "Random"]
+git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48"
+uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+version = "0.5.11"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[deps.MbedTLS]]
+deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"]
+git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf"
+uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
+version = "1.1.9"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.2+0"
+
+[[deps.MetaGraphsNext]]
+deps = ["Graphs", "JLD2", "SimpleTraits"]
+git-tree-sha1 = "8dd4f3f8a643d53e61ff9115749f522c35a38f3f"
+uuid = "fa8bd995-216d-47f1-8a91-f3b68fbeb377"
+version = "0.6.0"
+
+[[deps.MicroCollections]]
+deps = ["BangBang", "InitialValues", "Setfield"]
+git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e"
+uuid = "128add7d-3638-4c79-886c-908ea0c25c34"
+version = "0.1.4"
+
+[[deps.MicrosoftMPI_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "b01beb91d20b0d1312a9471a36017b5b339d26de"
+uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
+version = "10.1.4+1"
+
+[[deps.Missings]]
+deps = ["DataAPI"]
+git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "1.1.0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[deps.Mocking]]
+deps = ["Compat", "ExprTools"]
+git-tree-sha1 = "4cc0c5a83933648b615c36c2b956d94fda70641e"
+uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
+version = "0.7.7"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2022.10.11"
+
+[[deps.MultipleTesting]]
+deps = ["Distributions", "SpecialFunctions", "StatsBase"]
+git-tree-sha1 = "1e98f8f732e7035c4333135b75605b74f3462b9b"
+uuid = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b"
+version = "0.6.0"
+
+[[deps.NLSolversBase]]
+deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
+git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c"
+uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
+version = "7.8.3"
+
+[[deps.NNlib]]
+deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"]
+git-tree-sha1 = "ac86d2944bf7a670ac8bf0f7ec099b5898abcc09"
+uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+version = "0.9.8"
+
+    [deps.NNlib.extensions]
+    NNlibAMDGPUExt = "AMDGPU"
+    NNlibCUDACUDNNExt = ["CUDA", "cuDNN"]
+    NNlibCUDAExt = "CUDA"
+    NNlibEnzymeCoreExt = "EnzymeCore"
+
+    [deps.NNlib.weakdeps]
+    AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
+    cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
+
+[[deps.NaNMath]]
+deps = ["OpenLibm_jll"]
+git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4"
+uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
+version = "1.0.2"
+
+[[deps.NameResolution]]
+deps = ["PrettyPrint"]
+git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e"
+uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391"
+version = "0.1.5"
+
+[[deps.NetworkLayout]]
+deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "StaticArrays"]
+git-tree-sha1 = "91bb2fedff8e43793650e7a677ccda6e6e6e166b"
+uuid = "46757867-2c16-5918-afeb-47bfcb05e46a"
+version = "0.4.6"
+weakdeps = ["Graphs"]
+
+    [deps.NetworkLayout.extensions]
+    NetworkLayoutGraphsExt = "Graphs"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.21+4"
+
+[[deps.OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.1+0"
+
+[[deps.OpenML]]
+deps = ["ARFFFiles", "HTTP", "JSON", "Markdown", "Pkg", "Scratch"]
+git-tree-sha1 = "6efb039ae888699d5a74fb593f6f3e10c7193e33"
+uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
+version = "0.3.1"
+
+[[deps.OpenMPI_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "PMIx_jll", "TOML", "Zlib_jll", "libevent_jll", "prrte_jll"]
+git-tree-sha1 = "694458ae803b684f09c07f90459cb79655fb377d"
+uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
+version = "5.0.0+0"
+
+[[deps.OpenSSL]]
+deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
+git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2"
+uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
+version = "1.4.1"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.12+0"
+
+[[deps.OpenSpecFun_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
+uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
+version = "0.5.5+0"
+
+[[deps.Optim]]
+deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"]
+git-tree-sha1 = "01f85d9269b13fedc61e63cc72ee2213565f7a72"
+uuid = "429524aa-4258-5aef-a3af-852621145aeb"
+version = "1.7.8"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.6.3"
+
+[[deps.PDMats]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "4e5be6bb265d33669f98eb55d2a57addd1eeb72c"
+uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
+version = "0.11.30"
+
+[[deps.PMIx_jll]]
+deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "Zlib_jll", "libevent_jll"]
+git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541"
+uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab"
+version = "4.2.7+0"
+
+[[deps.PackageCompiler]]
+deps = ["Artifacts", "Glob", "LazyArtifacts", "Libdl", "Pkg", "Printf", "RelocatableFolders", "TOML", "UUIDs", "p7zip_jll"]
+git-tree-sha1 = "f9392ab72832f4315220a853747ff3dba758c9d1"
+uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d"
+version = "2.1.15"
+
+[[deps.Parameters]]
+deps = ["OrderedCollections", "UnPack"]
+git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
+uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
+version = "0.12.3"
+
+[[deps.Parsers]]
+deps = ["Dates", "PrecompileTools", "UUIDs"]
+git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.8.0"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.9.2"
+
+[[deps.PooledArrays]]
+deps = ["DataAPI", "Future"]
+git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3"
+uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+version = "1.4.3"
+
+[[deps.PositiveFactorizations]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20"
+uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
+version = "0.2.4"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.2.0"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.4.1"
+
+[[deps.PrettyPrint]]
+git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4"
+uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98"
+version = "0.2.0"
+
+[[deps.PrettyPrinting]]
+git-tree-sha1 = "22a601b04a154ca38867b991d5017469dc75f2db"
+uuid = "54e16d92-306c-5ea0-a30b-337be88ac337"
+version = "0.4.1"
+
+[[deps.PrettyTables]]
+deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"]
+git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660"
+uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+version = "2.3.1"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[deps.ProgressMeter]]
+deps = ["Distributed", "Printf"]
+git-tree-sha1 = "00099623ffee15972c16111bcf84c58a0051257c"
+uuid = "92933f4c-e287-5a05-a399-4b506db050ca"
+version = "1.9.0"
+
+[[deps.QuadGK]]
+deps = ["DataStructures", "LinearAlgebra"]
+git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1"
+uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
+version = "2.9.1"
+
+[[deps.RCall]]
+deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"]
+git-tree-sha1 = "3084689b18f9e5e817a6ce9a83a7654d8ad0f2f6"
+uuid = "6f49c342-dc21-5d91-9882-a32aef131414"
+version = "0.13.18"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[deps.Random]]
+deps = ["SHA", "Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.RealDot]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9"
+uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9"
+version = "0.1.0"
+
+[[deps.RecipesBase]]
+deps = ["PrecompileTools"]
+git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff"
+uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+version = "1.3.4"
+
+[[deps.Reexport]]
+git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "1.2.2"
+
+[[deps.RelocatableFolders]]
+deps = ["SHA", "Scratch"]
+git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864"
+uuid = "05181044-ff0b-4ac5-8273-598c1e38db00"
+version = "1.0.1"
+
+[[deps.Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.3.0"
+
+[[deps.Rmath]]
+deps = ["Random", "Rmath_jll"]
+git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b"
+uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
+version = "0.7.1"
+
+[[deps.Rmath_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da"
+uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
+version = "0.4.0+0"
+
+[[deps.Roots]]
+deps = ["ChainRulesCore", "CommonSolve", "Printf", "Setfield"]
+git-tree-sha1 = "0f1d92463a020321983d04c110f476c274bafe2e"
+uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
+version = "2.0.22"
+
+    [deps.Roots.extensions]
+    RootsForwardDiffExt = "ForwardDiff"
+    RootsIntervalRootFindingExt = "IntervalRootFinding"
+    RootsSymPyExt = "SymPy"
+    RootsSymPyPythonCallExt = "SymPyPythonCall"
+
+    [deps.Roots.weakdeps]
+    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+    IntervalRootFinding = "d2bf35a9-74e0-55ec-b149-d360ff49b807"
+    SymPy = "24249f21-da20-56a4-8eb1-6a02cf4ae2e6"
+    SymPyPythonCall = "bc8888f7-b21e-4b7c-a06a-5d9c9496438c"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.ScientificTypes]]
+deps = ["CategoricalArrays", "ColorTypes", "Dates", "Distributions", "PrettyTables", "Reexport", "ScientificTypesBase", "StatisticalTraits", "Tables"]
+git-tree-sha1 = "75ccd10ca65b939dab03b812994e571bf1e3e1da"
+uuid = "321657f4-b219-11e9-178b-2701a2544e81"
+version = "3.0.2"
+
+[[deps.ScientificTypesBase]]
+git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b"
+uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
+version = "3.0.0"
+
+[[deps.Scratch]]
+deps = ["Dates"]
+git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386"
+uuid = "6c6a2e73-6563-6170-7368-637461726353"
+version = "1.2.1"
+
+[[deps.SentinelArrays]]
+deps = ["Dates", "Random"]
+git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f"
+uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
+version = "1.4.1"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[deps.Setfield]]
+deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"]
+git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac"
+uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
+version = "1.1.1"
+
+[[deps.SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+
+[[deps.ShiftedArrays]]
+git-tree-sha1 = "503688b59397b3307443af35cd953a13e8005c16"
+uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a"
+version = "2.0.0"
+
+[[deps.ShowCases]]
+git-tree-sha1 = "7f534ad62ab2bd48591bdeac81994ea8c445e4a5"
+uuid = "605ecd9f-84a6-4c9e-81e2-4798472b76a3"
+version = "0.1.0"
+
+[[deps.SimpleBufferStream]]
+git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1"
+uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
+version = "1.1.0"
+
+[[deps.SimpleTraits]]
+deps = ["InteractiveUtils", "MacroTools"]
+git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
+uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
+version = "0.9.4"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[deps.SortingAlgorithms]]
+deps = ["DataStructures"]
+git-tree-sha1 = "5165dfb9fd131cf0c6957a3a7605dede376e7b63"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "1.2.0"
+
+[[deps.SparseArrays]]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[deps.SparseInverseSubset]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "91402087fd5d13b2d97e3ef29bbdf9d7859e678a"
+uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada"
+version = "0.1.1"
+
+[[deps.SparseMatricesCSR]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "38677ca58e80b5cad2382e5a1848f93b054ad28d"
+uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
+version = "0.6.7"
+
+[[deps.SpecialFunctions]]
+deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
+git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d"
+uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
+version = "2.3.1"
+weakdeps = ["ChainRulesCore"]
+
+    [deps.SpecialFunctions.extensions]
+    SpecialFunctionsChainRulesCoreExt = "ChainRulesCore"
+
+[[deps.SplittablesBase]]
+deps = ["Setfield", "Test"]
+git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5"
+uuid = "171d559e-b47b-412a-8079-5efa626c420e"
+version = "0.1.15"
+
+[[deps.StableRNGs]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "3be7d49667040add7ee151fefaf1f8c04c8c8276"
+uuid = "860ef19b-820b-49d6-a774-d7a799459cd3"
+version = "1.0.0"
+
+[[deps.StaticArrays]]
+deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"]
+git-tree-sha1 = "5ef59aea6f18c25168842bded46b16662141ab87"
+uuid = "90137ffa-7385-5640-81b9-e52037218182"
+version = "1.7.0"
+weakdeps = ["Statistics"]
+
+    [deps.StaticArrays.extensions]
+    StaticArraysStatisticsExt = "Statistics"
+
+[[deps.StaticArraysCore]]
+git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
+uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+version = "1.4.2"
+
+[[deps.StatisticalMeasures]]
+deps = ["CategoricalArrays", "CategoricalDistributions", "Distributions", "LearnAPI", "LinearAlgebra", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "StatisticalMeasuresBase", "Statistics", "StatsBase"]
+git-tree-sha1 = "b58c7cc3d7de6c0d75d8437b81481af924970123"
+uuid = "a19d573c-0a75-4610-95b3-7071388c7541"
+version = "0.1.3"
+
+    [deps.StatisticalMeasures.extensions]
+    LossFunctionsExt = "LossFunctions"
+    ScientificTypesExt = "ScientificTypes"
+
+    [deps.StatisticalMeasures.weakdeps]
+    LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
+    ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
+
+[[deps.StatisticalMeasuresBase]]
+deps = ["CategoricalArrays", "InteractiveUtils", "MLUtils", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "Statistics"]
+git-tree-sha1 = "17dfb22e2e4ccc9cd59b487dce52883e0151b4d3"
+uuid = "c062fc1d-0d66-479b-b6ac-8b44719de4cc"
+version = "0.1.1"
+
+[[deps.StatisticalTraits]]
+deps = ["ScientificTypesBase"]
+git-tree-sha1 = "30b9236691858e13f167ce829490a68e1a597782"
+uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9"
+version = "3.2.0"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.9.0"
+
+[[deps.StatsAPI]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed"
+uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
+version = "1.7.0"
+
+[[deps.StatsBase]]
+deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
+git-tree-sha1 = "1d77abd07f617c4868c33d4f5b9e1dbb2643c9cf"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.34.2"
+
+[[deps.StatsFuns]]
+deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
+git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a"
+uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+version = "1.3.0"
+
+    [deps.StatsFuns.extensions]
+    StatsFunsChainRulesCoreExt = "ChainRulesCore"
+    StatsFunsInverseFunctionsExt = "InverseFunctions"
+
+    [deps.StatsFuns.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
+
+[[deps.StatsModels]]
+deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Printf", "REPL", "ShiftedArrays", "SparseArrays", "StatsAPI", "StatsBase", "StatsFuns", "Tables"]
+git-tree-sha1 = "5cf6c4583533ee38639f73b880f35fc85f2941e0"
+uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
+version = "0.7.3"
+
+[[deps.StringEncodings]]
+deps = ["Libiconv_jll"]
+git-tree-sha1 = "b765e46ba27ecf6b44faf70df40c57aa3a547dcb"
+uuid = "69024149-9ee7-55f6-a4c4-859efe599b68"
+version = "0.3.7"
+
+[[deps.StringManipulation]]
+deps = ["PrecompileTools"]
+git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5"
+uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e"
+version = "0.3.4"
+
+[[deps.StructArrays]]
+deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"]
+git-tree-sha1 = "0a3db38e4cce3c54fe7a71f831cd7b6194a54213"
+uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
+version = "0.6.16"
+
+[[deps.StructTypes]]
+deps = ["Dates", "UUIDs"]
+git-tree-sha1 = "ca4bccb03acf9faaf4137a9abc1881ed1841aa70"
+uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
+version = "1.10.0"
+
+[[deps.SuiteSparse]]
+deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
+uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
+
+[[deps.SuiteSparse_jll]]
+deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"]
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "5.10.1+6"
+
+[[deps.TMLE]]
+deps = ["AbstractDifferentiation", "CategoricalArrays", "Combinatorics", "Distributions", "GLM", "Graphs", "HypothesisTests", "LogExpFunctions", "MLJBase", "MLJGLMInterface", "MLJModels", "MetaGraphsNext", "Missings", "PrecompileTools", "PrettyTables", "Random", "Statistics", "TableOperations", "Tables", "Zygote"]
+git-tree-sha1 = "fe31c10325f1e911dae33a5d521cc07c2c7eeecd"
+repo-rev = "cvtmle"
+repo-url = "https://github.com/TARGENE/TMLE.jl.git"
+uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf"
+version = "0.12.0"
+
+    [deps.TMLE.extensions]
+    GraphMakieExt = ["GraphMakie", "CairoMakie"]
+    JSONExt = "JSON"
+    YAMLExt = "YAML"
+
+    [deps.TMLE.weakdeps]
+    CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+    GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
+    JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+    YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.TZJData]]
+deps = ["Artifacts"]
+git-tree-sha1 = "d39314cdbaf5b90a047db33858626f8d1cc973e1"
+uuid = "dc5dba14-91b3-4cab-a142-028a31da12f7"
+version = "1.0.0+2023c"
+
+[[deps.TableOperations]]
+deps = ["SentinelArrays", "Tables", "Test"]
+git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1"
+uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87"
+version = "1.2.0"
+
+[[deps.TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.1"
+
+[[deps.Tables]]
+deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"]
+git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "1.11.1"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[deps.TimeZones]]
+deps = ["Artifacts", "Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "Scratch", "TZJData", "Unicode", "p7zip_jll"]
+git-tree-sha1 = "89e64d61ef3cd9e80f7fc12b7d13db2d75a23c03"
+uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
+version = "1.13.0"
+weakdeps = ["RecipesBase"]
+
+    [deps.TimeZones.extensions]
+    TimeZonesRecipesBaseExt = "RecipesBase"
+
+[[deps.TranscodingStreams]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.9.13"
+
+[[deps.Transducers]]
+deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"]
+git-tree-sha1 = "e579d3c991938fecbb225699e8f611fa3fbf2141"
+uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999"
+version = "0.4.79"
+
+    [deps.Transducers.extensions]
+    TransducersBlockArraysExt = "BlockArrays"
+    TransducersDataFramesExt = "DataFrames"
+    TransducersLazyArraysExt = "LazyArrays"
+    TransducersOnlineStatsBaseExt = "OnlineStatsBase"
+    TransducersReferenceablesExt = "Referenceables"
+
+    [deps.Transducers.weakdeps]
+    BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+    DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+    LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
+    OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338"
+    Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e"
+
+[[deps.URIs]]
+git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b"
+uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
+version = "1.5.1"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[deps.UnPack]]
+git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
+uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+version = "1.0.2"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[deps.UnsafeAtomics]]
+git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278"
+uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f"
+version = "0.2.1"
+
+[[deps.UnsafeAtomicsLLVM]]
+deps = ["LLVM", "UnsafeAtomics"]
+git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e"
+uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249"
+version = "0.1.3"
+
+[[deps.VersionParsing]]
+git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
+uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
+version = "1.3.0"
+
+[[deps.WeakRefStrings]]
+deps = ["DataAPI", "InlineStrings", "Parsers"]
+git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23"
+uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
+version = "1.4.2"
+
+[[deps.WinReg]]
+git-tree-sha1 = "cd910906b099402bcc50b3eafa9634244e5ec83b"
+uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128"
+version = "1.0.0"
+
+[[deps.WorkerUtilities]]
+git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7"
+uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60"
+version = "1.6.1"
+
+[[deps.XGBoost]]
+deps = ["AbstractTrees", "CEnum", "JSON3", "LinearAlgebra", "OrderedCollections", "SparseArrays", "SparseMatricesCSR", "Statistics", "Tables", "XGBoost_jll"]
+git-tree-sha1 = "bacb62e07d104630094c8dac2fd070f5d4b9b305"
+uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
+version = "2.5.1"
+
+    [deps.XGBoost.extensions]
+    XGBoostCUDAExt = "CUDA"
+    XGBoostTermExt = "Term"
+
+    [deps.XGBoost.weakdeps]
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    Term = "22787eb5-b846-44ae-b979-8e399b8463ab"
+
+[[deps.XGBoost_jll]]
+deps = ["Artifacts", "CUDA_Runtime_jll", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "Libdl", "TOML"]
+git-tree-sha1 = "1c0aa2390a7ebb28a3d6c214f64e57a24091fbd7"
+uuid = "a5c6f535-4255-5ca2-a466-0e519f119c46"
+version = "2.0.1+0"
+
+[[deps.YAML]]
+deps = ["Base64", "Dates", "Printf", "StringEncodings"]
+git-tree-sha1 = "e6330e4b731a6af7959673621e91645eb1356884"
+uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
+version = "0.4.9"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.13+0"
+
+[[deps.Zstd_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c"
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.5+0"
+
+[[deps.Zygote]]
+deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"]
+git-tree-sha1 = "5ded212acd815612df112bb895ef3910c5a03f57"
+uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
+version = "0.6.67"
+
+    [deps.Zygote.extensions]
+    ZygoteColorsExt = "Colors"
+    ZygoteDistancesExt = "Distances"
+    ZygoteTrackerExt = "Tracker"
+
+    [deps.Zygote.weakdeps]
+    Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
+    Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+
+[[deps.ZygoteRules]]
+deps = ["ChainRulesCore", "MacroTools"]
+git-tree-sha1 = "9d749cd449fb448aeca4feee9a2f4186dbb5d184"
+uuid = "700de1a5-db45-46bc-99cf-38207098b444"
+version = "0.2.4"
+
+[[deps.glmnet_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "31adae3b983b579a1fbd7cfd43a4bc0d224c2f5a"
+uuid = "78c6b45d-5eaf-5d68-bcfb-a5a2cb06c27f"
+version = "2.0.13+0"
+
+[[deps.libaec_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "eddd19a8dea6b139ea97bdc8a0e2667d4b661720"
+uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0"
+version = "1.0.6+1"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.8.0+0"
+
+[[deps.libevent_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll"]
+git-tree-sha1 = "f04ec6d9a186115fb38f858f05c0c4e1b7fc9dcb"
+uuid = "1080aeaf-3a6a-583e-a51c-c537b09f60ec"
+version = "2.1.13+1"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.48.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+0"
+
+[[deps.prrte_jll]]
+deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "PMIx_jll", "libevent_jll"]
+git-tree-sha1 = "5adb2d7a18a30280feb66cad6f1a1dfdca2dc7b0"
+uuid = "eb928a42-fffd-568d-ab9c-3f5d54fc65b9"
+version = "3.0.2+0"
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7464f91..4302c27 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM julia:1.9.0-bullseye
+FROM julia:1.9.4-bullseye
 
 ARG DEBIAN_FRONTEND=noninteractive
 
diff --git a/docs/Project.toml b/docs/Project.toml
index dfa65cd..7e440b8 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,2 +1,6 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+
+[compat]
+
+Documenter = "1.2.1"
\ No newline at end of file
diff --git a/sysimage/Project.toml b/sysimage/Project.toml
deleted file mode 100644
index fbb19f0..0000000
--- a/sysimage/Project.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[deps]
-PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d"
diff --git a/sysimage/create_sysimage.jl b/sysimage/create_sysimage.jl
deleted file mode 100644
index 2537aa8..0000000
--- a/sysimage/create_sysimage.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-
-SYSIMAGE_DIR = dirname(@__FILE__)
-push!(LOAD_PATH, SYSIMAGE_DIR)
-
-using PackageCompiler
-
-create_sysimage(
-    ["TargetedEstimation"]; 
-    sysimage_path="TargetedEstimationSysimage.so",
-    precompile_execution_file=joinpath(SYSIMAGE_DIR, "precompile_exec_file.jl")
-)
\ No newline at end of file
diff --git a/sysimage/precompile_exec_file.jl b/sysimage/precompile_exec_file.jl
deleted file mode 100644
index a45912e..0000000
--- a/sysimage/precompile_exec_file.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-import TargetedEstimation
-
-TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test")
-push!(LOAD_PATH, TEST_DIR)
-cd(TEST_DIR)
-include(joinpath(TEST_DIR, "runtests.jl"))
\ No newline at end of file

From 47f9ce794c3a8d022428ade8a77b9fb19979607a Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 5 Dec 2023 10:52:04 +0000
Subject: [PATCH 29/71] rename build file build_app.jl

---
 deps/{build.jl => build_app.jl} | 0
 docker/Dockerfile               | 2 +-
 docs/Project.toml               | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)
 rename deps/{build.jl => build_app.jl} (100%)

diff --git a/deps/build.jl b/deps/build_app.jl
similarity index 100%
rename from deps/build.jl
rename to deps/build_app.jl
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4302c27..bab40d7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -41,7 +41,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-RUN julia --project -t auto --startup-file=no deps/build.jl app
+RUN julia --project -t auto --startup-file=no deps/build_app.jl app
 
 ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
 
diff --git a/docs/Project.toml b/docs/Project.toml
index 7e440b8..5df1ad8 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -2,5 +2,4 @@
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 
 [compat]
-
 Documenter = "1.2.1"
\ No newline at end of file

From 84356680326cfe1a7db14cc30457da1b9b989125 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 6 Dec 2023 15:46:22 +0000
Subject: [PATCH 30/71] try remove precompilation

---
 Comonicon.toml    | 20 ++++++++++----------
 docker/Dockerfile |  3 +--
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/Comonicon.toml b/Comonicon.toml
index 48c2a41..4acd652 100644
--- a/Comonicon.toml
+++ b/Comonicon.toml
@@ -5,16 +5,16 @@ completion = true
 quiet = false
 optimize = 2
 
-[sysimg]
-incremental=true
-filter_stdlibs=false
+# [sysimg]
+# incremental=true
+# filter_stdlibs=false
 
-[sysimg.precompile]
-execution_file = ["deps/execute.jl"]
+# [sysimg.precompile]
+# execution_file = ["deps/execute.jl"]
 
-[application]
-incremental=true
-filter_stdlibs=false
+# [application]
+# incremental=true
+# filter_stdlibs=false
 
-[application.precompile]
-execution_file = ["deps/execute.jl"]
\ No newline at end of file
+# [application.precompile]
+# execution_file = ["deps/execute.jl"]
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bab40d7..99aa2c7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -41,8 +41,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-RUN julia --project -t auto --startup-file=no deps/build_app.jl app
+RUN julia --project -t auto --startup-file=no deps/build_app.jl
 
 ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
 
-RUN tmle --help

From 9899da68d0e7c0f4f45c8e2d5671cf099223a9c7 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 6 Dec 2023 21:51:06 +0000
Subject: [PATCH 31/71] up TMLE and remove Manifest from repo

---
 .gitignore    |    2 +-
 Manifest.toml | 1812 -------------------------------------------------
 Project.toml  |    1 +
 3 files changed, 2 insertions(+), 1813 deletions(-)
 delete mode 100644 Manifest.toml

diff --git a/.gitignore b/.gitignore
index 648f644..63898a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,6 @@ docs/site/
 docs/Manifest.toml
 
 test/Manifest.toml
-sysimage/Manifest.toml
+Manifest.toml
 
 build/
diff --git a/Manifest.toml b/Manifest.toml
deleted file mode 100644
index eb59e18..0000000
--- a/Manifest.toml
+++ /dev/null
@@ -1,1812 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-julia_version = "1.9.2"
-manifest_format = "2.0"
-project_hash = "79b338af0999710186711c6c1c568ae8891f4dc2"
-
-[[deps.ARFFFiles]]
-deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"]
-git-tree-sha1 = "e8c8e0a2be6eb4f56b1672e46004463033daa409"
-uuid = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
-version = "1.4.1"
-
-[[deps.AbstractDifferentiation]]
-deps = ["ExprTools", "LinearAlgebra", "Requires"]
-git-tree-sha1 = "6a5e61dc899ab116035c18ead4ec890269f3c478"
-uuid = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
-version = "0.6.0"
-
-    [deps.AbstractDifferentiation.extensions]
-    AbstractDifferentiationChainRulesCoreExt = "ChainRulesCore"
-    AbstractDifferentiationFiniteDifferencesExt = "FiniteDifferences"
-    AbstractDifferentiationForwardDiffExt = ["DiffResults", "ForwardDiff"]
-    AbstractDifferentiationReverseDiffExt = ["DiffResults", "ReverseDiff"]
-    AbstractDifferentiationTrackerExt = "Tracker"
-    AbstractDifferentiationZygoteExt = "Zygote"
-
-    [deps.AbstractDifferentiation.weakdeps]
-    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-    DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-    FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
-    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
-    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-
-[[deps.AbstractFFTs]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef"
-uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
-version = "1.5.0"
-weakdeps = ["ChainRulesCore", "Test"]
-
-    [deps.AbstractFFTs.extensions]
-    AbstractFFTsChainRulesCoreExt = "ChainRulesCore"
-    AbstractFFTsTestExt = "Test"
-
-[[deps.AbstractTrees]]
-git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c"
-uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.4.4"
-
-[[deps.Adapt]]
-deps = ["LinearAlgebra", "Requires"]
-git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433"
-uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
-version = "3.7.1"
-weakdeps = ["StaticArrays"]
-
-    [deps.Adapt.extensions]
-    AdaptStaticArraysExt = "StaticArrays"
-
-[[deps.ArgCheck]]
-git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4"
-uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
-version = "2.3.0"
-
-[[deps.ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
-version = "1.1.1"
-
-[[deps.ArnoldiMethod]]
-deps = ["LinearAlgebra", "Random", "StaticArrays"]
-git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
-uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
-version = "0.2.0"
-
-[[deps.ArrayInterface]]
-deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "247efbccf92448be332d154d6ca56b9fcdd93c31"
-uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "7.6.1"
-
-    [deps.ArrayInterface.extensions]
-    ArrayInterfaceBandedMatricesExt = "BandedMatrices"
-    ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices"
-    ArrayInterfaceCUDAExt = "CUDA"
-    ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore"
-    ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore"
-    ArrayInterfaceTrackerExt = "Tracker"
-
-    [deps.ArrayInterface.weakdeps]
-    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
-    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
-    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-    GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
-    StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
-    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
-
-[[deps.Arrow]]
-deps = ["ArrowTypes", "BitIntegers", "CodecLz4", "CodecZstd", "ConcurrentUtilities", "DataAPI", "Dates", "EnumX", "LoggingExtras", "Mmap", "PooledArrays", "SentinelArrays", "Tables", "TimeZones", "TranscodingStreams", "UUIDs"]
-git-tree-sha1 = "954666e252835c4cf8819ce4ffaf31073c1b7233"
-uuid = "69666777-d1a9-59fb-9406-91d4454c9d45"
-version = "2.6.2"
-
-[[deps.ArrowTypes]]
-deps = ["Sockets", "UUIDs"]
-git-tree-sha1 = "8c37bfdf1b689c6677bbfc8986968fe641f6a299"
-uuid = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
-version = "2.2.2"
-
-[[deps.Artifacts]]
-uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-
-[[deps.Atomix]]
-deps = ["UnsafeAtomics"]
-git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be"
-uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
-version = "0.1.0"
-
-[[deps.BSON]]
-git-tree-sha1 = "2208958832d6e1b59e49f53697483a84ca8d664e"
-uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
-version = "0.3.7"
-
-[[deps.BangBang]]
-deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"]
-git-tree-sha1 = "e28912ce94077686443433c2800104b061a827ed"
-uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66"
-version = "0.3.39"
-
-    [deps.BangBang.extensions]
-    BangBangChainRulesCoreExt = "ChainRulesCore"
-    BangBangDataFramesExt = "DataFrames"
-    BangBangStaticArraysExt = "StaticArrays"
-    BangBangStructArraysExt = "StructArrays"
-    BangBangTypedTablesExt = "TypedTables"
-
-    [deps.BangBang.weakdeps]
-    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-    DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
-    StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
-    TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
-
-[[deps.Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[deps.Baselet]]
-git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e"
-uuid = "9718e550-a3fa-408a-8086-8db961cd8217"
-version = "0.1.1"
-
-[[deps.BitFlags]]
-git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b"
-uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
-version = "0.1.8"
-
-[[deps.BitIntegers]]
-deps = ["Random"]
-git-tree-sha1 = "a55462dfddabc34bc97d3a7403a2ca2802179ae6"
-uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
-version = "0.3.1"
-
-[[deps.CEnum]]
-git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90"
-uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
-version = "0.4.2"
-
-[[deps.CSV]]
-deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"]
-git-tree-sha1 = "44dbf560808d49041989b8a96cae4cffbeb7966a"
-uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-version = "0.10.11"
-
-[[deps.CUDA_Driver_jll]]
-deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
-git-tree-sha1 = "1e42ef1bdb45487ff28de16182c0df4920181dc3"
-uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc"
-version = "0.7.0+0"
-
-[[deps.CUDA_Runtime_jll]]
-deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
-git-tree-sha1 = "9704e50c9158cf8896c2776b8dbc5edd136caf80"
-uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
-version = "0.10.1+0"
-
-[[deps.Calculus]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad"
-uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
-version = "0.5.1"
-
-[[deps.CategoricalArrays]]
-deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"]
-git-tree-sha1 = "1568b28f91293458345dabba6a5ea3f183250a61"
-uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-version = "0.10.8"
-weakdeps = ["JSON", "RecipesBase", "SentinelArrays", "StructTypes"]
-
-    [deps.CategoricalArrays.extensions]
-    CategoricalArraysJSONExt = "JSON"
-    CategoricalArraysRecipesBaseExt = "RecipesBase"
-    CategoricalArraysSentinelArraysExt = "SentinelArrays"
-    CategoricalArraysStructTypesExt = "StructTypes"
-
-[[deps.CategoricalDistributions]]
-deps = ["CategoricalArrays", "Distributions", "Missings", "OrderedCollections", "Random", "ScientificTypes"]
-git-tree-sha1 = "3124343a1b0c9a2f5fdc1d9bcc633ba11735a4c4"
-uuid = "af321ab8-2d2e-40a6-b165-3d674595d28e"
-version = "0.1.13"
-
-    [deps.CategoricalDistributions.extensions]
-    UnivariateFiniteDisplayExt = "UnicodePlots"
-
-    [deps.CategoricalDistributions.weakdeps]
-    UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
-
-[[deps.ChainRules]]
-deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"]
-git-tree-sha1 = "006cc7170be3e0fa02ccac6d4164a1eee1fc8c27"
-uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
-version = "1.58.0"
-
-[[deps.ChainRulesCore]]
-deps = ["Compat", "LinearAlgebra"]
-git-tree-sha1 = "e0af648f0692ec1691b5d094b8724ba1346281cf"
-uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.18.0"
-weakdeps = ["SparseArrays"]
-
-    [deps.ChainRulesCore.extensions]
-    ChainRulesCoreSparseArraysExt = "SparseArrays"
-
-[[deps.CodecLz4]]
-deps = ["Lz4_jll", "TranscodingStreams"]
-git-tree-sha1 = "8bf4f9e2ee52b5e217451a7cd9171fcd4e16ae23"
-uuid = "5ba52731-8f18-5e0d-9241-30f10d1ec561"
-version = "0.4.1"
-
-[[deps.CodecZlib]]
-deps = ["TranscodingStreams", "Zlib_jll"]
-git-tree-sha1 = "cd67fc487743b2f0fd4380d4cbd3a24660d0eec8"
-uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
-version = "0.7.3"
-
-[[deps.CodecZstd]]
-deps = ["CEnum", "TranscodingStreams", "Zstd_jll"]
-git-tree-sha1 = "849470b337d0fa8449c21061de922386f32949d9"
-uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
-version = "0.7.2"
-
-[[deps.ColorTypes]]
-deps = ["FixedPointNumbers", "Random"]
-git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4"
-uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
-version = "0.11.4"
-
-[[deps.Combinatorics]]
-git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860"
-uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-version = "1.0.2"
-
-[[deps.CommonSolve]]
-git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c"
-uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
-version = "0.2.4"
-
-[[deps.CommonSubexpressions]]
-deps = ["MacroTools", "Test"]
-git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
-uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
-version = "0.3.0"
-
-[[deps.Comonicon]]
-deps = ["Configurations", "ExproniconLite", "Libdl", "Logging", "Markdown", "OrderedCollections", "PackageCompiler", "Pkg", "Scratch", "TOML", "UUIDs"]
-git-tree-sha1 = "552667002fdd5602ca72e0aeac8bd099daa0e040"
-uuid = "863f3e99-da2a-4334-8734-de3dacbe5542"
-version = "1.0.6"
-
-[[deps.Compat]]
-deps = ["UUIDs"]
-git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d"
-uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "4.10.1"
-weakdeps = ["Dates", "LinearAlgebra"]
-
-    [deps.Compat.extensions]
-    CompatLinearAlgebraExt = "LinearAlgebra"
-
-[[deps.CompilerSupportLibraries_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
-version = "1.0.5+0"
-
-[[deps.CompositionsBase]]
-git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad"
-uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b"
-version = "0.1.2"
-
-    [deps.CompositionsBase.extensions]
-    CompositionsBaseInverseFunctionsExt = "InverseFunctions"
-
-    [deps.CompositionsBase.weakdeps]
-    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
-
-[[deps.ComputationalResources]]
-git-tree-sha1 = "52cb3ec90e8a8bea0e62e275ba577ad0f74821f7"
-uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3"
-version = "0.3.2"
-
-[[deps.ConcurrentUtilities]]
-deps = ["Serialization", "Sockets"]
-git-tree-sha1 = "8cfa272e8bdedfa88b6aefbbca7c19f1befac519"
-uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb"
-version = "2.3.0"
-
-[[deps.Conda]]
-deps = ["Downloads", "JSON", "VersionParsing"]
-git-tree-sha1 = "51cab8e982c5b598eea9c8ceaced4b58d9dd37c9"
-uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
-version = "1.10.0"
-
-[[deps.Configurations]]
-deps = ["ExproniconLite", "OrderedCollections", "TOML"]
-git-tree-sha1 = "4358750bb58a3caefd5f37a4a0c5bfdbbf075252"
-uuid = "5218b696-f38b-4ac9-8b61-a12ec717816d"
-version = "0.17.6"
-
-[[deps.ConstructionBase]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "c53fc348ca4d40d7b371e71fd52251839080cbc9"
-uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
-version = "1.5.4"
-
-    [deps.ConstructionBase.extensions]
-    ConstructionBaseIntervalSetsExt = "IntervalSets"
-    ConstructionBaseStaticArraysExt = "StaticArrays"
-
-    [deps.ConstructionBase.weakdeps]
-    IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
-    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
-
-[[deps.ContextVariablesX]]
-deps = ["Compat", "Logging", "UUIDs"]
-git-tree-sha1 = "25cc3803f1030ab855e383129dcd3dc294e322cc"
-uuid = "6add18c4-b38d-439d-96f6-d6bc489c04c5"
-version = "0.1.3"
-
-[[deps.Crayons]]
-git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
-uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
-version = "4.1.1"
-
-[[deps.DataAPI]]
-git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c"
-uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.15.0"
-
-[[deps.DataFrames]]
-deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8"
-uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "1.6.1"
-
-[[deps.DataStructures]]
-deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d"
-uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.18.15"
-
-[[deps.DataValueInterfaces]]
-git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
-uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
-version = "1.0.0"
-
-[[deps.Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[deps.DefineSingletons]]
-git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c"
-uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52"
-version = "0.1.2"
-
-[[deps.DelimitedFiles]]
-deps = ["Mmap"]
-git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
-uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-version = "1.9.1"
-
-[[deps.DiffResults]]
-deps = ["StaticArraysCore"]
-git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621"
-uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-version = "1.1.0"
-
-[[deps.DiffRules]]
-deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272"
-uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.15.1"
-
-[[deps.Distances]]
-deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
-git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0"
-uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-version = "0.10.11"
-weakdeps = ["ChainRulesCore", "SparseArrays"]
-
-    [deps.Distances.extensions]
-    DistancesChainRulesCoreExt = "ChainRulesCore"
-    DistancesSparseArraysExt = "SparseArrays"
-
-[[deps.Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[[deps.Distributions]]
-deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
-git-tree-sha1 = "a6c00f894f24460379cb7136633cef54ac9f6f4a"
-uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.103"
-
-    [deps.Distributions.extensions]
-    DistributionsChainRulesCoreExt = "ChainRulesCore"
-    DistributionsDensityInterfaceExt = "DensityInterface"
-    DistributionsTestExt = "Test"
-
-    [deps.Distributions.weakdeps]
-    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-    DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
-    Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[deps.DocStringExtensions]]
-deps = ["LibGit2"]
-git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
-uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.9.3"
-
-[[deps.Downloads]]
-deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-version = "1.6.0"
-
-[[deps.DualNumbers]]
-deps = ["Calculus", "NaNMath", "SpecialFunctions"]
-git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566"
-uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74"
-version = "0.6.8"
-
-[[deps.EarCut_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053"
-uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
-version = "2.2.4+0"
-
-[[deps.EarlyStopping]]
-deps = ["Dates", "Statistics"]
-git-tree-sha1 = "98fdf08b707aaf69f524a6cd0a67858cefe0cfb6"
-uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
-version = "0.3.0"
-
-[[deps.EnumX]]
-git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237"
-uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
-version = "1.0.4"
-
-[[deps.EvoTrees]]
-deps = ["BSON", "CategoricalArrays", "Distributions", "MLJModelInterface", "NetworkLayout", "Random", "RecipesBase", "Statistics", "StatsBase", "Tables"]
-git-tree-sha1 = "f08d64339d7259b0c69a00a1e321dc6da79672ea"
-uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
-version = "0.16.5"
-
-    [deps.EvoTrees.extensions]
-    EvoTreesCUDAExt = "CUDA"
-
-    [deps.EvoTrees.weakdeps]
-    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-
-[[deps.ExceptionUnwrapping]]
-deps = ["Test"]
-git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96"
-uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4"
-version = "0.1.9"
-
-[[deps.ExprTools]]
-git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec"
-uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
-version = "0.1.10"
-
-[[deps.ExproniconLite]]
-git-tree-sha1 = "fbc390c2f896031db5484bc152a7e805ecdfb01f"
-uuid = "55351af7-c7e9-48d6-89ff-24e801d99491"
-version = "0.10.5"
-
-[[deps.Extents]]
-git-tree-sha1 = "2140cd04483da90b2da7f99b2add0750504fc39c"
-uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910"
-version = "0.1.2"
-
-[[deps.FLoops]]
-deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"]
-git-tree-sha1 = "ffb97765602e3cbe59a0589d237bf07f245a8576"
-uuid = "cc61a311-1640-44b5-9fba-1b764f453329"
-version = "0.2.1"
-
-[[deps.FLoopsBase]]
-deps = ["ContextVariablesX"]
-git-tree-sha1 = "656f7a6859be8673bf1f35da5670246b923964f7"
-uuid = "b9860ae5-e623-471e-878b-f6a53c775ea6"
-version = "0.1.1"
-
-[[deps.FileIO]]
-deps = ["Pkg", "Requires", "UUIDs"]
-git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673"
-uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
-version = "1.16.1"
-
-[[deps.FilePathsBase]]
-deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"]
-git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa"
-uuid = "48062228-2e41-5def-b9a4-89aafe57970f"
-version = "0.9.21"
-
-[[deps.FileWatching]]
-uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
-
-[[deps.FillArrays]]
-deps = ["LinearAlgebra", "Random"]
-git-tree-sha1 = "28e4e9c4b7b162398ec8004bdabe9a90c78c122d"
-uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "1.8.0"
-weakdeps = ["PDMats", "SparseArrays", "Statistics"]
-
-    [deps.FillArrays.extensions]
-    FillArraysPDMatsExt = "PDMats"
-    FillArraysSparseArraysExt = "SparseArrays"
-    FillArraysStatisticsExt = "Statistics"
-
-[[deps.FiniteDiff]]
-deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays"]
-git-tree-sha1 = "c6e4a1fbe73b31a3dea94b1da449503b8830c306"
-uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.21.1"
-
-    [deps.FiniteDiff.extensions]
-    FiniteDiffBandedMatricesExt = "BandedMatrices"
-    FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices"
-    FiniteDiffStaticArraysExt = "StaticArrays"
-
-    [deps.FiniteDiff.weakdeps]
-    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
-    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
-    StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
-
-[[deps.FixedPointNumbers]]
-deps = ["Statistics"]
-git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
-uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
-version = "0.8.4"
-
-[[deps.ForwardDiff]]
-deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"]
-git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad"
-uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.36"
-weakdeps = ["StaticArrays"]
-
-    [deps.ForwardDiff.extensions]
-    ForwardDiffStaticArraysExt = "StaticArrays"
-
-[[deps.Future]]
-deps = ["Random"]
-uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-
-[[deps.GLM]]
-deps = ["Distributions", "LinearAlgebra", "Printf", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels"]
-git-tree-sha1 = "273bd1cd30768a2fddfa3fd63bbc746ed7249e5f"
-uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
-version = "1.9.0"
-
-[[deps.GLMNet]]
-deps = ["DataFrames", "Distributed", "Distributions", "Printf", "Random", "SparseArrays", "StatsBase", "glmnet_jll"]
-git-tree-sha1 = "7ea4e2bbb84183fe52a488d05e16c152b2387b95"
-uuid = "8d5ece8b-de18-5317-b113-243142960cc6"
-version = "0.7.2"
-
-[[deps.GPUArrays]]
-deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
-git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1"
-uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
-version = "9.1.0"
-
-[[deps.GPUArraysCore]]
-deps = ["Adapt"]
-git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0"
-uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
-version = "0.1.5"
-
-[[deps.GeoInterface]]
-deps = ["Extents"]
-git-tree-sha1 = "d53480c0793b13341c40199190f92c611aa2e93c"
-uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f"
-version = "1.3.2"
-
-[[deps.GeometryBasics]]
-deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
-git-tree-sha1 = "424a5a6ce7c5d97cca7bcc4eac551b97294c54af"
-uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
-version = "0.4.9"
-
-[[deps.Glob]]
-git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496"
-uuid = "c27321d9-0574-5035-807b-f59d2c89b15c"
-version = "1.3.1"
-
-[[deps.Graphs]]
-deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
-git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7"
-uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
-version = "1.9.0"
-
-[[deps.HDF5]]
-deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"]
-git-tree-sha1 = "26407bd1c60129062cec9da63dc7d08251544d53"
-uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
-version = "0.17.1"
-
-    [deps.HDF5.extensions]
-    MPIExt = "MPI"
-
-    [deps.HDF5.weakdeps]
-    MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
-
-[[deps.HDF5_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"]
-git-tree-sha1 = "38c8874692d48d5440d5752d6c74b0c6b0b60739"
-uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
-version = "1.14.2+1"
-
-[[deps.HTTP]]
-deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
-git-tree-sha1 = "abbbb9ec3afd783a7cbd82ef01dcd088ea051398"
-uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
-version = "1.10.1"
-
-[[deps.HighlyAdaptiveLasso]]
-deps = ["DataFrames", "MLJModelInterface", "RCall"]
-git-tree-sha1 = "40f12ec0130659287a3d1b7e1a8ffc4fcf7249ba"
-uuid = "c5dac772-1445-43c4-b698-9440de7877f6"
-version = "0.2.0"
-
-[[deps.Hwloc_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "8ecb0b34472a3c98f945e3c75fc7d5428d165511"
-uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
-version = "2.9.3+0"
-
-[[deps.HypergeometricFunctions]]
-deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
-git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685"
-uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
-version = "0.3.23"
-
-[[deps.HypothesisTests]]
-deps = ["Combinatorics", "Distributions", "LinearAlgebra", "Printf", "Random", "Rmath", "Roots", "Statistics", "StatsAPI", "StatsBase"]
-git-tree-sha1 = "4b5d5ba51f5f473737ed9de6d8a7aa190ad8c72f"
-uuid = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
-version = "0.11.0"
-
-[[deps.IRTools]]
-deps = ["InteractiveUtils", "MacroTools", "Test"]
-git-tree-sha1 = "8aa91235360659ca7560db43a7d57541120aa31d"
-uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
-version = "0.4.11"
-
-[[deps.Inflate]]
-git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381"
-uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
-version = "0.1.4"
-
-[[deps.InitialValues]]
-git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3"
-uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c"
-version = "0.3.1"
-
-[[deps.InlineStrings]]
-deps = ["Parsers"]
-git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461"
-uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
-version = "1.4.0"
-
-[[deps.IntelOpenMP_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32"
-uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0"
-version = "2023.2.0+0"
-
-[[deps.InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[deps.InvertedIndices]]
-git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038"
-uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
-version = "1.3.0"
-
-[[deps.IrrationalConstants]]
-git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2"
-uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
-version = "0.2.2"
-
-[[deps.IterTools]]
-git-tree-sha1 = "4ced6667f9974fc5c5943fa5e2ef1ca43ea9e450"
-uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
-version = "1.8.0"
-
-[[deps.IterationControl]]
-deps = ["EarlyStopping", "InteractiveUtils"]
-git-tree-sha1 = "d7df9a6fdd82a8cfdfe93a94fcce35515be634da"
-uuid = "b3c1a2ee-3fec-4384-bf48-272ea71de57c"
-version = "0.5.3"
-
-[[deps.IterativeSolvers]]
-deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
-git-tree-sha1 = "b435d190ef8369cf4d79cc9dd5fba88ba0165307"
-uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
-version = "0.9.3"
-
-[[deps.IteratorInterfaceExtensions]]
-git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
-uuid = "82899510-4779-5014-852e-03e436cf321d"
-version = "1.0.0"
-
-[[deps.JLD2]]
-deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "Pkg", "PrecompileTools", "Printf", "Reexport", "Requires", "TranscodingStreams", "UUIDs"]
-git-tree-sha1 = "9bbb5130d3b4fa52846546bca4791ecbdfb52730"
-uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
-version = "0.4.38"
-
-[[deps.JLLWrappers]]
-deps = ["Artifacts", "Preferences"]
-git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca"
-uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.5.0"
-
-[[deps.JSON]]
-deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
-uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.4"
-
-[[deps.JSON3]]
-deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"]
-git-tree-sha1 = "95220473901735a0f4df9d1ca5b171b568b2daa3"
-uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
-version = "1.13.2"
-
-[[deps.JuliaVariables]]
-deps = ["MLStyle", "NameResolution"]
-git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70"
-uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec"
-version = "0.2.4"
-
-[[deps.KernelAbstractions]]
-deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"]
-git-tree-sha1 = "b0737cbbe1c8da6f1139d1c23e35e7cea129c0af"
-uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
-version = "0.9.13"
-
-    [deps.KernelAbstractions.extensions]
-    EnzymeExt = "EnzymeCore"
-
-    [deps.KernelAbstractions.weakdeps]
-    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
-
-[[deps.LLVM]]
-deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"]
-git-tree-sha1 = "c879e47398a7ab671c782e02b51a4456794a7fa3"
-uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
-version = "6.4.0"
-
-    [deps.LLVM.extensions]
-    BFloat16sExt = "BFloat16s"
-
-    [deps.LLVM.weakdeps]
-    BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
-
-[[deps.LLVMExtra_jll]]
-deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
-git-tree-sha1 = "98eaee04d96d973e79c25d49167668c5c8fb50e2"
-uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
-version = "0.0.27+1"
-
-[[deps.LLVMOpenMP_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b"
-uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
-version = "15.0.4+0"
-
-[[deps.LaTeXStrings]]
-git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec"
-uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
-version = "1.3.1"
-
-[[deps.LatinHypercubeSampling]]
-deps = ["Random", "StableRNGs", "StatsBase", "Test"]
-git-tree-sha1 = "825289d43c753c7f1bf9bed334c253e9913997f8"
-uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d"
-version = "1.9.0"
-
-[[deps.LazyArtifacts]]
-deps = ["Artifacts", "Pkg"]
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[[deps.LearnAPI]]
-deps = ["InteractiveUtils", "Statistics"]
-git-tree-sha1 = "ec695822c1faaaa64cee32d0b21505e1977b4809"
-uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
-version = "0.1.0"
-
-[[deps.LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-version = "0.6.3"
-
-[[deps.LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "7.84.0+0"
-
-[[deps.LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[deps.LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-version = "1.10.2+0"
-
-[[deps.Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[deps.Libiconv_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175"
-uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
-version = "1.17.0+0"
-
-[[deps.LineSearches]]
-deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
-git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d"
-uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
-version = "7.2.0"
-
-[[deps.LinearAlgebra]]
-deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[[deps.LinearMaps]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "9df2ab050ffefe870a09c7b6afdb0cde381703f2"
-uuid = "7a12625a-238d-50fd-b39a-03d52299707e"
-version = "3.11.1"
-weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"]
-
-    [deps.LinearMaps.extensions]
-    LinearMapsChainRulesCoreExt = "ChainRulesCore"
-    LinearMapsSparseArraysExt = "SparseArrays"
-    LinearMapsStatisticsExt = "Statistics"
-
-[[deps.LogExpFunctions]]
-deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa"
-uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.26"
-
-    [deps.LogExpFunctions.extensions]
-    LogExpFunctionsChainRulesCoreExt = "ChainRulesCore"
-    LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables"
-    LogExpFunctionsInverseFunctionsExt = "InverseFunctions"
-
-    [deps.LogExpFunctions.weakdeps]
-    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-    ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
-    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
-
-[[deps.Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[deps.LoggingExtras]]
-deps = ["Dates", "Logging"]
-git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075"
-uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
-version = "1.0.3"
-
-[[deps.Lz4_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "6c26c5e8a4203d43b5497be3ec5d4e0c3cde240a"
-uuid = "5ced341a-0733-55b8-9ab6-a4889d929147"
-version = "1.9.4+0"
-
-[[deps.MKL]]
-deps = ["Artifacts", "Libdl", "LinearAlgebra", "MKL_jll"]
-git-tree-sha1 = "100521a1d2181cb39036ee1a6955d6b9686bb363"
-uuid = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
-version = "0.6.1"
-
-[[deps.MKL_jll]]
-deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
-git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913"
-uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
-version = "2023.2.0+0"
-
-[[deps.MLFlowClient]]
-deps = ["Dates", "FilePathsBase", "HTTP", "JSON", "ShowCases", "URIs", "UUIDs"]
-git-tree-sha1 = "32cee10a6527476bef0c6484ff4c60c2cead5d3e"
-uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83"
-version = "0.4.4"
-
-[[deps.MLJ]]
-deps = ["CategoricalArrays", "ComputationalResources", "Distributed", "Distributions", "LinearAlgebra", "MLJBalancing", "MLJBase", "MLJEnsembles", "MLJFlow", "MLJIteration", "MLJModels", "MLJTuning", "OpenML", "Pkg", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "StatisticalMeasures", "Statistics", "StatsBase", "Tables"]
-git-tree-sha1 = "981196c41a23cbc1befbad190558b1f0ebb97910"
-uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
-version = "0.20.2"
-
-[[deps.MLJBalancing]]
-deps = ["MLJBase", "MLJModelInterface", "MLUtils", "OrderedCollections", "Random", "StatsBase"]
-git-tree-sha1 = "e4be85602f010291f49b6a6464ccde1708ce5d62"
-uuid = "45f359ea-796d-4f51-95a5-deb1a414c586"
-version = "0.1.3"
-
-[[deps.MLJBase]]
-deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Dates", "DelimitedFiles", "Distributed", "Distributions", "InteractiveUtils", "InvertedIndices", "LearnAPI", "LinearAlgebra", "MLJModelInterface", "Missings", "OrderedCollections", "Parameters", "PrettyTables", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "Serialization", "StatisticalMeasuresBase", "StatisticalTraits", "Statistics", "StatsBase", "Tables"]
-git-tree-sha1 = "6d433d34a1764324cf37a1ddc47dcc42ec05340f"
-uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
-version = "1.0.1"
-weakdeps = ["StatisticalMeasures"]
-
-    [deps.MLJBase.extensions]
-    DefaultMeasuresExt = "StatisticalMeasures"
-
-[[deps.MLJEnsembles]]
-deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatisticalMeasuresBase", "StatsBase"]
-git-tree-sha1 = "94403b2c8f692011df6731913376e0e37f6c0fe9"
-uuid = "50ed68f4-41fd-4504-931a-ed422449fee0"
-version = "0.4.0"
-
-[[deps.MLJFlow]]
-deps = ["MLFlowClient", "MLJBase", "MLJModelInterface"]
-git-tree-sha1 = "89d0e7a7e08359476482f20b2d8ff12080d171ee"
-uuid = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f"
-version = "0.3.0"
-
-[[deps.MLJGLMInterface]]
-deps = ["Distributions", "GLM", "MLJModelInterface", "StatsModels", "Tables"]
-git-tree-sha1 = "06aba1c96b19f31744f7e97d96fcf66b79739e05"
-uuid = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
-version = "0.3.5"
-
-[[deps.MLJIteration]]
-deps = ["IterationControl", "MLJBase", "Random", "Serialization"]
-git-tree-sha1 = "991e10d4c8da49d534e312e8a4fbe56b7ac6f70c"
-uuid = "614be32b-d00c-4edb-bd02-1eb411ab5e55"
-version = "0.6.0"
-
-[[deps.MLJLinearModels]]
-deps = ["DocStringExtensions", "IterativeSolvers", "LinearAlgebra", "LinearMaps", "MLJModelInterface", "Optim", "Parameters"]
-git-tree-sha1 = "7f517fd840ca433a8fae673edb31678ff55d969c"
-uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
-version = "0.10.0"
-
-[[deps.MLJModelInterface]]
-deps = ["Random", "ScientificTypesBase", "StatisticalTraits"]
-git-tree-sha1 = "381d99f0af76d98f50bd5512dcf96a99c13f8223"
-uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
-version = "1.9.3"
-
-[[deps.MLJModels]]
-deps = ["CategoricalArrays", "CategoricalDistributions", "Combinatorics", "Dates", "Distances", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJModelInterface", "Markdown", "OrderedCollections", "Parameters", "Pkg", "PrettyPrinting", "REPL", "Random", "RelocatableFolders", "ScientificTypes", "StatisticalTraits", "Statistics", "StatsBase", "Tables"]
-git-tree-sha1 = "10d221910fc3f3eedad567178ddbca3cc0f776a3"
-uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
-version = "0.16.12"
-
-[[deps.MLJTuning]]
-deps = ["ComputationalResources", "Distributed", "Distributions", "LatinHypercubeSampling", "MLJBase", "ProgressMeter", "Random", "RecipesBase", "StatisticalMeasuresBase"]
-git-tree-sha1 = "44dc126646a15018d7829f020d121b85b4def9bc"
-uuid = "03970b2e-30c4-11ea-3135-d1576263f10f"
-version = "0.8.0"
-
-[[deps.MLJXGBoostInterface]]
-deps = ["MLJModelInterface", "SparseArrays", "Tables", "XGBoost"]
-git-tree-sha1 = "988c399a352f0b49bc1345c509d8a4800cb468c5"
-uuid = "54119dfa-1dab-4055-a167-80440f4f7a91"
-version = "0.3.10"
-
-[[deps.MLStyle]]
-git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8"
-uuid = "d8e11817-5142-5d16-987a-aa16d5891078"
-version = "0.4.17"
-
-[[deps.MLUtils]]
-deps = ["ChainRulesCore", "Compat", "DataAPI", "DelimitedFiles", "FLoops", "NNlib", "Random", "ShowCases", "SimpleTraits", "Statistics", "StatsBase", "Tables", "Transducers"]
-git-tree-sha1 = "3504cdb8c2bc05bde4d4b09a81b01df88fcbbba0"
-uuid = "f1d291b0-491e-4a28-83b9-f70985020b54"
-version = "0.4.3"
-
-[[deps.MPICH_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
-git-tree-sha1 = "8a5b4d2220377d1ece13f49438d71ad20cf1ba83"
-uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
-version = "4.1.2+0"
-
-[[deps.MPIPreferences]]
-deps = ["Libdl", "Preferences"]
-git-tree-sha1 = "8f6af051b9e8ec597fa09d8885ed79fd582f33c9"
-uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
-version = "0.1.10"
-
-[[deps.MPItrampoline_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"]
-git-tree-sha1 = "6979eccb6a9edbbb62681e158443e79ecc0d056a"
-uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748"
-version = "5.3.1+0"
-
-[[deps.MacroTools]]
-deps = ["Markdown", "Random"]
-git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48"
-uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.11"
-
-[[deps.Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[deps.MbedTLS]]
-deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"]
-git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf"
-uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
-version = "1.1.9"
-
-[[deps.MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.2+0"
-
-[[deps.MetaGraphsNext]]
-deps = ["Graphs", "JLD2", "SimpleTraits"]
-git-tree-sha1 = "8dd4f3f8a643d53e61ff9115749f522c35a38f3f"
-uuid = "fa8bd995-216d-47f1-8a91-f3b68fbeb377"
-version = "0.6.0"
-
-[[deps.MicroCollections]]
-deps = ["BangBang", "InitialValues", "Setfield"]
-git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e"
-uuid = "128add7d-3638-4c79-886c-908ea0c25c34"
-version = "0.1.4"
-
-[[deps.MicrosoftMPI_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "b01beb91d20b0d1312a9471a36017b5b339d26de"
-uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
-version = "10.1.4+1"
-
-[[deps.Missings]]
-deps = ["DataAPI"]
-git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272"
-uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-version = "1.1.0"
-
-[[deps.Mmap]]
-uuid = "a63ad114-7e13-5084-954f-fe012c677804"
-
-[[deps.Mocking]]
-deps = ["Compat", "ExprTools"]
-git-tree-sha1 = "4cc0c5a83933648b615c36c2b956d94fda70641e"
-uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
-version = "0.7.7"
-
-[[deps.MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2022.10.11"
-
-[[deps.MultipleTesting]]
-deps = ["Distributions", "SpecialFunctions", "StatsBase"]
-git-tree-sha1 = "1e98f8f732e7035c4333135b75605b74f3462b9b"
-uuid = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b"
-version = "0.6.0"
-
-[[deps.NLSolversBase]]
-deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
-git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c"
-uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
-version = "7.8.3"
-
-[[deps.NNlib]]
-deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"]
-git-tree-sha1 = "ac86d2944bf7a670ac8bf0f7ec099b5898abcc09"
-uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-version = "0.9.8"
-
-    [deps.NNlib.extensions]
-    NNlibAMDGPUExt = "AMDGPU"
-    NNlibCUDACUDNNExt = ["CUDA", "cuDNN"]
-    NNlibCUDAExt = "CUDA"
-    NNlibEnzymeCoreExt = "EnzymeCore"
-
-    [deps.NNlib.weakdeps]
-    AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
-    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-    EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
-    cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
-
-[[deps.NaNMath]]
-deps = ["OpenLibm_jll"]
-git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4"
-uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
-version = "1.0.2"
-
-[[deps.NameResolution]]
-deps = ["PrettyPrint"]
-git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e"
-uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391"
-version = "0.1.5"
-
-[[deps.NetworkLayout]]
-deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "StaticArrays"]
-git-tree-sha1 = "91bb2fedff8e43793650e7a677ccda6e6e6e166b"
-uuid = "46757867-2c16-5918-afeb-47bfcb05e46a"
-version = "0.4.6"
-weakdeps = ["Graphs"]
-
-    [deps.NetworkLayout.extensions]
-    NetworkLayoutGraphsExt = "Graphs"
-
-[[deps.NetworkOptions]]
-uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
-version = "1.2.0"
-
-[[deps.OpenBLAS_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
-uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.21+4"
-
-[[deps.OpenLibm_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-version = "0.8.1+0"
-
-[[deps.OpenML]]
-deps = ["ARFFFiles", "HTTP", "JSON", "Markdown", "Pkg", "Scratch"]
-git-tree-sha1 = "6efb039ae888699d5a74fb593f6f3e10c7193e33"
-uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
-version = "0.3.1"
-
-[[deps.OpenMPI_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "PMIx_jll", "TOML", "Zlib_jll", "libevent_jll", "prrte_jll"]
-git-tree-sha1 = "694458ae803b684f09c07f90459cb79655fb377d"
-uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
-version = "5.0.0+0"
-
-[[deps.OpenSSL]]
-deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
-git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2"
-uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
-version = "1.4.1"
-
-[[deps.OpenSSL_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f"
-uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "3.0.12+0"
-
-[[deps.OpenSpecFun_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
-uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
-version = "0.5.5+0"
-
-[[deps.Optim]]
-deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"]
-git-tree-sha1 = "01f85d9269b13fedc61e63cc72ee2213565f7a72"
-uuid = "429524aa-4258-5aef-a3af-852621145aeb"
-version = "1.7.8"
-
-[[deps.OrderedCollections]]
-git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
-uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.6.3"
-
-[[deps.PDMats]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "4e5be6bb265d33669f98eb55d2a57addd1eeb72c"
-uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
-version = "0.11.30"
-
-[[deps.PMIx_jll]]
-deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "Zlib_jll", "libevent_jll"]
-git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541"
-uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab"
-version = "4.2.7+0"
-
-[[deps.PackageCompiler]]
-deps = ["Artifacts", "Glob", "LazyArtifacts", "Libdl", "Pkg", "Printf", "RelocatableFolders", "TOML", "UUIDs", "p7zip_jll"]
-git-tree-sha1 = "f9392ab72832f4315220a853747ff3dba758c9d1"
-uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d"
-version = "2.1.15"
-
-[[deps.Parameters]]
-deps = ["OrderedCollections", "UnPack"]
-git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
-uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
-version = "0.12.3"
-
-[[deps.Parsers]]
-deps = ["Dates", "PrecompileTools", "UUIDs"]
-git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.8.0"
-
-[[deps.Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-version = "1.9.2"
-
-[[deps.PooledArrays]]
-deps = ["DataAPI", "Future"]
-git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3"
-uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-version = "1.4.3"
-
-[[deps.PositiveFactorizations]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20"
-uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
-version = "0.2.4"
-
-[[deps.PrecompileTools]]
-deps = ["Preferences"]
-git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
-uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
-version = "1.2.0"
-
-[[deps.Preferences]]
-deps = ["TOML"]
-git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e"
-uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.4.1"
-
-[[deps.PrettyPrint]]
-git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4"
-uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98"
-version = "0.2.0"
-
-[[deps.PrettyPrinting]]
-git-tree-sha1 = "22a601b04a154ca38867b991d5017469dc75f2db"
-uuid = "54e16d92-306c-5ea0-a30b-337be88ac337"
-version = "0.4.1"
-
-[[deps.PrettyTables]]
-deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"]
-git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660"
-uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-version = "2.3.1"
-
-[[deps.Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[deps.ProgressMeter]]
-deps = ["Distributed", "Printf"]
-git-tree-sha1 = "00099623ffee15972c16111bcf84c58a0051257c"
-uuid = "92933f4c-e287-5a05-a399-4b506db050ca"
-version = "1.9.0"
-
-[[deps.QuadGK]]
-deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1"
-uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.9.1"
-
-[[deps.RCall]]
-deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"]
-git-tree-sha1 = "3084689b18f9e5e817a6ce9a83a7654d8ad0f2f6"
-uuid = "6f49c342-dc21-5d91-9882-a32aef131414"
-version = "0.13.18"
-
-[[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[deps.Random]]
-deps = ["SHA", "Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[deps.RealDot]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9"
-uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9"
-version = "0.1.0"
-
-[[deps.RecipesBase]]
-deps = ["PrecompileTools"]
-git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff"
-uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.3.4"
-
-[[deps.Reexport]]
-git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
-uuid = "189a3867-3050-52da-a836-e630ba90ab69"
-version = "1.2.2"
-
-[[deps.RelocatableFolders]]
-deps = ["SHA", "Scratch"]
-git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864"
-uuid = "05181044-ff0b-4ac5-8273-598c1e38db00"
-version = "1.0.1"
-
-[[deps.Requires]]
-deps = ["UUIDs"]
-git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
-uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.3.0"
-
-[[deps.Rmath]]
-deps = ["Random", "Rmath_jll"]
-git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b"
-uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
-version = "0.7.1"
-
-[[deps.Rmath_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da"
-uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
-version = "0.4.0+0"
-
-[[deps.Roots]]
-deps = ["ChainRulesCore", "CommonSolve", "Printf", "Setfield"]
-git-tree-sha1 = "0f1d92463a020321983d04c110f476c274bafe2e"
-uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
-version = "2.0.22"
-
-    [deps.Roots.extensions]
-    RootsForwardDiffExt = "ForwardDiff"
-    RootsIntervalRootFindingExt = "IntervalRootFinding"
-    RootsSymPyExt = "SymPy"
-    RootsSymPyPythonCallExt = "SymPyPythonCall"
-
-    [deps.Roots.weakdeps]
-    ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-    IntervalRootFinding = "d2bf35a9-74e0-55ec-b149-d360ff49b807"
-    SymPy = "24249f21-da20-56a4-8eb1-6a02cf4ae2e6"
-    SymPyPythonCall = "bc8888f7-b21e-4b7c-a06a-5d9c9496438c"
-
-[[deps.SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-version = "0.7.0"
-
-[[deps.ScientificTypes]]
-deps = ["CategoricalArrays", "ColorTypes", "Dates", "Distributions", "PrettyTables", "Reexport", "ScientificTypesBase", "StatisticalTraits", "Tables"]
-git-tree-sha1 = "75ccd10ca65b939dab03b812994e571bf1e3e1da"
-uuid = "321657f4-b219-11e9-178b-2701a2544e81"
-version = "3.0.2"
-
-[[deps.ScientificTypesBase]]
-git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b"
-uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
-version = "3.0.0"
-
-[[deps.Scratch]]
-deps = ["Dates"]
-git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386"
-uuid = "6c6a2e73-6563-6170-7368-637461726353"
-version = "1.2.1"
-
-[[deps.SentinelArrays]]
-deps = ["Dates", "Random"]
-git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f"
-uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
-version = "1.4.1"
-
-[[deps.Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[deps.Setfield]]
-deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"]
-git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac"
-uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
-version = "1.1.1"
-
-[[deps.SharedArrays]]
-deps = ["Distributed", "Mmap", "Random", "Serialization"]
-uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
-
-[[deps.ShiftedArrays]]
-git-tree-sha1 = "503688b59397b3307443af35cd953a13e8005c16"
-uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a"
-version = "2.0.0"
-
-[[deps.ShowCases]]
-git-tree-sha1 = "7f534ad62ab2bd48591bdeac81994ea8c445e4a5"
-uuid = "605ecd9f-84a6-4c9e-81e2-4798472b76a3"
-version = "0.1.0"
-
-[[deps.SimpleBufferStream]]
-git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1"
-uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
-version = "1.1.0"
-
-[[deps.SimpleTraits]]
-deps = ["InteractiveUtils", "MacroTools"]
-git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
-uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
-version = "0.9.4"
-
-[[deps.Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[deps.SortingAlgorithms]]
-deps = ["DataStructures"]
-git-tree-sha1 = "5165dfb9fd131cf0c6957a3a7605dede376e7b63"
-uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
-version = "1.2.0"
-
-[[deps.SparseArrays]]
-deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
-uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[[deps.SparseInverseSubset]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "91402087fd5d13b2d97e3ef29bbdf9d7859e678a"
-uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada"
-version = "0.1.1"
-
-[[deps.SparseMatricesCSR]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "38677ca58e80b5cad2382e5a1848f93b054ad28d"
-uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
-version = "0.6.7"
-
-[[deps.SpecialFunctions]]
-deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
-git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d"
-uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "2.3.1"
-weakdeps = ["ChainRulesCore"]
-
-    [deps.SpecialFunctions.extensions]
-    SpecialFunctionsChainRulesCoreExt = "ChainRulesCore"
-
-[[deps.SplittablesBase]]
-deps = ["Setfield", "Test"]
-git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5"
-uuid = "171d559e-b47b-412a-8079-5efa626c420e"
-version = "0.1.15"
-
-[[deps.StableRNGs]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "3be7d49667040add7ee151fefaf1f8c04c8c8276"
-uuid = "860ef19b-820b-49d6-a774-d7a799459cd3"
-version = "1.0.0"
-
-[[deps.StaticArrays]]
-deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"]
-git-tree-sha1 = "5ef59aea6f18c25168842bded46b16662141ab87"
-uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.7.0"
-weakdeps = ["Statistics"]
-
-    [deps.StaticArrays.extensions]
-    StaticArraysStatisticsExt = "Statistics"
-
-[[deps.StaticArraysCore]]
-git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
-uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
-version = "1.4.2"
-
-[[deps.StatisticalMeasures]]
-deps = ["CategoricalArrays", "CategoricalDistributions", "Distributions", "LearnAPI", "LinearAlgebra", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "StatisticalMeasuresBase", "Statistics", "StatsBase"]
-git-tree-sha1 = "b58c7cc3d7de6c0d75d8437b81481af924970123"
-uuid = "a19d573c-0a75-4610-95b3-7071388c7541"
-version = "0.1.3"
-
-    [deps.StatisticalMeasures.extensions]
-    LossFunctionsExt = "LossFunctions"
-    ScientificTypesExt = "ScientificTypes"
-
-    [deps.StatisticalMeasures.weakdeps]
-    LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
-    ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
-
-[[deps.StatisticalMeasuresBase]]
-deps = ["CategoricalArrays", "InteractiveUtils", "MLUtils", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "Statistics"]
-git-tree-sha1 = "17dfb22e2e4ccc9cd59b487dce52883e0151b4d3"
-uuid = "c062fc1d-0d66-479b-b6ac-8b44719de4cc"
-version = "0.1.1"
-
-[[deps.StatisticalTraits]]
-deps = ["ScientificTypesBase"]
-git-tree-sha1 = "30b9236691858e13f167ce829490a68e1a597782"
-uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9"
-version = "3.2.0"
-
-[[deps.Statistics]]
-deps = ["LinearAlgebra", "SparseArrays"]
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-version = "1.9.0"
-
-[[deps.StatsAPI]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed"
-uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
-version = "1.7.0"
-
-[[deps.StatsBase]]
-deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
-git-tree-sha1 = "1d77abd07f617c4868c33d4f5b9e1dbb2643c9cf"
-uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-version = "0.34.2"
-
-[[deps.StatsFuns]]
-deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
-git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a"
-uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "1.3.0"
-
-    [deps.StatsFuns.extensions]
-    StatsFunsChainRulesCoreExt = "ChainRulesCore"
-    StatsFunsInverseFunctionsExt = "InverseFunctions"
-
-    [deps.StatsFuns.weakdeps]
-    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
-
-[[deps.StatsModels]]
-deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Printf", "REPL", "ShiftedArrays", "SparseArrays", "StatsAPI", "StatsBase", "StatsFuns", "Tables"]
-git-tree-sha1 = "5cf6c4583533ee38639f73b880f35fc85f2941e0"
-uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
-version = "0.7.3"
-
-[[deps.StringEncodings]]
-deps = ["Libiconv_jll"]
-git-tree-sha1 = "b765e46ba27ecf6b44faf70df40c57aa3a547dcb"
-uuid = "69024149-9ee7-55f6-a4c4-859efe599b68"
-version = "0.3.7"
-
-[[deps.StringManipulation]]
-deps = ["PrecompileTools"]
-git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5"
-uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e"
-version = "0.3.4"
-
-[[deps.StructArrays]]
-deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"]
-git-tree-sha1 = "0a3db38e4cce3c54fe7a71f831cd7b6194a54213"
-uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
-version = "0.6.16"
-
-[[deps.StructTypes]]
-deps = ["Dates", "UUIDs"]
-git-tree-sha1 = "ca4bccb03acf9faaf4137a9abc1881ed1841aa70"
-uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
-version = "1.10.0"
-
-[[deps.SuiteSparse]]
-deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
-uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
-
-[[deps.SuiteSparse_jll]]
-deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"]
-uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.10.1+6"
-
-[[deps.TMLE]]
-deps = ["AbstractDifferentiation", "CategoricalArrays", "Combinatorics", "Distributions", "GLM", "Graphs", "HypothesisTests", "LogExpFunctions", "MLJBase", "MLJGLMInterface", "MLJModels", "MetaGraphsNext", "Missings", "PrecompileTools", "PrettyTables", "Random", "Statistics", "TableOperations", "Tables", "Zygote"]
-git-tree-sha1 = "fe31c10325f1e911dae33a5d521cc07c2c7eeecd"
-repo-rev = "cvtmle"
-repo-url = "https://github.com/TARGENE/TMLE.jl.git"
-uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf"
-version = "0.12.0"
-
-    [deps.TMLE.extensions]
-    GraphMakieExt = ["GraphMakie", "CairoMakie"]
-    JSONExt = "JSON"
-    YAMLExt = "YAML"
-
-    [deps.TMLE.weakdeps]
-    CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
-    GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
-    JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-    YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
-
-[[deps.TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-version = "1.0.3"
-
-[[deps.TZJData]]
-deps = ["Artifacts"]
-git-tree-sha1 = "d39314cdbaf5b90a047db33858626f8d1cc973e1"
-uuid = "dc5dba14-91b3-4cab-a142-028a31da12f7"
-version = "1.0.0+2023c"
-
-[[deps.TableOperations]]
-deps = ["SentinelArrays", "Tables", "Test"]
-git-tree-sha1 = "e383c87cf2a1dc41fa30c093b2a19877c83e1bc1"
-uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87"
-version = "1.2.0"
-
-[[deps.TableTraits]]
-deps = ["IteratorInterfaceExtensions"]
-git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
-uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
-version = "1.0.1"
-
-[[deps.Tables]]
-deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"]
-git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d"
-uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-version = "1.11.1"
-
-[[deps.Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-version = "1.10.0"
-
-[[deps.Test]]
-deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
-uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[deps.TimeZones]]
-deps = ["Artifacts", "Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "Scratch", "TZJData", "Unicode", "p7zip_jll"]
-git-tree-sha1 = "89e64d61ef3cd9e80f7fc12b7d13db2d75a23c03"
-uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
-version = "1.13.0"
-weakdeps = ["RecipesBase"]
-
-    [deps.TimeZones.extensions]
-    TimeZonesRecipesBaseExt = "RecipesBase"
-
-[[deps.TranscodingStreams]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769"
-uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.13"
-
-[[deps.Transducers]]
-deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"]
-git-tree-sha1 = "e579d3c991938fecbb225699e8f611fa3fbf2141"
-uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999"
-version = "0.4.79"
-
-    [deps.Transducers.extensions]
-    TransducersBlockArraysExt = "BlockArrays"
-    TransducersDataFramesExt = "DataFrames"
-    TransducersLazyArraysExt = "LazyArrays"
-    TransducersOnlineStatsBaseExt = "OnlineStatsBase"
-    TransducersReferenceablesExt = "Referenceables"
-
-    [deps.Transducers.weakdeps]
-    BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-    DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-    LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
-    OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338"
-    Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e"
-
-[[deps.URIs]]
-git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b"
-uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
-version = "1.5.1"
-
-[[deps.UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[deps.UnPack]]
-git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
-uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
-version = "1.0.2"
-
-[[deps.Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[deps.UnsafeAtomics]]
-git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278"
-uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f"
-version = "0.2.1"
-
-[[deps.UnsafeAtomicsLLVM]]
-deps = ["LLVM", "UnsafeAtomics"]
-git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e"
-uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249"
-version = "0.1.3"
-
-[[deps.VersionParsing]]
-git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
-uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
-version = "1.3.0"
-
-[[deps.WeakRefStrings]]
-deps = ["DataAPI", "InlineStrings", "Parsers"]
-git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23"
-uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
-version = "1.4.2"
-
-[[deps.WinReg]]
-git-tree-sha1 = "cd910906b099402bcc50b3eafa9634244e5ec83b"
-uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128"
-version = "1.0.0"
-
-[[deps.WorkerUtilities]]
-git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7"
-uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60"
-version = "1.6.1"
-
-[[deps.XGBoost]]
-deps = ["AbstractTrees", "CEnum", "JSON3", "LinearAlgebra", "OrderedCollections", "SparseArrays", "SparseMatricesCSR", "Statistics", "Tables", "XGBoost_jll"]
-git-tree-sha1 = "bacb62e07d104630094c8dac2fd070f5d4b9b305"
-uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
-version = "2.5.1"
-
-    [deps.XGBoost.extensions]
-    XGBoostCUDAExt = "CUDA"
-    XGBoostTermExt = "Term"
-
-    [deps.XGBoost.weakdeps]
-    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-    Term = "22787eb5-b846-44ae-b979-8e399b8463ab"
-
-[[deps.XGBoost_jll]]
-deps = ["Artifacts", "CUDA_Runtime_jll", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "Libdl", "TOML"]
-git-tree-sha1 = "1c0aa2390a7ebb28a3d6c214f64e57a24091fbd7"
-uuid = "a5c6f535-4255-5ca2-a466-0e519f119c46"
-version = "2.0.1+0"
-
-[[deps.YAML]]
-deps = ["Base64", "Dates", "Printf", "StringEncodings"]
-git-tree-sha1 = "e6330e4b731a6af7959673621e91645eb1356884"
-uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
-version = "0.4.9"
-
-[[deps.Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.13+0"
-
-[[deps.Zstd_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c"
-uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
-version = "1.5.5+0"
-
-[[deps.Zygote]]
-deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "5ded212acd815612df112bb895ef3910c5a03f57"
-uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
-version = "0.6.67"
-
-    [deps.Zygote.extensions]
-    ZygoteColorsExt = "Colors"
-    ZygoteDistancesExt = "Distances"
-    ZygoteTrackerExt = "Tracker"
-
-    [deps.Zygote.weakdeps]
-    Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
-    Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
-
-[[deps.ZygoteRules]]
-deps = ["ChainRulesCore", "MacroTools"]
-git-tree-sha1 = "9d749cd449fb448aeca4feee9a2f4186dbb5d184"
-uuid = "700de1a5-db45-46bc-99cf-38207098b444"
-version = "0.2.4"
-
-[[deps.glmnet_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "31adae3b983b579a1fbd7cfd43a4bc0d224c2f5a"
-uuid = "78c6b45d-5eaf-5d68-bcfb-a5a2cb06c27f"
-version = "2.0.13+0"
-
-[[deps.libaec_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl"]
-git-tree-sha1 = "eddd19a8dea6b139ea97bdc8a0e2667d4b661720"
-uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0"
-version = "1.0.6+1"
-
-[[deps.libblastrampoline_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.8.0+0"
-
-[[deps.libevent_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll"]
-git-tree-sha1 = "f04ec6d9a186115fb38f858f05c0c4e1b7fc9dcb"
-uuid = "1080aeaf-3a6a-583e-a51c-c537b09f60ec"
-version = "2.1.13+1"
-
-[[deps.nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.48.0+0"
-
-[[deps.p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+0"
-
-[[deps.prrte_jll]]
-deps = ["Artifacts", "Hwloc_jll", "JLLWrappers", "Libdl", "PMIx_jll", "libevent_jll"]
-git-tree-sha1 = "5adb2d7a18a30280feb66cad6f1a1dfdca2dc7b0"
-uuid = "eb928a42-fffd-568d-ab9c-3f5d54fc65b9"
-version = "3.0.2+0"
diff --git a/Project.toml b/Project.toml
index 2f187ac..84ec5f3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -57,4 +57,5 @@ MultipleTesting = "0.6.0"
 Optim = "1.7"
 Tables = "1.10.1"
 YAML = "0.4.9"
+TMLE = "0.12"
 julia = "1.7, 1"

From 9051eb1c7f7399f09bca1d05082d42d693d99a4c Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 6 Dec 2023 23:42:13 +0000
Subject: [PATCH 32/71] try build app again

---
 .github/workflows/CI.yml |  2 ++
 Comonicon.toml           | 20 ++++++++++----------
 docker/Dockerfile        |  2 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index fc1b284..a0dde5f 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -45,6 +45,8 @@ jobs:
       - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+      - name: Build app
+        run: julia --project -t auto deps/build_app.jl app tarball
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v2
         with:
diff --git a/Comonicon.toml b/Comonicon.toml
index 4acd652..48c2a41 100644
--- a/Comonicon.toml
+++ b/Comonicon.toml
@@ -5,16 +5,16 @@ completion = true
 quiet = false
 optimize = 2
 
-# [sysimg]
-# incremental=true
-# filter_stdlibs=false
+[sysimg]
+incremental=true
+filter_stdlibs=false
 
-# [sysimg.precompile]
-# execution_file = ["deps/execute.jl"]
+[sysimg.precompile]
+execution_file = ["deps/execute.jl"]
 
-# [application]
-# incremental=true
-# filter_stdlibs=false
+[application]
+incremental=true
+filter_stdlibs=false
 
-# [application.precompile]
-# execution_file = ["deps/execute.jl"]
\ No newline at end of file
+[application.precompile]
+execution_file = ["deps/execute.jl"]
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 99aa2c7..0523eeb 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -41,7 +41,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-RUN julia --project -t auto --startup-file=no deps/build_app.jl
+RUN julia --project -t auto --startup-file=no deps/build_app.jl app
 
 ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
 

From 873095ad147b217da4581ed39f08fee78cb61261 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 7 Dec 2023 16:45:42 +0000
Subject: [PATCH 33/71] remove HAL

---
 .github/workflows/CI.yml       | 18 +-----------------
 Project.toml                   |  4 +---
 docker/Dockerfile              | 25 -------------------------
 docs/src/models.md             |  1 -
 src/TargetedEstimation.jl      |  1 -
 test/config/tmle_ose_config.jl |  2 --
 6 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index a0dde5f..19683a3 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -22,22 +22,6 @@ jobs:
           - x64
     steps:
       - uses: actions/checkout@v2
-      - name: Install curl
-        run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev
-      - name: Setup R
-        uses: r-lib/actions/setup-r@v2
-        with:
-          r-version: 4.2.3
-      - name: Install R dependencies
-        run: |
-          install.packages("devtools", repos="http://cran.us.r-project.org", dependecies=TRUE)
-          require(devtools)
-          install_version("hal9001", version = "0.4.1", repos = "http://cran.us.r-project.org")
-        shell: Rscript {0}
-      - name: Set R_HOME
-        run: echo "R_HOME=$(R RHOME)" >> $GITHUB_ENV
-      - name: Set LD_LIBRARY
-        run: echo "LD_LIBRARY_PATH=$R_HOME/lib" >> $GITHUB_ENV
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
@@ -45,7 +29,7 @@ jobs:
       - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-      - name: Build app
+      - name: Build App
         run: julia --project -t auto deps/build_app.jl app tarball
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v2
diff --git a/Project.toml b/Project.toml
index 84ec5f3..4518080 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,7 +14,6 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
-HighlyAdaptiveLasso = "c5dac772-1445-43c4-b698-9440de7877f6"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
@@ -43,7 +42,6 @@ Configurations = "0.17.6"
 DataFrames = "1.3.4"
 EvoTrees = "0.16.5"
 GLMNet = "0.7"
-HighlyAdaptiveLasso = "0.2.0"
 JLD2 = "0.4.22"
 JSON = "0.21.4"
 MKL = "0.6"
@@ -55,7 +53,7 @@ MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.6.0"
 Optim = "1.7"
+TMLE = "0.12"
 Tables = "1.10.1"
 YAML = "0.4.9"
-TMLE = "0.12"
 julia = "1.7, 1"
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0523eeb..8016127 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,31 +6,6 @@ ENV TZ=Europe/Amsterdam
 
 ENV JULIA_DEPOT_PATH=/opt
 
-RUN apt-get update && apt-get install -y wget unzip procps
-
-# Install R and hal9001
-
-RUN apt-get install -y r-base \
-                    r-base-core \
-                    r-recommended \
-                    r-base-dev
-
-RUN apt-get install -y libssl-dev \
-                libxml2-dev \
-                libcurl4-openssl-dev \
-                libgit2-dev \
-                libharfbuzz-dev \
-                libfribidi-dev \
-                libfontconfig1-dev \
-                libfreetype6-dev \
-                libpng-dev \
-                libtiff5-dev \
-                libjpeg-dev
-
-RUN R -e "install.packages('devtools', repos='http://cran.us.r-project.org', dependecies=TRUE); \
-         require(devtools);\
-         install_version('hal9001', version = '0.4.1', repos = 'http://cran.us.r-project.org')"
-
 # Import project, build and precompile
 
 COPY . /TargetedEstimation.jl 
diff --git a/docs/src/models.md b/docs/src/models.md
index 4d978bd..5a15d88 100644
--- a/docs/src/models.md
+++ b/docs/src/models.md
@@ -11,7 +11,6 @@ Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of
 - [EvoTrees.jl](https://evovest.github.io/EvoTrees.jl/stable/): A pure Julia implementation of histogram based gradient boosting trees (subset of XGBoost)
 - [GLMNet](https://github.com/JuliaStats/GLMNet.jl): A Julia wrapper of the [glmnet](https://glmnet.stanford.edu/articles/glmnet.html) package. See the [GLMNet](@ref) section.
 - [MLJModels](https://github.com/JuliaAI/MLJModels.jl): General utilities such as the `OneHotEncoder` or `InteractionTransformer`.
-- [HighlyAdaptiveLasso](https://github.com/olivierlabayle/HighlyAdaptiveLasso.jl): A Julia wrapper of the [HAL](https://tlverse.org/hal9001/) algorithm, experimental.
 
 Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TargetedEstimation.jl/issues).
 
diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index fce10ec..aa01fda 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -10,7 +10,6 @@ using MLJ
 using CSV
 using Arrow
 using TMLE
-using HighlyAdaptiveLasso
 using EvoTrees
 using MLJXGBoostInterface
 using MLJLinearModels
diff --git a/test/config/tmle_ose_config.jl b/test/config/tmle_ose_config.jl
index 8649d9f..1997b70 100644
--- a/test/config/tmle_ose_config.jl
+++ b/test/config/tmle_ose_config.jl
@@ -13,7 +13,6 @@ default_models = TMLE.default_models(
     evo_10             = EvoTreeRegressor(nrounds=10),
     evo_20             = EvoTreeRegressor(nrounds=20),
     constant           = ConstantRegressor(),
-    hal                = HALRegressor(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false)
     ),
   # For the estimation of E[Y|W, T]: binary target
   Q_binary = Stack(
@@ -26,7 +25,6 @@ default_models = TMLE.default_models(
       cache                   = false
     ),
     constant           = ConstantClassifier(),
-    hal                = HALClassifier(max_degree=1, smoothness_orders=1, num_knots=[10, 5], lambda=10, cv_select=false),
     gridsearch_evo     = TunedModel(
       model = evotree,
       resampling = CV(),

From b11921f82288aeaf2636bfe7cdd68b00ac0239cf Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 7 Dec 2023 17:02:17 +0000
Subject: [PATCH 34/71] add gcc to dockerfile

---
 docker/Dockerfile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 8016127..3f24ee7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,6 +2,10 @@ FROM julia:1.9.4-bullseye
 
 ARG DEBIAN_FRONTEND=noninteractive
 
+RUN apt-get update && \
+    apt-get -y install gcc mono-mcs && \
+    rm -rf /var/lib/apt/lists/*
+
 ENV TZ=Europe/Amsterdam
 
 ENV JULIA_DEPOT_PATH=/opt
@@ -16,7 +20,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-RUN julia --project -t auto --startup-file=no deps/build_app.jl app
+# RUN julia --project -t auto --startup-file=no deps/build_app.jl app
 
-ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
+# ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
 

From 26e822366f56c6817b120752fd8ea2162b9ba3da Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 7 Dec 2023 17:07:58 +0000
Subject: [PATCH 35/71] add back app

---
 docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 3f24ee7..5eac723 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,7 +20,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Precompile Sysimage project
-# RUN julia --project -t auto --startup-file=no deps/build_app.jl app
+RUN julia --project -t auto --startup-file=no deps/build_app.jl app
 
-# ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
+ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
 

From df1594ca14f89080780f2d6904b1c490cba176ed Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 8 Dec 2023 10:47:11 +0000
Subject: [PATCH 36/71] add some doc updates

---
 docs/src/cli.md                               | 51 +++++++++++++++++++
 docs/src/environment.md                       | 22 --------
 docs/src/index.md                             |  7 +--
 .../G-superlearning-Q-glm.jl                  |  0
 .../G-superlearning-Q-glmnet.jl               |  0
 .../glm-with-interactions-for-Q.jl            |  0
 .../estimators => estimators-configs}/glm.jl  |  0
 .../glmnet-with-interactions-for-Q.jl         |  0
 .../glmnet.jl                                 |  0
 .../superlearning-with-interactions-for-Q.jl  |  0
 .../superlearning.jl                          |  0
 .../tuned-xgboost.jl                          |  0
 12 files changed, 53 insertions(+), 27 deletions(-)
 create mode 100644 docs/src/cli.md
 delete mode 100644 docs/src/environment.md
 rename {docs/src/estimators => estimators-configs}/G-superlearning-Q-glm.jl (100%)
 rename {docs/src/estimators => estimators-configs}/G-superlearning-Q-glmnet.jl (100%)
 rename {docs/src/estimators => estimators-configs}/glm-with-interactions-for-Q.jl (100%)
 rename {docs/src/estimators => estimators-configs}/glm.jl (100%)
 rename {docs/src/estimators => estimators-configs}/glmnet-with-interactions-for-Q.jl (100%)
 rename {docs/src/estimators => estimators-configs}/glmnet.jl (100%)
 rename {docs/src/estimators => estimators-configs}/superlearning-with-interactions-for-Q.jl (100%)
 rename {docs/src/estimators => estimators-configs}/superlearning.jl (100%)
 rename {docs/src/estimators => estimators-configs}/tuned-xgboost.jl (100%)

diff --git a/docs/src/cli.md b/docs/src/cli.md
new file mode 100644
index 0000000..b6127e2
--- /dev/null
+++ b/docs/src/cli.md
@@ -0,0 +1,51 @@
+# The Command Line Interface
+
+## Installing the CLI
+
+### Via Docker (requires Docker)
+
+While we are getting close to providing a standalone application, the most reliable way to use the app is still via the provided [Docker container](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags). In this container, the command line interface is accessible and can be used directly. For example via:
+
+```bash
+docker run -it --rm -v HOST_DIR:CONTAINER_DIR olivierlabayle/targeted-estimation:TAG tmle --help
+```
+
+where `HOST_DIR:CONTAINER_DIR` will map the host directory `HOST_DIR` to the container's `CONTAINER_DIR` and `TAG` is the currently released version of the project.
+
+### Build (requires Julia)
+
+Alternatively, provided you have Julia installed, you can build the app via:
+
+```bash
+julia --project deps/build_app.jl app
+```
+
+Be low is a description of the functionalities offered by the CLI.
+
+## CLI Description
+
+The CLI contains 3 sub-commands:
+
+- `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)).
+- `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)).
+- `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref))
+
+### tmle command
+
+Arguments:
+
+- dataset: A dataset either in .csv or .arrow format
+- estimands: A file containing a serialized Configuration object.
+- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning").
+
+Options:
+
+- -v, --verbosity: Verbosity level.
+- -o, --outputs: Ouputs to be generated.
+- --chunksize <100::Int>: Results are written in batches of size chunksize.
+- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment).
+- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
+
+Flags:
+
+- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands).
\ No newline at end of file
diff --git a/docs/src/environment.md b/docs/src/environment.md
deleted file mode 100644
index 24a40dc..0000000
--- a/docs/src/environment.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# The Run Environment
-
-## General usage
-
-At this point in time, the package depends on several R dependencies which makes it difficult to package as a single Julia executable. We thus rely on a docker container for the execution of the various command line interfaces. Some familiarity with [Docker](https://docs.docker.com/get-started/) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/quick_start.html) is thus beneficial.
-
-- The container is available for download from the [Docker registry](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags).
-- In this container, the project is stored in `/TargetedEstimation.jl`, as such, any script can be run using the following template command: `julia --startup-file=no --project=/TargetedEstimation.jl /TargetedEstimation.jl/scripts/SCRIPT_NAME.jl`. Dont forget to mount the output directory in order to retrieve the output data.
-
-Example Docker command:
-
-```bash
-docker run -it --rm -v HOST_DIR:CONTAINER_DIR olivierlabayle/targeted-estimation:0.7 \
-julia --project=/TargetedEstimation.jl /TargetedEstimation.jl/scripts/tmle.jl --help
-```
-
-## Alternatives
-
-Here are a couple alternatives to using the Docker container:
-
-- If you are not using the HAL algorithm, you can simply clone this repository and instantiate the project in order to use the scripts or any other functionality.
-- If you are using the HAL algorithm you can use the `docker/Dockerfile` as a guide for your local installation.
diff --git a/docs/src/index.md b/docs/src/index.md
index 10952d5..4e032b5 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,11 +1,8 @@
 # TargetedEstimation.jl
 
-The goal of this package, eventually, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package.
+The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package.
 
-The various command line interfaces provided here are described in the following sections and can be run in the associated [Docker container](https://hub.docker.com/r/olivierlabayle/targeted-estimation/tags):
-
-- [Targeted Minimum Loss Based Estimation](@ref): The main command line interface provided in this project to run TMLE.
-- [Sieve Variance Plateau Estimation](@ref): Variance correction for non i.i.d. data.
+[The Command Line Interface](@ref)
 
 We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it):
 
diff --git a/docs/src/estimators/G-superlearning-Q-glm.jl b/estimators-configs/G-superlearning-Q-glm.jl
similarity index 100%
rename from docs/src/estimators/G-superlearning-Q-glm.jl
rename to estimators-configs/G-superlearning-Q-glm.jl
diff --git a/docs/src/estimators/G-superlearning-Q-glmnet.jl b/estimators-configs/G-superlearning-Q-glmnet.jl
similarity index 100%
rename from docs/src/estimators/G-superlearning-Q-glmnet.jl
rename to estimators-configs/G-superlearning-Q-glmnet.jl
diff --git a/docs/src/estimators/glm-with-interactions-for-Q.jl b/estimators-configs/glm-with-interactions-for-Q.jl
similarity index 100%
rename from docs/src/estimators/glm-with-interactions-for-Q.jl
rename to estimators-configs/glm-with-interactions-for-Q.jl
diff --git a/docs/src/estimators/glm.jl b/estimators-configs/glm.jl
similarity index 100%
rename from docs/src/estimators/glm.jl
rename to estimators-configs/glm.jl
diff --git a/docs/src/estimators/glmnet-with-interactions-for-Q.jl b/estimators-configs/glmnet-with-interactions-for-Q.jl
similarity index 100%
rename from docs/src/estimators/glmnet-with-interactions-for-Q.jl
rename to estimators-configs/glmnet-with-interactions-for-Q.jl
diff --git a/docs/src/estimators/glmnet.jl b/estimators-configs/glmnet.jl
similarity index 100%
rename from docs/src/estimators/glmnet.jl
rename to estimators-configs/glmnet.jl
diff --git a/docs/src/estimators/superlearning-with-interactions-for-Q.jl b/estimators-configs/superlearning-with-interactions-for-Q.jl
similarity index 100%
rename from docs/src/estimators/superlearning-with-interactions-for-Q.jl
rename to estimators-configs/superlearning-with-interactions-for-Q.jl
diff --git a/docs/src/estimators/superlearning.jl b/estimators-configs/superlearning.jl
similarity index 100%
rename from docs/src/estimators/superlearning.jl
rename to estimators-configs/superlearning.jl
diff --git a/docs/src/estimators/tuned-xgboost.jl b/estimators-configs/tuned-xgboost.jl
similarity index 100%
rename from docs/src/estimators/tuned-xgboost.jl
rename to estimators-configs/tuned-xgboost.jl

From 600439c99c712248d2bf6b4971717b7c6980e3d4 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 8 Dec 2023 15:00:11 +0000
Subject: [PATCH 37/71] update docs

---
 docs/src/cli.md             | 26 +++-----------------------
 docs/src/index.md           |  2 +-
 docs/src/make_summary.md    | 21 +++++++++++++++++++++
 docs/src/merge.md           | 17 -----------------
 docs/src/sieve_variance.md  | 31 ++++++++++++++++++-------------
 docs/src/tmle_estimation.md | 36 +++++++++++++++++-------------------
 6 files changed, 60 insertions(+), 73 deletions(-)
 create mode 100644 docs/src/make_summary.md
 delete mode 100644 docs/src/merge.md

diff --git a/docs/src/cli.md b/docs/src/cli.md
index b6127e2..820f3c2 100644
--- a/docs/src/cli.md
+++ b/docs/src/cli.md
@@ -1,6 +1,6 @@
-# The Command Line Interface
+# The Command Line Interface (CLI)
 
-## Installing the CLI
+## CLI Installation
 
 ### Via Docker (requires Docker)
 
@@ -20,7 +20,7 @@ Alternatively, provided you have Julia installed, you can build the app via:
 julia --project deps/build_app.jl app
 ```
 
-Be low is a description of the functionalities offered by the CLI.
+Bellow is a description of the functionalities offered by the CLI.
 
 ## CLI Description
 
@@ -29,23 +29,3 @@ The CLI contains 3 sub-commands:
 - `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)).
 - `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)).
 - `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref))
-
-### tmle command
-
-Arguments:
-
-- dataset: A dataset either in .csv or .arrow format
-- estimands: A file containing a serialized Configuration object.
-- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning").
-
-Options:
-
-- -v, --verbosity: Verbosity level.
-- -o, --outputs: Ouputs to be generated.
-- --chunksize <100::Int>: Results are written in batches of size chunksize.
-- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment).
-- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
-
-Flags:
-
-- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands).
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index 4e032b5..2f78304 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package.
 
-[The Command Line Interface](@ref)
+[The Command Line Interface (CLI)](@ref)
 
 We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it):
 
diff --git a/docs/src/make_summary.md b/docs/src/make_summary.md
new file mode 100644
index 0000000..2463498
--- /dev/null
+++ b/docs/src/make_summary.md
@@ -0,0 +1,21 @@
+# Merging TMLE outputs
+
+## Usage
+
+```bash
+tmle make-summary --help
+```
+
+Merges tmle outputs in a single file.
+
+Args:
+
+- prefix: Prefix to .hdf5 files to be used to create the summary file
+
+Options:
+
+- -o, --outputs <Outputs...>: Ouptuts configuration.
+
+Flags:
+
+- -h, --help: Print this help message.
diff --git a/docs/src/merge.md b/docs/src/merge.md
deleted file mode 100644
index 924bf6b..0000000
--- a/docs/src/merge.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Merging TMLE and SVP outputs
-
-If multiple `scripts/tmle.jl` and potentially `scripts/sieve_variance.jl` have been run, you may want to combine the generated CSV outputs in a single result file. This is the purpose of this command line interface.
-
-## Usage
-
-You can merge summary CSV files by running:
-
-```bash
-julia scripts/merge_summaries.jl TMLE_PREFIX OUT --sieve-prefix=SIEVE_PREFIX
-```
-
-where:
-
-- `TMLE_PREFIX`: is a prefix to all output CSV files generated by the `scripts/tmle.jl` script.
-- `OUT`: is a path to the output file that will be generated.
-- `--sieve-prefix`: is an optional prefix to the CSV output of the `scripts/sieve_variance.jl` script.
diff --git a/docs/src/sieve_variance.md b/docs/src/sieve_variance.md
index 3abd1fa..d7c61a9 100644
--- a/docs/src/sieve_variance.md
+++ b/docs/src/sieve_variance.md
@@ -4,20 +4,25 @@ If the i.i.d. (independent and identically distributed) hypothesis is not satisf
 
 ## Usage
 
-At the moment, this script is restricted to the analysis of population genetics datasets mostly in the context of [TarGene](https://targene.github.io/targene-pipeline/stable/sieve_variance/). It can be run with the following command:
-
 ```bash
-julia scripts/sieve_variance.jl PREFIX GRM_PREFIX OUT_PREFIX
-        --nb-estimators=100
-        --max-tau=1.0
-        --verbosity=1
+tmle sieve-variance-plateau --help
 ```
 
-where:
+Runs Sieve Variance Plateau correction.
+
+Args:
+
+- input_prefix: Prefix to outputs from the tmle command.
+
+Options:
+
+- -o, --out <svp.hdf5> Output filename in hdf5 format.
+- -g, --grm-prefix <GRM>: Prefix to the aggregated GRM.
+- -v, --verbosity <0>: Verbosity level.
+- -n, --n-estimators <10>: Number of variance estimators to build for each estimate.
+- -m, --max-tau <0.8>: Maximum distance between any two individuals.
+- -e, --estimator-key <TMLE>: Estimator to use to proceed with sieve variance correction.
+
+Flags:
 
-- `PREFIX`: A prefix to HDF5 files generated by `scripts/tmle.jl` (potentially multiple).
-- `GRM_PREFIX`: A prefix to the aggregated Genetic Relationship Matrix.
-- `OUT_PREFIX`: Output prefix to save SVP curves and final variance estimates.
-- `--nb-estimators`: The number of points per SVP curve.
-- `--max-tau`: Maximum distance between individuals to consider.
-- `--verbosity`: Verbosity level.
+- -h, --help: Print this help message.
diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md
index 78984cb..67b6c7a 100644
--- a/docs/src/tmle_estimation.md
+++ b/docs/src/tmle_estimation.md
@@ -4,27 +4,25 @@ This is the main script in this package, it provides a command line interface fo
 
 ## Usage
 
-Provided you have the package and all dependencies installed or in the provided docker container, you can run TMLE via the following command:
-
-```bash
-julia scripts/tmle.jl DATAFILE PARAMFILE OUTFILE
-        --estimator-file=docs/estimators/glmnet.jl
-        --hdf5-out=output.hdf5
-        --pval-threshold=0.05
-        --chunksize=100
-        --verbosity=1
-```
+Runs TMLE estimation.
 
-where:
+Args:
+
+- dataset: A dataset either in .csv or .arrow format
+- estimands: A file containing a serialized Configuration object.
+- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning").
+
+Options:
+
+- -v, --verbosity: Verbosity level.
+- -o, --outputs: Ouputs to be generated.
+- --chunksize <100::Int>: Results are written in batches of size chunksize.
+- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment).
+- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
+
+Flags:
 
-- `DATAFILE`: A CSV (.csv) or Arrow (.arrow) file containing the tabular data. The format will be infered from the extension.
-- `PARAMFILE`: A serialized [YAML](https://targene.github.io/TMLE.jl/stable/user_guide/#Reading-Parameters-from-YAML-files) or [bin](https://docs.julialang.org/en/v1/stdlib/Serialization/) file containing the estimands to be estimated. The YAML file can be written by hand or programmatically using the [TMLE.parameters_to_yaml](https://targene.github.io/TMLE.jl/stable/api/#TMLE.parameters_to_yaml-Tuple{Any,%20Any}) function.
-- `OUTFILE`: The output .csv file (see [Output file](@ref))
-- `--estimator-file`: A Julia file describing the TMLE specifications (see [Estimator File](@ref)).
-- `--hdf5-out`: if provided, a path to a file to save the influence curves.
-- `--pval-threshold`: Only "significant" (< this threshold) estimates will actually have their influence curves stored in the previous file.
-- `--chunksize`: To manage memory, the results are appended to the output files in batches the size of which can be controlled via this option.
-- `--verbosity`: The verbosity level.
+- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands).
 
 ## Output file
 

From 2c8959323abcd289b31b188ad45d1194a6d5b7c7 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Mon, 11 Dec 2023 13:58:04 +0000
Subject: [PATCH 38/71] make some args optional

---
 Project.toml                                  |  2 +-
 docs/make.jl                                  |  2 +-
 estimators-configs/G-superlearning-Q-glm.jl   | 14 +++---
 .../G-superlearning-Q-glmnet.jl               | 14 +++---
 .../glm-with-interactions-for-Q.jl            | 14 +++---
 estimators-configs/glm.jl                     | 14 +++---
 .../glmnet-with-interactions-for-Q.jl         | 14 +++---
 estimators-configs/glmnet.jl                  | 16 +++----
 .../superlearning-with-interactions-for-Q.jl  | 14 +++---
 estimators-configs/superlearning.jl           | 16 +++----
 estimators-configs/tuned-xgboost.jl           | 14 +++---
 src/runner.jl                                 | 20 +++++---
 src/utils.jl                                  | 46 +++++++++++++------
 test/runner.jl                                | 28 ++++++++---
 test/sieve_variance.jl                        |  6 ++-
 test/summary.jl                               | 13 +++++-
 test/utils.jl                                 | 46 ++++++++++++++-----
 17 files changed, 176 insertions(+), 117 deletions(-)

diff --git a/Project.toml b/Project.toml
index 4518080..48583ae 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.6.0"
 Optim = "1.7"
-TMLE = "0.12"
+TMLE = "0.12.1"
 Tables = "1.10.1"
 YAML = "0.4.9"
 julia = "1.7, 1"
diff --git a/docs/make.jl b/docs/make.jl
index 0280c0d..e6aee0b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,7 +15,7 @@ makedocs(
     modules = [TargetedEstimation],
     pages=[
         "Home" => "index.md",
-        "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "merge.md"],
+        "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"],
         "MLJ Extensions" => ["models.md", "resampling.md"],
     ]
 )
diff --git a/estimators-configs/G-superlearning-Q-glm.jl b/estimators-configs/G-superlearning-Q-glm.jl
index 83d44d0..02da072 100644
--- a/estimators-configs/G-superlearning-Q-glm.jl
+++ b/estimators-configs/G-superlearning-Q-glm.jl
@@ -1,13 +1,7 @@
 xgboost_classifier = XGBoostClassifier(tree_method="hist")
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = LinearRegressor(),
   # For the estimation of E[Y|W, T]: binary target
   Q_binary = LogisticClassifier(lambda=0.),
@@ -30,4 +24,8 @@ tmle_spec = (
         cache=false
     )
   )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/G-superlearning-Q-glmnet.jl b/estimators-configs/G-superlearning-Q-glmnet.jl
index 47094ef..bb8c495 100644
--- a/estimators-configs/G-superlearning-Q-glmnet.jl
+++ b/estimators-configs/G-superlearning-Q-glmnet.jl
@@ -1,13 +1,7 @@
 xgboost_classifier = XGBoostClassifier(tree_method="hist")
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = GLMNetRegressor(resampling=CV(nfolds=3)),
   # For the estimation of E[Y|W, T]: binary target
   Q_binary = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)),
@@ -30,4 +24,8 @@ tmle_spec = (
         cache=false
     )
   )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/glm-with-interactions-for-Q.jl b/estimators-configs/glm-with-interactions-for-Q.jl
index edaeca7..8959c76 100644
--- a/estimators-configs/glm-with-interactions-for-Q.jl
+++ b/estimators-configs/glm-with-interactions-for-Q.jl
@@ -1,11 +1,5 @@
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = Pipeline(
     RestrictedInteractionTransformer(order=2, primary_variables_patterns=[r"^rs[0-9]+"]),
     LinearRegressor(),
@@ -19,4 +13,8 @@ tmle_spec = (
   ),
   # For the estimation of p(T| W)
   G           = LogisticClassifier(lambda=0.)
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/glm.jl b/estimators-configs/glm.jl
index 6aea32a..9a8b166 100644
--- a/estimators-configs/glm.jl
+++ b/estimators-configs/glm.jl
@@ -1,14 +1,12 @@
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = LinearRegressor(),
   # For the estimation of E[Y|W, T]: binary target
   Q_binary = LogisticClassifier(lambda=0.),
   # For the estimation of p(T| W)
   G = LogisticClassifier(lambda=0.)
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/glmnet-with-interactions-for-Q.jl b/estimators-configs/glmnet-with-interactions-for-Q.jl
index 003cd7e..b255974 100644
--- a/estimators-configs/glmnet-with-interactions-for-Q.jl
+++ b/estimators-configs/glmnet-with-interactions-for-Q.jl
@@ -1,11 +1,5 @@
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = Pipeline(
     RestrictedInteractionTransformer(order=2, primary_variables_patterns=[r"^rs[0-9]+"]),
     GLMNetRegressor(resampling=CV(nfolds=3)),
@@ -19,4 +13,8 @@ tmle_spec = (
   ),
   # For the estimation of p(T| W)
   G           = GLMNetClassifier(resampling=StratifiedCV(nfolds=3))
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/glmnet.jl b/estimators-configs/glmnet.jl
index 27a89f3..5fd2584 100644
--- a/estimators-configs/glmnet.jl
+++ b/estimators-configs/glmnet.jl
@@ -1,14 +1,12 @@
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = GLMNetRegressor(resampling=CV(nfolds=3)),
-  # For the estimation of E[Y|W, T]: binary target
+  # For the estimation of E[Y|W, T]: binary outcome
   Q_binary = GLMNetClassifier(resampling=StratifiedCV(nfolds=3)),
   # For the estimation of p(T| W)
   G = GLMNetClassifier(resampling=StratifiedCV(nfolds=3))
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/superlearning-with-interactions-for-Q.jl b/estimators-configs/superlearning-with-interactions-for-Q.jl
index f3d75ce..df2372a 100644
--- a/estimators-configs/superlearning-with-interactions-for-Q.jl
+++ b/estimators-configs/superlearning-with-interactions-for-Q.jl
@@ -1,14 +1,8 @@
 xgboost_regressor = XGBoostRegressor(tree_method="hist")
 xgboost_classifier = XGBoostClassifier(tree_method="hist")
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = Stack(
     metalearner        = LinearRegressor(fit_intercept=false),
     resampling         = CV(nfolds=3),
@@ -81,4 +75,8 @@ tmle_spec = (
         cache=false
     )
   )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/superlearning.jl b/estimators-configs/superlearning.jl
index 4bb72f6..5f2ce4c 100644
--- a/estimators-configs/superlearning.jl
+++ b/estimators-configs/superlearning.jl
@@ -1,14 +1,8 @@
 xgboost_regressor = XGBoostRegressor(tree_method="hist")
 xgboost_classifier = XGBoostClassifier(tree_method="hist")
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = Stack(
     metalearner        = LinearRegressor(fit_intercept=false),
     resampling         = CV(nfolds=3),
@@ -27,7 +21,7 @@ tmle_spec = (
         cache=false
         )
     ),
-  # For the estimation of E[Y|W, T]: binary target
+  # For the estimation of E[Y|W, T]: binary outcome
   Q_binary = Stack(
     metalearner        = LogisticClassifier(lambda=0., fit_intercept=false),
     resampling         = StratifiedCV(nfolds=3),
@@ -65,4 +59,8 @@ tmle_spec = (
         cache=false
     )
   )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/estimators-configs/tuned-xgboost.jl b/estimators-configs/tuned-xgboost.jl
index 6432206..d7318c5 100644
--- a/estimators-configs/tuned-xgboost.jl
+++ b/estimators-configs/tuned-xgboost.jl
@@ -1,14 +1,8 @@
 xgboost_regressor = XGBoostRegressor(tree_method="hist")
 xgboost_classifier = XGBoostClassifier(tree_method="hist")
 
-tmle_spec = (
-  # Controls caching of data by MLJ machines: turning to `true` may result in faster execution but higher memory usage
-  cache        = false,
-  # Controls whether the fluctuation is weighted or not
-  weighted_fluctuation = false,
-  # Propensity score threshold
-  threshold    = 1e-8,
-  # For the estimation of E[Y|W, T]: continuous target
+default_models = TMLE.default_models(
+  # For the estimation of E[Y|W, T]: continuous outcome
   Q_continuous = TunedModel(
     model = xgboost_regressor,
     resampling = CV(nfolds=3),
@@ -44,4 +38,8 @@ tmle_spec = (
     measure = log_loss,
     cache=false
 )
+)
+
+ESTIMATORS = (
+  TMLE = TMLEE(models=default_models, weighted=true, ps_lowerbound=1e-8),
 )
\ No newline at end of file
diff --git a/src/runner.jl b/src/runner.jl
index f25e5e8..f7a8166 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -7,7 +7,9 @@ mutable struct Runner
     outputs::Outputs
     verbosity::Int
     failed_nuisance::Set
-    function Runner(dataset, estimands, estimators; 
+    function Runner(dataset; 
+        estimands="generateATEs", 
+        estimators="glmnet",
         verbosity=0, 
         outputs=Outputs(), 
         chunksize=100,
@@ -16,11 +18,11 @@ mutable struct Runner
         sort_estimands=false
         )    
         # Retrieve TMLE specifications
-        estimators = TargetedEstimation.load_tmle_spec(estimators)
+        estimators = TargetedEstimation.load_tmle_spec(file=estimators)
         # Load dataset
         dataset = TargetedEstimation.instantiate_dataset(dataset)
         # Read parameter files
-        estimands = TargetedEstimation.proofread_estimands(estimands, dataset)
+        estimands = TargetedEstimation.build_estimands_list(estimands, dataset)
         if sort_estimands
             estimands = groups_ordering(estimands; 
                 brute_force=true, 
@@ -115,7 +117,9 @@ end
 
 
 """
-    tmle(dataset, estimands, estimators; 
+    tmle(dataset; 
+        estimands="generateATEs", 
+        estimators="glmnet"; 
         verbosity=0, 
         outputs=Outputs(),
         chunksize=100,
@@ -144,7 +148,9 @@ TMLE CLI.
 
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
-@cast function tmle(dataset::String, estimands::String, estimators::String; 
+@cast function tmle(dataset::String;
+    estimands::String="default_ATE", 
+    estimators::String="glmnet",
     verbosity::Int=0, 
     outputs::Outputs=Outputs(),
     chunksize::Int=100,
@@ -152,7 +158,9 @@ TMLE CLI.
     cache_strategy::String="release-unusable",
     sort_estimands::Bool=false
     )
-    runner = Runner(dataset, estimands, estimators; 
+    runner = Runner(dataset;
+        estimands=estimands, 
+        estimators=estimators, 
         verbosity=verbosity, 
         outputs=outputs, 
         chunksize=chunksize,
diff --git a/src/utils.jl b/src/utils.jl
index a5c4c24..9ab403c 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -74,6 +74,34 @@ function proofread_estimands(filename, dataset)
     return estimands
 end
 
+"""
+This explicitely requires that the following columns belong to the dataset:
+
+- `T`: for the treatment variable
+- `Y`: for the outcome variable
+- `^W`: for the confounding variables
+
+All ATE parameters are generated.
+"""
+function TMLE.generateATEs(dataset)
+    colnames = names(dataset)
+    "T" ∈ colnames || throw(ArgumentError("No column 'T' found in the dataset for the treatment variable."))
+    "Y" ∈ colnames || throw(ArgumentError("No column 'Y' found in the dataset for the outcome variable."))
+    confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name))
+    length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset."))
+    
+    return generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)
+end
+
+function build_estimands_list(estimands_pattern, dataset)
+    estimands = if estimands_pattern == "generateATEs"
+        generateATEs(dataset)
+    else
+        proofread_estimands(estimands_pattern, dataset)
+    end
+    return estimands
+end
+
 #####################################################################
 #####                 ADDITIONAL METHODS                         ####
 #####################################################################
@@ -81,7 +109,6 @@ end
 TMLE.emptyIC(nt::NamedTuple{names}, pval_threshold) where names =
     NamedTuple{names}([TMLE.emptyIC(result, pval_threshold) for result in nt])
 
-
 """
     instantiate_dataset(path::String)
 
@@ -146,18 +173,11 @@ variables(Ψ::TMLE.Estimand) = Set([
     Iterators.flatten(values(Ψ.treatment_confounders))...
     ])
 
-load_tmle_spec(file::Nothing) = (
-    TMLE = TMLEE(
-        models = TMLE.default_models(
-            Q_binary = LogisticClassifier(lambda=0.),
-            Q_continuous = LinearRegressor(),
-            G = LogisticClassifier(lambda=0.)
-        ),
-        weighted = true, 
-        ),
-    )
-
-function load_tmle_spec(file)
+function load_tmle_spec(;file="glmnet")
+    file = endswith(file, ".jl") ? file : joinpath(
+        pkgdir(TargetedEstimation),
+        "estimators-configs",
+        string(file, ".jl"))
     include(abspath(file))
     return ESTIMATORS
 end
diff --git a/test/runner.jl b/test/runner.jl
index 3acccfc..f52ead4 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -26,9 +26,9 @@ include(joinpath(TESTDIR, "testutils.jl"))
         jls=TargetedEstimation.JLSOutput(filename="output.jls", pval_threshold=1e-5),
     )
     runner = Runner(
-        "data.csv", 
-        estimands_filename, 
-        joinpath(CONFIGDIR, "tmle_ose_config.jl"); 
+        "data.csv";
+        estimands=estimands_filename, 
+        estimators=joinpath(CONFIGDIR, "tmle_ose_config.jl"),
         outputs=outputs, 
         cache_strategy="release-unusable",
     )
@@ -121,7 +121,9 @@ end
         datafile = string("data.", format)
         build_dataset(;n=1000, format=format)
         for chunksize in (4, 10)
-            tmle(datafile, estimands_filename, estimatorfile; 
+            tmle(datafile; 
+                estimands=estimands_filename, 
+                estimators=estimatorfile,
                 outputs=outputs,
                 chunksize=chunksize,
             )
@@ -161,7 +163,10 @@ end
     TMLE.write_json(estimandsfile, configuration)
     estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
     datafile = "data.csv"
-    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs)
+    tmle(datafile; 
+        estimands=estimandsfile, 
+        estimators=estimatorfile,
+        outputs=outputs)
     
     # Essential results
     results_from_json = TMLE.read_json(outputs.json.filename)
@@ -190,7 +195,11 @@ end
     estimatorfile = joinpath(CONFIGDIR, "problematic_tmle_ose_config.jl")
     datafile = "data.csv"
 
-    runner = Runner(datafile, estimandsfile, estimatorfile; outputs=outputs);
+    runner = Runner(datafile; 
+        estimands=estimandsfile, 
+        estimators=estimatorfile,
+        outputs=outputs
+    );
     runner()
 
     # Test failed nuisance estimates (T2 model)
@@ -242,7 +251,12 @@ end
     estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
     datafile = "data.csv"
 
-    tmle(datafile, estimandsfile, estimatorfile; outputs=outputs, chunksize=2)
+    tmle(datafile;
+        estimands=estimandsfile, 
+        estimators=estimatorfile,
+        outputs=outputs, 
+        chunksize=2
+    )
     
     # JLS Output
     results = []
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 7317854..62fb7a9 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -56,7 +56,11 @@ function build_tmle_output_file(sample_ids, estimandfile, outprefix;
     outputs = TargetedEstimation.Outputs(
         hdf5=TargetedEstimation.HDF5Output(filename=string(outprefix, ".hdf5"), pval_threshold=pval, sample_ids=true),
     )
-    tmle("data.csv", estimandfile, estimatorfile, outputs=outputs)
+    tmle("data.csv"; 
+        estimands=estimandfile, 
+        estimators=estimatorfile, 
+        outputs=outputs
+    )
 end
 
 function basic_variance_implementation(matrix_distance, influence_curve, n_obs)
diff --git a/test/summary.jl b/test/summary.jl
index 6903e88..1b8f5d8 100644
--- a/test/summary.jl
+++ b/test/summary.jl
@@ -23,14 +23,23 @@ include(joinpath(TESTDIR, "testutils.jl"))
     config_1 = statistical_estimands_only_config()
     configfile_1 = joinpath(tmpdir, "configuration_1.json")
     TMLE.write_json(configfile_1, config_1)
-    tmle(datafile, configfile_1, estimatorfile; outputs=tmle_output_1, chunksize=3)
+    tmle(datafile; 
+        estimands=configfile_1, 
+        estimators=estimatorfile,
+        outputs=tmle_output_1, 
+        chunksize=3
+    )
     
     # Second Run
     tmle_output_2 = TargetedEstimation.Outputs(hdf5=TargetedEstimation.HDF5Output(filename="tmle_output_2.hdf5"))
     config_2 = causal_and_composed_estimands_config()
     configfile_2 = joinpath(tmpdir, "configuration_2.json")
     TMLE.write_json(configfile_2, config_2)
-    tmle(datafile, configfile_2, estimatorfile; outputs=tmle_output_2)
+    tmle(datafile; 
+        estimands=configfile_2, 
+        estimators=estimatorfile, 
+        outputs=tmle_output_2
+    )
 
     # Make summary files
     outputs = TargetedEstimation.Outputs(
diff --git a/test/utils.jl b/test/utils.jl
index 7525168..9bedbb0 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -14,27 +14,33 @@ check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T
 check_type(treatment_values::NamedTuple, ::Type{T}) where T = 
     @test treatment_values.case isa T && treatment_values.control isa T 
 
-TESTDIR = joinpath(pkgdir(TargetedEstimation), "test")
+PKGDIR = pkgdir(TargetedEstimation)
+TESTDIR = joinpath(PKGDIR, "test")
 
 include(joinpath(TESTDIR, "testutils.jl"))
 
-@testset "Test load_tmle_spec: with configuration file" begin
-    estimators = TargetedEstimation.load_tmle_spec(joinpath(TESTDIR, "config", "tmle_ose_config.jl"))
+@testset "Test load_tmle_spec" begin
+    # Default
+    noarg_estimators = TargetedEstimation.load_tmle_spec()
+    default_models = noarg_estimators.TMLE.models
+    @test noarg_estimators.TMLE isa TMLEE
+    @test default_models.Q_binary_default.glm_net_classifier isa GLMNetClassifier
+    @test default_models.Q_continuous_default.glm_net_regressor isa GLMNetRegressor
+    @test default_models.G_default isa GLMNetClassifier
+    # From template name
+    for file in readdir(joinpath(PKGDIR, "estimators-configs"))
+        configname = replace(file, ".jl" => "")
+        estimators = TargetedEstimation.load_tmle_spec(;file=configname)
+        @test estimators.TMLE isa TMLEE
+    end
+    # From explicit file
+    estimators = TargetedEstimation.load_tmle_spec(file=joinpath(TESTDIR, "config", "tmle_ose_config.jl"))
     @test estimators.TMLE isa TMLE.TMLEE
     @test estimators.OSE isa TMLE.OSE
     @test estimators.TMLE.weighted === true
     @test estimators.TMLE.models.G_default === estimators.OSE.models.G_default
     @test estimators.TMLE.models.G_default isa MLJBase.ProbabilisticStack
 end
-
-@testset "Test load_tmle_spec: no configuration file" begin
-    estimators = TargetedEstimation.load_tmle_spec(nothing)
-    @test !haskey(estimators, :OSE)
-    @test haskey(estimators, :TMLE)
-    @test estimators.TMLE.weighted === true
-    @test estimators.TMLE.models.G_default isa LogisticClassifier
-end
-
 @testset "Test convert_treatment_values" begin
     treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int)
     newT = TargetedEstimation.convert_treatment_values((T₁=1,), treatment_types)
@@ -68,6 +74,22 @@ end
     # Clean estimands file
     rm(filename)
 end
+
+@testset "Test generateATEs" begin
+    dataset = DataFrame(C=[1, 2, 3, 4],)
+    @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset)
+    dataset.T = [0, 1, missing, 2]
+    @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset)
+    dataset.Y = [0, 1, 2, 2]
+    dataset.W1 = [1, 1, 1, 1]
+    dataset.W_2 = [1, 1, 1, 1]
+    ATEs = TargetedEstimation.build_estimands_list("generateATEs", dataset)
+    @test ATEs == [
+        TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()),
+        TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()),
+        TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ())
+    ]
+end
 @testset "Test coerce_types!" begin
     Ψ = IATE(
         outcome=:Ycont,

From 46c914f110ae5a0f349055121235d45585bfe7f6 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Mon, 11 Dec 2023 14:01:55 +0000
Subject: [PATCH 39/71] update docstrings

---
 src/TargetedEstimation.jl | 2 --
 src/runner.jl             | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index aa01fda..74856d8 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -43,8 +43,6 @@ include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
 
-
-"""TL CLI."""
 @main
 
 export Runner, tmle, sieve_variance_plateau, make_summary
diff --git a/src/runner.jl b/src/runner.jl
index f7a8166..b3d0ec9 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -133,11 +133,11 @@ TMLE CLI.
 # Args
 
 - `dataset`: Data file (either .csv or .arrow)
-- `estimands`: Estimands file (either .json or .yaml)
-- `estimators`: A julia file containing the estimators to use.
 
 # Options
 
+- `--estimands`: A string ("generateATEs") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)
+- `--estimators`: A julia file containing the estimators to use.
 - `-v, --verbosity`: Verbosity level.
 - `-o, --outputs`: Ouputs to be generated.
 - `--chunksize`: Results are written in batches of size chunksize.

From 6c5e3404785aba7cf23677995f7585c484a305c8 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 12 Dec 2023 06:28:34 +0000
Subject: [PATCH 40/71] add sample dataset

---
 data/sample_dataset.csv | 101 ++++++++++++++++++++++++++++++++++++++++
 src/runner.jl           |   2 +-
 2 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 data/sample_dataset.csv

diff --git a/data/sample_dataset.csv b/data/sample_dataset.csv
new file mode 100644
index 0000000..6dbdd98
--- /dev/null
+++ b/data/sample_dataset.csv
@@ -0,0 +1,101 @@
+T,W_1,W_2,Y
+false,0.18102554215580358,0.5450806703063027,2.9940372099784027
+false,0.3674899461902501,0.6384329204193084,3.6556468914971756
+false,0.6690584311411027,0.43792082835867485,3.6566892558262984
+false,0.04273056581197765,0.7757285573916282,3.4160794440612667
+false,0.4379909608992427,0.3047310991183463,2.789125420976082
+true,0.4832901297478609,0.7451730217771686,1.5859557539740305
+false,0.7639737291557767,0.8904601966212045,5.188296343573969
+true,0.9244830959536434,0.13974660123511873,0.4982270316308855
+false,0.5150451217580685,0.6374757520926937,3.935855162247756
+false,0.49504171746863457,0.8565155620522915,4.561765097844799
+false,0.7013399928855184,0.8136974906450547,4.829305702798148
+false,0.6935693200259092,0.7770630474285287,4.708377370910661
+false,0.8762727975157072,0.6985801341473621,4.853277788606795
+true,0.9442193089864095,0.152194194900529,4.037983372295075
+false,0.7392125921999604,0.15113069121626244,2.928937148645259
+false,0.5595446681103335,0.32420235015472953,3.0840500870314176
+true,0.5068756305502102,0.7933086736255439,4.003336818622111
+true,0.6460238520948196,0.09241238685905295,1.8598644023282838
+false,0.26637285700482627,0.32928469720586406,2.5254756391406956
+true,0.25314126112406954,0.005639780239579784,2.0525644282124227
+false,0.3516179276178317,0.562947511861783,3.3891190015292962
+true,0.5273890936851706,0.5791653992166597,2.236487556841665
+true,0.7210925534616786,0.0815702398426259,3.0907289031725105
+false,0.18851150251759496,0.4507062485489093,2.7103706395089318
+false,0.637996251650204,0.2796408144910678,3.1179620782873023
+false,0.42784339776115243,0.05608349255807643,2.021998670506748
+false,0.36344680674044993,0.8176529855849277,4.187058502288485
+false,0.5776470899697952,0.5038402847145775,3.664411058533266
+false,0.4552051132605446,0.80168727438732,4.324422128772599
+true,0.16582015869838074,0.6517102569667845,3.4493844928436257
+true,0.3485177133589714,0.6950429952352082,3.6465473336626584
+false,0.7370786697973803,0.38967172496607816,3.634455876824883
+false,0.8097943410644535,0.30785356657086305,3.5631085002530503
+true,0.17116662327378251,0.38189260775289746,1.4561452308524758
+true,0.2239059463776638,0.37754440958420843,2.9358196480011927
+false,0.8482049921559374,0.7931999164105743,5.073800536916948
+false,0.6129496106778634,0.5582656110841486,3.9006286335176275
+false,0.9525421237374148,0.7075371298070849,5.027958774493786
+false,0.329471340006501,0.8244276908646733,4.133141608706676
+true,0.9183731870724761,0.5155924027190455,2.1766450039509775
+true,0.5406318800132754,0.7964424508760488,4.684487622068989
+false,0.820474440393214,0.3278374872665033,3.6201478925383053
+false,0.22304578643880224,0.15454141257308707,1.9141989300490125
+true,0.5987759444612732,0.8176931599179378,4.883829492724324
+true,0.5391280234619427,0.0800996880924989,-0.15332305595018275
+true,0.6195348270893413,0.04758713076380294,1.5044413418755769
+false,0.4197760589260566,0.6919387484370496,3.913552132910235
+false,0.45325909384306007,0.8196586617380355,4.361479188698554
+false,0.458012070794656,0.16787410906435518,2.3890361585879396
+false,0.8360692316060747,0.12572716005598905,3.0494749469842053
+false,0.68704221750134,0.9336977783694771,5.162792856806991
+false,0.3539590866764071,0.4938068514848526,3.201878898498728
+false,0.15146985093210463,0.9318499781184257,4.0825781392628055
+true,0.7036713552821277,0.3110022402796051,4.247461621524256
+false,0.7858058549340399,0.7913869099880062,4.9487597218140555
+false,0.5516353577049822,0.7651183843708445,4.394357280603648
+false,0.33689370624999193,0.8200595760169511,4.122787054444181
+false,0.7103550345192344,0.5218538906399544,3.9851980589961844
+false,0.3437537135972244,0.7082383555963896,3.8223674411348094
+true,0.40543796744015514,0.07340489667656125,-0.44867162036508734
+false,0.418787685820859,0.9537197956213714,4.671368501348916
+false,0.3461876693258523,0.17116028512837467,2.196479242014222
+false,0.256693308150987,0.7535261803886308,3.7658816939059676
+false,0.15717578481324845,0.9086295629550201,4.041940493212971
+false,0.06397027012871725,0.748570362698747,3.3694111418545805
+true,0.5960710257852946,0.6663504027833114,3.367016019904959
+true,0.3313524247810329,0.6591751071404244,1.591879202485502
+true,0.09653466861970061,0.6363397790684187,2.9197943852934563
+false,0.2444274100956212,0.2098958139673206,2.1388072276125003
+true,0.128071302925437,0.2212452403166849,0.2377448923043096
+true,0.42570257768498054,0.19674477205988938,1.387006507647053
+true,0.9265378351770237,0.2414821377114318,3.7979295737435383
+false,0.49186280724413045,0.12746729440853555,2.384783686379066
+false,0.13454928219280093,0.4781845302027954,2.710629287272798
+true,0.7767793160877585,0.524392439209832,2.5943698653629785
+false,0.5975255336244989,0.390410633670742,3.3711308436058625
+true,0.36037123968128437,0.08296139972284933,-0.759956439158037
+false,0.0346420282305675,0.0617778348993705,1.2471920828108296
+false,0.002639461613289207,0.7009785602029246,3.0993420302705896
+false,0.6043418433725678,0.6486688636856162,4.156769560508067
+false,0.34013328112005636,0.1345850741469954,2.08159579861953
+false,0.08884383382645145,0.3088038486943412,2.1056178972765487
+false,0.27042373335313585,0.006888583580566321,1.556777817484051
+false,0.2906905645217257,0.541835309258762,3.2236471704885887
+false,0.1159966466957052,0.5135297293779133,2.783128796238891
+true,0.8470732321945746,0.42816797170836707,2.0301351712154556
+true,0.8139519778944555,0.24560986612792113,1.7245624951948406
+false,0.060180250784984235,0.49673727324525174,2.622413005826214
+true,0.42137186429269047,0.16330846948665134,0.8216594084825344
+true,0.6798160152993227,0.5456659244000286,2.8674322144135957
+false,0.7229464588051613,0.43577650941532386,3.7550371808310166
+false,0.7375979790215319,0.9110656955098189,5.216039115140225
+false,0.5523948722167735,0.9189451331877909,4.858501050384263
+false,0.970875486702566,0.06627679288609234,3.143553167360774
+true,0.7978957981860126,0.16648221966941223,0.3279875381813927
+false,0.6832983780571866,0.6493963093415174,4.318015004991571
+false,0.523122205661108,0.05689713675107577,2.230304870061633
+false,0.8553411083874956,0.7608458973060162,4.992917779986071
+false,0.2884613639525233,0.40667243126317154,2.79960851563243
+true,0.4312330027658198,0.24848292057152732,2.172648627086597
diff --git a/src/runner.jl b/src/runner.jl
index b3d0ec9..3b37e3d 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -149,7 +149,7 @@ TMLE CLI.
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
 @cast function tmle(dataset::String;
-    estimands::String="default_ATE", 
+    estimands::String="generateATEs", 
     estimators::String="glmnet",
     verbosity::Int=0, 
     outputs::Outputs=Outputs(),

From 4f1c53788041df85b321e72de2ecedbe8c9836c1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 12 Dec 2023 07:00:44 +0000
Subject: [PATCH 41/71] do not build sysimage for now

---
 Comonicon.toml    | 20 ++++++++++----------
 docker/Dockerfile | 15 +++++++++------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/Comonicon.toml b/Comonicon.toml
index 48c2a41..4acd652 100644
--- a/Comonicon.toml
+++ b/Comonicon.toml
@@ -5,16 +5,16 @@ completion = true
 quiet = false
 optimize = 2
 
-[sysimg]
-incremental=true
-filter_stdlibs=false
+# [sysimg]
+# incremental=true
+# filter_stdlibs=false
 
-[sysimg.precompile]
-execution_file = ["deps/execute.jl"]
+# [sysimg.precompile]
+# execution_file = ["deps/execute.jl"]
 
-[application]
-incremental=true
-filter_stdlibs=false
+# [application]
+# incremental=true
+# filter_stdlibs=false
 
-[application.precompile]
-execution_file = ["deps/execute.jl"]
\ No newline at end of file
+# [application.precompile]
+# execution_file = ["deps/execute.jl"]
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5eac723..51da080 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -3,24 +3,27 @@ FROM julia:1.9.4-bullseye
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
-    apt-get -y install gcc mono-mcs && \
+    apt-get -y install gcc mono-mcs vim && \
     rm -rf /var/lib/apt/lists/*
 
 ENV TZ=Europe/Amsterdam
 
 ENV JULIA_DEPOT_PATH=/opt
 
-# Import project, build and precompile
+# Import the project
 
 COPY . /TargetedEstimation.jl 
 
 WORKDIR /TargetedEstimation.jl
 
-# Precompile project
+# Precompile the project
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
-# Precompile Sysimage project
-RUN julia --project -t auto --startup-file=no deps/build_app.jl app
+# Build CLI
+RUN julia --project --startup-file=no deps/build_app.jl
 
-ENV PATH="${PATH}:/TargetedEstimation.jl/build/tmle/bin"
+# Add CLI to PATH
+ENV PATH="${PATH}:/opt/bin/"
 
+# Test the CLI runs
+RUN tmle tmle data/sample_dataset.csv
\ No newline at end of file

From 4b1fe4be92d16533bf1438bab2c981c165109a7b Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 12 Dec 2023 18:15:57 +0100
Subject: [PATCH 42/71] make sure outcome is nor OrderedFactor for now

---
 src/utils.jl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index 9ab403c..ce6b854 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -154,14 +154,19 @@ function coerce_types!(dataset, Ψ::ComposedEstimand)
 end
 
 function coerce_types!(dataset, Ψ)
+    # Make Treatments categorical but preserve order
     categorical_variables = Set(keys(Ψ.treatment_values))
-    continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders)))
-    union!(continuous_variables, Ψ.outcome_extra_covariates) 
-    TMLE.is_binary(dataset, Ψ.outcome) ? 
-        push!(categorical_variables, Ψ.outcome) : 
-        push!(continuous_variables, Ψ.outcome)
     make_categorical!(dataset, categorical_variables, infer_ordered=true)
+    # Make Confounders and extra covariates continuous
+    continuous_variables = Set(Iterators.flatten(values(Ψ.treatment_confounders)))
+    union!(continuous_variables, Ψ.outcome_extra_covariates)
     make_float!(dataset, continuous_variables)
+    # Make outcome categorical if binary but do not infer order 
+    if TMLE.is_binary(dataset, Ψ.outcome)
+        make_categorical!(dataset, Ψ.outcome, infer_ordered=false)
+    else
+        make_float!(dataset, Ψ.outcome)
+    end 
 end
 
 variables(Ψ::TMLE.ComposedEstimand) = union((variables(arg) for arg in Ψ.args)...)

From 18075215662e50ba612c52f5d4609c5d9e78b9d1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 13 Dec 2023 09:13:50 +0100
Subject: [PATCH 43/71] update docs

---
 docs/make.jl                |   7 ++-
 docs/src/cli.md             |   9 ++-
 docs/src/index.md           |   2 +-
 docs/src/make_summary.md    |  16 +----
 docs/src/models.md          |   2 +-
 docs/src/sieve_variance.md  |  23 ++------
 docs/src/tmle_estimation.md | 113 ++++++------------------------------
 experiments/runtime.jl      |   8 +--
 8 files changed, 41 insertions(+), 139 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index e6aee0b..e278c5d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,9 +15,12 @@ makedocs(
     modules = [TargetedEstimation],
     pages=[
         "Home" => "index.md",
-        "Command Line Interfaces" => ["environment.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"],
+        "Command Line Interface" => ["cli.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"],
         "MLJ Extensions" => ["models.md", "resampling.md"],
-    ]
+    ],
+    pagesonly=true,
+    clean = true,
+    checkdocs=:exports
 )
 
 @info "Deploying docs..."
diff --git a/docs/src/cli.md b/docs/src/cli.md
index 820f3c2..17dc2d6 100644
--- a/docs/src/cli.md
+++ b/docs/src/cli.md
@@ -24,8 +24,7 @@ Bellow is a description of the functionalities offered by the CLI.
 
 ## CLI Description
 
-The CLI contains 3 sub-commands:
-
-- `tmle`: To Run TMLE on a dataset (see [tmle command](@ref)).
-- `sieve-variance-plateau`: To correct the variance of an estimator for non i.i.d data via [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (see [sieve-variance-plateau command](@ref)).
-- `make-summary`: Combines multiple outputs from a `tmle` run into one output file (see [make-summary command](@ref))
+```@contents
+Pages = ["tmle_estimation.md", "sieve_variance.md", "make_summary.md"]
+Depth = 5
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 2f78304..84cd4cb 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package.
 
-[The Command Line Interface (CLI)](@ref)
+- Jump to [The Command Line Interface (CLI)](@ref)
 
 We also provide extensions to the [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) universe that are particularly useful in statistical genetics (but not restricted to it):
 
diff --git a/docs/src/make_summary.md b/docs/src/make_summary.md
index 2463498..d93db23 100644
--- a/docs/src/make_summary.md
+++ b/docs/src/make_summary.md
@@ -6,16 +6,6 @@
 tmle make-summary --help
 ```
 
-Merges tmle outputs in a single file.
-
-Args:
-
-- prefix: Prefix to .hdf5 files to be used to create the summary file
-
-Options:
-
-- -o, --outputs <Outputs...>: Ouptuts configuration.
-
-Flags:
-
-- -h, --help: Print this help message.
+```@docs
+make_summary
+```
diff --git a/docs/src/models.md b/docs/src/models.md
index 5a15d88..a660e13 100644
--- a/docs/src/models.md
+++ b/docs/src/models.md
@@ -14,7 +14,7 @@ Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of
 
 Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TargetedEstimation.jl/issues).
 
-Also, because the [Estimator File](@ref) is a pure Julia file, it is possible to use it in order to install additional package that can be used to define additional models.
+Also, because the estimator file used by the TMLE CLI is a pure Julia file, it is possible to use it in order to install additional package that can be used to define additional models.
 
 Finally, we also provide some additional models described in [Additional models provided by TargetedEstimation.jl](@ref).
 
diff --git a/docs/src/sieve_variance.md b/docs/src/sieve_variance.md
index d7c61a9..780812e 100644
--- a/docs/src/sieve_variance.md
+++ b/docs/src/sieve_variance.md
@@ -2,27 +2,12 @@
 
 If the i.i.d. (independent and identically distributed) hypothesis is not satisfied, most of the traditional statistical inference theory falls apart. This is typically possible in population genetics where a study may contain related individuals. Here we leverage a non-parametric method called [Sieve Variance Plateau](https://biostats.bepress.com/ucbbiostat/paper322/) (SVP) estimation. The hypothesis is that the dependence between individuals is sufficiently small, so that our targeted estimator will still be asymptotically unbiased, but its variance will be under estimated. In brief, the SVP estimator computes a variance estimate for a range of thresholds 𝜏, by considering individuals to be independent if their distance exceeds 𝜏. As the distance threshold 𝜏 increases, fewer individuals are assumed to be independent. The maximum of this curve is the most conservative estimate of the variance of the target parameter estimator and constitutes our SVP corrected variance estimator.
 
-## Usage
+## [Usage](@id svp_command)
 
 ```bash
 tmle sieve-variance-plateau --help
 ```
 
-Runs Sieve Variance Plateau correction.
-
-Args:
-
-- input_prefix: Prefix to outputs from the tmle command.
-
-Options:
-
-- -o, --out <svp.hdf5> Output filename in hdf5 format.
-- -g, --grm-prefix <GRM>: Prefix to the aggregated GRM.
-- -v, --verbosity <0>: Verbosity level.
-- -n, --n-estimators <10>: Number of variance estimators to build for each estimate.
-- -m, --max-tau <0.8>: Maximum distance between any two individuals.
-- -e, --estimator-key <TMLE>: Estimator to use to proceed with sieve variance correction.
-
-Flags:
-
-- -h, --help: Print this help message.
+```@docs
+sieve_variance_plateau
+```
diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md
index 67b6c7a..e7b6ae3 100644
--- a/docs/src/tmle_estimation.md
+++ b/docs/src/tmle_estimation.md
@@ -4,94 +4,19 @@ This is the main script in this package, it provides a command line interface fo
 
 ## Usage
 
-Runs TMLE estimation.
-
-Args:
-
-- dataset: A dataset either in .csv or .arrow format
-- estimands: A file containing a serialized Configuration object.
-- estimators: A custom julia file containing the estimators to use. Several examples are provided [here](https://github.com/TARGENE/TargetedEstimation.jl/estimators-configs). Alternatively, to point to any of them, the name of the file can be supplied without the ".jl" extension. (e.g. "superlearning").
-
-Options:
-
-- -v, --verbosity: Verbosity level.
-- -o, --outputs: Ouputs to be generated.
-- --chunksize <100::Int>: Results are written in batches of size chunksize.
-- -r, --rng <123::Int>: Random seed (Only used for estimands ordering at the moment).
-- -c, --cache-strategy: Caching Strategy for the nuisance functions, any of ("release-unusable", "no-cache", "max-size").
-
-Flags:
-
-- -s, --sort-estimands: Sort estimands to minimize cache usage. A brute force approach will be used, resulting in exponentially long sorting time (Only appropriate for small number of estimands).
-
-## Output file
-
-The output file is a plain CSV file containing one line per estimand in the input `PARAMFILE`. The file contains the following columns:
-
-- `PARAMETER_TYPE`: The estimand type (e.g. "ATE", "IATE", ...).
-- `TREATMENTS`: A "_&_" separated string containing all treatment variables associated with the estimand.
-- `CASE`: A "_&_" separated string containing the treatment variables' case values in the same order as `TREATMENTS`.
-- `CONTROL`: A "_&_" separated string containing the treatment variables' control values in the same order as `TREATMENTS`.
-- `OUTCOME`: The outcome variable.
-- `CONFOUNDERS`: A "_&_" separated string containing the confounding variables.
-- `COVARIATES`: A "_&_" separated string containing the extra covariates used to estimate the outcome's mean.
-- `INITIAL_ESTIMATE`: The initial estimate before the targeting step.
-- `TMLE_ESTIMATE`: The targeted estimate.
-- `TMLE_STD`: The standard deviation associated with the targeted estimate.
-- `TMLE_PVALUE`: The p-value associated with the targeted estimate.
-- `TMLE_LWB`: The 95% confidence interval lower bound associated with the targeted estimate.
-- `TMLE_UPB`: The 95% confidence interval upper bound associated with the targeted estimate.
-- `ONESTEP_ESTIMATE`: The one step estimate.
-- `ONESTEP_STD`: The standard deviation associated with the one step estimate.
-- `ONESTEP_PVALUE`: The p-value associated with the one step estimate.
-- `ONESTEP_LWB`: The 95% confidence interval lower bound associated with the one step estimate.
-- `ONESTEP_UPB`: The 95% confidence interval upper bound associated with the one step estimate.
-- `LOG`: A log message if estimation failed.
-
-## Estimator File
-
-TMLE is an adaptive procedure that depends on the specification of learning algorithms for the estimation of the nuisance parameters (see [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) for a description of the assumed setting). In our case, there are two nuisance parameters for which we need to specify learning algorithms:
-
-- `E[Y|T, W, C]`: The mean outcome given the treatment, confounders and extra covariates. It is commonly denoted by `Q` in the Targeted Learning litterature.
-- `p(T|W)`: The propensity score. It is commonly denoted by `G` in the Targeted Learning litterature.
-
-### Description of the file
-
-In order to provide maximum flexibility as to the choice of learning algorithms, the estimator file is a plain [Julia](https://julialang.org/) file. This file is optional and omitting it defaults to using generalized linear models. If provided, it must define a [NamedTuple](https://docs.julialang.org/en/v1/base/base/#Core.NamedTuple) called `tmle_spec` containing any of the following fields as follows (default configuration):
-
-```julia
-
-tmle_spec = (
-  Q_continuous = LinearRegressor(),
-  Q_binary     = LogisticClassifier(lambda=0.),
-  G            = LogisticClassifier(lambda=0.),
-  threshold    = 1e-8,
-  cache        = false,
-  weighted_fluctuation = false
-)
+```bash
+tmle tmle --help
 ```
 
-where:
-
-- `Q_continuous`: is a MLJ model used for the estimation of `E[Y|T, W, C]` when the outcome `Y` is continuous.
-- `Q_binary`: is a MLJ model used for the estimation of `E[Y|T, W, C]` when the outcome `Y` is binary.
-- `G`: is a MLJ model used for the estimation of `p(T|W)`.
-- `threshold`: is the minimum value the propensity score `G` is allowed to take.
-- `cache`: controls caching of data by [MLJ machines](https://alan-turing-institute.github.io/MLJ.jl/dev/machines/). Setting it to `true` may result in faster runtime but higher memory usage.
-- `weighted_fluctuation`: controls whether the fluctuation for `Q` is a weighted glm or not. If some of the treatment values are rare it may lead to more robust estimation.
-
-Typically, `Q_continuous`, `Q_binary` and `G` will be adjusted and other fields can be left unspecified.
+```@docs
+tmle
+```
 
-### Ready to use estimator files
+## Note on TMLE Outputs
 
-We recognize not everyone will be familiar with [Julia](https://julialang.org/). We thus provide a set of ready to use estimator files that can be simplified or extended as needed:
+We can output results in three different formats: HDF5, JSON and JLS. By default no output is written, so you need to specify at least one. An output can be generated by specifying an output filename for it. For instance `--outputs.json.filename=output.json` will output a JSON file. Note that you can generate multiple formats at once, e.g. `--outputs.json.filename=output.json --outputs.hdf5.filename=output.hdf5` will output both JSON and HDF5 result files. Another important output option is the `pval_threshold`. Each estimation result is accompanied by an influence curve vector and by default these vectors are erased before saving the results because they typically take up too much space and are not usually needed. In some occasions you might want to keep them and this can be achieved by specifiying the output's `pval_threhsold`. For instance `--outputs.hdf5.pval_threshold=1.` will keep all such vectors because all p-values lie in between 0 and 1.
 
-- Super Learning: [with](./estimators/superlearning-with-interactions-for-Q.jl) and [without](./estimators/superlearning.jl) interaction terms in the GLM models for Q.
-- Super Learning for G and GLMNet for Q: [here](./estimators/G-superlearning-Q-glmnet.jl).
-- Super Learning for G and GLM for Q: [here](./estimators/G-superlearning-Q-glm.jl).
-- GLMNet: [with](./estimators/glmnet-with-interactions-for-Q.jl) and [without](./estimators/glmnet.jl) interaction terms in the GLM models for Q.
-- GLM: [with](./estimators/glm-with-interactions-for-Q.jl) and [without](./estimators/glm.jl) interaction terms in the GLM models for Q.
-- XGBoost: [with tuning](./estimators/tuned-xgboost.jl).
+In order to run sieve variance plateau correction after a TMLE run you need to save the results in HDF5 format with influence curve vectors. Furthermore, you will need to save the sample-ids associated with each result. A complete option set for this could be: `--outputs.hdf5.filename=output.hdf5 --outputs.hdf5.pval_threshold=0.05 --sample_ids=true`. In this case, only those results with an individual p-value of less than ``0.05`` will keep track of their influence curves and be considered for sieve variance correction.
 
 ## Runtime
 
@@ -111,8 +36,8 @@ In what follows, `Y` is an outcome of interest, `W` a set of confounding variabl
 
 For all the following experiments:
 
-- The Julia script can be found at [experiments/runtime.jl](../../experiments/runtime.jl).
-- The various estimators used below are further described in [Ready to use estimator files](@ref).
+- The Julia script can be found at [experiments/runtime.jl](https://github.com/TARGENE/TargetedEstimation.jl/tree/main/experiments/runtime.jl).
+- The various estimators used below are further described in the[estimators-configs](https://github.com/TARGENE/TargetedEstimation.jl/tree/main/estimators-configs) folder.
 
 ### Multiple treatment contrasts
 
@@ -136,12 +61,12 @@ In a PheWAS, one is interested in the effect of a genetic variation across many
 
 With this setup in mind, the computational complexity is mostly driven by the specification of the learning algorithms for `Q`, which will have to be fitted for each outcome. For 10 outcomes, we estimate the 3 Average Treatment Effects corresponding to the 3 possible treatment contrasts defined in the previous section. There are thus two levels of reuse of `G` and `Q` in this study design. In the table below are presented some runtimes for various specifications of `G` and `Q` using a single cpu. The "Unit runtime" is the average runtime across all estimands and can roughly be extrapolated to bigger studies.
 
-| Estimator file | Unit runtime (s) | Extrapolated runtime to 1000 outcomes |
+| Estimator | Unit runtime (s) | Extrapolated runtime to 1000 outcomes |
 | --- | :---: | :---: |
-| `docs/src/estimators/glm.jl` | 4.65 | ≈ 1h20 |
-| `docs/src/estimators/glmnet.jl` | 7.19 | ≈ 2h |
-| `docs/src/estimators/G-superlearning-Q-glmnet.jl` | 50.05| ≈ 13h45 |
-| `docs/src/estimators/superlearning.jl` | 168.98 | ≈ 46h |
+| `glm.` | 4.65 | ≈ 1h20 |
+| `glmnet` | 7.19 | ≈ 2h |
+| `G-superlearning-Q-glmnet` | 50.05| ≈ 13h45 |
+| `superlearning` | 168.98 | ≈ 46h |
 
 Depending on the exact setup, this means one can probably afford to use Super Learning for at least the estimation of `G` (and potentially also for `Q` for a single PheWAS). This turns out to be a great news because TMLE is a double robust estimator. As a reminder, it means that only one of the estimators for `G` or `Q` needs to converge sufficiently fast to the ground truth to guarantee that our estimates will be asymptotically unbiased.
 
@@ -166,9 +91,9 @@ Again, we estimate the 3 Average Treatment Effects corresponding to the 3 possib
 
 | Estimator file | Continuous outcome unit runtime (s) | Binary outcome unit runtime (s) | Projected Time on HPC (200 folds //) |
 | --- | :---: | :---: | :---: |
-| `docs/src/estimators/glm.jl` | 5.64 | 6.14 | ≈ 6h30 |
-| `docs/src/estimators/glmnet.jl` | 17.46 | 22.24 | ≈ 22h |
-| `docs/src/estimators/G-superlearning-Q-glmnet.jl` | 430.54 | 438.67 | ≈ 20 days |
-| `docs/src/estimators/superlearning.jl` | 511.26 | 567.72 | ≈ 24 days |
+| `glm` | 5.64 | 6.14 | ≈ 6h30 |
+| `glmnet` | 17.46 | 22.24 | ≈ 22h |
+| `G-superlearning-Q-glmnet` | 430.54 | 438.67 | ≈ 20 days |
+| `superlearning` | 511.26 | 567.72 | ≈ 24 days |
 
 We can see that modern high performance computing platforms definitely enable this study design when using GLMs or GLMNets. It is unlikely however, that you will be able to use Super Learning for any of `P(V|W)` or `E[Y|V, W]` if you don't have privileged access to such platform. While the double robustness guarantees will generally not be satisfied, our estimate will still be targeted, which means that its bias will be reduced compared to classic inference using a parametric model.
diff --git a/experiments/runtime.jl b/experiments/runtime.jl
index e641673..e3e2b62 100644
--- a/experiments/runtime.jl
+++ b/experiments/runtime.jl
@@ -2,10 +2,10 @@ using ArgParse
 using TargetedEstimation
 
 const ESTIMATORS = [
-    "docs/src/estimators/glm.jl",
-    "docs/src/estimators/glmnet.jl",
-    "docs/src/estimators/G-superlearning-Q-glmnet.jl",
-    "docs/src/estimators/superlearning.jl"
+    "glm",
+    "glmnet",
+    "G-superlearning-Q-glmnet",
+    "superlearning"
 ]
 const PARAMETERS = [
     "experiments/parameters.phewas.yaml",

From 51659814828d0e4a38f4abfb48c551ab1eecd9a4 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 13 Dec 2023 11:01:38 +0100
Subject: [PATCH 44/71] up TMLE dep to manage failed fluctuations

---
 Project.toml  | 2 +-
 src/runner.jl | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 48583ae..7cf526d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.6.0"
 Optim = "1.7"
-TMLE = "0.12.1"
+TMLE = "0.12.2"
 Tables = "1.10.1"
 YAML = "0.4.9"
 julia = "1.7, 1"
diff --git a/src/runner.jl b/src/runner.jl
index 3b37e3d..a827bd5 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -57,9 +57,11 @@ function try_estimation(runner, Ψ, estimator)
         return result
     catch e
         # Some nuisance function fits may fail. We do not interrupt on them but log instead.
-        # This also allows to skip fast the next estimands requiring the same nuisance functions.
         if e isa TMLE.FitFailedError
-            push!(runner.failed_nuisance, e.estimand)
+            # This also allows to skip fast the next estimands requiring the same nuisance functions.
+            if !(e.model isa TMLE.Fluctuation)
+                push!(runner.failed_nuisance, e.estimand)
+            end
             return FailedEstimate(Ψ, e.msg)
         # On other errors, rethrow
         else 

From 837070312f6318e88e989d5a41aedd0ef3449dc1 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 13 Dec 2023 12:42:07 +0100
Subject: [PATCH 45/71] add typing to cli functions

---
 src/sieve_variance.jl | 14 +++++++-------
 src/summary.jl        |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index d113cc7..10d4be0 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -225,13 +225,13 @@ Sieve Variance Plateau CLI.
 - `-m, --max_tau`: Maximum distance between any two individuals.
 - `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction.
 """
-@cast function sieve_variance_plateau(input_prefix;
-    out="svp.hdf5",
-    grm_prefix="GRM",
-    verbosity=0, 
-    n_estimators=10, 
-    max_tau=0.8,
-    estimator_key="TMLE"
+@cast function sieve_variance_plateau(input_prefix::String;
+    out::String="svp.hdf5",
+    grm_prefix::String="GRM",
+    verbosity::Int=0, 
+    n_estimators::Int=10, 
+    max_tau::Float64=0.8,
+    estimator_key::String="TMLE"
     )
     estimator_key = Symbol(estimator_key)
     τs = default_τs(n_estimators;max_τ=max_tau)
diff --git a/src/summary.jl b/src/summary.jl
index 6ebf4c5..0089df9 100644
--- a/src/summary.jl
+++ b/src/summary.jl
@@ -29,8 +29,8 @@ Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can
 - `-o, --outputs`: Ouptuts configuration.
 """
 @cast function make_summary(
-    prefix; 
-    outputs=Outputs(json=JSONOutput(filename="summary.json"))
+    prefix::String;
+    outputs::Outputs=Outputs()
     )
     
     # Initialize output files

From 539c1d7064e0eda5cdec5250e7957983e6659e55 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 13 Dec 2023 16:28:22 +0100
Subject: [PATCH 46/71] add skipping of FailedEstimates in sieve

---
 src/sieve_variance.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index 10d4be0..70c446d 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -79,6 +79,7 @@ function build_work_list(prefix, grm_ids; estimator_key=:TMLE)
                 batch_results = io[key]
                 for nt_result in batch_results
                     result = nt_result[estimator_key]
+                    result isa FailedEstimate && continue
                     sample_ids = nt_result.SAMPLE_IDS
                     update_work_lists_with!(
                         result,

From d50fc6f059745de0c97e6ccba9fab41925fbd70e Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 14 Dec 2023 08:26:22 +0100
Subject: [PATCH 47/71] up docker image with procps

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 51da080..547da61 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -3,7 +3,7 @@ FROM julia:1.9.4-bullseye
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
-    apt-get -y install gcc mono-mcs vim && \
+    apt-get -y install gcc mono-mcs vim procps && \
     rm -rf /var/lib/apt/lists/*
 
 ENV TZ=Europe/Amsterdam

From ef258799cbb1ec6d679b73d2ec63deb4a5a216e9 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 19 Dec 2023 12:40:55 +0100
Subject: [PATCH 48/71] up TMLE dep

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 7cf526d..cff4a17 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.6.0"
 Optim = "1.7"
-TMLE = "0.12.2"
+TMLE = "0.13.1"
 Tables = "1.10.1"
 YAML = "0.4.9"
 julia = "1.7, 1"

From 98ed9b4f32003604da3bcc822e475beddb158ae8 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 19 Dec 2023 13:46:37 +0100
Subject: [PATCH 49/71] fix default generateATEs

---
 src/utils.jl  | 2 +-
 test/utils.jl | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index ce6b854..518de66 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -90,7 +90,7 @@ function TMLE.generateATEs(dataset)
     confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name))
     length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset."))
     
-    return generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)
+    return [generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)]
 end
 
 function build_estimands_list(estimands_pattern, dataset)
diff --git a/test/utils.jl b/test/utils.jl
index 9bedbb0..072db30 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -83,12 +83,12 @@ end
     dataset.Y = [0, 1, 2, 2]
     dataset.W1 = [1, 1, 1, 1]
     dataset.W_2 = [1, 1, 1, 1]
-    ATEs = TargetedEstimation.build_estimands_list("generateATEs", dataset)
-    @test ATEs == [
+    composedATE = TargetedEstimation.build_estimands_list("generateATEs", dataset)[1]
+    @test composedATE.args == (
         TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()),
         TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()),
         TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ())
-    ]
+    )
 end
 @testset "Test coerce_types!" begin
     Ψ = IATE(

From 695e661e2bf8bac5032f4593da60c75d557e406e Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 14:46:50 +0100
Subject: [PATCH 50/71] move back to argparse but keep semantics

---
 Project.toml              |   4 +-
 src/TargetedEstimation.jl |   7 +-
 src/cli.jl                | 180 ++++++++++++++++++++++++++++++++++++++
 src/runner.jl             |   2 +-
 src/sieve_variance.jl     |   2 +-
 src/summary.jl            |   2 +-
 test/runner.jl            |  54 ++++++------
 test/sieve_variance.jl    |  26 ++++--
 test/summary.jl           |  27 +++---
 9 files changed, 247 insertions(+), 57 deletions(-)
 create mode 100644 src/cli.jl

diff --git a/Project.toml b/Project.toml
index cff4a17..7575ae0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,11 +4,11 @@ authors = ["Olivier Labayle"]
 version = "0.7.4"
 
 [deps]
+ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542"
 Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
@@ -33,11 +33,11 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [compat]
+ArgParse = "1.1.4"
 Arrow = "2.5.2"
 CSV = "0.10"
 CategoricalArrays = "0.10"
 Combinatorics = "1.0.2"
-Comonicon = "1.0.6"
 Configurations = "0.17.6"
 DataFrames = "1.3.4"
 EvoTrees = "0.16.5"
diff --git a/src/TargetedEstimation.jl b/src/TargetedEstimation.jl
index 74856d8..a3ee31c 100644
--- a/src/TargetedEstimation.jl
+++ b/src/TargetedEstimation.jl
@@ -4,6 +4,7 @@ if occursin("Intel", Sys.cpu_info()[1].model)
     using MKL
 end
 
+using ArgParse
 using DataFrames
 using MLJBase
 using MLJ
@@ -25,7 +26,6 @@ using Tables
 using Random
 using YAML
 using JSON
-using Comonicon
 using Configurations
 
 import MLJModelInterface
@@ -42,10 +42,9 @@ include("resampling.jl")
 include(joinpath("models", "glmnet.jl"))
 include(joinpath("models", "adaptive_interaction_transformer.jl"))
 include(joinpath("models", "biallelic_snp_encoder.jl"))
+include("cli.jl")
 
-@main
-
-export Runner, tmle, sieve_variance_plateau, make_summary
+export Runner, tmle, sieve_variance_plateau, make_summary, main
 export GLMNetRegressor, GLMNetClassifier
 export RestrictedInteractionTransformer, BiAllelicSNPEncoder
 export AdaptiveCV, AdaptiveStratifiedCV, JointStratifiedCV
diff --git a/src/cli.jl b/src/cli.jl
new file mode 100644
index 0000000..985b088
--- /dev/null
+++ b/src/cli.jl
@@ -0,0 +1,180 @@
+function cli_settings()
+    s = ArgParseSettings(description="TMLE CLI.")
+
+    @add_arg_table s begin
+        "tmle"
+            action = :command
+            help = "Run TMLE."
+        
+        "svp"
+            action = :command
+            help = "Run Sieve Variance Plateau."
+
+        "merge"
+            action = :command
+            help = "Merges TMLE outputs together."
+    end
+
+    @add_arg_table s["tmle"] begin
+        "dataset"
+            arg_type = String
+            required = true
+            help = "Path to the dataset (either .csv or .arrow)"
+
+        "--estimands"
+            arg_type = String
+            help = "A string (`generateATEs`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)"
+            default = "generateATEs"
+
+        "--estimators"
+            arg_type = String
+            help = "A julia file containing the estimators to use."
+            default = "glmnet"
+
+        "--verbosity"
+            arg_type = Int
+            default = 0
+            help = "Verbosity level"
+
+        "--hdf5-output"
+            arg_type = String
+            help = "HDF5 file output."
+        
+        "--json-output"
+            arg_type = String
+            help = "JSON file output."
+
+        "--jls-output"
+            arg_type = String
+            help = "JLS file output."
+        
+        "--chunksize"
+            arg_type = Int
+            help = "Results are written in batches of size chunksize."
+            default = 100
+
+        "--rng"
+            arg_type = Int
+            help = "Random seed (Only used for estimands ordering at the moment)."
+            default = 123
+
+        "--cache-strategy"
+            arg_type = String
+            help = "Caching Strategy for the nuisance functions, any of (`release-unusable`, `no-cache`, `max-size`)."
+            default = "release-unusable"
+        
+        "--sort-estimands"
+            help = "Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time)."
+            action = :store_true
+    end
+
+    @add_arg_table s["svp"] begin
+        "input-prefix"
+            arg_type = String
+            help = "Input prefix to HDF5 files generated by the tmle CLI."
+
+        "--out"
+            arg_type = String
+            help = "Output filename."
+            default = "svp.hdf5"
+
+        "--grm-prefix"
+            arg_type = String
+            help = "Prefix to the aggregated GRM."
+            default = "GRM"
+        
+        "--verbosity"
+            arg_type = Int
+            default = 0
+            help = "Verbosity level"
+        
+        "--n-estimators"
+            arg_type = Int
+            default = 10
+            help = "Number of variance estimators to build for each estimate."
+
+        "--max-tau"
+            arg_type = Float64
+            default = 0.8
+            help = "Maximum distance between any two individuals."
+
+        "--estimator-key"
+            arg_type = String
+            help = "Estimator to use to proceed with sieve variance correction."
+            default = "TMLE"
+    end
+
+    @add_arg_table s["merge"] begin
+        "prefix"
+            arg_type = String
+            help = "Prefix to .hdf5 files to be used to create the summary file."
+
+        "--hdf5-output"
+            arg_type = String
+            help = "HDF5 file output."
+        
+        "--json-output"
+            arg_type = String
+            help = "JSON file output."
+    
+        "--jls-output"
+            arg_type = String
+            help = "JLS file output."
+    end
+
+    return s
+end
+
+
+makeOutput(T::Type, ::Nothing) = T()
+
+function makeOutput(T::Type, str)
+    args = split(str, ",")
+    kwargs = Dict(fn => tryparse(ft, val) for (val, fn, ft) ∈ zip(args, fieldnames(T), fieldtypes(T)))
+    return T(;kwargs...)
+end
+
+make_outputs(hdf5_string, json_string, jls_tring) = Outputs(
+    hdf5=makeOutput(HDF5Output, hdf5_string),
+    json=makeOutput(JSONOutput, json_string),
+    jls=makeOutput(JLSOutput, jls_tring)
+)
+
+function main(args=ARGS)
+    settings = parse_args(args, cli_settings())
+    cmd = settings["%COMMAND%"]
+    cmd_settings = settings[cmd]
+    if cmd ∈ ("tmle", "merge")
+        outputs = make_outputs(cmd_settings["hdf5-output"], cmd_settings["json-output"], cmd_settings["jls-output"])
+        if cmd == "tmle"
+            tmle(cmd_settings["dataset"];
+                estimands=cmd_settings["estimands"], 
+                estimators=cmd_settings["estimators"],
+                verbosity=cmd_settings["verbosity"], 
+                outputs=outputs,
+                chunksize=cmd_settings["chunksize"],
+                rng=cmd_settings["rng"],
+                cache_strategy=cmd_settings["cache-strategy"],
+                sort_estimands=cmd_settings["sort-estimands"]
+            )
+        else
+            make_summary(cmd_settings["prefix"];
+                outputs=outputs
+            )
+        end
+    else
+        sieve_variance_plateau(cmd_settings["input-prefix"];
+            out=cmd_settings["out"],
+            grm_prefix=cmd_settings["grm-prefix"],
+            verbosity=cmd_settings["verbosity"], 
+            n_estimators=cmd_settings["n-estimators"], 
+            max_tau=cmd_settings["max-tau"],
+            estimator_key=cmd_settings["estimator-key"]
+        )
+    end
+end
+
+function julia_main()::Cint
+    main()
+    return 0
+end
\ No newline at end of file
diff --git a/src/runner.jl b/src/runner.jl
index a827bd5..1079db7 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -150,7 +150,7 @@ TMLE CLI.
 
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
-@cast function tmle(dataset::String;
+function tmle(dataset::String;
     estimands::String="generateATEs", 
     estimators::String="glmnet",
     verbosity::Int=0, 
diff --git a/src/sieve_variance.jl b/src/sieve_variance.jl
index 70c446d..ea41eb9 100644
--- a/src/sieve_variance.jl
+++ b/src/sieve_variance.jl
@@ -226,7 +226,7 @@ Sieve Variance Plateau CLI.
 - `-m, --max_tau`: Maximum distance between any two individuals.
 - `-e, --estimator-key`: Estimator to use to proceed with sieve variance correction.
 """
-@cast function sieve_variance_plateau(input_prefix::String;
+function sieve_variance_plateau(input_prefix::String;
     out::String="svp.hdf5",
     grm_prefix::String="GRM",
     verbosity::Int=0, 
diff --git a/src/summary.jl b/src/summary.jl
index 0089df9..a83a383 100644
--- a/src/summary.jl
+++ b/src/summary.jl
@@ -28,7 +28,7 @@ Combines multiple TMLE .hdf5 output files in a single file. Multiple formats can
 
 - `-o, --outputs`: Ouptuts configuration.
 """
-@cast function make_summary(
+function make_summary(
     prefix::String;
     outputs::Outputs=Outputs()
     )
diff --git a/test/runner.jl b/test/runner.jl
index f52ead4..a8ce1f2 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -154,22 +154,24 @@ end
 
 @testset "Test tmle: lower p-value threshold only JSON output" begin
     build_dataset(;n=1000, format="csv")
-    outputs = TargetedEstimation.Outputs(
-        json=TargetedEstimation.JSONOutput(filename="output.json", pval_threshold=1e-15)
-    )
     tmpdir = mktempdir(cleanup=true)
     estimandsfile = joinpath(tmpdir, "configuration.json")
     configuration = statistical_estimands_only_config()
     TMLE.write_json(estimandsfile, configuration)
     estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
     datafile = "data.csv"
-    tmle(datafile; 
-        estimands=estimandsfile, 
-        estimators=estimatorfile,
-        outputs=outputs)
+
+    # Using the main entry point
+    main([
+        "tmle", 
+        datafile, 
+        "--estimands", estimandsfile, 
+        "--estimators", estimatorfile,
+        "--json-output", "output.json,1e-15"]
+    )
     
     # Essential results
-    results_from_json = TMLE.read_json(outputs.json.filename)
+    results_from_json = TMLE.read_json("output.json")
     n_IC_empties = 0
     for result in results_from_json
         if result[:OSE].IC != []
@@ -179,7 +181,7 @@ end
     @test n_IC_empties > 0
 
     rm(datafile)
-    rm(outputs.json.filename)
+    rm("output.json")
 end
 
 @testset "Test tmle: Failing estimands" begin
@@ -238,11 +240,6 @@ end
 
 @testset "Test tmle: Causal and Composed Estimands" begin
     build_dataset(;n=1000, format="csv")
-    outputs = TargetedEstimation.Outputs(
-        json = TargetedEstimation.JSONOutput(filename="output.json"),
-        jls = TargetedEstimation.JLSOutput(filename="output.jls"),
-        hdf5 = TargetedEstimation.HDF5Output(filename="output.hdf5")
-    )
     tmpdir = mktempdir(cleanup=true)
     estimandsfile = joinpath(tmpdir, "configuration.jls")
 
@@ -251,16 +248,21 @@ end
     estimatorfile = joinpath(CONFIGDIR, "ose_config.jl")
     datafile = "data.csv"
 
-    tmle(datafile;
-        estimands=estimandsfile, 
-        estimators=estimatorfile,
-        outputs=outputs, 
-        chunksize=2
-    )
+    # Using the main entry point
+    main([
+        "tmle", 
+        datafile, 
+        "--estimands", estimandsfile, 
+        "--estimators", estimatorfile,
+        "--chunksize", "2",
+        "--json-output", "output.json",
+        "--hdf5-output", "output.hdf5",
+        "--jls-output", "output.jls"
+    ])
     
     # JLS Output
     results = []
-    open(outputs.jls.filename) do io
+    open("output.jls") do io
         while !eof(io)
             push!(results, deserialize(io))
         end
@@ -279,19 +281,19 @@ end
     @test results[3].OSE isa TMLE.ComposedEstimate
     
     # JSON Output
-    results_from_json = TMLE.read_json(outputs.json.filename)
+    results_from_json = TMLE.read_json("output.json")
     @test length(results_from_json) == 3
 
     # HDF5
-    results_from_json = jldopen(outputs.hdf5.filename)
+    results_from_json = jldopen("output.hdf5")
     @test length(results_from_json["Batch_1"]) == 2
     composed_result = only(results_from_json["Batch_2"])
     @test composed_result.OSE.cov == results[3].OSE.cov
     
     rm(datafile)
-    rm(outputs.jls.filename)
-    rm(outputs.json.filename)
-    rm(outputs.hdf5.filename)
+    rm("output.jls")
+    rm("output.json")
+    rm("output.hdf5")
 end
 
 
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 62fb7a9..de1465c 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -285,10 +285,13 @@ end
     TMLE.write_json(estimandsfile_2, config_2)
     build_tmle_output_file(grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2"; pval=pval)
 
-    sieve_variance_plateau("tmle_output";
-        grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"),
-        max_tau=0.75
-    )
+    # Using the main command
+    main([
+        "svp", 
+        "tmle_output", 
+        "--grm-prefix", joinpath(TESTDIR, "data", "grm", "test.grm"), 
+        "--max-tau", "0.75"
+    ])
 
     io = jldopen("svp.hdf5")
     # Check τs
@@ -332,11 +335,16 @@ end
         "tmle_output";
         estimatorfile=joinpath(TESTDIR, "config", "ose_config.jl")
     )
-    sieve_variance_plateau("tmle_output";
-        grm_prefix=joinpath(TESTDIR, "data", "grm", "test.grm"),
-        max_tau=0.75,
-        estimator_key="OSE"
-    )
+
+    # Using the main command
+    main([
+        "svp", 
+        "tmle_output", 
+        "--grm-prefix", joinpath(TESTDIR, "data", "grm", "test.grm"), 
+        "--max-tau", "0.75",
+        "--estimator-key", "OSE"
+    ])
+
     # The ComposedEstimate std is not updated but each component is.
     src_results = jldopen("tmle_output.hdf5")["Batch_1"]
     io = jldopen("svp.hdf5")
diff --git a/test/summary.jl b/test/summary.jl
index 1b8f5d8..92c454b 100644
--- a/test/summary.jl
+++ b/test/summary.jl
@@ -41,27 +41,28 @@ include(joinpath(TESTDIR, "testutils.jl"))
         outputs=tmle_output_2
     )
 
-    # Make summary files
-    outputs = TargetedEstimation.Outputs(
-        json=TargetedEstimation.JSONOutput(filename="summary.json"),
-        hdf5=TargetedEstimation.HDF5Output(filename="summary.hdf5"),
-        jls=TargetedEstimation.JLSOutput(filename="summary.jls")
-    )
-    make_summary("tmle_output", outputs=outputs)
+    # Using the main entry point
+    main([
+        "merge", 
+        "tmle_output", 
+        "--json-output", "summary.json", 
+        "--jls-output", "summary.jls",
+        "--hdf5-output", "summary.hdf5"
+    ])
 
     # Test correctness
     hdf5file_1 = jldopen("tmle_output_1.hdf5")
     hdf5file_2 = jldopen("tmle_output_2.hdf5")
     inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"])
 
-    json_outputs = TMLE.read_json(outputs.json.filename)
+    json_outputs = TMLE.read_json("summary.json")
     jls_outputs = []
-    open(outputs.jls.filename) do io
+    open("summary.jls") do io
         while !eof(io)
             push!(jls_outputs, deserialize(io))
         end
     end
-    hdf5_output = jldopen(outputs.hdf5.filename)
+    hdf5_output = jldopen("summary.hdf5")
     hdf5_outputs = vcat((hdf5_output[key] for key in keys(hdf5_output))...)
 
     @test length(inputs) == 9
@@ -72,9 +73,9 @@ include(joinpath(TESTDIR, "testutils.jl"))
     # cleanup
     rm("tmle_output_1.hdf5")
     rm("tmle_output_2.hdf5")
-    rm(outputs.json.filename)
-    rm(outputs.jls.filename)
-    rm(outputs.hdf5.filename)
+    rm("summary.hdf5")
+    rm("summary.jls")
+    rm("summary.json")
     rm(datafile)
 end
 

From 12ba044d605f7f8c6863d65c25f9aace6996a548 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 14:59:42 +0100
Subject: [PATCH 51/71] try build in CI

---
 .github/workflows/CI.yml |  2 +-
 .gitignore               |  1 +
 Comonicon.toml           | 20 --------------------
 Project.toml             |  2 ++
 deps/build_app.jl        |  3 ++-
 deps/execute.jl          |  7 -------
 docker/Dockerfile        |  5 +----
 7 files changed, 7 insertions(+), 33 deletions(-)
 delete mode 100644 Comonicon.toml

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 19683a3..29551c4 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -30,7 +30,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - name: Build App
-        run: julia --project -t auto deps/build_app.jl app tarball
+        run: julia --project --startup-file=no deps/build_app.jl
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v2
         with:
diff --git a/.gitignore b/.gitignore
index 63898a4..eb170c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ deps/build.log
 deps/downloads/
 deps/usr/
 deps/src/
+tmle/
 
 # Build artifacts for creating documentation generated by the Documenter package
 docs/build/
diff --git a/Comonicon.toml b/Comonicon.toml
deleted file mode 100644
index 4acd652..0000000
--- a/Comonicon.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-name = "tmle"
-
-[install]
-completion = true
-quiet = false
-optimize = 2
-
-# [sysimg]
-# incremental=true
-# filter_stdlibs=false
-
-# [sysimg.precompile]
-# execution_file = ["deps/execute.jl"]
-
-# [application]
-# incremental=true
-# filter_stdlibs=false
-
-# [application.precompile]
-# execution_file = ["deps/execute.jl"]
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 7575ae0..f5df5c6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,6 +26,7 @@ MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
 MultipleTesting = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 TMLE = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf"
@@ -33,6 +34,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [compat]
+PackageCompiler = "2.1.16"
 ArgParse = "1.1.4"
 Arrow = "2.5.2"
 CSV = "0.10"
diff --git a/deps/build_app.jl b/deps/build_app.jl
index e0a9f11..cb0b99c 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1 +1,2 @@
-using TargetedEstimation; TargetedEstimation.comonicon_install()
\ No newline at end of file
+using PackageCompiler
+PackageCompiler.create_app(".", "tmle", precompile_execution_file="execute.jl")
diff --git a/deps/execute.jl b/deps/execute.jl
index bde1b1b..70f58fe 100644
--- a/deps/execute.jl
+++ b/deps/execute.jl
@@ -1,13 +1,6 @@
 using TargetedEstimation
 
 @info "Running precompilation script."
-
-# Run help messages
-TargetedEstimation.command_main(["-h"])
-TargetedEstimation.command_main(["tmle", "-h"])
-TargetedEstimation.command_main(["make-summary", "-h"])
-TargetedEstimation.command_main(["sieve-variance-plateau", "-h"])
-
 # Run workload
 TEST_DIR = joinpath(pkgdir(TargetedEstimation), "test")
 push!(LOAD_PATH, TEST_DIR)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 547da61..741a9ba 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -22,8 +22,5 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi
 # Build CLI
 RUN julia --project --startup-file=no deps/build_app.jl
 
-# Add CLI to PATH
-ENV PATH="${PATH}:/opt/bin/"
-
 # Test the CLI runs
-RUN tmle tmle data/sample_dataset.csv
\ No newline at end of file
+RUN tmle/bin/tmle tmle data/sample_dataset.csv
\ No newline at end of file

From 478ba5d5b87d7f0bb2576ec0a323eaa176185c62 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 15:02:09 +0100
Subject: [PATCH 52/71] fix execute path

---
 deps/build_app.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/build_app.jl b/deps/build_app.jl
index cb0b99c..8e9a8dd 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1,2 +1,2 @@
 using PackageCompiler
-PackageCompiler.create_app(".", "tmle", precompile_execution_file="execute.jl")
+PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl")

From b2d2c929c634b275808629ce94ab0806c0d55e04 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 15:43:59 +0100
Subject: [PATCH 53/71] remove app run at the end of docker build

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 741a9ba..334ab1a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi
 RUN julia --project --startup-file=no deps/build_app.jl
 
 # Test the CLI runs
-RUN tmle/bin/tmle tmle data/sample_dataset.csv
\ No newline at end of file
+# RUN tmle/bin/tmle tmle data/sample_dataset.csv
\ No newline at end of file

From fab24558911c763c8c40951713616f02f7802851 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 16:40:53 +0100
Subject: [PATCH 54/71] try more platforms and upload artifact

---
 .github/workflows/CI.yml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 29551c4..65c45df 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -15,11 +15,14 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.9'
+          - '1'
         os:
           - ubuntu-latest
+          - macOS-latest
+          - windows-latest
         arch:
-          - x64
+          - 'x64'
+          - 'x86'
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
@@ -29,12 +32,16 @@ jobs:
       - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-      - name: Build App
-        run: julia --project --startup-file=no deps/build_app.jl
-      - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v2
         with:
           files: lcov.info
+      - uses: julia-actions/julia-processcoverage@v1
+      - name: Build App
+        run: julia --project --startup-file=no deps/build_app.jl
+      - uses: actions/upload-artifact@v4
+        with:
+          name: tmle-${{ matrix.os }}-${{ matrix.arch }}
+          path: tmle
   docs:
     name: Documentation
     runs-on: ubuntu-latest

From 26c273e03f6c97d47eb615e7ea40f7060c7f85ba Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 16:50:39 +0100
Subject: [PATCH 55/71] remove x86

---
 .github/workflows/CI.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 65c45df..ebf50b7 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -22,7 +22,6 @@ jobs:
           - windows-latest
         arch:
           - 'x64'
-          - 'x86'
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1

From 592c67520c38032055224f9a5e09105998a95278 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 21 Dec 2023 17:05:02 +0100
Subject: [PATCH 56/71] remove the use of mmap in json read

---
 src/utils.jl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index 518de66..c378749 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -18,17 +18,16 @@ MissingSCMError() = ArgumentError(string("A Structural Causal Model should be pr
 get_identification_method(method::Nothing) = BackdoorAdjustment()
 get_identification_method(method) = method
 
-function read_method(extension)
-    method = if extension == ".json"
-        TMLE.read_json
-    elseif extension == ".yaml"
-        TMLE.read_yaml
-    elseif extension == ".jls"
-        deserialize
+function read_estimands_config(filename)
+    if endswith(filename, ".json")
+        TMLE.read_json(filename, use_mmap=false)
+    elseif endswith(filename, ".yaml")
+        TMLE.read_yaml(filename)
+    elseif endswith(filename, ".jls")
+        return deserialize(filename)
     else
         throw(ArgumentError(string("Can't read from ", extension, " file")))
     end
-    return method
 end
 
 function fix_treatment_values!(treatment_types::AbstractDict, Ψ::ComposedEstimand, dataset)
@@ -62,8 +61,7 @@ Reads estimands from file and ensures that the treatment values in the config fi
 respects the treatment types in the dataset.
 """
 function proofread_estimands(filename, dataset)
-    extension = filename[findlast(isequal('.'), filename):end]
-    config = read_method(extension)(filename)
+    config = read_estimands_config(filename)
     adjustment_method = get_identification_method(config.adjustment)
     estimands = Vector{TMLE.Estimand}(undef, length(config.estimands))
     treatment_types = Dict()

From 1518216b74160d4a7aa8c2987816e663f162fd3d Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 22 Dec 2023 17:22:02 +0100
Subject: [PATCH 57/71] close files before removing

---
 test/runner.jl         | 11 ++++++-----
 test/sieve_variance.jl |  3 +++
 test/summary.jl        |  4 ++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/test/runner.jl b/test/runner.jl
index a8ce1f2..97e39b4 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -231,7 +231,7 @@ end
             @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand
         end
     end
-
+    close(results_from_hdf5)
     # Clean
     rm(outputs.json.filename)
     rm(outputs.hdf5.filename)
@@ -285,11 +285,12 @@ end
     @test length(results_from_json) == 3
 
     # HDF5
-    results_from_json = jldopen("output.hdf5")
-    @test length(results_from_json["Batch_1"]) == 2
-    composed_result = only(results_from_json["Batch_2"])
+    results_from_hdf5 = jldopen("output.hdf5")
+    @test length(results_from_hdf5["Batch_1"]) == 2
+    composed_result = only(results_from_hdf5["Batch_2"])
     @test composed_result.OSE.cov == results[3].OSE.cov
-    
+    close(results_from_hdf5)
+
     rm(datafile)
     rm("output.jls")
     rm("output.json")
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index de1465c..8365d1b 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -313,6 +313,8 @@ end
         @test src_result.TMLE.n == svp_result.TMLE.n
         @test svp_result.TMLE.IC == []
     end
+    close(tmleout1)
+    close(tmleout2)
     close(io)
     # clean
     rm("svp.hdf5")
@@ -361,6 +363,7 @@ end
         @test standalone_estimates[i].OSE.std != src_results[i].OSE.std
     end
 
+    close(src_results)
     close(io)
     
     # clean
diff --git a/test/summary.jl b/test/summary.jl
index 92c454b..a75d0e1 100644
--- a/test/summary.jl
+++ b/test/summary.jl
@@ -70,6 +70,10 @@ include(joinpath(TESTDIR, "testutils.jl"))
         @test input.OSE.estimand == jls_output.OSE.estimand == hdf5_out.OSE.estimand == json_output[:OSE].estimand
     end
 
+    close(hdf5file_1)
+    close(hdf5file_2)
+    close(hdf5_output)
+
     # cleanup
     rm("tmle_output_1.hdf5")
     rm("tmle_output_2.hdf5")

From bcd5ba5d678cd01286ba7f7d7cd6dd003f7d18a7 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 22 Dec 2023 17:43:12 +0100
Subject: [PATCH 58/71] fix poor closing of jld2 files

---
 test/runner.jl         | 27 ++++++++++++++-------------
 test/sieve_variance.jl | 10 ++++------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/test/runner.jl b/test/runner.jl
index 97e39b4..88b01da 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -222,16 +222,17 @@ end
     end
 
     # Check results from HDF5
-    results_from_hdf5 = jldopen(outputs.hdf5.filename)["Batch_1"]
-    for estimator in (:OSE, :TMLE)
-        @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate
-        @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate
-        for i in 3:6
-            @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate
-            @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand
+    jldopen(outputs.hdf5.filename) do io 
+        results_from_hdf5 = io["Batch_1"]
+        for estimator in (:OSE, :TMLE)
+            @test results_from_hdf5[1][estimator] isa TargetedEstimation.FailedEstimate
+            @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate
+            for i in 3:6
+                @test results_from_hdf5[i][estimator] isa TargetedEstimation.FailedEstimate
+                @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand
+            end
         end
     end
-    close(results_from_hdf5)
     # Clean
     rm(outputs.json.filename)
     rm(outputs.hdf5.filename)
@@ -285,11 +286,11 @@ end
     @test length(results_from_json) == 3
 
     # HDF5
-    results_from_hdf5 = jldopen("output.hdf5")
-    @test length(results_from_hdf5["Batch_1"]) == 2
-    composed_result = only(results_from_hdf5["Batch_2"])
-    @test composed_result.OSE.cov == results[3].OSE.cov
-    close(results_from_hdf5)
+    jldopen("output.hdf5") do io
+        @test length(io["Batch_1"]) == 2
+        composed_result = only(io["Batch_2"])
+        @test composed_result.OSE.cov == results[3].OSE.cov
+    end
 
     rm(datafile)
     rm("output.jls")
diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl
index 8365d1b..0095067 100644
--- a/test/sieve_variance.jl
+++ b/test/sieve_variance.jl
@@ -301,8 +301,8 @@ end
     # Check results
     svp_results = io["results"]
     
-    tmleout1 = jldopen("tmle_output_1.hdf5")["Batch_1"]
-    tmleout2 = jldopen("tmle_output_2.hdf5")["Batch_1"]
+    tmleout1 = jldopen(x -> x["Batch_1"], "tmle_output_1.hdf5")
+    tmleout2 = jldopen(x -> x["Batch_1"], "tmle_output_2.hdf5")
     src_results = [tmleout1..., tmleout2...]
 
     for svp_result in svp_results
@@ -313,8 +313,7 @@ end
         @test src_result.TMLE.n == svp_result.TMLE.n
         @test svp_result.TMLE.IC == []
     end
-    close(tmleout1)
-    close(tmleout2)
+
     close(io)
     # clean
     rm("svp.hdf5")
@@ -348,7 +347,7 @@ end
     ])
 
     # The ComposedEstimate std is not updated but each component is.
-    src_results = jldopen("tmle_output.hdf5")["Batch_1"]
+    src_results = jldopen(x -> x["Batch_1"], "tmle_output.hdf5")
     io = jldopen("svp.hdf5")
     svp_results = io["results"]
     standalone_estimates = svp_results[1:2]
@@ -363,7 +362,6 @@ end
         @test standalone_estimates[i].OSE.std != src_results[i].OSE.std
     end
 
-    close(src_results)
     close(io)
     
     # clean

From 375980da6d801a37412ae988422b4b5a9af6bacd Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 22 Dec 2023 18:47:08 +0100
Subject: [PATCH 59/71] add add lazy artifacts and test run in docker image

---
 deps/build_app.jl | 2 +-
 docker/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deps/build_app.jl b/deps/build_app.jl
index 8e9a8dd..8552971 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1,2 +1,2 @@
 using PackageCompiler
-PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl")
+PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 334ab1a..96dec1d 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompi
 RUN julia --project --startup-file=no deps/build_app.jl
 
 # Test the CLI runs
-# RUN tmle/bin/tmle tmle data/sample_dataset.csv
\ No newline at end of file
+RUN /TargetedEstimation.jl/tmle/bin/TargetedEstimation tmle data/sample_dataset.csv
\ No newline at end of file

From f8358ffe540b548a5cea9e0d52accfa0ca02e37a Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 22 Dec 2023 18:49:57 +0100
Subject: [PATCH 60/71] fix json memap in tests

---
 test/outputs.jl |  2 +-
 test/runner.jl  | 10 +++++-----
 test/summary.jl |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/outputs.jl b/test/outputs.jl
index 9b13bb0..73b9207 100644
--- a/test/outputs.jl
+++ b/test/outputs.jl
@@ -45,7 +45,7 @@ end
     TargetedEstimation.initialize_json(jsonoutput.filename)
     TargetedEstimation.update_file(jsonoutput, results[1:3])
     TargetedEstimation.update_file(jsonoutput, results[4:end]; finalize=true)
-    loaded_results = TMLE.read_json(jsonoutput.filename)
+    loaded_results = TMLE.read_json(jsonoutput.filename, use_mmap=false)
     @test size(loaded_results) == size(results)
     for (result, loaded_result) in zip(results, loaded_results)
         @test result.TMLE.estimate == loaded_result[:TMLE].estimate
diff --git a/test/runner.jl b/test/runner.jl
index 88b01da..1985f35 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -44,7 +44,7 @@ include(joinpath(TESTDIR, "testutils.jl"))
     TargetedEstimation.save(runner, results, partition, true)
 
     # Test Save to JSON
-    loaded_results = TMLE.read_json(outputs.json.filename)
+    loaded_results = TMLE.read_json(outputs.json.filename, use_mmap=false)
     for (result, loaded_result) in zip(results, loaded_results)
         @test loaded_result[:TMLE] isa TMLE.TMLEstimate
         @test result.TMLE.estimate == loaded_result[:TMLE].estimate
@@ -134,7 +134,7 @@ end
                 end
             end
             results_from_hdf5 = vcat(results_from_hdf5...)
-            results_from_json = TMLE.read_json(outputs.json.filename)
+            results_from_json = TMLE.read_json(outputs.json.filename, use_mmap=false)
 
             for i in 1:6
                 Ψ = configuration.estimands[i]
@@ -171,7 +171,7 @@ end
     )
     
     # Essential results
-    results_from_json = TMLE.read_json("output.json")
+    results_from_json = TMLE.read_json("output.json", use_mmap=false)
     n_IC_empties = 0
     for result in results_from_json
         if result[:OSE].IC != []
@@ -210,7 +210,7 @@ end
     ])
 
     # Check results from JSON
-    results_from_json = TMLE.read_json(outputs.json.filename)
+    results_from_json = TMLE.read_json(outputs.json.filename, use_mmap=false)
     for estimator in (:OSE, :TMLE)
         @test results_from_json[1][estimator][:error] == "Could not fit the following propensity score model: P₀(T2 | W1, W2)"
         @test results_from_json[1][estimator][:estimand] isa TMLE.Estimand
@@ -282,7 +282,7 @@ end
     @test results[3].OSE isa TMLE.ComposedEstimate
     
     # JSON Output
-    results_from_json = TMLE.read_json("output.json")
+    results_from_json = TMLE.read_json("output.json", use_mmap=false)
     @test length(results_from_json) == 3
 
     # HDF5
diff --git a/test/summary.jl b/test/summary.jl
index a75d0e1..40f3a73 100644
--- a/test/summary.jl
+++ b/test/summary.jl
@@ -55,7 +55,7 @@ include(joinpath(TESTDIR, "testutils.jl"))
     hdf5file_2 = jldopen("tmle_output_2.hdf5")
     inputs = vcat(hdf5file_1["Batch_1"], hdf5file_1["Batch_2"], hdf5file_2["Batch_1"])
 
-    json_outputs = TMLE.read_json("summary.json")
+    json_outputs = TMLE.read_json("summary.json", use_mmap=false)
     jls_outputs = []
     open("summary.jls") do io
         while !eof(io)

From ccc2a07a77612a34f8c773132d9df1ae03d157b2 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sat, 23 Dec 2023 10:28:33 +0100
Subject: [PATCH 61/71] sysimage instead of app in docker container

---
 deps/build_app.jl      | 6 +++++-
 deps/build_sysimage.jl | 6 ++++++
 docker/Dockerfile      | 4 ++--
 tmle.jl                | 1 +
 4 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 deps/build_sysimage.jl
 create mode 100644 tmle.jl

diff --git a/deps/build_app.jl b/deps/build_app.jl
index 8552971..9c36c5a 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1,2 +1,6 @@
 using PackageCompiler
-PackageCompiler.create_app(".", "tmle", precompile_execution_file="deps/execute.jl", include_lazy_artifacts=true)
+PackageCompiler.create_app(".", "tmle", 
+    lib_name="tmle",
+    precompile_execution_file="deps/execute.jl", 
+    include_lazy_artifacts=true
+)
diff --git a/deps/build_sysimage.jl b/deps/build_sysimage.jl
new file mode 100644
index 0000000..b1a32cc
--- /dev/null
+++ b/deps/build_sysimage.jl
@@ -0,0 +1,6 @@
+using PackageCompiler
+PackageCompiler.create_sysimage(
+    ["TargetedEstimation"], 
+    sysimage_path="TMLESysimage.so", 
+    precompile_execution_file="deps/execute.jl", 
+)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 96dec1d..423f096 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,7 +20,7 @@ WORKDIR /TargetedEstimation.jl
 RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()'
 
 # Build CLI
-RUN julia --project --startup-file=no deps/build_app.jl
+RUN julia --project --startup-file=no deps/build_sysimage.jl
 
 # Test the CLI runs
-RUN /TargetedEstimation.jl/tmle/bin/TargetedEstimation tmle data/sample_dataset.csv
\ No newline at end of file
+RUN julia --startup-file=no --project -JTMLESysimage.so tmle.jl tmle data/sample_dataset.csv
\ No newline at end of file
diff --git a/tmle.jl b/tmle.jl
new file mode 100644
index 0000000..592b78f
--- /dev/null
+++ b/tmle.jl
@@ -0,0 +1 @@
+using TargetedEstimation; main()
\ No newline at end of file

From 4b0dcd6e6b34b7bb3d103c0b9c5c58aa9e0278c0 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sat, 23 Dec 2023 10:44:45 +0100
Subject: [PATCH 62/71] fix executable name

---
 deps/build_app.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deps/build_app.jl b/deps/build_app.jl
index 9c36c5a..c48e8cf 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1,6 +1,6 @@
 using PackageCompiler
-PackageCompiler.create_app(".", "tmle", 
-    lib_name="tmle",
+PackageCompiler.create_app(".", "tmle",
+    executables = ["tmle" => "julia_main"]
     precompile_execution_file="deps/execute.jl", 
     include_lazy_artifacts=true
 )

From 4e0bdf57c82f14e61b33aa53a46ea2c83fb76717 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sat, 23 Dec 2023 10:59:50 +0100
Subject: [PATCH 63/71] fix missing coma

---
 deps/build_app.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/build_app.jl b/deps/build_app.jl
index c48e8cf..8e3ea90 100644
--- a/deps/build_app.jl
+++ b/deps/build_app.jl
@@ -1,6 +1,6 @@
 using PackageCompiler
 PackageCompiler.create_app(".", "tmle",
-    executables = ["tmle" => "julia_main"]
+    executables = ["tmle" => "julia_main"],
     precompile_execution_file="deps/execute.jl", 
     include_lazy_artifacts=true
 )

From 6ddceaecad6bc44a905e10df8200ca4da6c8915e Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sat, 23 Dec 2023 11:15:26 +0100
Subject: [PATCH 64/71] try generic cpu target

---
 deps/build_sysimage.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deps/build_sysimage.jl b/deps/build_sysimage.jl
index b1a32cc..208628a 100644
--- a/deps/build_sysimage.jl
+++ b/deps/build_sysimage.jl
@@ -1,6 +1,7 @@
 using PackageCompiler
 PackageCompiler.create_sysimage(
     ["TargetedEstimation"], 
+    cpu_target="generic",
     sysimage_path="TMLESysimage.so", 
     precompile_execution_file="deps/execute.jl", 
 )

From eea3bc0afdcfdd1bdb029cbd730ba32516c1c300 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 27 Dec 2023 12:29:43 +0100
Subject: [PATCH 65/71] update cache management to drop composite factors as
 well

---
 src/cache_managers.jl  | 26 ++++++++++++++-----
 src/cli.jl             |  8 +++---
 test/cache_managers.jl | 59 +++++++++++++++++++++++++++++-------------
 3 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/src/cache_managers.jl b/src/cache_managers.jl
index 56acd3f..50d7fd3 100644
--- a/src/cache_managers.jl
+++ b/src/cache_managers.jl
@@ -10,12 +10,20 @@ function release!(cache_manager::ReleaseUnusableCacheManager, Ψ)
     # Always drop fluctuations
     haskey(cache_manager.cache, :last_fluctuation) && pop!(cache_manager.cache, :last_fluctuation)
 
+    # Drop Basic nuisance functions
     for η in TMLE.nuisance_functions_iterator(Ψ)
         cache_manager.η_counts[η] -= 1
         if cache_manager.η_counts[η] == 0
             delete!(cache_manager.cache, η)
         end
     end
+
+    # Drop aggregate nuisance function
+    for η in keys(cache_manager.cache)
+        if η isa TMLE.CMRelevantFactors
+            delete!(cache_manager.cache, η)
+        end
+    end
 end
 
 struct MaxSizeCacheManager <: CacheManager
@@ -25,14 +33,20 @@ struct MaxSizeCacheManager <: CacheManager
 end
 
 function release!(cache_manager::MaxSizeCacheManager, Ψ)
-    while length(cache_manager.cache) > cache_manager.max_size
-        # Prioritize the release of the last fluctuation
-        if haskey(cache_manager.cache, :last_fluctuation)
-            pop!(cache_manager.cache, :last_fluctuation)
-        else
-            pop!(cache_manager.cache)
+    # Prioritize the release of the last fluctuation
+    if haskey(cache_manager.cache, :last_fluctuation)
+        pop!(cache_manager.cache, :last_fluctuation)
+    end
+    # Drop aggregate nuisance function
+    for η in keys(cache_manager.cache)
+        if η isa TMLE.CMRelevantFactors
+            delete!(cache_manager.cache, η)
         end
     end
+    # Drop the rest randomly until the size is acceptable
+    while length(cache_manager.cache) > cache_manager.max_size
+        pop!(cache_manager.cache)
+    end
 end
 
 struct NoCacheManager <: CacheManager
diff --git a/src/cli.jl b/src/cli.jl
index 985b088..b91d0cf 100644
--- a/src/cli.jl
+++ b/src/cli.jl
@@ -1,7 +1,7 @@
 function cli_settings()
     s = ArgParseSettings(description="TMLE CLI.")
 
-    @add_arg_table s begin
+    @add_arg_table! s begin
         "tmle"
             action = :command
             help = "Run TMLE."
@@ -15,7 +15,7 @@ function cli_settings()
             help = "Merges TMLE outputs together."
     end
 
-    @add_arg_table s["tmle"] begin
+    @add_arg_table! s["tmle"] begin
         "dataset"
             arg_type = String
             required = true
@@ -68,7 +68,7 @@ function cli_settings()
             action = :store_true
     end
 
-    @add_arg_table s["svp"] begin
+    @add_arg_table! s["svp"] begin
         "input-prefix"
             arg_type = String
             help = "Input prefix to HDF5 files generated by the tmle CLI."
@@ -104,7 +104,7 @@ function cli_settings()
             default = "TMLE"
     end
 
-    @add_arg_table s["merge"] begin
+    @add_arg_table! s["merge"] begin
         "prefix"
             arg_type = String
             help = "Prefix to .hdf5 files to be used to create the summary file."
diff --git a/test/cache_managers.jl b/test/cache_managers.jl
index 294ccd2..1567cad 100644
--- a/test/cache_managers.jl
+++ b/test/cache_managers.jl
@@ -16,17 +16,26 @@ end
 
 @testset "Test MaxSizeCacheManager" begin
     cache_manager = TargetedEstimation.MaxSizeCacheManager(3)
-    cache_manager.cache["Toto"] = 1
-    cache_manager.cache["Tata"] = 2
-    TargetedEstimation.release!(cache_manager, nothing)
-    @test cache_manager.cache == Dict("Toto" => 1, "Tata" => 2)
-    cache_manager.cache["Titi"] = 3
-    cache_manager.cache["Tutu"] = 4
-    @test length(cache_manager.cache) == 4
-    TargetedEstimation.release!(cache_manager, nothing)
-    @test length(cache_manager.cache) == 3
+    Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W))
+    cache_manager.cache[Y_T₁T₂] = 1
+    T₁_W = TMLE.ConditionalDistribution(:T₁, (:W,))
+    cache_manager.cache[T₁_W] = 1
+    T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,))
+    cache_manager.cache[T₂_W] = 1
+    η = TMLE.CMRelevantFactors(
+        Y_T₁T₂,
+        (T₁_W, T₂_W)
+    )
+    cache_manager.cache[η] = 1
+    cache_manager.cache[:last_fluctuation] = 1
+    length(cache_manager.cache) == 5
     TargetedEstimation.release!(cache_manager, nothing)
-    @test length(cache_manager.cache) == 3
+    # CMRelevantFactors and fluctuation dropped
+    @test cache_manager.cache == Dict(
+        TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1,
+        TMLE.ConditionalDistribution(:T₂, (:W,))         => 1,
+        TMLE.ConditionalDistribution(:T₁, (:W,))         => 1
+    )
 end
 
 @testset "Test ReleaseUnusableCacheManager" begin
@@ -61,23 +70,37 @@ end
     cache_manager.cache[T₁_W] = 1
     T₂_W = TMLE.ConditionalDistribution(:T₂, (:W,))
     cache_manager.cache[T₂_W] = 1
+    η = TMLE.CMRelevantFactors(
+        Y_T₁T₂,
+        (T₁_W, T₂_W)
+    )
+    cache_manager.cache[η] = 1
     cache_manager.cache[:last_fluctuation] = 1
-    @test length(cache_manager.cache) == 4
-    # After estimation of the first estimand, only the fluctuation is released
+    @test length(cache_manager.cache) == 5
+    # After estimation of the first estimand, the fluctuation and composite factor are released
     TargetedEstimation.release!(cache_manager, estimands[1])
-    @test length(cache_manager.cache) == 3
+    @test cache_manager.cache == Dict(
+        TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1,
+        TMLE.ConditionalDistribution(:T₂, (:W,))         => 1,
+        TMLE.ConditionalDistribution(:T₁, (:W,))         => 1
+    )
 
-    # Estimation of the second estimand will not result in further nuisance functions
+    # Estimation of the second estimand will restore the composite factor
+    cache_manager.cache[η] = 1
+    cache_manager.cache[:last_fluctuation] = 1
     # Y_T₁T₂ and T₂_W are no longer needed
     TargetedEstimation.release!(cache_manager, estimands[2])
-    @test length(cache_manager.cache) == 1
-    @test !haskey(cache_manager.cache, T₂_W)
-    @test !haskey(cache_manager.cache, Y_T₁T₂)
-    @test haskey(cache_manager.cache, T₁_W)
+    @test cache_manager.cache == Dict(TMLE.ConditionalDistribution(:T₁, (:W,)) => 1)
 
     # Estimation of the third estimand will fill the cache with the following
     Y_T₁ = TMLE.ConditionalDistribution(:Y, (:T₁, :W))
     cache_manager.cache[Y_T₁] = 1
+    η = TMLE.CMRelevantFactors(
+        Y_T₁,
+        (T₁_W, )
+    )
+    cache_manager.cache[η] = 1
+    cache_manager.cache[:last_fluctuation] = 1
     # Y_T₁ and T₁_W are no longer needed
     TargetedEstimation.release!(cache_manager, estimands[3])
     @test cache_manager.cache == Dict()

From 1555173943bfe0131a47970815b7b928604ca053 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 28 Dec 2023 14:07:32 +0100
Subject: [PATCH 66/71] handle case when glmnetcv fails at first lambda

---
 Project.toml         | 2 +-
 src/models/glmnet.jl | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f5df5c6..82b56c7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "TargetedEstimation"
 uuid = "2573d147-4098-46ba-9db2-8608d210ccac"
 authors = ["Olivier Labayle"]
-version = "0.7.4"
+version = "0.8.0"
 
 [deps]
 ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
diff --git a/src/models/glmnet.jl b/src/models/glmnet.jl
index 4232ffd..189aca1 100644
--- a/src/models/glmnet.jl
+++ b/src/models/glmnet.jl
@@ -73,6 +73,10 @@ end
 function MLJBase.fit(model::GLMNetModel, verbosity::Int, X, y)
     folds = getfolds(model.resampling, X, y)
     res = glmnetcv(MLJBase.matrix(X), y; folds=folds, model.params...)
+    # This is currently not caught by the GLMNet package
+    if length(res.meanloss) == 0
+        throw(error("glmnetcv's mean loss is empty. Probably meaning convergence failed at the first lambda for some fold."))
+    end
     return make_fitresult(model, res, y), nothing, nothing
 end
 

From 48098c62f28f42ebf818c78971a54d125c66bb83 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 28 Dec 2023 14:08:29 +0100
Subject: [PATCH 67/71] up docker deps to 1.10

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 423f096..7a0885e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM julia:1.9.4-bullseye
+FROM julia:1.10-bullseye
 
 ARG DEBIAN_FRONTEND=noninteractive
 

From 1e25d448dfc7f9dccf851751cb27ecb4dc567ce0 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 28 Dec 2023 15:11:31 +0100
Subject: [PATCH 68/71] add clang compiler

---
 docker/Dockerfile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7a0885e..f711fc9 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,14 +2,16 @@ FROM julia:1.10-bullseye
 
 ARG DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && \
-    apt-get -y install gcc mono-mcs vim procps && \
-    rm -rf /var/lib/apt/lists/*
-
 ENV TZ=Europe/Amsterdam
 
 ENV JULIA_DEPOT_PATH=/opt
 
+RUN apt-get update && \
+    apt-get -y install gcc mono-mcs vim procps wget lsb-release software-properties-common gnupg && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
+
 # Import the project
 
 COPY . /TargetedEstimation.jl 

From f0da774f034b8de645da2dfe1bc8768dde49dc0f Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 28 Dec 2023 15:23:25 +0100
Subject: [PATCH 69/71] try solve windows problem

---
 test/runner.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runner.jl b/test/runner.jl
index 1985f35..01bd6da 100644
--- a/test/runner.jl
+++ b/test/runner.jl
@@ -148,6 +148,7 @@ end
             rm(outputs.hdf5.filename)
             rm(outputs.json.filename)
         end
+        GC.gc() # memory freed for deleting arrow file
         rm(datafile)
     end
 end

From 289bfa23ed55911784dbf9ce4c2e5328afe3a0f6 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Sat, 27 Jan 2024 12:14:47 +0000
Subject: [PATCH 70/71] up TMLE and get rid of windows

---
 .github/workflows/CI.yml | 1 -
 Project.toml             | 2 +-
 src/cli.jl               | 4 ++--
 src/runner.jl            | 8 ++++----
 src/utils.jl             | 8 ++++----
 test/utils.jl            | 9 ++++-----
 6 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index ebf50b7..f04e246 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -19,7 +19,6 @@ jobs:
         os:
           - ubuntu-latest
           - macOS-latest
-          - windows-latest
         arch:
           - 'x64'
     steps:
diff --git a/Project.toml b/Project.toml
index 82b56c7..e968a6d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -55,7 +55,7 @@ MLJModels = "0.16"
 MLJXGBoostInterface = "0.3.4"
 MultipleTesting = "0.6.0"
 Optim = "1.7"
-TMLE = "0.13.1"
+TMLE = "0.14.0"
 Tables = "1.10.1"
 YAML = "0.4.9"
 julia = "1.7, 1"
diff --git a/src/cli.jl b/src/cli.jl
index b91d0cf..51d84b1 100644
--- a/src/cli.jl
+++ b/src/cli.jl
@@ -23,8 +23,8 @@ function cli_settings()
 
         "--estimands"
             arg_type = String
-            help = "A string (`generateATEs`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)"
-            default = "generateATEs"
+            help = "A string (`factorialATE`) or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)"
+            default = "factorialATE"
 
         "--estimators"
             arg_type = String
diff --git a/src/runner.jl b/src/runner.jl
index 1079db7..e91c09d 100644
--- a/src/runner.jl
+++ b/src/runner.jl
@@ -8,7 +8,7 @@ mutable struct Runner
     verbosity::Int
     failed_nuisance::Set
     function Runner(dataset; 
-        estimands="generateATEs", 
+        estimands="factorialATE", 
         estimators="glmnet",
         verbosity=0, 
         outputs=Outputs(), 
@@ -120,7 +120,7 @@ end
 
 """
     tmle(dataset; 
-        estimands="generateATEs", 
+        estimands="factorialATE", 
         estimators="glmnet"; 
         verbosity=0, 
         outputs=Outputs(),
@@ -138,7 +138,7 @@ TMLE CLI.
 
 # Options
 
-- `--estimands`: A string ("generateATEs") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)
+- `--estimands`: A string ("factorialATE") or a serialized TMLE.Configuration (accepted formats: .json | .yaml | .jls)
 - `--estimators`: A julia file containing the estimators to use.
 - `-v, --verbosity`: Verbosity level.
 - `-o, --outputs`: Ouputs to be generated.
@@ -151,7 +151,7 @@ TMLE CLI.
 - `-s, --sort_estimands`: Sort estimands to minimize cache usage (A brute force approach will be used, resulting in exponentially long sorting time).
 """
 function tmle(dataset::String;
-    estimands::String="generateATEs", 
+    estimands::String="factorialATE", 
     estimators::String="glmnet",
     verbosity::Int=0, 
     outputs::Outputs=Outputs(),
diff --git a/src/utils.jl b/src/utils.jl
index c378749..8fa48ca 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -81,19 +81,19 @@ This explicitely requires that the following columns belong to the dataset:
 
 All ATE parameters are generated.
 """
-function TMLE.generateATEs(dataset)
+function TMLE.factorialATE(dataset)
     colnames = names(dataset)
     "T" ∈ colnames || throw(ArgumentError("No column 'T' found in the dataset for the treatment variable."))
     "Y" ∈ colnames || throw(ArgumentError("No column 'Y' found in the dataset for the outcome variable."))
     confounding_variables = Tuple(name for name in colnames if occursin(r"^W", name))
     length(confounding_variables) > 0 || throw(ArgumentError("Could not find any confounding variable (starting with 'W') in the dataset."))
     
-    return [generateATEs(dataset, (:T, ), :Y; confounders=confounding_variables)]
+    return [factorialATE(dataset, (:T, ), :Y; confounders=confounding_variables)]
 end
 
 function build_estimands_list(estimands_pattern, dataset)
-    estimands = if estimands_pattern == "generateATEs"
-        generateATEs(dataset)
+    estimands = if estimands_pattern == "factorialATE"
+        factorialATE(dataset)
     else
         proofread_estimands(estimands_pattern, dataset)
     end
diff --git a/test/utils.jl b/test/utils.jl
index 072db30..9279170 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -75,18 +75,17 @@ end
     rm(filename)
 end
 
-@testset "Test generateATEs" begin
+@testset "Test factorialATE" begin
     dataset = DataFrame(C=[1, 2, 3, 4],)
-    @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset)
+    @test_throws ArgumentError TargetedEstimation.build_estimands_list("factorialATE", dataset)
     dataset.T = [0, 1, missing, 2]
-    @test_throws ArgumentError TargetedEstimation.build_estimands_list("generateATEs", dataset)
+    @test_throws ArgumentError TargetedEstimation.build_estimands_list("factorialATE", dataset)
     dataset.Y = [0, 1, 2, 2]
     dataset.W1 = [1, 1, 1, 1]
     dataset.W_2 = [1, 1, 1, 1]
-    composedATE = TargetedEstimation.build_estimands_list("generateATEs", dataset)[1]
+    composedATE = TargetedEstimation.build_estimands_list("factorialATE", dataset)[1]
     @test composedATE.args == (
         TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()),
-        TMLE.StatisticalATE(:Y, (T = (case = 2, control = 0),), (T = (:W1, :W_2),), ()),
         TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ())
     )
 end

From 43a215c0b190df99030e8cd4cd9e9e0c44eacede Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Mon, 5 Feb 2024 09:55:17 +0100
Subject: [PATCH 71/71] remove build app for now and postpone to later

---
 .github/workflows/CI.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index f04e246..c651df5 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -34,12 +34,12 @@ jobs:
         with:
           files: lcov.info
       - uses: julia-actions/julia-processcoverage@v1
-      - name: Build App
-        run: julia --project --startup-file=no deps/build_app.jl
-      - uses: actions/upload-artifact@v4
-        with:
-          name: tmle-${{ matrix.os }}-${{ matrix.arch }}
-          path: tmle
+      # - name: Build App
+      #   run: julia --project --startup-file=no deps/build_app.jl
+      # - uses: actions/upload-artifact@v4
+      #   with:
+      #     name: tmle-${{ matrix.os }}-${{ matrix.arch }}
+      #     path: tmle
   docs:
     name: Documentation
     runs-on: ubuntu-latest