diff --git a/.github/workflows/pr-cmd-format.yml b/.github/workflows/pr-cmd-format.yml new file mode 100644 index 000000000..3ef15390e --- /dev/null +++ b/.github/workflows/pr-cmd-format.yml @@ -0,0 +1,53 @@ + +on: + issue_comment: + types: [created] + +name: PR auto-formatting command + +jobs: + format: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') }} + name: auto-format + runs-on: ubuntu-latest + steps: + - name: Clone the repository + uses: actions/checkout@v2 + - name: Checkout the pull request code + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh pr checkout ${{ github.event.issue.number }} + - name: Install JuliaFormatter and format + run: | + julia -e 'import Pkg; Pkg.add("JuliaFormatter")' + julia -e 'using JuliaFormatter; format(".")' + - name: Remove trailing whitespace + run: | + find -name '*.jl' -or -name '*.md' -or -name '*.toml' -or -name '*.yml' | while read filename ; do + # remove any trailing spaces + sed --in-place -e 's/\s*$//' "$filename" + # add a final newline if missing + if [[ -s "$filename" && $(tail -c 1 "$filename" |wc -l) -eq 0 ]] ; then + echo >> "$filename" + fi + # squash superfluous final newlines + sed -i -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "$filename" + done + - name: Commit and push the changes + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [ `git status -s | wc -l` -ne 0 ] ; then + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git commit -a -m "automatic formatting" -m "triggered by @$GITHUB_ACTOR on PR #${{ github.event.issue.number }}" + if git push + then gh pr comment ${{ github.event.issue.number }} --body \ + ":heavy_check_mark: auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) succeeded, commited as `git rev-parse HEAD`" + else gh pr comment ${{ github.event.issue.number }} --body \ + ":x: auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) failed, perhaps someone pushed to the PR in the meantime?" + fi + else + echo "No changes, all good!" + fi diff --git a/.github/workflows/pr-formatcheck.yml b/.github/workflows/pr-formatcheck.yml new file mode 100644 index 000000000..622c269ab --- /dev/null +++ b/.github/workflows/pr-formatcheck.yml @@ -0,0 +1,41 @@ + +on: + pull_request: + +name: Formatting check + +jobs: + formatcheck: + name: check-code-format + runs-on: ubuntu-latest + steps: + - name: Check out the code + uses: actions/checkout@v2 + - name: Install JuliaFormatter and format + run: | + julia -e 'import Pkg; Pkg.add("JuliaFormatter")' + julia -e 'using JuliaFormatter; format(".")' + - name: Remove trailing whitespace + run: | + find -name '*.jl' -or -name '*.md' -or -name '*.toml' -or -name '*.yml' | while read filename ; do + # remove any trailing spaces + sed --in-place -e 's/\s*$//' "$filename" + # add a final newline if missing + if [[ -s "$filename" && $(tail -c 1 "$filename" |wc -l) -eq 0 ]] ; then + echo >> "$filename" + fi + # squash superfluous final newlines + sed -i -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "$filename" + done + - name: Report any problems + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [ `git status -s | wc -l` -ne 0 ] + then gh pr comment ${{ github.event.number }} --body \ + ":red_square:  Commit ${{ github.event.pull_request.head.sha }} requires formatting!\ + "$'\n\n'"Required formatting changes summary:\ + "$'\n```\n'"`git diff --stat`"$'\n```' + else gh pr comment ${{ github.event.number }} --body \ + ":green_circle:  Commit ${{ github.event.pull_request.head.sha }} is formatted properly." + fi diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cddf54577..04e7eae05 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -189,44 +189,6 @@ mac:julia1.6: <<: *global_julia16 <<: *global_env_mac -# -# CODE FORMAT CHECKER -# - -format: - stage: test - script: - - | - docker run -v "$PWD":/project $CI_REGISTRY/r3/docker/julia-custom julia -e 'using JuliaFormatter; format("/project", verbose=true);' - - | - if [ `docker run -i -v "$PWD":/git alpine/git status -s | wc -l` -ne 0 ] ; then - GITHUB_COMMENT=":red_square:  Commit ${CI_COMMIT_SHORT_SHA} requires formatting!\ - "$'\n\n'"Required formatting changes summary:\ - "$'\n```\n'"`docker run -i -v \"$PWD\":/git alpine/git diff --stat`"$'\n```' - else - GITHUB_COMMENT=":green_circle:  Commit ${CI_COMMIT_SHORT_SHA} is formatted properly." - fi - - | - export GITHUB_TOKEN="${GITHUB_ACCESS_TOKEN_FORMATTER}" - export GITHUB_OWNER="lcsb-biocore" - export GITHUB_REPO="cobrexa.jl" - export GITHUB_COMMENT_TYPE=pr - export GITHUB_PR_ISSUE_NUMBER="${CI_EXTERNAL_PULL_REQUEST_IID}" - export GITHUB_COMMENT_FORMAT="" - export GITHUB_COMMENT - - | - docker run -i --rm \ - -e GITHUB_TOKEN \ - -e GITHUB_OWNER \ - -e GITHUB_REPO \ - -e GITHUB_COMMENT_TYPE \ - -e GITHUB_PR_ISSUE_NUMBER \ - -e GITHUB_COMMENT_FORMAT \ - -e GITHUB_COMMENT \ - cloudposse/github-commenter - <<: *global_dind - <<: *global_trigger_pull_request - # # ASSETS # diff --git a/Project.toml b/Project.toml index 957cbb127..042ec4e94 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "COBREXA" uuid = "babc4406-5200-4a30-9033-bf5ae714c842" authors = ["The developers of COBREXA.jl"] -version = "1.3.0" +version = "1.3.1" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" diff --git a/docs/src/functions/types.md b/docs/src/functions/types.md index ad20573e3..c952444c3 100644 --- a/docs/src/functions/types.md +++ b/docs/src/functions/types.md @@ -12,3 +12,9 @@ Pages = map(file -> joinpath("base", "types", "abstract", file), readdir("../src Modules = [COBREXA] Pages = map(file -> joinpath("base", "types", file), readdir("../src/base/types")) ``` + +## Model type wrappers +```@autodocs +Modules = [COBREXA] +Pages = map(file -> joinpath("base", "types", "wrappers", file), readdir("../src/base/types/wrappers")) +``` diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index bf79ef980..eedef6208 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -147,24 +147,15 @@ function make_gecko_model( push!(coupling_row_mass_group, _gecko_capacity(grp, idxs, mms, gmgb_(grp))) end - # create model with dummy objective - gm = GeckoModel( - spzeros(length(columns) + length(coupling_row_gene_product)), + GeckoModel( + [ + _gecko_reaction_column_reactions(columns, model)' * objective(model) + spzeros(length(coupling_row_gene_product)) + ], columns, coupling_row_reaction, collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))), coupling_row_mass_group, model, ) - - #= - Set objective. This is a separate field because gene products can also be objectives. - This way they can be set as objectives by the user. - =# - gm.objective .= [ - _gecko_reaction_column_reactions(gm)' * objective(gm.inner) - spzeros(length(coupling_row_gene_product)) - ] - - return gm end diff --git a/src/analysis/sampling/warmup_variability.jl b/src/analysis/sampling/warmup_variability.jl index cf66bb522..917662968 100644 --- a/src/analysis/sampling/warmup_variability.jl +++ b/src/analysis/sampling/warmup_variability.jl @@ -71,7 +71,7 @@ function warmup_from_variability( end ) - map(fetch, save_at.(workers, :cobrexa_sampling_warmup_optmodel, Ref(save_model))) + asyncmap(fetch, save_at.(workers, :cobrexa_sampling_warmup_optmodel, Ref(save_model))) fluxes = hcat( dpmap( @@ -92,7 +92,7 @@ function warmup_from_variability( ) # free the data on workers - map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel)) + asyncmap(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel)) return fluxes, lbs, ubs end diff --git a/src/analysis/screening.jl b/src/analysis/screening.jl index 55ba83020..1876fcef6 100644 --- a/src/analysis/screening.jl +++ b/src/analysis/screening.jl @@ -129,9 +129,9 @@ function _screen_impl( workers = [myid()], )::Array where {V<:AbstractVector,A,N} - map(fetch, save_at.(workers, :cobrexa_screen_variants_model, Ref(model))) - map(fetch, save_at.(workers, :cobrexa_screen_variants_analysis_fn, Ref(analysis))) - map(fetch, get_from.(workers, Ref(:(precache!(cobrexa_screen_variants_model))))) + asyncmap(fetch, save_at.(workers, :cobrexa_screen_variants_model, Ref(model))) + asyncmap(fetch, save_at.(workers, :cobrexa_screen_variants_analysis_fn, Ref(analysis))) + asyncmap(fetch, get_from.(workers, Ref(:(precache!(cobrexa_screen_variants_model))))) res = pmap( (vars, args)::Tuple -> screen_variant( @@ -144,8 +144,8 @@ function _screen_impl( zip(variants, args), ) - map(fetch, remove_from.(workers, :cobrexa_screen_variants_model)) - map(fetch, remove_from.(workers, :cobrexa_screen_variants_analysis_fn)) + asyncmap(fetch, remove_from.(workers, :cobrexa_screen_variants_model)) + asyncmap(fetch, remove_from.(workers, :cobrexa_screen_variants_analysis_fn)) return res end @@ -276,7 +276,7 @@ function _screen_optmodel_modifications_impl( workers = [myid()], )::Array where {V<:AbstractVector,VF<:AbstractVector,A,N} - map( + asyncmap( fetch, save_at.( workers, @@ -290,12 +290,15 @@ function _screen_optmodel_modifications_impl( ), ), ) - map(fetch, save_at.(workers, :cobrexa_screen_optmodel_modifications_fn, Ref(analysis))) + asyncmap( + fetch, + save_at.(workers, :cobrexa_screen_optmodel_modifications_fn, Ref(analysis)), + ) res = pmap(_screen_optmodel_item, CachingPool(workers), zip(modifications, args)) - map(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_data)) - map(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_fn)) + asyncmap(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_data)) + asyncmap(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_fn)) return res end diff --git a/src/base/solver.jl b/src/base/solver.jl index 058ab5b40..02735a2bc 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S xl, xu = bounds(model) optimization_model = Model(optimizer) - @variable(optimization_model, x[i = 1:n]) + @variable(optimization_model, x[1:n]) @objective(optimization_model, sense, objective(model)' * x) @constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance @constraint(optimization_model, lbs, xl .<= x) # lower bounds @constraint(optimization_model, ubs, x .<= xu) # upper bounds C = coupling(model) # empty if no coupling - cl, cu = coupling_bounds(model) - isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds - isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds + isempty(C) || begin + cl, cu = coupling_bounds(model) + @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds + @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds + end return optimization_model end diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index 1ba000648..ee764cffe 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -12,7 +12,14 @@ _missing_impl_error(m, a) = throw(MethodError(m, a)) """ reactions(a::MetabolicModel)::Vector{String} -Return a vector of reaction identifiers in a model. +Return a vector of reaction identifiers in a model. The vector precisely +corresponds to the columns in [`stoichiometry`](@ref) matrix. + +For technical reasons, the "reactions" may sometimes not be true reactions but +various virtual and helper pseudo-reactions that are used in the metabolic +modeling, such as metabolite exchanges, separate forward and reverse reactions, +supplies of enzymatic and genetic material and virtual cell volume, etc. To +simplify the view of the model contents use [`reaction_flux`](@ref). """ function reactions(a::MetabolicModel)::Vector{String} _missing_impl_error(reactions, (a,)) @@ -21,7 +28,11 @@ end """ metabolites(a::MetabolicModel)::Vector{String} -Return a vector of metabolite identifiers in a model. +Return a vector of metabolite identifiers in a model. The vector precisely +corresponds to the rows in [`stoichiometry`](@ref) matrix. + +As with [`reactions`](@ref)s, some metabolites in models may be virtual, +representing purely technical equality constraints. """ function metabolites(a::MetabolicModel)::Vector{String} _missing_impl_error(metabolites, (a,)) diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 42cc47aa8..4b12951d2 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -16,7 +16,7 @@ end """ struct _gecko_capacity -A helper struct that contains the gene product capacity terms organized by +A helper struct that contains the gene product capacity terms organized by the grouping type, e.g. metabolic or membrane groups etc. """ struct _gecko_capacity @@ -52,19 +52,18 @@ The model wraps another "internal" model, and adds following modifications: proteins). The structure contains fields `columns` that describe the contents of the -coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and -`coupling_row_mass_group` that describe correspondence of the coupling rows to -original model and determine the coupling bounds, and `inner`, which is the -original wrapped model. Note, `objective` is the objective vector of the model. -Special care needs to be taken to ensure that its length is the sum of -`n_reactions(model)` and `n_genes(model)` when the user modifies it, where -`model` is the GeckoModel in question. +stoichiometry matrix columns, `coupling_row_reaction`, +`coupling_row_gene_product` and `coupling_row_mass_group` that describe +correspondence of the coupling rows to original model and determine the +coupling bounds (note: the coupling for gene product is actually added to +stoichiometry, not in [`coupling`](@ref)), and `inner`, which is the original +wrapped model. The `objective` of the model includes also the extra columns for +individual genes, as held by `coupling_row_gene_product`. Implementation exposes the split reactions (available as `reactions(model)`), -but retains the original "simple" reactions accessible by [`fluxes`](@ref). All -constraints are implemented using [`coupling`](@ref) and -[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are -purely virtual and do not occur in [`metabolites`](@ref). +but retains the original "simple" reactions accessible by [`fluxes`](@ref). +The related constraints are implemented using [`coupling`](@ref) and +[`coupling_bounds`](@ref). """ struct GeckoModel <: ModelWrapper objective::SparseVec @@ -98,10 +97,9 @@ end objective(model::GeckoModel) Return the objective of the [`GeckoModel`](@ref). Note, the objective is with -respect to the internal variables, i.e. [`reactions(model)`](@ref) and -[`genes(model)`](@ref). To manually set the objective, index into -`model.objective` appropriately, and remember to set the previous coefficients -to zero. +respect to the internal variables, i.e. [`reactions(model)`](@ref), which are +the unidirectional reactions and the genes involved in enzymatic reactions that +have kinetic data. """ objective(model::GeckoModel) = model.objective @@ -112,16 +110,17 @@ Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split to forward- and reverse-only parts with different isozyme indexes; reactions IDs are mangled accordingly with suffixes). """ -reactions(model::GeckoModel) = - let inner_reactions = reactions(model.inner) - [ - _gecko_reaction_name( - inner_reactions[col.reaction_idx], - col.direction, - col.isozyme_idx, - ) for col in model.columns - ] - end +function reactions(model::GeckoModel) + inner_reactions = reactions(model.inner) + mangled_reactions = [ + _gecko_reaction_name( + inner_reactions[col.reaction_idx], + col.direction, + col.isozyme_idx, + ) for col in model.columns + ] + [mangled_reactions; genes(model)] +end """ n_reactions(model::GeckoModel) @@ -129,7 +128,7 @@ reactions(model::GeckoModel) = Returns the number of all irreversible reactions in `model` as well as the number of gene products that take part in enzymatic reactions. """ -n_reactions(model::GeckoModel) = length(model.columns) +n_reactions(model::GeckoModel) = length(model.columns) + n_genes(model) """ bounds(model::GeckoModel) @@ -217,7 +216,7 @@ end """ balance(model::GeckoModel) -Return the balance of the reactions in the inner model, concatenated with a vector of +Return the balance of the reactions in the inner model, concatenated with a vector of zeros representing the enzyme balance of a [`GeckoModel`](@ref). """ balance(model::GeckoModel) = @@ -233,7 +232,7 @@ n_genes(model::GeckoModel) = length(model.coupling_row_gene_product) """ genes(model::GeckoModel) -Return the gene ids of genes that have enzymatic constraints associated with them. +Return the gene ids of genes that have enzymatic constraints associated with them. """ genes(model::GeckoModel) = genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] @@ -243,7 +242,7 @@ genes(model::GeckoModel) = Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref). """ -metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#supply"] +metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#gecko"] """ n_metabolites(model::GeckoModel) diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl index 6ee4091b0..152437fb1 100644 --- a/src/base/utils/enzymes.jl +++ b/src/base/utils/enzymes.jl @@ -8,7 +8,7 @@ function that returns a dictionary of solved fluxes. """ gene_product_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? - Dict(genes(model) .=> value.(opt_model[:x])[(n_reactions(model)+1):end]) : nothing + Dict(genes(model) .=> value.(opt_model[:x])[(length(model.columns)+1):end]) : nothing """ gene_product_dict(model::GeckoModel) @@ -26,7 +26,7 @@ gene_product_mass_group_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict( grp.group_id => dot( - value.(opt_model[:x])[n_reactions(model).+grp.gene_product_idxs], + value.(opt_model[:x])[length(model.columns).+grp.gene_product_idxs], grp.gene_product_molar_masses, ) for grp in model.coupling_row_mass_group ) : nothing @@ -34,9 +34,10 @@ gene_product_mass_group_dict(model::GeckoModel, opt_model) = """ gene_product_mass_group_dict(model::GeckoModel) -A pipe-able variant of [`mass_group_dict`](@ref). +A pipe-able variant of [`gene_product_mass_group_dict`](@ref). """ -gene_product_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) +gene_product_mass_group_dict(model::GeckoModel) = + x -> gene_product_mass_group_dict(model, x) """ gene_product_mass(model::SMomentModel) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index 86500314b..e25ec2863 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -15,12 +15,20 @@ _gecko_reaction_name(original_name::String, direction::Int, isozyme_idx::Int) = Retrieve a utility mapping between reactions and split reactions; rows correspond to "original" reactions, columns correspond to "split" reactions. """ -_gecko_reaction_column_reactions(model::GeckoModel) = sparse( - [col.reaction_idx for col in model.columns], - 1:length(model.columns), - [col.direction >= 0 ? 1 : -1 for col in model.columns], - n_reactions(model.inner), - length(model.columns), +_gecko_reaction_column_reactions(model::GeckoModel) = + _gecko_reaction_column_reactions(model.columns, model.inner) + +""" + _gecko_reaction_column_reactions(columns, inner) + +Helper method that doesn't require the whole [`GeckoModel`](@ref). +""" +_gecko_reaction_column_reactions(columns, inner) = sparse( + [col.reaction_idx for col in columns], + 1:length(columns), + [col.direction >= 0 ? 1 : -1 for col in columns], + n_reactions(inner), + length(columns), ) """ diff --git a/src/reconstruction/gapfill_minimum_reactions.jl b/src/reconstruction/gapfill_minimum_reactions.jl new file mode 100644 index 000000000..f6bced117 --- /dev/null +++ b/src/reconstruction/gapfill_minimum_reactions.jl @@ -0,0 +1,189 @@ +""" + function gapfill_minimum_reactions( + model::MetabolicModel, + universal_reactions::Vector{Reaction}, + optimizer; + objective_bounds = (_constants.tolerance, _constants.default_reaction_bound), + maximum_new_reactions = 5, + weights = fill(1.0, length(universal_reactions)), + modifications = [], + ) + +Find a minimal set of reactions from `universal_reactions` that should be added +to `model` so that the model has a feasible solution with bounds on its +objective function given in `objective_bounds`. Weights of the added reactions +may be specified in `weights` to prefer adding reactions with lower weights. + +Internally, this builds and solves a mixed integer program, following +the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to +refining genome annotation." *Proceedings of the National Academy of Sciences* +(2006)). + +The function returns a solved JuMP optimization model, with the boolean +reaction inclusion indicators in variable vector `y`. Use +[`gapfilled_mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction +information in Julia datatypes. + +To reduce the uncertainty in the MILP solver (and likely reduce the +complexity), you may put a limit on the size of the added reaction set in +`maximum_new_reactions`. +""" +function gapfill_minimum_reactions( + model::MetabolicModel, + universal_reactions::Vector{Reaction}, + optimizer; + objective_bounds = (_constants.tolerance, _constants.default_reaction_bound), + maximum_new_reactions = length(universal_reactions), + weights = fill(1.0, length(universal_reactions)), + modifications = [], +) + precache!(model) + + # constraints from universal reactions that can fill gaps + univs = _universal_stoichiometry(universal_reactions, metabolites(model)) + + # add space for additional metabolites and glue with the universal reaction + # stoichiometry + extended_stoichiometry = [[ + stoichiometry(model) + spzeros(length(univs.new_mids), n_reactions(model)) + ] univs.stoichiometry] + + # make the model anew (we can't really use make_optimization_model because + # we need the balances and several other things completely removed. Could + # be solved either by parametrizing make_optimization_model or by making a + # tiny temporary wrapper for this. + # keep this in sync with src/base/solver.jl, except for adding balances. + opt_model = Model(optimizer) + @variable(opt_model, x[1:n_reactions(model)]) + xl, xu = bounds(model) + @constraint(opt_model, lbs, xl .<= x) + @constraint(opt_model, ubs, x .<= xu) + + C = coupling(model) + isempty(C) || begin + cl, cu = coupling_bounds(model) + @constraint(opt_model, c_lbs, cl .<= C * x) + @constraint(opt_model, c_ubs, C * x .<= cu) + end + + # add the variables for new stuff + @variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions + @variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators + + # combined metabolite balances + @constraint( + opt_model, + extended_stoichiometry * [x; ux] .== + [balance(model); zeros(length(univs.new_mids))] + ) + + # objective bounds + @constraint(opt_model, objective_bounds[1] <= objective(model)' * x) + @constraint(opt_model, objective_bounds[2] >= objective(model)' * x) + + # flux bounds of universal reactions with indicators + @constraint(opt_model, ulb, univs.lbs .* y .<= ux) + @constraint(opt_model, uub, univs.ubs .* y .>= ux) + + # minimize the total number of indicated reactions + @objective(opt_model, Min, weights' * y) + + # limit the number of indicated reactions + # (prevents the solver from exploring too far) + @constraint(opt_model, sum(y) <= maximum_new_reactions) + + # apply all modifications + for mod in modifications + mod(opt_model, model) + end + + optimize!(opt_model) + + return opt_model +end + +""" + gapfilled_mask(opt_model::BitVector) + +Get a `BitVector` of added reactions from the model solved by +[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes +of `universal_reactions` given to the gapfilling function. In case the model is +not solved, this returns `nothing`. + +# Example + + gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask +""" +gapfilled_mask(opt_model)::BitVector = + is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing + +""" + gapfilled_rids(opt_model, universal_reactions::Vector{Reaction})::Vector{String} + +Utility to extract a short vector of IDs of the reactions added by the +gapfilling algorithm. Use with `opt_model` returned from +[`gapfill_minimum_reactions`](@ref). +""" +gapfilled_rids(opt_model, universal_reactions::Vector{Reaction}) = + let v = gapfilled_mask(opt_model) + isnothing(v) ? nothing : [rxn.id for rxn in universal_reactions[v]] + end + +""" + gapfilled_rids(universal_reactions::Vector{Reaction}) + +Overload of [`gapfilled_rids`](@ref) that can be piped easily. + +# Example + + gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_rids(myReactions) +""" +gapfilled_rids(universal_reactions::Vector{Reaction}) = + opt_model -> gapfilled_rids(opt_model, universal_reactions) + +""" + _universal_stoichiometry( + universal_reactions::Vector{Reaction}, + mids, + ) + +A helper function that constructs the stoichiometric matrix of a set of +`universal_reactions`. The order of the metabolites is determined with +`mids`, so that this stoichiometric matrix can be combined with +another one. +""" +function _universal_stoichiometry(urxns::Vector{Reaction}, mids::Vector{String}) + + # traversal over all elements in stoichiometry of universal_reactions + stoiMap(f) = [ + f(ridx, mid, stoi) for (ridx, rxn) in enumerate(urxns) for + (mid, stoi) in rxn.metabolites + ] + + # make an index and find new metabolites + met_id_lookup = Dict(mids .=> eachindex(mids)) + + new_mids = + collect(Set(filter(x -> !haskey(met_id_lookup, x), stoiMap((_, mid, _) -> mid)))) + all_mids = vcat(mids, new_mids) + + # remake the index with all metabolites + met_id_lookup = Dict(all_mids .=> eachindex(all_mids)) + + # build the result + return ( + stoichiometry = float.( + sparse( + stoiMap((_, mid, _) -> met_id_lookup[mid]), + stoiMap((ridx, _, _) -> ridx), + stoiMap((_, _, stoi) -> stoi), + length(all_mids), + length(urxns), + ), + ), + lbs = [rxn.lb for rxn in urxns], + ubs = [rxn.ub for rxn in urxns], + new_mids = new_mids, + ) +end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 58318405f..eab3a7083 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -50,11 +50,25 @@ @test isapprox(prot_mass, total_gene_product_mass, atol = TEST_TOLERANCE) @test isapprox(prot_mass, mass_groups["uncategorized"], atol = TEST_TOLERANCE) + + # test enzyme objective + growth_lb = rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"] * 0.9 + opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer; + modifications = [ + change_objective(genes(gm); weights = [], sense = COBREXA.MIN_SENSE), + change_constraint("BIOMASS_Ecoli_core_w_GAM", lb = growth_lb), + change_optimizer_attribute("IPM_IterationsLimit", 1000), + ], + ) + mass_groups_min = gene_product_mass_group_dict(gm, opt_model) + @test mass_groups_min["uncategorized"] < mass_groups["uncategorized"] end @testset "GECKO small model" begin #= - Implement the small model found in the supplment of the + Implement the small model found in the supplment of the original GECKO paper. This model is nice to troubleshoot with, because the stoich matrix is small. =# @@ -73,8 +87,10 @@ end "r6", m4 → nothing, 0, 100 end - gs = [Gene("g$i") for i = 1:4] + gs = [Gene("g$i") for i = 1:5] + m.reactions["r2"].grr = [["g5"]] + m.reactions["r3"].grr = [["g1"]] m.reactions["r4"].grr = [["g1"], ["g2"]] m.reactions["r5"].grr = [["g3", "g4"]] m.reactions["r6"].objective_coefficient = 1.0 @@ -120,4 +136,6 @@ end @test isapprox(rxn_fluxes["r6"], 3.181818181753438, atol = TEST_TOLERANCE) @test isapprox(gene_products["g4"], 0.09090909090607537, atol = TEST_TOLERANCE) @test isapprox(mass_groups["uncategorized"], 0.5, atol = TEST_TOLERANCE) + @test length(genes(gm)) == 4 + @test length(genes(gm.inner)) == 5 end diff --git a/test/reconstruction/gapfill_minimum_reactions.jl b/test/reconstruction/gapfill_minimum_reactions.jl new file mode 100644 index 000000000..884b31063 --- /dev/null +++ b/test/reconstruction/gapfill_minimum_reactions.jl @@ -0,0 +1,47 @@ +@testset "Gap fill with minimum reactions" begin + #= + Implement the small model that should be gapfilled. + =# + model = StandardModel("partial model") + + (m1, m2, m3, m4, m5, m6, m7, m8) = Metabolite.("m$i" for i = 1:8) + + @add_reactions! model begin + "r1", nothing → m1, 0, 1 + "r2", m1 ↔ m2, -10, 100 + "r3", m1 → m3, 0, 100 + "r4", m2 ↔ m4, 0, 100 + # "r5", m3 → m4, 0, 100 + "r6", m4 → nothing, 0, 100 + # "r7", m2 → m7 + m6, 0, 100 + "r8", m7 → m8, 0, 100 + "r9", m8 → nothing, 0, 100 + # "r10", m6 → nothing, 0, 100 + "r11", m2 + m3 + m7 → nothing, 0, 100 + "r12", m3 → m5, -10, 10 + end + + model.reactions["r11"].objective_coefficient = 1.0 + + add_metabolites!(model, [m1, m2, m3, m4, m5, m7, m8]) + + r5 = Reaction("r5", Dict("m3" => -1, "m4" => 1), :forward) + r7 = Reaction("r7", Dict("m2" => -1, "m7" => 1, "m6" => 1), :forward) + r10 = Reaction("r10", Dict("m6" => -1), :forward) + rA = Reaction("rA", Dict("m1" => -1, "m2" => 1, "m3" => 1), :forward) + rB = Reaction("rB", Dict("m2" => -1, "m9" => 1), :forward) + rC = Reaction("rC", Dict("m9" => -1, "m10" => 1), :bidirectional) + rD = Reaction("rC", Dict("m10" => -1), :reverse) + + universal_reactions = [r5, r7, r10, rA, rB, rC, rD] + + rxns = + gapfill_minimum_reactions( + model, + universal_reactions, + GLPK.Optimizer; + objective_bounds = (0.1, 1000.0), + ) |> gapfilled_rids(universal_reactions) + + @test issetequal(["r7", "r10"], rxns) +end