From d1eb3e4a70bdd0111d2f99405498c2dcf3eb4a8c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 18 Feb 2022 13:51:34 +0100 Subject: [PATCH 001/109] Revert "Revert "display banner consistently at each import"" This reverts commit 10d72b4a6743d8e804dc6b2777c8759231901d26, which reverted the reverted commit 71cc7c8f7a9d823abf9790c3100103b5d55285bc. --- src/COBREXA.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/COBREXA.jl b/src/COBREXA.jl index 2f26a9e43..996c89fde 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -18,9 +18,11 @@ import Base: findfirst, getindex, show import Pkg import SBML # conflict with Reaction struct name - include("banner.jl") -_print_banner() + +function __init__() + _print_banner() +end # autoloading const _inc(path...) = include(joinpath(path...)) From b0913113381e44e42bf1e6f87805bbcb2ba01574 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 18 Feb 2022 13:54:40 +0100 Subject: [PATCH 002/109] add sensible conditions to banner display --- src/COBREXA.jl | 4 ---- src/banner.jl | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/COBREXA.jl b/src/COBREXA.jl index 996c89fde..c27cc8f64 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -20,10 +20,6 @@ import SBML # conflict with Reaction struct name include("banner.jl") -function __init__() - _print_banner() -end - # autoloading const _inc(path...) = include(joinpath(path...)) const _inc_all(dir) = _inc.(joinpath.(dir, filter(fn -> endswith(fn, ".jl"), readdir(dir)))) diff --git a/src/banner.jl b/src/banner.jl index 68c32620f..6ffeafe21 100644 --- a/src/banner.jl +++ b/src/banner.jl @@ -4,6 +4,12 @@ include_dependency(joinpath(_PKG_ROOT_DIR, "Project.toml")) const COBREXA_VERSION = VersionNumber(Pkg.TOML.parsefile(joinpath(_PKG_ROOT_DIR, "Project.toml"))["version"]) +function __init__() + if myid() == 1 && Base.JLOptions().banner != 0 + _print_banner() + end +end + function _print_banner() c = Base.text_colors n = c[:normal] From 86ac6db9e02cd610c10470aa73582a7e70c306fb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 26 Feb 2022 00:18:34 +0000 Subject: [PATCH 003/109] CompatHelper: bump compat for "JuMP" to "0.23" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e5aac5090..5e5613bc1 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] DistributedData = "0.1.4" JSON = "0.21" -JuMP = "0.21.0, 0.22.0" +JuMP = "0.21.0, 0.22.0, 0.23" MAT = "0.10" MacroTools = "0.5.6" OSQP = "0.6" From 45fe43687ce43f397d3180cafcbd33cc32ee3cf1 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 8 Mar 2022 15:34:05 +0100 Subject: [PATCH 004/109] prepare a macro for inheriting the accessors --- src/base/macros/model_wrapper.jl | 30 +++++++++++ src/base/macros/serialized.jl | 4 +- src/base/types/CoreModelCoupled.jl | 82 ++---------------------------- 3 files changed, 35 insertions(+), 81 deletions(-) create mode 100644 src/base/macros/model_wrapper.jl diff --git a/src/base/macros/model_wrapper.jl b/src/base/macros/model_wrapper.jl new file mode 100644 index 000000000..d7086cfa6 --- /dev/null +++ b/src/base/macros/model_wrapper.jl @@ -0,0 +1,30 @@ +""" + @_inherit_model_methods + +Generates trivial accessor functions listed in `fns` for a model that is +wrapped in type `mtype` as field `member`. +""" +macro _inherit_model_methods(mtype::Symbol, arglist, member::Symbol, fwdlist, fns...) + Expr( + :block, + ( + begin + header = Expr(:call, fn, :(model::$mtype), arglist.args...) + call = Expr(:call, fn, :(model.$member), fwdlist.args...) + esc( + Expr( + :macrocall, + Symbol("@doc"), + __source__, + """ + $header + + Evaluates [`$fn`](@ref) on the model contained in $mtype. + """, + Expr(:(=), header, Expr(:block, __source__, call)), + ), + ) + end for fn in fns + )..., + ) +end diff --git a/src/base/macros/serialized.jl b/src/base/macros/serialized.jl index 65ca54159..0883e5a91 100644 --- a/src/base/macros/serialized.jl +++ b/src/base/macros/serialized.jl @@ -2,8 +2,8 @@ @_serialized_change_unwrap function Creates a simple wrapper structure that calls the `function` transparently on -the internal precached model. Internal type is returned (because this would -break the consistency of serialization). +the internal precached model. The internal type is returned (otherwise this +would break the consistency of serialization). """ macro _serialized_change_unwrap(fn::Symbol) docstring = """ diff --git a/src/base/types/CoreModelCoupled.jl b/src/base/types/CoreModelCoupled.jl index 69b4d0222..de14bc4ea 100644 --- a/src/base/types/CoreModelCoupled.jl +++ b/src/base/types/CoreModelCoupled.jl @@ -23,53 +23,7 @@ mutable struct CoreModelCoupled <: MetabolicModel end end -""" - reactions(a::CoreModelCoupled) - -Extract reactions from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -reactions(a::CoreModelCoupled) = reactions(a.lm) - -""" - metabolites(a::CoreModelCoupled) - -Extract metabolites from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -metabolites(a::CoreModelCoupled) = metabolites(a.lm) - -""" - stoichiometry(a::CoreModelCoupled) - -Extract stoichiometry from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -stoichiometry(a::CoreModelCoupled) = stoichiometry(a.lm) - -""" - bounds(a::CoreModelCoupled) - -Extract bounds from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -bounds(a::CoreModelCoupled) = bounds(a.lm) - -""" - balance(a::CoreModelCoupled) - -Extract balance from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -balance(a::CoreModelCoupled) = balance(a.lm) - -""" - objective(a::CoreModelCoupled) - -Extract objective from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -objective(a::CoreModelCoupled) = objective(a.lm) +@_inherit_model_methods CoreModelCoupled () lm () reactions metabolites stoichiometry bounds balance objective """ coupling(a::CoreModelCoupled)::SparseMat @@ -92,21 +46,8 @@ Coupling bounds for a `CoreModelCoupled`. """ coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = (a.cl, a.cu) -""" - reaction_stoichiometry(model::CoreModelCoupled, rid::String)::Dict{String, Float64} - -Return the stoichiometry of reaction with ID `rid`. -""" -reaction_stoichiometry(m::CoreModelCoupled, rid::String) = reaction_stoichiometry(m.lm, rid) - -""" - reaction_stoichiometry(model::CoreModelCoupled, ridx)::Dict{String, Float64} - -Return the stoichiometry of reaction at index `ridx`. -""" -function reaction_stoichiometry(m::CoreModelCoupled, ridx)::Dict{String,Float64} - reaction_stoichiometry(m.lm, ridx) -end +@_inherit_model_methods CoreModelCoupled (rid::String,) lm (rid,) reaction_stoichiometry reaction_gene_association +@_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry """ reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} @@ -117,23 +58,6 @@ same order as `reactions(model)`. reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} = reaction_gene_association_vec(model.lm) -""" - reaction_gene_association(model::CoreModelCoupled, ridx::Int)::Maybe{GeneAssociation} - -Retrieve the [`GeneAssociation`](@ref) from [`CoreModelCoupled`](@ref) by reaction -index. -""" -reaction_gene_association(model::CoreModelCoupled, ridx::Int)::Maybe{GeneAssociation} = - reaction_gene_association(model.lm, ridx) - -""" - reaction_gene_association(model::CoreModelCoupled, rid::String)::Maybe{GeneAssociation} - -Retrieve the [`GeneAssociation`](@ref) from [`CoreModelCoupled`](@ref) by reaction ID. -""" -reaction_gene_association(model::CoreModelCoupled, rid::String)::Maybe{GeneAssociation} = - reaction_gene_association(model.lm, rid) - """ Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) From e9e7dd123f98da3e77cab59150a6fa44ae3a7102 Mon Sep 17 00:00:00 2001 From: htpusa Date: Fri, 11 Mar 2022 18:27:14 +0200 Subject: [PATCH 005/109] quickstart example should work now, word missing in documentation --- README.md | 12 ++++++------ src/analysis/screening.jl | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e4bbf9e38..6f2945414 100644 --- a/README.md +++ b/README.md @@ -128,20 +128,20 @@ some reactions were disabled independently: ```julia # convert to a model type that is efficient to modify -m = convert(StandardModel, m) +m = convert(StandardModel, model) # find the model objective value if oxygen or carbon dioxide transports are disabled screen(m, # the base model variants=[ # this specifies how to generate the desired model variants [], # one with no modifications, i.e. the base case - [with_changed_bound("O2t", lower=0.0, upper=0.0)], # disable oxygen - [with_changed_bound("CO2t", lower=0.0, upper=0.0)], # disable CO2 - [with_changed_bound("O2t", lower=0.0, upper=0.0), - with_changed_bound("CO2t", lower=0.0, upper=0.0)], # disable both + [with_changed_bound("R_O2t", lower=0.0, upper=0.0)], # disable oxygen + [with_changed_bound("R_CO2t", lower=0.0, upper=0.0)], # disable CO2 + [with_changed_bound("R_O2t", lower=0.0, upper=0.0), + with_changed_bound("R_CO2t", lower=0.0, upper=0.0)], # disable both ], # this specifies what to do with the model variants (received as the argument `x`) analysis = x -> - flux_balance_analysis_dict(x, Tulip.Optimizer)["BIOMASS_Ecoli_core_w_GAM"], + flux_balance_analysis_dict(x, Tulip.Optimizer)["R_BIOMASS_Ecoli_core_w_GAM"], ) ``` You should receive a result showing that missing oxygen transport makes the diff --git a/src/analysis/screening.jl b/src/analysis/screening.jl index acfd1daa8..55ba83020 100644 --- a/src/analysis/screening.jl +++ b/src/analysis/screening.jl @@ -75,7 +75,7 @@ from pure Julia structures, because they may be transferred over the network between the computation nodes. For that reason, functions that return whole JuMP models that contain pointers to allocated C structures (such as [`flux_balance_analysis`](@ref) used with `GLPK` or `Gurobi` otimizers) will -generally not in this context. +generally not work in this context. Note: this function is a thin argument-handling wrapper around [`_screen_impl`](@ref). From d03f4e58c4f75d44f85f5b1a4cdf3292b0a4d60c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 17 Mar 2022 14:22:15 +0100 Subject: [PATCH 006/109] fix argument forwarding in find_biomass_reaction_ids --- src/base/utils/looks_like.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/utils/looks_like.jl b/src/base/utils/looks_like.jl index b3f05577b..524a38daa 100644 --- a/src/base/utils/looks_like.jl +++ b/src/base/utils/looks_like.jl @@ -96,7 +96,7 @@ Shortcut for finding biomass reaction identifiers in a model; arguments are forwarded to [`looks_like_biomass_reaction`](@ref). """ find_biomass_reaction_ids(m::MetabolicModel; kwargs...) = - filter(id -> looks_like_biomass_reaction(id, kwargs...), reactions(m)) + filter(id -> looks_like_biomass_reaction(id; kwargs...), reactions(m)) """ looks_like_extracellular_metabolite(rxn_id::String; From 5ee72be799a4f7161d97b1cef62b519147f108b3 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 17 Mar 2022 14:30:15 +0100 Subject: [PATCH 007/109] realign one comment --- src/base/utils/looks_like.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/utils/looks_like.jl b/src/base/utils/looks_like.jl index 524a38daa..1b29f23f8 100644 --- a/src/base/utils/looks_like.jl +++ b/src/base/utils/looks_like.jl @@ -101,7 +101,7 @@ find_biomass_reaction_ids(m::MetabolicModel; kwargs...) = """ looks_like_extracellular_metabolite(rxn_id::String; extracellular_suffixes = _constants.extracellular_suffixes, - )::Bool + )::Bool A predicate that matches metabolite identifiers that look like they are extracellular metabolites. Extracellular metabolites are identified by `extracellular_suffixes` at the end of the From 9b73f24267f4530c7c905adb4e2740b9851cd737 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 23 Mar 2022 00:22:46 +0000 Subject: [PATCH 008/109] CompatHelper: bump compat for "DistributedData" to "0.2" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5e5613bc1..57c6b667b 100644 --- a/Project.toml +++ b/Project.toml @@ -21,7 +21,7 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] -DistributedData = "0.1.4" +DistributedData = "0.1.4, 0.2" JSON = "0.21" JuMP = "0.21.0, 0.22.0, 0.23" MAT = "0.10" From 71e506581de2c1169a8cf93102dc845c3ce37094 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 25 Mar 2022 00:19:55 +0000 Subject: [PATCH 009/109] CompatHelper: bump compat for "JuMP" to "1" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 57c6b667b..c422896eb 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] DistributedData = "0.1.4, 0.2" JSON = "0.21" -JuMP = "0.21.0, 0.22.0, 0.23" +JuMP = "0.21.0, 0.22.0, 0.23, 1" MAT = "0.10" MacroTools = "0.5.6" OSQP = "0.6" From 17852a1b9307aa36b1acf797e54a21d12bdbb431 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 1 Apr 2022 13:55:15 +0200 Subject: [PATCH 010/109] add deprecation warning --- src/analysis/modifications/moment.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analysis/modifications/moment.jl b/src/analysis/modifications/moment.jl index bcde28ca5..5a405a5dd 100644 --- a/src/analysis/modifications/moment.jl +++ b/src/analysis/modifications/moment.jl @@ -36,6 +36,7 @@ flux_balance_analysis( """ add_moment_constraints(kcats::Dict{String,Float64}, protein_mass_fraction::Float64) = (model, opt_model) -> begin + @warn("DEPRECATION WARNING: this function will be removed in future versions of COBREXA.jl in favor of a GECKO based formulation.") lbs, ubs = get_optmodel_bounds(opt_model) # to assign directions # get grrs and ignore empty blocks: TODO: fix importing to avoid this ugly conditional see #462 From 3e8e86f61382c9c74b0b3dda3ac30036ca56e64d Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 1 Apr 2022 14:27:09 +0200 Subject: [PATCH 011/109] format --- src/analysis/modifications/moment.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/analysis/modifications/moment.jl b/src/analysis/modifications/moment.jl index 5a405a5dd..89b8fdf9c 100644 --- a/src/analysis/modifications/moment.jl +++ b/src/analysis/modifications/moment.jl @@ -36,7 +36,9 @@ flux_balance_analysis( """ add_moment_constraints(kcats::Dict{String,Float64}, protein_mass_fraction::Float64) = (model, opt_model) -> begin - @warn("DEPRECATION WARNING: this function will be removed in future versions of COBREXA.jl in favor of a GECKO based formulation.") + @warn( + "DEPRECATION WARNING: this function will be removed in future versions of COBREXA.jl in favor of a GECKO based formulation." + ) lbs, ubs = get_optmodel_bounds(opt_model) # to assign directions # get grrs and ignore empty blocks: TODO: fix importing to avoid this ugly conditional see #462 From 78f0d6bfeeb00d843a5b2e3fbf6de873eef1306d Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 12:19:52 +0200 Subject: [PATCH 012/109] make the model wrapping a bit more systematic --- src/base/macros/model_wrapper.jl | 56 +++- src/base/types/MetabolicModel.jl | 308 +++++++++++++++++++++ src/base/types/ModelWrapper.jl | 23 ++ src/base/types/Serialized.jl | 37 +-- src/base/types/abstract/MetabolicModel.jl | 312 +--------------------- 5 files changed, 398 insertions(+), 338 deletions(-) create mode 100644 src/base/types/MetabolicModel.jl create mode 100644 src/base/types/ModelWrapper.jl diff --git a/src/base/macros/model_wrapper.jl b/src/base/macros/model_wrapper.jl index d7086cfa6..ccbdb0e29 100644 --- a/src/base/macros/model_wrapper.jl +++ b/src/base/macros/model_wrapper.jl @@ -1,30 +1,72 @@ + """ - @_inherit_model_methods + _inherit_model_methods_impl(mtype::Symbol, arglist, access, fwdlist, fns...) -Generates trivial accessor functions listed in `fns` for a model that is -wrapped in type `mtype` as field `member`. +A helper backend for [`@_inherit_model_methods`](@ref) and +[`@_inherit_model_methods_fn`](@ref). """ -macro _inherit_model_methods(mtype::Symbol, arglist, member::Symbol, fwdlist, fns...) +function _inherit_model_methods_impl( + source, + mtype::Symbol, + arglist, + access, + fwdlist, + fns..., +) Expr( :block, ( begin header = Expr(:call, fn, :(model::$mtype), arglist.args...) - call = Expr(:call, fn, :(model.$member), fwdlist.args...) + call = Expr(:call, fn, access(:model), fwdlist.args...) esc( Expr( :macrocall, Symbol("@doc"), - __source__, + source, """ $header Evaluates [`$fn`](@ref) on the model contained in $mtype. """, - Expr(:(=), header, Expr(:block, __source__, call)), + Expr(:(=), header, Expr(:block, source, call)), ), ) end for fn in fns )..., ) end + +""" + @_inherit_model_methods + +Generates trivial accessor functions listed in `fns` for a model that is +wrapped in type `mtype` as field `member`. +""" +macro _inherit_model_methods(mtype::Symbol, arglist, member::Symbol, fwdlist, fns...) + _inherit_model_methods_impl( + __source__, + mtype, + arglist, + sym -> :($sym.$member), + fwdlist, + fns..., + ) +end + +""" + @_inherit_model_methods_fn + +A more generic version of [`@_inherit_model_methods`](@ref) that accesses the +"inner" model using an accessor function name. +""" +macro _inherit_model_methods_fn(mtype::Symbol, arglist, accessor, fwdlist, fns...) + _inherit_model_methods_impl( + __source__, + mtype, + arglist, + sym -> :($accessor($sym)), + fwdlist, + fns..., + ) +end diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl new file mode 100644 index 000000000..131be6d07 --- /dev/null +++ b/src/base/types/MetabolicModel.jl @@ -0,0 +1,308 @@ + +# +# IMPORTANT +# +# This file provides a list of "officially supported" accessors that should +# work with all subtypes of [`MetabolicModel`](@ref). Keep this synced with the +# automatically derived methods for [`ModelWrapper`](@ref). +# + +""" + reactions(a::MetabolicModel)::Vector{String} + +Return a vector of reaction identifiers in a model. +""" +function reactions(a::MetabolicModel)::Vector{String} + _missing_impl_error(reactions, (a,)) +end + +""" + metabolites(a::MetabolicModel)::Vector{String} + +Return a vector of metabolite identifiers in a model. +""" +function metabolites(a::MetabolicModel)::Vector{String} + _missing_impl_error(metabolites, (a,)) +end + +""" + n_reactions(a::MetabolicModel)::Int + +Get the number of reactions in a model. +""" +function n_reactions(a::MetabolicModel)::Int + length(reactions(a)) +end + +""" + n_metabolites(a::MetabolicModel)::Int + +Get the number of metabolites in a model. +""" +function n_metabolites(a::MetabolicModel)::Int + length(metabolites(a)) +end + +""" + stoichiometry(a::MetabolicModel)::SparseMat + +Get the sparse stoichiometry matrix of a model. +""" +function stoichiometry(a::MetabolicModel)::SparseMat + _missing_impl_error(stoichiometry, (a,)) +end + +""" + bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + +Get the lower and upper flux bounds of a model. +""" +function bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + _missing_impl_error(bounds, (a,)) +end + +""" + balance(a::MetabolicModel)::SparseVec + +Get the sparse balance vector of a model (ie. the `b` from `S x = b`). +""" +function balance(a::MetabolicModel)::SparseVec + return spzeros(n_metabolites(a)) +end + +""" + objective(a::MetabolicModel)::SparseVec + +Get the objective vector of a model. +""" +function objective(a::MetabolicModel)::SparseVec + _missing_impl_error(objective, (a,)) +end + +""" + coupling(a::MetabolicModel)::SparseMat + +Get a matrix of coupling constraint definitions of a model. By default, there +is no coupling in the models. +""" +function coupling(a::MetabolicModel)::SparseMat + return spzeros(0, n_reactions(a)) +end + +""" + n_coupling_constraints(a::MetabolicModel)::Int + +Get the number of coupling constraints in a model. +""" +function n_coupling_constraints(a::MetabolicModel)::Int + size(coupling(a), 1) +end + +""" + coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + +Get the lower and upper bounds for each coupling bound in a model, as specified +by `coupling`. By default, the model does not have any coupling bounds. +""" +function coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + return (spzeros(0), spzeros(0)) +end + +""" + genes(a::MetabolicModel)::Vector{String} + +Return identifiers of all genes contained in the model. By default, there are +no genes. + +In SBML, these are usually called "gene products" but we write `genes` for +simplicity. +""" +function genes(a::MetabolicModel)::Vector{String} + return [] +end + +""" + n_genes(a::MetabolicModel)::Int + +Return the number of genes in the model (as returned by [`genes`](@ref)). If +you just need the number of the genes, this may be much more efficient than +calling [`genes`](@ref) and measuring the array. +""" +function n_genes(a::MetabolicModel)::Int + return length(genes(a)) +end + +""" + reaction_gene_association(a::MetabolicModel, gene_id::String)::Maybe{GeneAssociation} + +Returns the sets of genes that need to be present so that the reaction can work +(technically, a DNF on gene availability, with positive atoms only). + +For simplicity, `nothing` may be returned, meaning that the reaction always +takes place. (in DNF, that would be equivalent to returning `[[]]`.) +""" +function reaction_gene_association( + a::MetabolicModel, + reaction_id::String, +)::Maybe{GeneAssociation} + return nothing +end + +""" + reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} + +Return the subsystem of reaction `reaction_id` in `model` if it is assigned. If not, +return `nothing`. +""" +function reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} + return nothing +end + +""" + reaction_stoichiometry(model::MetaboliteModel, rid::String)::Dict{String, Float64} + +Return the stoichiometry of reaction with ID `rid` in the model. The dictionary +maps the metabolite IDs to their stoichiometric coefficients. +""" +function reaction_stoichiometry(m::MetabolicModel, rid::String)::Dict{String,Float64} + mets = metabolites(m) + Dict( + mets[k] => v for + (k, v) in zip(findnz(stoichiometry(m)[:, first(indexin([rid], reactions(m)))])...) + ) +end + +""" + metabolite_formula( + a::MetabolicModel, + metabolite_id::String, + )::Maybe{MetaboliteFormula} + +Return the formula of metabolite `metabolite_id` in `model`. +Return `nothing` in case the formula is not known or irrelevant. +""" +function metabolite_formula( + model::MetabolicModel, + metabolite_id::String, +)::Maybe{MetaboliteFormula} + return nothing +end + +""" + metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} + +Return the charge associated with metabolite `metabolite_id` in `model`. +Returns `nothing` if charge not present. +""" +function metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} + return nothing +end + +""" + metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} + +Return the compartment of metabolite `metabolite_id` in `model` if it is assigned. If not, +return `nothing`. +""" +function metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} + return nothing +end + +""" + reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations + +Return standardized names that may help identifying the reaction. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"Reactome" => ["reactomeID123"]`. +""" +function reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations + return Dict() +end + +""" + metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations + +Return standardized names that may help to reliably identify the metabolite. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"ChEMBL" => ["123"]` or `"PubChem" => ["CID123", "CID654645645"]`. +""" +function metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations + return Dict() +end + +""" + gene_annotations(a::MetabolicModel, gene_id::String)::Annotations + +Return standardized names that identify the corresponding gene or product. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"PDB" => ["PROT01"]`. +""" +function gene_annotations(a::MetabolicModel, gene_id::String)::Annotations + return Dict() +end + +""" + reaction_notes(model::MetabolicModel, reaction_id::String)::Notes + +Return the notes associated with reaction `reaction_id` in `model`. +""" +function reaction_notes(model::MetabolicModel, reaction_id::String)::Notes + return Dict() +end + +""" + metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes + +Return the notes associated with metabolite `reaction_id` in `model`. +""" +function metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes + return Dict() +end + +""" + gene_notes(model::MetabolicModel, gene_id::String)::Notes + +Return the notes associated with the gene `gene_id` in `model`. +""" +function gene_notes(model::MetabolicModel, gene_id::String)::Notes + return Dict() +end + +""" + reaction_name(model::MetabolicModel, rid::String) + +Return the name of reaction with ID `rid`. +""" +reaction_name(model::MetabolicModel, rid::String) = nothing + +""" + metabolite_name(model::MetabolicModel, mid::String) + +Return the name of metabolite with ID `mid`. +""" +metabolite_name(model::MetabolicModel, mid::String) = nothing + +""" + gene_name(model::MetabolicModel, gid::String) + +Return the name of gene with ID `gid`. +""" +gene_name(model::MetabolicModel, gid::String) = nothing + +""" + precache!(a::MetabolicModel)::Nothing + +Do whatever is feasible to get the model into a state that can be read from +as-quickly-as-possible. This may include e.g. generating helper index +structures and loading delayed parts of the model from disk. The model should +be modified "transparently" in-place. Analysis functions call this right before +applying modifications or converting the model to the optimization model using +[`make_optimization_model`](@ref); usually on the same machine where the +optimizers (and, generally, the core analysis algorithms) will run. The calls +are done in a good hope that the performance will be improved. + +By default, it should be safe to do nothing. +""" +function precache!(a::MetabolicModel)::Nothing + nothing +end diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl new file mode 100644 index 000000000..0b62714a2 --- /dev/null +++ b/src/base/types/ModelWrapper.jl @@ -0,0 +1,23 @@ + +""" + unwrap_model(a::ModelWrapper) + +A simple helper to pick the single w +""" +function unwrap_model(a::ModelWrapper) + _missing_impl_error(unwrap_model, (a,)) +end + +# +# IMPORTANT +# +# The list of inherited functions must be synced with the methods available for [`MetabolicModel`](@ref). +# + +@_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective coupling n_coupling_constraints coupling_bounds genes n_genes precache! + +@_inherit_model_methods_fn ModelWrapper (rid::String,) unwrap_model (rid,) reaction_gene_association reaction_subsystem reaction_stoichiometry reaction_annotations reaction_notes + +@_inherit_model_methods_fn ModelWrapper (mid::String,) unwrap_model (mid,) metabolite_formula metabolite_charge metabolite_compartment metabolite_annotations metabolite_notes + +@_inherit_model_methods_fn ModelWrapper (gid::String,) unwrap_model (mid,) gene_annotations gene_notes diff --git a/src/base/types/Serialized.jl b/src/base/types/Serialized.jl index 722cbd7f9..1d1741816 100644 --- a/src/base/types/Serialized.jl +++ b/src/base/types/Serialized.jl @@ -9,7 +9,7 @@ A meta-model that represents a model that is serialized on the disk. The internal model will be loaded on-demand by using any accessor, or by calling [`precache!`](@ref) directly. """ -mutable struct Serialized{M} <: MetabolicModel where {M<:MetabolicModel} +mutable struct Serialized{M} <: ModelWrapper where {M<:MetabolicModel} m::Maybe{M} filename::String @@ -18,37 +18,16 @@ mutable struct Serialized{M} <: MetabolicModel where {M<:MetabolicModel} new{T}(model, filename) end -function _on_precached(m::Serialized, f, args...) +""" + unwrap_model(m::Serialized) + +Unwrap the serialized model (precaching it transparently). +""" +function unwrap_model(m::Serialized) precache!(m) - f(m.m, args...) + m.m end -reactions(m::Serialized) = _on_precached(m, reactions) -n_reactions(m::Serialized) = _on_precached(m, n_reactions) -metabolites(m::Serialized) = _on_precached(m, metabolites) -n_metabolites(m::Serialized) = _on_precached(m, n_metabolites) -stoichiometry(m::Serialized) = _on_precached(m, stoichiometry) -bounds(m::Serialized) = _on_precached(m, bounds) -balance(m::Serialized) = _on_precached(m, balance) -objective(m::Serialized) = _on_precached(m, objective) -coupling(m::Serialized) = _on_precached(m, coupling) -n_coupling_constraints(m::Serialized) = _on_precached(m, n_coupling_constraints) -coupling_bounds(m::Serialized) = _on_precached(m, coupling_bounds) -genes(m::Serialized) = _on_precached(m, genes) -n_genes(m::Serialized) = _on_precached(m, n_genes) -metabolite_formula(m::Serialized, id::String) = _on_precached(m, metabolite_formula, id) -metabolite_charge(m::Serialized, id::String) = _on_precached(m, metabolite_charge, id) -reaction_annotations(m::Serialized, id::String) = _on_precached(m, reaction_annotations, id) -metabolite_annotations(m::Serialized, id::String) = - _on_precached(m, metabolite_annotations, id) -gene_annotations(m::Serialized, id::String) = _on_precached(m, gene_annotations, id) -reaction_notes(m::Serialized, id::String) = _on_precached(m, reaction_notes, id) -metabolite_notes(m::Serialized, id::String) = _on_precached(m, metabolite_notes, id) -gene_notes(m::Serialized, id::String) = _on_precached(m, gene_notes, id) -metabolite_compartment(m::Serialized, id::String) = - _on_precached(m, metabolite_compartment, id) -reaction_subsystem(m::Serialized, id::String) = _on_precached(m, reaction_subsystem, id) - """ precache!(model::Serialized{MetabolicModel})::Nothing diff --git a/src/base/types/abstract/MetabolicModel.jl b/src/base/types/abstract/MetabolicModel.jl index a598fa423..27af601df 100644 --- a/src/base/types/abstract/MetabolicModel.jl +++ b/src/base/types/abstract/MetabolicModel.jl @@ -2,8 +2,8 @@ """ abstract type MetabolicModel end -A helper supertype that wraps everything usable as a linear-like model for -COBREXA functions. +A helper supertype of everything usable as a linear-like model for COBREXA +functions. If you want your model type to work with COBREXA, add the `MetabolicModel` as its supertype, and implement the accessor functions. Accessors @@ -13,6 +13,14 @@ mandatory and default to safe "empty" values. """ abstract type MetabolicModel end +""" + abstract type ModelWrapper <: MetabolicModel end + +A helper supertype of all "wrapper" types that contain precisely one other +[`MetabolicModel`](@ref). +""" +abstract type ModelWrapper <: MetabolicModel end + const SparseMat = SparseMatrixCSC{Float64,Int} const SparseVec = SparseVector{Float64,Int} const MatType = AbstractMatrix{Float64} @@ -58,303 +66,3 @@ Free-form notes about something (e.g. a [`Gene`](@ref)), categorized by const Notes = Dict{String,Vector{String}} _missing_impl_error(m, a) = throw(MethodError(m, a)) - -""" - reactions(a::MetabolicModel)::Vector{String} - -Return a vector of reaction identifiers in a model. -""" -function reactions(a::MetabolicModel)::Vector{String} - _missing_impl_error(reactions, (a,)) -end - -""" - metabolites(a::MetabolicModel)::Vector{String} - -Return a vector of metabolite identifiers in a model. -""" -function metabolites(a::MetabolicModel)::Vector{String} - _missing_impl_error(metabolites, (a,)) -end - -""" - n_reactions(a::MetabolicModel)::Int - -Get the number of reactions in a model. -""" -function n_reactions(a::MetabolicModel)::Int - length(reactions(a)) -end - -""" - n_metabolites(a::MetabolicModel)::Int - -Get the number of metabolites in a model. -""" -function n_metabolites(a::MetabolicModel)::Int - length(metabolites(a)) -end - -""" - stoichiometry(a::MetabolicModel)::SparseMat - -Get the sparse stoichiometry matrix of a model. -""" -function stoichiometry(a::MetabolicModel)::SparseMat - _missing_impl_error(stoichiometry, (a,)) -end - -""" - bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - -Get the lower and upper flux bounds of a model. -""" -function bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - _missing_impl_error(bounds, (a,)) -end - -""" - balance(a::MetabolicModel)::SparseVec - -Get the sparse balance vector of a model (ie. the `b` from `S x = b`). -""" -function balance(a::MetabolicModel)::SparseVec - return spzeros(n_metabolites(a)) -end - -""" - objective(a::MetabolicModel)::SparseVec - -Get the objective vector of a model. -""" -function objective(a::MetabolicModel)::SparseVec - _missing_impl_error(objective, (a,)) -end - -""" - coupling(a::MetabolicModel)::SparseMat - -Get a matrix of coupling constraint definitions of a model. By default, there -is no coupling in the models. -""" -function coupling(a::MetabolicModel)::SparseMat - return spzeros(0, n_reactions(a)) -end - -""" - n_coupling_constraints(a::MetabolicModel)::Int - -Get the number of coupling constraints in a model. -""" -function n_coupling_constraints(a::MetabolicModel)::Int - size(coupling(a), 1) -end - -""" - coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - -Get the lower and upper bounds for each coupling bound in a model, as specified -by `coupling`. By default, the model does not have any coupling bounds. -""" -function coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - return (spzeros(0), spzeros(0)) -end - -""" - genes(a::MetabolicModel)::Vector{String} - -Return identifiers of all genes contained in the model. By default, there are -no genes. - -In SBML, these are usually called "gene products" but we write `genes` for -simplicity. -""" -function genes(a::MetabolicModel)::Vector{String} - return [] -end - -""" - n_genes(a::MetabolicModel)::Int - -Return the number of genes in the model (as returned by [`genes`](@ref)). If -you just need the number of the genes, this may be much more efficient than -calling [`genes`](@ref) and measuring the array. -""" -function n_genes(a::MetabolicModel)::Int - return length(genes(a)) -end - -""" - reaction_gene_association(a::MetabolicModel, gene_id::String)::Maybe{GeneAssociation} - -Returns the sets of genes that need to be present so that the reaction can work -(technically, a DNF on gene availability, with positive atoms only). - -For simplicity, `nothing` may be returned, meaning that the reaction always -takes place. (in DNF, that would be equivalent to returning `[[]]`.) -""" -function reaction_gene_association( - a::MetabolicModel, - reaction_id::String, -)::Maybe{GeneAssociation} - return nothing -end - -""" - metabolite_formula( - a::MetabolicModel, - metabolite_id::String, - )::Maybe{MetaboliteFormula} - -Return the formula of metabolite `metabolite_id` in `model`. -Return `nothing` in case the formula is not known or irrelevant. -""" -function metabolite_formula( - model::MetabolicModel, - metabolite_id::String, -)::Maybe{MetaboliteFormula} - return nothing -end - -""" - metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} - -Return the charge associated with metabolite `metabolite_id` in `model`. -Returns `nothing` if charge not present. -""" -function metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} - return nothing -end - -""" - reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations - -Return standardized names that may help identifying the reaction. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"Reactome" => ["reactomeID123"]`. -""" -function reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations - return Dict() -end - -""" - metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations - -Return standardized names that may help to reliably identify the metabolite. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"ChEMBL" => ["123"]` or `"PubChem" => ["CID123", "CID654645645"]`. -""" -function metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations - return Dict() -end - -""" - gene_annotations(a::MetabolicModel, gene_id::String)::Annotations - -Return standardized names that identify the corresponding gene or product. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"PDB" => ["PROT01"]`. -""" -function gene_annotations(a::MetabolicModel, gene_id::String)::Annotations - return Dict() -end - -""" - reaction_notes(model::MetabolicModel, reaction_id::String)::Notes - -Return the notes associated with reaction `reaction_id` in `model`. -""" -function reaction_notes(model::MetabolicModel, reaction_id::String)::Notes - return Dict() -end - -""" - metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes - -Return the notes associated with metabolite `reaction_id` in `model`. -""" -function metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes - return Dict() -end - -""" - gene_notes(model::MetabolicModel, gene_id::String)::Notes - -Return the notes associated with the gene `gene_id` in `model`. -""" -function gene_notes(model::MetabolicModel, gene_id::String)::Notes - return Dict() -end - -""" - metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} - -Return the compartment of metabolite `metabolite_id` in `model` if it is assigned. If not, -return `nothing`. -""" -function metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} - return nothing -end - -""" - reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} - -Return the subsystem of reaction `reaction_id` in `model` if it is assigned. If not, -return `nothing`. -""" -function reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} - return nothing -end - -""" - reaction_stoichiometry(model::MetaboliteModel, rid::String)::Dict{String, Float64} - -Return the stoichiometry of reaction with ID `rid` in the model. The dictionary -maps the metabolite IDs to their stoichiometric coefficients. -""" -function reaction_stoichiometry(m::MetabolicModel, rid::String)::Dict{String,Float64} - mets = metabolites(m) - Dict( - mets[k] => v for - (k, v) in zip(findnz(stoichiometry(m)[:, first(indexin([rid], reactions(m)))])...) - ) -end - -""" - reaction_name(model::MetabolicModel, rid::String) - -Return the name of reaction with ID `rid`. -""" -reaction_name(model::MetabolicModel, rid::String) = nothing - -""" - metabolite_name(model::MetabolicModel, mid::String) - -Return the name of metabolite with ID `mid`. -""" -metabolite_name(model::MetabolicModel, mid::String) = nothing - -""" - gene_name(model::MetabolicModel, gid::String) - -Return the name of gene with ID `gid`. -""" -gene_name(model::MetabolicModel, gid::String) = nothing - -""" - precache!(a::MetabolicModel)::Nothing - -Do whatever is feasible to get the model into a state that can be read from -as-quickly-as-possible. This may include e.g. generating helper index -structures and loading delayed parts of the model from disk. The model should -be modified "transparently" in-place. Analysis functions call this right before -applying modifications or converting the model to the optimization model using -[`make_optimization_model`](@ref); usually on the same machine where the -optimizers (and, generally, the core analysis algorithms) will run. The calls -are done in a good hope that the performance will be improved. - -By default, it should be safe to do nothing. -""" -function precache!(a::MetabolicModel)::Nothing - nothing -end From 8c48d8fd2d2ee26b79e3f39ebc7b6be8fdb07559 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 12:20:38 +0200 Subject: [PATCH 013/109] add some forgotten functions to CoreModel and CoreModelCoupled --- src/base/types/CoreModel.jl | 18 ++++++++++++++++++ src/base/types/CoreModelCoupled.jl | 11 +---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/base/types/CoreModel.jl b/src/base/types/CoreModel.jl index dc8b2b19b..4e3d65cb7 100644 --- a/src/base/types/CoreModel.jl +++ b/src/base/types/CoreModel.jl @@ -86,6 +86,24 @@ balance(a::CoreModel)::SparseVec = a.b """ objective(a::CoreModel)::SparseVec = a.c +""" + genes(a::CoreModel)::Vector{String} + +Collect all genes contained in the [`CoreModel`](@ref). The call is expensive +for large models, because the vector is not stored and instead gets rebuilt +each time this function is called. +""" +function genes(a::MetabolicModel)::Vector{String} + res = Set{String}() + for grr in a.grrs + isnothing(grr) && continue + for gs in grr + push!(res, gs) + end + end + sort(collect(res)) +end + """ reaction_stoichiometry(model::CoreModel, rid::String)::Dict{String, Float64} diff --git a/src/base/types/CoreModelCoupled.jl b/src/base/types/CoreModelCoupled.jl index de14bc4ea..152843452 100644 --- a/src/base/types/CoreModelCoupled.jl +++ b/src/base/types/CoreModelCoupled.jl @@ -23,7 +23,7 @@ mutable struct CoreModelCoupled <: MetabolicModel end end -@_inherit_model_methods CoreModelCoupled () lm () reactions metabolites stoichiometry bounds balance objective +@_inherit_model_methods CoreModelCoupled () lm () reactions n_reactions metabolites n_metabolites stoichiometry bounds balance objective genes n_genes reaction_gene_association_vec """ coupling(a::CoreModelCoupled)::SparseMat @@ -49,15 +49,6 @@ coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = ( @_inherit_model_methods CoreModelCoupled (rid::String,) lm (rid,) reaction_stoichiometry reaction_gene_association @_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry -""" - reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} - -Retrieve a vector of gene associations in a [`CoreModelCoupled`](@ref), in the -same order as `reactions(model)`. -""" -reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} = - reaction_gene_association_vec(model.lm) - """ Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) From fb9c5a8a2e81b6d28a94435b643ee8aae53e0e32 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 12:55:05 +0200 Subject: [PATCH 014/109] reimagine CoreModelCoupled as a wrapper --- src/base/types/CoreModelCoupled.jl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/base/types/CoreModelCoupled.jl b/src/base/types/CoreModelCoupled.jl index 152843452..d40a8143d 100644 --- a/src/base/types/CoreModelCoupled.jl +++ b/src/base/types/CoreModelCoupled.jl @@ -7,7 +7,7 @@ The linear model with additional coupling constraints in the form cₗ ≤ C x ≤ cᵤ ``` """ -mutable struct CoreModelCoupled <: MetabolicModel +mutable struct CoreModelCoupled <: ModelWrapper lm::CoreModel C::SparseMat cl::Vector{Float64} @@ -23,7 +23,12 @@ mutable struct CoreModelCoupled <: MetabolicModel end end -@_inherit_model_methods CoreModelCoupled () lm () reactions n_reactions metabolites n_metabolites stoichiometry bounds balance objective genes n_genes reaction_gene_association_vec +""" + unwrap_model(a::CoreModelCoupled) + +Get the internal [`CoreModel`](@ref) out of [`CoreModelCoupled`](@ref). +""" +unwrap_model(a::CoreModelCoupled) = a.lm """ coupling(a::CoreModelCoupled)::SparseMat @@ -46,7 +51,8 @@ Coupling bounds for a `CoreModelCoupled`. """ coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = (a.cl, a.cu) -@_inherit_model_methods CoreModelCoupled (rid::String,) lm (rid,) reaction_stoichiometry reaction_gene_association +# these are special for CoreModel-ish models +@_inherit_model_methods CoreModelCoupled () lm () reaction_gene_association_vec @_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry """ @@ -55,6 +61,9 @@ coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = ( Make a `CoreModelCoupled` out of any compatible model type. """ function Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) + # TODO this might need a bit of rethinking and might be deprecated soon. + # Eventually it seems to me that the coupling should be added as a + # completely generic wrapper. if typeof(mm) == CoreModelCoupled return mm end From 73fff04e760c392052943c8693a260cbe7663e2b Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 14:08:07 +0200 Subject: [PATCH 015/109] make the deprecation warning more specific --- src/analysis/modifications/moment.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analysis/modifications/moment.jl b/src/analysis/modifications/moment.jl index 89b8fdf9c..5c46da8d5 100644 --- a/src/analysis/modifications/moment.jl +++ b/src/analysis/modifications/moment.jl @@ -37,7 +37,7 @@ flux_balance_analysis( add_moment_constraints(kcats::Dict{String,Float64}, protein_mass_fraction::Float64) = (model, opt_model) -> begin @warn( - "DEPRECATION WARNING: this function will be removed in future versions of COBREXA.jl in favor of a GECKO based formulation." + "DEPRECATION WARNING: 'add_moment_constraints' will be removed in future versions of COBREXA.jl in favor of a GECKO-based formulation" ) lbs, ubs = get_optmodel_bounds(opt_model) # to assign directions From 536ae24457e987ebb31039414a35e34cd8244b77 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 17:03:49 +0200 Subject: [PATCH 016/109] abstract out a function for converting optimizer solutions to fluxes --- src/base/types/MetabolicModel.jl | 27 +++++++++++++++++++++++---- src/base/types/ModelWrapper.jl | 2 ++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index 131be6d07..a3c638fc3 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -46,7 +46,13 @@ end """ stoichiometry(a::MetabolicModel)::SparseMat -Get the sparse stoichiometry matrix of a model. +Get the sparse stoichiometry matrix of a model. A feasible solution `x` of a +model `m` is defined as satisfying the equations: + +- `stoichiometry(m) * x .== balance(m)` +- `x .>= lbs` +- `y .<= ubs` +- `(lbs, ubs) == bounds(m) """ function stoichiometry(a::MetabolicModel)::SparseMat _missing_impl_error(stoichiometry, (a,)) @@ -55,7 +61,7 @@ end """ bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} -Get the lower and upper flux bounds of a model. +Get the lower and upper solution bounds of a model. """ function bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} _missing_impl_error(bounds, (a,)) @@ -64,7 +70,7 @@ end """ balance(a::MetabolicModel)::SparseVec -Get the sparse balance vector of a model (ie. the `b` from `S x = b`). +Get the sparse balance vector of a model. """ function balance(a::MetabolicModel)::SparseVec return spzeros(n_metabolites(a)) @@ -73,12 +79,24 @@ end """ objective(a::MetabolicModel)::SparseVec -Get the objective vector of a model. +Get the objective vector of the model. Analysis functions, such as +[`flux_balance_analysis`](@ref), are supposed to maximize `dot(objective, x)` +where `x` is a feasible solution of the model. """ function objective(a::MetabolicModel)::SparseVec _missing_impl_error(objective, (a,)) end +""" + solution_flux(a::MetabolicModel, solution::Vector{Float64})::Vector{Float64} + +Retrieve a vector of reaction fluxes (corresponding to `reactions(a)`) from a +feasible solution of the optimization problem. +""" +function solution_flux(a::MetabolicModel, solution::Vector{Float64})::Vector{Float64} + solution +end + """ coupling(a::MetabolicModel)::SparseMat @@ -86,6 +104,7 @@ Get a matrix of coupling constraint definitions of a model. By default, there is no coupling in the models. """ function coupling(a::MetabolicModel)::SparseMat + # TODO make this an extension of stoichiometry return spzeros(0, n_reactions(a)) end diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl index 0b62714a2..b851addbe 100644 --- a/src/base/types/ModelWrapper.jl +++ b/src/base/types/ModelWrapper.jl @@ -16,6 +16,8 @@ end @_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective coupling n_coupling_constraints coupling_bounds genes n_genes precache! +@_inherit_model_methods_fn ModelWrapper (solution::Vector{Float64},) unwrap_model (solution,) solution_flux + @_inherit_model_methods_fn ModelWrapper (rid::String,) unwrap_model (rid,) reaction_gene_association reaction_subsystem reaction_stoichiometry reaction_annotations reaction_notes @_inherit_model_methods_fn ModelWrapper (mid::String,) unwrap_model (mid,) metabolite_formula metabolite_charge metabolite_compartment metabolite_annotations metabolite_notes From 93978d3b6baedb5bf51cf5529cc3609aa793544e Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 17:20:02 +0200 Subject: [PATCH 017/109] fixup: flux_vec and flux_dict use solution_flux --- src/analysis/flux_balance_analysis.jl | 10 +++++++--- src/analysis/flux_variability_analysis.jl | 8 +++++++- src/analysis/minimize_metabolic_adjustment.jl | 6 +++--- src/analysis/parsimonious_flux_balance_analysis.jl | 6 +++--- src/base/solver.jl | 7 ++++--- src/base/types/ModelWrapper.jl | 4 +++- 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/analysis/flux_balance_analysis.jl b/src/analysis/flux_balance_analysis.jl index ae096ea3e..ffd9375b1 100644 --- a/src/analysis/flux_balance_analysis.jl +++ b/src/analysis/flux_balance_analysis.jl @@ -1,5 +1,5 @@ """ - flux_balance_analysis_vec(args...)::Maybe{Vector{Float64}} + flux_balance_analysis_vec(model::MetabolicModel, args...)::Maybe{Vector{Float64}} A variant of FBA that returns a vector of fluxes in the same order as reactions of the model, if the solution is found. @@ -9,8 +9,12 @@ Arguments are passed to [`flux_balance_analysis`](@ref). This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -flux_balance_analysis_vec(args...; kwargs...)::Maybe{Vector{Float64}} = - flux_vector(flux_balance_analysis(args...; kwargs...)) +flux_balance_analysis_vec( + model::MetabolicModel, + args...; + kwargs..., +)::Maybe{Vector{Float64}} = + flux_vector(model, flux_balance_analysis(model, args...; kwargs...)) """ flux_balance_analysis_dict(model::MetabolicModel, args...)::Maybe{Dict{String, Float64}} diff --git a/src/analysis/flux_variability_analysis.jl b/src/analysis/flux_variability_analysis.jl index 8228b16a7..c3f49be38 100644 --- a/src/analysis/flux_variability_analysis.jl +++ b/src/analysis/flux_variability_analysis.jl @@ -64,6 +64,7 @@ function flux_variability_analysis( bounds = z -> (z, Inf), ret = objective_value, ) + #TODO this breaks if the flux doesn't correspond to the solution if any(reactions .< 1) || any(reactions .> n_reactions(model)) throw(DomainError(reactions, "Index exceeds number of reactions.")) end @@ -140,7 +141,12 @@ mins, maxs = flux_variability_analysis_dict( ``` """ function flux_variability_analysis_dict(model::MetabolicModel, optimizer; kwargs...) - fluxes = flux_variability_analysis(model, optimizer; kwargs..., ret = flux_vector) + fluxes = flux_variability_analysis( + model, + optimizer; + kwargs..., + ret = sol -> flux_vector(model, sol), + ) rxns = reactions(model) dicts = zip.(Ref(rxns), fluxes) diff --git a/src/analysis/minimize_metabolic_adjustment.jl b/src/analysis/minimize_metabolic_adjustment.jl index 73fd605e8..d15501f37 100644 --- a/src/analysis/minimize_metabolic_adjustment.jl +++ b/src/analysis/minimize_metabolic_adjustment.jl @@ -82,7 +82,7 @@ minimize_metabolic_adjustment(flux_ref_dict::Dict{String,Float64}) = ) """ - minimize_metabolic_adjustment_analysis_vec(args...; kwargs...) + minimize_metabolic_adjustment_analysis_vec(model::MetabolicModel, args...; kwargs...) Perform minimization of metabolic adjustment (MOMA) and return a vector of fluxes in the same order as the reactions in `model`. Arguments are forwarded to @@ -91,8 +91,8 @@ same order as the reactions in `model`. Arguments are forwarded to This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -minimize_metabolic_adjustment_analysis_vec(args...; kwargs...) = - flux_vector(minimize_metabolic_adjustment_analysis(args...; kwargs...)) +minimize_metabolic_adjustment_analysis_vec(model::MetabolicModel, args...; kwargs...) = + flux_vector(model, minimize_metabolic_adjustment_analysis(model, args...; kwargs...)) """ minimize_metabolic_adjustment_analysis_dict(model::MetabolicModel, args...; kwargs...) diff --git a/src/analysis/parsimonious_flux_balance_analysis.jl b/src/analysis/parsimonious_flux_balance_analysis.jl index 844b3b009..12475f15c 100644 --- a/src/analysis/parsimonious_flux_balance_analysis.jl +++ b/src/analysis/parsimonious_flux_balance_analysis.jl @@ -94,7 +94,7 @@ function parsimonious_flux_balance_analysis( end """ - parsimonious_flux_balance_analysis_vec(args...; kwargs...) + parsimonious_flux_balance_analysis_vec(model::MetabolicModel, args...; kwargs...) Perform parsimonious flux balance analysis on `model` using `optimizer`. Returns a vector of fluxes in the same order as the reactions in `model`. @@ -104,8 +104,8 @@ internally. This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -parsimonious_flux_balance_analysis_vec(args...; kwargs...) = - flux_vector(parsimonious_flux_balance_analysis(args...; kwargs...)) +parsimonious_flux_balance_analysis_vec(model::MetabolicModel, args...; kwargs...) = + flux_vector(model, parsimonious_flux_balance_analysis(model, args...; kwargs...)) """ parsimonious_flux_balance_analysis_dict(model::MetabolicModel, args...; kwargs...) diff --git a/src/base/solver.jl b/src/base/solver.jl index 641e2bab2..f5d216cfd 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -107,8 +107,8 @@ Returns a vector of fluxes of the model, if solved. flux_vector(flux_balance_analysis(model, ...)) ``` """ -flux_vector(opt_model)::Maybe{Vector{Float64}} = - is_solved(opt_model) ? value.(opt_model[:x]) : nothing +flux_vector(model::MetabolicModel, opt_model)::Maybe{Vector{Float64}} = + is_solved(opt_model) ? solution_flux(model, value.(opt_model[:x])) : nothing """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String, Float64}, Nothing} @@ -121,4 +121,5 @@ flux_dict(model, flux_balance_analysis(model, ...)) ``` """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = - is_solved(opt_model) ? Dict(reactions(model) .=> value.(opt_model[:x])) : nothing + is_solved(opt_model) ? + Dict(reactions(model) .=> solution_flux(model, value.(opt_model[:x]))) : nothing diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl index b851addbe..8b314c2a3 100644 --- a/src/base/types/ModelWrapper.jl +++ b/src/base/types/ModelWrapper.jl @@ -16,7 +16,9 @@ end @_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective coupling n_coupling_constraints coupling_bounds genes n_genes precache! -@_inherit_model_methods_fn ModelWrapper (solution::Vector{Float64},) unwrap_model (solution,) solution_flux +@_inherit_model_methods_fn ModelWrapper (solution::Vector{Float64},) unwrap_model ( + solution, +) solution_flux @_inherit_model_methods_fn ModelWrapper (rid::String,) unwrap_model (rid,) reaction_gene_association reaction_subsystem reaction_stoichiometry reaction_annotations reaction_notes From 0e61a7149a697605db00134c111498db909cd427 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 4 Apr 2022 16:50:59 +0200 Subject: [PATCH 018/109] side-commit: remove a bit of a freshly discovered trailing whitespace --- docs/src/advanced.md | 1 - docs/src/tutorials.md | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/src/advanced.md b/docs/src/advanced.md index 957256b1a..870069e64 100644 --- a/docs/src/advanced.md +++ b/docs/src/advanced.md @@ -5,4 +5,3 @@ Pages = joinpath.("advanced", filter(x -> endswith(x, ".md"), readdir("advanced"))) Depth = 2 ``` - diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md index 20ef6c374..75bc9ea37 100644 --- a/docs/src/tutorials.md +++ b/docs/src/tutorials.md @@ -5,4 +5,3 @@ Pages = joinpath.("tutorials", filter(x -> endswith(x, ".md"), readdir("tutorials"))) Depth = 2 ``` - From e1059ac29b940205a851bf43cb38ba0c0ce386ad Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 4 Apr 2022 17:27:27 +0200 Subject: [PATCH 019/109] redo CoreModelCoupled as a more generic CoreCoupling This is in fact applicable to any model to just add "some extra conditions". --- src/base/types/CoreModelCoupled.jl | 93 +++++++++++++++++++----------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/src/base/types/CoreModelCoupled.jl b/src/base/types/CoreModelCoupled.jl index d40a8143d..fd1fb4735 100644 --- a/src/base/types/CoreModelCoupled.jl +++ b/src/base/types/CoreModelCoupled.jl @@ -1,73 +1,100 @@ """ - struct CoreModelCoupled <: MetabolicModel + mutable struct CoreCoupling{M} <: ModelWrapper where {M<:MetabolicModel} -The linear model with additional coupling constraints in the form +A matrix-based wrap that adds reaction coupling matrix to the inner model. A +flux `x` feasible in this model must satisfy: ``` cₗ ≤ C x ≤ cᵤ ``` """ -mutable struct CoreModelCoupled <: ModelWrapper - lm::CoreModel +mutable struct CoreCoupling{M} <: ModelWrapper where {M<:MetabolicModel} + lm::M C::SparseMat cl::Vector{Float64} cu::Vector{Float64} - function CoreModelCoupled(lm::MetabolicModel, C::MatType, cl::VecType, cu::VecType) + function CoreCoupling( + lm::M, + C::MatType, + cl::VecType, + cu::VecType, + ) where {M<:MetabolicModel} length(cu) == length(cl) || throw(DimensionMismatch("`cl` and `cu` need to have the same size")) size(C) == (length(cu), n_reactions(lm)) || throw(DimensionMismatch("wrong dimensions of `C`")) - new(convert(CoreModel, lm), sparse(C), collect(cl), collect(cu)) + new{M}(lm, sparse(C), collect(cl), collect(cu)) end end """ - unwrap_model(a::CoreModelCoupled) + unwrap_model(a::CoreCoupling) -Get the internal [`CoreModel`](@ref) out of [`CoreModelCoupled`](@ref). +Get the internal [`CoreModel`](@ref) out of [`CoreCoupling`](@ref). """ -unwrap_model(a::CoreModelCoupled) = a.lm +unwrap_model(a::CoreCoupling) = a.lm """ - coupling(a::CoreModelCoupled)::SparseMat + coupling(a::CoreCoupling)::SparseMat -Coupling constraint matrix for a `CoreModelCoupled`. +Coupling constraint matrix for a `CoreCoupling`. """ -coupling(a::CoreModelCoupled)::SparseMat = a.C +coupling(a::CoreCoupling)::SparseMat = vcat(coupling(a.lm), a.C) """ - n_coupling_constraints(a::CoreModelCoupled)::Int + n_coupling_constraints(a::CoreCoupling)::Int -The number of coupling constraints in a `CoreModelCoupled`. +The number of coupling constraints in a `CoreCoupling`. """ -n_coupling_constraints(a::CoreModelCoupled)::Int = size(a.C, 1) +n_coupling_constraints(a::CoreCoupling)::Int = n_coupling_constraints(a.lm) + size(a.C, 1) """ - coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} + coupling_bounds(a::CoreCoupling)::Tuple{Vector{Float64},Vector{Float64}} -Coupling bounds for a `CoreModelCoupled`. +Coupling bounds for a `CoreCoupling`. """ -coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = (a.cl, a.cu) - -# these are special for CoreModel-ish models -@_inherit_model_methods CoreModelCoupled () lm () reaction_gene_association_vec -@_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry +coupling_bounds(a::CoreCoupling)::Tuple{Vector{Float64},Vector{Float64}} = + vcat.(coupling_bounds(a.lm), (a.cl, a.cu)) """ - Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) + Base.convert(::Type{CoreCoupling{M}}, mm::MetabolicModel; clone_coupling = true) where {M} -Make a `CoreModelCoupled` out of any compatible model type. +Make a `CoreCoupling` out of any compatible model type. """ -function Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) - # TODO this might need a bit of rethinking and might be deprecated soon. - # Eventually it seems to me that the coupling should be added as a - # completely generic wrapper. - if typeof(mm) == CoreModelCoupled - return mm +function Base.convert( + ::Type{CoreCoupling{M}}, + mm::MetabolicModel; + clone_coupling = true, +) where {M} + if mm isa CoreCoupling{M} + mm + elseif mm isa CoreCoupling + CoreCoupling(convert(M, mm.lm), mm.C, mm.cl, mm.cu) + elseif clone_coupling + (cl, cu) = coupling_bounds(mm) + CoreCoupling(convert(M, mm), coupling(mm), cl, cu) + else + CoreCoupling(convert(M, mm), spzeros(0, n_reactions(mm)), spzeros(0), spzeros(0)) end - - (cl, cu) = coupling_bounds(mm) - CoreModelCoupled(convert(CoreModel, mm), coupling(mm), cl, cu) end + +""" + const CoreModelCoupled = CoreCoupling{CoreModel} + +A matrix-based linear model with additional coupling constraints in the form: +``` + cₗ ≤ C x ≤ cᵤ +``` + +Internally, the model is implemented using [`CoreCoupling`](@ref) that contains a single [`CoreModel`](@ref). +""" +const CoreModelCoupled = CoreCoupling{CoreModel} + +CoreModelCoupled(lm::CoreModel, C::MatType, cl::VecType, cu::VecType) = + CoreCoupling(lm, sparse(C), collect(cl), collect(cu)) + +# these are special for CoreModel-ish models +@_inherit_model_methods CoreModelCoupled () lm () reaction_gene_association_vec +@_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry From cf7e80cf352a7fbb9b692d831f389b1e57d79b88 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 4 Apr 2022 18:00:24 +0200 Subject: [PATCH 020/109] generalize some of the CMC modifications to CoreCoupling --- src/reconstruction/CoreModelCoupled.jl | 99 +++++++++++++------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/src/reconstruction/CoreModelCoupled.jl b/src/reconstruction/CoreModelCoupled.jl index 711cd849d..ad2b4a81e 100644 --- a/src/reconstruction/CoreModelCoupled.jl +++ b/src/reconstruction/CoreModelCoupled.jl @@ -165,13 +165,14 @@ function add_reactions( end """ -Add constraints of the following form to a CoreModelCoupled and return a modified one. + add_coupling_constraints(m::CoreCoupling, args...) -Add constraints to a [`CoreModelCoupled`](@ref) and return a modified one. +Add constraints of the following form to CoreCoupling and return the modified +model. The arguments are same as for in-place [`add_coupling_constraints!`](@ref). """ -function add_coupling_constraints(m::CoreModelCoupled, args...) +function add_coupling_constraints(m::CoreCoupling, args...) new_lp = deepcopy(m) add_coupling_constraints!(new_lp, args...) return new_lp @@ -183,12 +184,11 @@ end Add coupling constraints to a plain [`CoreModel`](@ref) (returns a [`CoreModelCoupled`](@ref)). """ -add_coupling_constraints(m::CoreModel, args...) = - add_coupling_constraints(convert(CoreModelCoupled, m), args...) +add_coupling_constraints(m::CoreModel, args...) = CoreModelCoupled(m, args...) """ add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, c::VecType, cl::AbstractFloat, cu::AbstractFloat, @@ -197,7 +197,7 @@ add_coupling_constraints(m::CoreModel, args...) = Overload for adding a single coupling constraint. """ function add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, c::VecType, cl::AbstractFloat, cu::AbstractFloat, @@ -207,7 +207,7 @@ end """ add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, C::MatType, cl::V, cu::V, @@ -219,7 +219,7 @@ In-place add a single coupling constraint in form ``` """ function add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, C::MatType, cl::V, cu::V, @@ -237,35 +237,34 @@ function add_coupling_constraints!( end """ - remove_coupling_constraints(m::CoreModelCoupled, args...) + remove_coupling_constraints(m::CoreCoupling, args...) Remove coupling constraints from the linear model, and return the modified model. Arguments are the same as for in-place version [`remove_coupling_constraints!`](@ref). """ -function remove_coupling_constraints(m::CoreModelCoupled, args...) +function remove_coupling_constraints(m::CoreCoupling, args...) new_model = deepcopy(m) remove_coupling_constraints!(new_model, args...) return new_model end """ - remove_coupling_constraints!(m::CoreModelCoupled, constraint::Int) + remove_coupling_constraints!(m::CoreCoupling, constraint::Int) -Removes a single coupling constraints from a [`CoreModelCoupled`](@ref) -in-place. +Removes a single coupling constraints from a [`CoreCoupling`](@ref) in-place. """ -remove_coupling_constraints!(m::CoreModelCoupled, constraint::Int) = +remove_coupling_constraints!(m::CoreCoupling, constraint::Int) = remove_coupling_constraints!(m, [constraint]) """ - remove_coupling_constraints!(m::CoreModelCoupled, constraints::Vector{Int}) + remove_coupling_constraints!(m::CoreCoupling, constraints::Vector{Int}) -Removes a set of coupling constraints from a [`CoreModelCoupled`](@ref) +Removes a set of coupling constraints from a [`CoreCoupling`](@ref) in-place. """ -function remove_coupling_constraints!(m::CoreModelCoupled, constraints::Vector{Int}) +function remove_coupling_constraints!(m::CoreCoupling, constraints::Vector{Int}) to_be_kept = filter(!in(constraints), 1:n_coupling_constraints(m)) m.C = m.C[to_be_kept, :] m.cl = m.cl[to_be_kept] @@ -275,7 +274,7 @@ end """ change_coupling_bounds!( - model::CoreModelCoupled, + model::CoreCoupling, constraints::Vector{Int}; cl::V = Float64[], cu::V = Float64[], @@ -285,7 +284,7 @@ Change the lower and/or upper bounds (`cl` and `cu`) for the given list of coupling constraints. """ function change_coupling_bounds!( - model::CoreModelCoupled, + model::CoreCoupling, constraints::Vector{Int}; cl::V = Float64[], cu::V = Float64[], @@ -309,131 +308,131 @@ function change_coupling_bounds!( nothing end -@_change_bounds_fn CoreModelCoupled Int inplace begin +# TODO see if some of these can be derived from ModelWrapper +@_change_bounds_fn CoreCoupling Int inplace begin change_bound!(model.lm, rxn_idx, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled Int inplace plural begin +@_change_bounds_fn CoreCoupling Int inplace plural begin change_bounds!(model.lm, rxn_idxs, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled String inplace begin +@_change_bounds_fn CoreCoupling String inplace begin change_bound!(model.lm, rxn_id, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled String inplace plural begin +@_change_bounds_fn CoreCoupling String inplace plural begin change_bounds!(model.lm, rxn_ids, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled Int begin +@_change_bounds_fn CoreCoupling Int begin n = copy(model) n.lm = change_bound(model.lm, rxn_idx, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled Int plural begin +@_change_bounds_fn CoreCoupling Int plural begin n = copy(model) n.lm = change_bounds(model.lm, rxn_idxs, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled String begin +@_change_bounds_fn CoreCoupling String begin n = copy(model) n.lm = change_bound(model.lm, rxn_id, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled String plural begin +@_change_bounds_fn CoreCoupling String plural begin n = copy(model) n.lm = change_bounds(model.lm, rxn_ids, lower = lower, upper = upper) n end -@_remove_fn reaction CoreModelCoupled Int inplace begin +@_remove_fn reaction CoreCoupling Int inplace begin remove_reactions!(model, [reaction_idx]) end -@_remove_fn reaction CoreModelCoupled Int inplace plural begin +@_remove_fn reaction CoreCoupling Int inplace plural begin orig_rxns = reactions(model.lm) remove_reactions!(model.lm, reaction_idxs) model.C = model.C[:, in.(orig_rxns, Ref(Set(reactions(model.lm))))] nothing end -@_remove_fn reaction CoreModelCoupled Int begin +@_remove_fn reaction CoreCoupling Int begin remove_reactions(model, [reaction_idx]) end -@_remove_fn reaction CoreModelCoupled Int plural begin +@_remove_fn reaction CoreCoupling Int plural begin n = copy(model) n.lm = remove_reactions(n.lm, reaction_idxs) n.C = n.C[:, in.(reactions(model.lm), Ref(Set(reactions(n.lm))))] return n end -@_remove_fn reaction CoreModelCoupled String inplace begin +@_remove_fn reaction CoreCoupling String inplace begin remove_reactions!(model, [reaction_id]) end -@_remove_fn reaction CoreModelCoupled String inplace plural begin +@_remove_fn reaction CoreCoupling String inplace plural begin remove_reactions!(model, Int.(indexin(reaction_ids, reactions(model)))) end -@_remove_fn reaction CoreModelCoupled String begin +@_remove_fn reaction CoreCoupling String begin remove_reactions(model, [reaction_id]) end -@_remove_fn reaction CoreModelCoupled String plural begin +@_remove_fn reaction CoreCoupling String plural begin remove_reactions(model, Int.(indexin(reaction_ids, reactions(model)))) end -@_remove_fn metabolite CoreModelCoupled Int inplace begin +@_remove_fn metabolite CoreCoupling Int inplace begin remove_metabolites!(model, [metabolite_idx]) end -@_remove_fn metabolite CoreModelCoupled Int plural inplace begin +@_remove_fn metabolite CoreCoupling Int plural inplace begin orig_rxns = reactions(model.lm) model.lm = remove_metabolites(model.lm, metabolite_idxs) model.C = model.C[:, in.(orig_rxns, Ref(Set(reactions(model.lm))))] nothing end -@_remove_fn metabolite CoreModelCoupled Int begin +@_remove_fn metabolite CoreCoupling Int begin remove_metabolites(model, [metabolite_idx]) end -@_remove_fn metabolite CoreModelCoupled Int plural begin - n = deepcopy(model) #almost everything gets changed anyway - remove_metabolites!(n, metabolite_idxs) +@_remove_fn metabolite CoreCoupling Int plural begin + n = copy(model) + n.lm = remove_metabolites(n.lm, metabolite_idxs) return n end -@_remove_fn metabolite CoreModelCoupled String inplace begin +@_remove_fn metabolite CoreCoupling String inplace begin remove_metabolites!(model, [metabolite_id]) end -@_remove_fn metabolite CoreModelCoupled String inplace plural begin +@_remove_fn metabolite CoreCoupling String inplace plural begin remove_metabolites!(model, Int.(indexin(metabolite_ids, metabolites(model)))) end -@_remove_fn metabolite CoreModelCoupled String begin +@_remove_fn metabolite CoreCoupling String begin remove_metabolites(model, [metabolite_id]) end -@_remove_fn metabolite CoreModelCoupled String plural begin +@_remove_fn metabolite CoreCoupling String plural begin remove_metabolites(model, Int.(indexin(metabolite_ids, metabolites(model)))) end """ change_objective!( - model::CoreModelCoupled, + model::CoreCoupling, args...; kwargs..., ) -Forwards arguments to [`change_objective!`](@ref) of the internal -[`CoreModel`](@ref). +Forwards arguments to [`change_objective!`](@ref) of the internal model. """ -function change_objective!(model::CoreModelCoupled, args...; kwargs...) +function change_objective!(model::CoreCoupling, args...; kwargs...) change_objective!(model.lm, args...; kwargs...) end From aa82aa544c6e9b91b320cdc5b24d15deb10eb17c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 4 Apr 2022 18:47:27 +0200 Subject: [PATCH 021/109] fix a blatant inconsistency in logo coloring Preposterous! --- src/banner.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/banner.jl b/src/banner.jl index 6ffeafe21..00f62a550 100644 --- a/src/banner.jl +++ b/src/banner.jl @@ -17,16 +17,16 @@ function _print_banner() println( " - $(y)// $(n) | - \\\\\\\\\\ // $(y)// $(n) | $(c[:bold])COBREXA.jl $(c[:normal]) v$(COBREXA_VERSION) - \\\\ \\\\// $(y)// $(n) | - \\\\ \\/ $(y)// $(n) | $(c[:bold])CO$(c[:normal])nstraint-$(c[:bold])B$(c[:normal])ased $(c[:bold])R$(c[:normal])econstruction - \\\\ $(y)// $(n) | and $(c[:bold])EX$(c[:normal])ascale $(c[:bold])A$(c[:normal])nalysis in Julia - // $(y)\\\\ $(n) | - // $(y)/\\ \\\\ $(n) | See documentation and examples at: - // $(y)//\\\\ \\\\ $(n)| https://lcsb-biocore.github.io/COBREXA.jl - // $(y)// \\\\\\\\\\ $(n)| - // $(n)| + $(y)//$(n) | + \\\\\\\\\\ // $(y)//$(n) | $(c[:bold])COBREXA.jl $(c[:normal]) v$(COBREXA_VERSION) + \\\\ \\\\// $(y)//$(n) | + \\\\ \\/ $(y)//$(n) | $(c[:bold])CO$(c[:normal])nstraint-$(c[:bold])B$(c[:normal])ased $(c[:bold])R$(c[:normal])econstruction + \\\\ $(y)//$(n) | and $(c[:bold])EX$(c[:normal])ascale $(c[:bold])A$(c[:normal])nalysis in Julia + // $(y)\\\\$(n) | + // $(y)/\\ \\\\$(n) | See documentation and examples at: + // $(y)//\\\\ \\\\$(n) | https://lcsb-biocore.github.io/COBREXA.jl + // $(y)// \\\\\\\\\\$(n) | + // | ", ) end From 52a9c527150f2db5d2a4643c8dbc7068ca31a6f9 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 4 Apr 2022 18:50:45 +0200 Subject: [PATCH 022/109] materialize the reactions-to-flux mapping in accessors --- src/base/solver.jl | 4 ++-- src/base/types/MetabolicModel.jl | 36 +++++++++++++++++++++++++++----- src/base/types/ModelWrapper.jl | 6 +----- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/base/solver.jl b/src/base/solver.jl index f5d216cfd..07564c4cd 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -108,7 +108,7 @@ flux_vector(flux_balance_analysis(model, ...)) ``` """ flux_vector(model::MetabolicModel, opt_model)::Maybe{Vector{Float64}} = - is_solved(opt_model) ? solution_flux(model, value.(opt_model[:x])) : nothing + is_solved(opt_model) ? reaction_flux(model)' * value.(opt_model[:x]) : nothing """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String, Float64}, Nothing} @@ -122,4 +122,4 @@ flux_dict(model, flux_balance_analysis(model, ...)) """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = is_solved(opt_model) ? - Dict(reactions(model) .=> solution_flux(model, value.(opt_model[:x]))) : nothing + Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index a3c638fc3..5718c70aa 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -87,14 +87,40 @@ function objective(a::MetabolicModel)::SparseVec _missing_impl_error(objective, (a,)) end + +""" + fluxes(a::MetabolicModel)::Vector{String} + +In some models, the [`reactions`](@ref) that correspond to the columns of +[`stoichiometry`](@ref) matrix do not fully represent the semantic contents of +the model; for example, fluxes may be split into forward and reverse reactions, +reactions catalyzed by distinct enzymes, etc. Together with +[`reaction_flux`](@ref) (and [`n_fluxes`](@ref)) this specifies how the +flux is decomposed into individual reactions. + +By default (and in most models), fluxes and reactions perfectly correspond. +""" +function fluxes(a::MetabolicModel)::Vector{String} + reactions(a) +end + +function n_fluxes(a::MetabolicModel)::Int + n_reactions(a) +end + """ - solution_flux(a::MetabolicModel, solution::Vector{Float64})::Vector{Float64} + reaction_flux(a::MetabolicModel)::SparseMat -Retrieve a vector of reaction fluxes (corresponding to `reactions(a)`) from a -feasible solution of the optimization problem. +Retrieve a sparse matrix that describes the correspondence of a solution of the +linear system to the fluxes (see [`fluxes`](@ref) for rationale). Returns a +sparse matrix of size `(n_reactions(a), n_fluxes(a))`. For most models, this is +an identity matrix. """ -function solution_flux(a::MetabolicModel, solution::Vector{Float64})::Vector{Float64} - solution +function reaction_flux(a::MetabolicModel)::SparseMat + nr = n_reactions(a) + nf = n_fluxes(a) + nr == nf || _missing_impl_error(reaction_flux, (a,)) + spdiagm(fill(1, nr)) end """ diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl index 8b314c2a3..eee1f4c4e 100644 --- a/src/base/types/ModelWrapper.jl +++ b/src/base/types/ModelWrapper.jl @@ -14,11 +14,7 @@ end # The list of inherited functions must be synced with the methods available for [`MetabolicModel`](@ref). # -@_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective coupling n_coupling_constraints coupling_bounds genes n_genes precache! - -@_inherit_model_methods_fn ModelWrapper (solution::Vector{Float64},) unwrap_model ( - solution, -) solution_flux +@_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective fluxes n_fluxes reaction_flux coupling n_coupling_constraints coupling_bounds genes n_genes precache! @_inherit_model_methods_fn ModelWrapper (rid::String,) unwrap_model (rid,) reaction_gene_association reaction_subsystem reaction_stoichiometry reaction_annotations reaction_notes From 0d940de253d491dbfc559eb97ecd5bffefd62dcf Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:12:26 +0200 Subject: [PATCH 023/109] clean up MetabolicModel --- src/base/types/MetabolicModel.jl | 3 ++- src/base/types/abstract/MetabolicModel.jl | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index 5718c70aa..d5d2576e9 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -7,6 +7,8 @@ # automatically derived methods for [`ModelWrapper`](@ref). # +_missing_impl_error(m, a) = throw(MethodError(m, a)) + """ reactions(a::MetabolicModel)::Vector{String} @@ -130,7 +132,6 @@ Get a matrix of coupling constraint definitions of a model. By default, there is no coupling in the models. """ function coupling(a::MetabolicModel)::SparseMat - # TODO make this an extension of stoichiometry return spzeros(0, n_reactions(a)) end diff --git a/src/base/types/abstract/MetabolicModel.jl b/src/base/types/abstract/MetabolicModel.jl index 27af601df..8b1fc4b77 100644 --- a/src/base/types/abstract/MetabolicModel.jl +++ b/src/base/types/abstract/MetabolicModel.jl @@ -64,5 +64,3 @@ Free-form notes about something (e.g. a [`Gene`](@ref)), categorized by "topic". """ const Notes = Dict{String,Vector{String}} - -_missing_impl_error(m, a) = throw(MethodError(m, a)) From a14df5beed497c9c08fbd088f28a8e9e5ff06cbc Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:12:51 +0200 Subject: [PATCH 024/109] fix type of `genes(::CoreModel)` --- src/base/types/CoreModel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/types/CoreModel.jl b/src/base/types/CoreModel.jl index 4e3d65cb7..8f2d74c52 100644 --- a/src/base/types/CoreModel.jl +++ b/src/base/types/CoreModel.jl @@ -93,7 +93,7 @@ Collect all genes contained in the [`CoreModel`](@ref). The call is expensive for large models, because the vector is not stored and instead gets rebuilt each time this function is called. """ -function genes(a::MetabolicModel)::Vector{String} +function genes(a::CoreModel)::Vector{String} res = Set{String}() for grr in a.grrs isnothing(grr) && continue From 1fc46d249b8e9a601ed288a8f53b6207aa8a055f Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:13:22 +0200 Subject: [PATCH 025/109] fix ModelWrapper gene accessors --- src/base/types/ModelWrapper.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl index eee1f4c4e..ccf289d3e 100644 --- a/src/base/types/ModelWrapper.jl +++ b/src/base/types/ModelWrapper.jl @@ -20,4 +20,4 @@ end @_inherit_model_methods_fn ModelWrapper (mid::String,) unwrap_model (mid,) metabolite_formula metabolite_charge metabolite_compartment metabolite_annotations metabolite_notes -@_inherit_model_methods_fn ModelWrapper (gid::String,) unwrap_model (mid,) gene_annotations gene_notes +@_inherit_model_methods_fn ModelWrapper (gid::String,) unwrap_model (gid,) gene_annotations gene_notes From ff4d4715d55df751ff845ca678a215110cb1d167 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:13:45 +0200 Subject: [PATCH 026/109] fix gene enumeration in CoreModel --- src/base/types/CoreModel.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/base/types/CoreModel.jl b/src/base/types/CoreModel.jl index 8f2d74c52..db696c42a 100644 --- a/src/base/types/CoreModel.jl +++ b/src/base/types/CoreModel.jl @@ -98,7 +98,9 @@ function genes(a::CoreModel)::Vector{String} for grr in a.grrs isnothing(grr) && continue for gs in grr - push!(res, gs) + for g in gs + push!(res, g) + end end end sort(collect(res)) From ed460e45e556c13303347f17e44309eb9aba3454 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:15:41 +0200 Subject: [PATCH 027/109] adjust flux sampling to new fluxes() vs reactions() --- src/analysis/sampling/affine_hit_and_run.jl | 15 +++++++++------ src/analysis/sampling/warmup_variability.jl | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/analysis/sampling/affine_hit_and_run.jl b/src/analysis/sampling/affine_hit_and_run.jl index ca524cef4..07644654b 100644 --- a/src/analysis/sampling/affine_hit_and_run.jl +++ b/src/analysis/sampling/affine_hit_and_run.jl @@ -11,9 +11,9 @@ Run a hit-and-run style sampling that starts from `warmup_points` and uses their affine combinations for generating the run directions to sample the space -delimited by `lbs` and `ubs`. The points that represent fluxes in -`warmup_points` should be organized in columns, i.e. `warmup_points[:,1]` is -the first warmup flux. +delimited by `lbs` and `ubs`. The reaction rate vectors in `warmup_points` +should be organized in columns, i.e. `warmup_points[:,1]` is the first set of +reaction rates. There are total `chains` of hit-and-run runs, each on a batch of `size(warmup_points, 2)` points. The runs are scheduled on `workers`, for good @@ -25,9 +25,9 @@ points is collected for output. For example, `sample_iters=[1,4,5]` causes the process run for 5 iterations, returning the sample batch that was produced by 1st, 4th and last (5th) iteration. -Returns a matrix of sampled fluxes (in columns), with all collected samples -horizontally concatenated. The total number of samples (columns) will be -`size(warmup_points,2) * chains * length(sample_iters)`. +Returns a matrix of sampled reaction rates (in columns), with all collected +samples horizontally concatenated. The total number of samples (columns) will +be `size(warmup_points,2) * chains * length(sample_iters)`. # Example ``` @@ -38,6 +38,9 @@ model = load_model(StandardModel, model_path) warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer, 100) samples = affine_hit_and_run(warmup, lbs, ubs, sample_iters = 1:3) + +# convert the result to flux (for models where the distinction matters): +fluxes = reaction_flux(model)' * samples ``` """ function affine_hit_and_run( diff --git a/src/analysis/sampling/warmup_variability.jl b/src/analysis/sampling/warmup_variability.jl index c34d7246b..cf66bb522 100644 --- a/src/analysis/sampling/warmup_variability.jl +++ b/src/analysis/sampling/warmup_variability.jl @@ -75,7 +75,7 @@ function warmup_from_variability( fluxes = hcat( dpmap( - rid -> :($COBREXA._max_variability_flux( + rid -> :($COBREXA._maximize_warmup_reaction( cobrexa_sampling_warmup_optmodel, $rid, om -> $COBREXA.JuMP.value.(om[:x]), @@ -96,3 +96,18 @@ function warmup_from_variability( return fluxes, lbs, ubs end + +""" + _maximize_warmup_reaction(opt_model, rid, ret) + +A helper function for finding warmup points from reaction variability. +""" +function _maximize_warmup_reaction(opt_model, rid, ret) + sense = rid > 0 ? MAX_SENSE : MIN_SENSE + var = all_variables(opt_model)[abs(rid)] + + @objective(opt_model, sense, var) + optimize!(opt_model) + + is_solved(opt_model) ? ret(opt_model) : nothing +end From d336a7d678582402e551b769a28e612add41905f Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 5 Apr 2022 20:17:05 +0200 Subject: [PATCH 028/109] flux_variability_analysis now works on fluxes instead of bare reactions ...the way is backwards compatible, and there's completely backwards-compatible reaction_variability_analysis. --- src/analysis/flux_variability_analysis.jl | 126 ++++++++++++++++----- test/analysis/flux_variability_analysis.jl | 3 + 2 files changed, 101 insertions(+), 28 deletions(-) diff --git a/src/analysis/flux_variability_analysis.jl b/src/analysis/flux_variability_analysis.jl index c3f49be38..7935da1bd 100644 --- a/src/analysis/flux_variability_analysis.jl +++ b/src/analysis/flux_variability_analysis.jl @@ -1,7 +1,7 @@ """ flux_variability_analysis( model::MetabolicModel, - reactions::Vector{Int}, + fluxes::Vector{Int}, optimizer; modifications = [], workers = [myid()], @@ -11,9 +11,9 @@ )::Matrix{Float64} Flux variability analysis solves a pair of optimization problems in `model` for -each flux listed in `reactions`: +each flux `f` described in `fluxes`: ``` - min,max xᵢ +min,max fᵀxᵢ s.t. S x = b xₗ ≤ x ≤ xᵤ cᵀx ≥ bounds(Z₀)[1] @@ -38,13 +38,13 @@ multithreaded variability computation can be used to improve resource allocation efficiency in many common use-cases. `ret` is a function used to extract results from optimized JuMP models of the -individual reactions. By default, it calls and returns the value of +individual fluxes. By default, it calls and returns the value of `JuMP.objective_value`. More information can be extracted e.g. by setting it to a function that returns a more elaborate data structure; such as `m -> (JuMP.objective_value(m), JuMP.value.(m[:x]))`. Returns a matrix of extracted `ret` values for minima and maxima, of total size -(`length(reactions)`,2). The optimizer result status is checked with +(`size(fluxes,2)`,2). The optimizer result status is checked with [`is_solved`](@ref); `nothing` is returned if the optimization failed for any reason. @@ -56,7 +56,7 @@ flux_variability_analysis(model, [1, 2, 3, 42], GLPK.optimizer) """ function flux_variability_analysis( model::MetabolicModel, - reactions::Vector{Int}, + fluxes::SparseMat, optimizer; modifications = [], workers = [myid()], @@ -64,9 +64,13 @@ function flux_variability_analysis( bounds = z -> (z, Inf), ret = objective_value, ) - #TODO this breaks if the flux doesn't correspond to the solution - if any(reactions .< 1) || any(reactions .> n_reactions(model)) - throw(DomainError(reactions, "Index exceeds number of reactions.")) + if size(fluxes, 1) != n_reactions(model) + throw( + DomainError( + size(fluxes, 1), + "Flux matrix size is not compatible with model reaction count.", + ), + ) end Z = bounds( @@ -76,6 +80,8 @@ function flux_variability_analysis( ) : optimal_objective_value, ) + flux_vector = [fluxes[:, i] for i = 1:size(fluxes, 2)] + return screen_optmodel_modifications( model, optimizer; @@ -94,12 +100,36 @@ function flux_variability_analysis( end, ], ), - args = Tuple.([-reactions reactions]), - analysis = (_, opt_model, ridx) -> _max_variability_flux(opt_model, ridx, ret), + args = tuple.([flux_vector flux_vector], [MIN_SENSE MAX_SENSE]), + analysis = (_, opt_model, flux, sense) -> + _max_variability_flux(opt_model, flux, sense, ret), workers = workers, ) end +""" + flux_variability_analysis(model::MetabolicModel, flux_indexes::Vector{Int}, optimizer; kwargs...) + +An overload of [`flux_variability_analysis`](@ref) that explores the fluxes specified by integer indexes +""" +function flux_variability_analysis( + model::MetabolicModel, + flux_indexes::Vector{Int}, + optimizer; + kwargs..., +) + if any((flux_indexes .< 1) .| (flux_indexes .> n_fluxes(model))) + throw(DomainError(flux_indexes, "Flux index out of range")) + end + + flux_variability_analysis( + model, + reaction_flux(model)[:, flux_indexes], + optimizer; + kwargs..., + ) +end + """ flux_variability_analysis( model::MetabolicModel, @@ -108,12 +138,10 @@ end ) A simpler version of [`flux_variability_analysis`](@ref) that maximizes and -minimizes all reactions in the model. Arguments are forwarded. +minimizes all declared fluxes in the model. Arguments are forwarded. """ -function flux_variability_analysis(model::MetabolicModel, optimizer; kwargs...) - n = n_reactions(model) - return flux_variability_analysis(model, collect(1:n), optimizer; kwargs...) -end +flux_variability_analysis(model::MetabolicModel, optimizer; kwargs...) = + flux_variability_analysis(model, reaction_flux(model), optimizer; kwargs...) """ flux_variability_analysis_dict( @@ -123,8 +151,8 @@ end ) A variant of [`flux_variability_analysis`](@ref) that returns the individual -maximized and minimized fluxes of all reactions as two dictionaries (of -dictionaries). All keyword arguments except `ret` are passed through. +maximized and minimized fluxes as two dictionaries (of dictionaries). All +keyword arguments except `ret` are passed through. # Example ``` @@ -141,29 +169,71 @@ mins, maxs = flux_variability_analysis_dict( ``` """ function flux_variability_analysis_dict(model::MetabolicModel, optimizer; kwargs...) - fluxes = flux_variability_analysis( + vs = flux_variability_analysis( model, optimizer; kwargs..., ret = sol -> flux_vector(model, sol), ) - rxns = reactions(model) - dicts = zip.(Ref(rxns), fluxes) + flxs = fluxes(model) + dicts = zip.(Ref(flxs), vs) - return (Dict(rxns .=> Dict.(dicts[:, 1])), Dict(rxns .=> Dict.(dicts[:, 2]))) + return (Dict(flxs .=> Dict.(dicts[:, 1])), Dict(flxs .=> Dict.(dicts[:, 2]))) end """ - _max_variability_flux(opt_model, rid, ret) + _max_variability_flux(opt_model, flux, sense, ret) Internal helper for maximizing reactions in optimization model. """ -function _max_variability_flux(opt_model, rid, ret) - sense = rid > 0 ? MAX_SENSE : MIN_SENSE - var = all_variables(opt_model)[abs(rid)] - - @objective(opt_model, sense, var) +function _max_variability_flux(opt_model, flux, sense, ret) + @objective(opt_model, sense, sum(flux .* opt_model[:x])) optimize!(opt_model) is_solved(opt_model) ? ret(opt_model) : nothing end + +""" + reaction_variability_analysis(model::MetabolicModel, reaction_indexes::Vector{Int}, optimizer; kwargs...) + +A variant for [`flux_variability_analysis`](@ref) that examines actual +reactions (selected by their indexes in `reactions` argument) instead of whole +fluxes. This may be useful for models where the sets of reactions and fluxes +differ. +""" +function reaction_variability_analysis( + model::MetabolicModel, + reaction_indexes::Vector{Int}, + optimizer; + kwargs..., +) + if any((reaction_indexes .< 1) .| (reaction_indexes .> n_reactions(model))) + throw(DomainError(reaction_indexes, "Flux index out of range")) + end + + flux_variability_analysis( + model, + sparse( + reaction_indexes, + 1:length(reaction_indexes), + 1.0, + n_reactions(model), + length(reaction_indexes), + ), + optimizer; + kwargs..., + ) +end + +""" + reaction_variability_analysis( model::MetabolicModel, optimizer; kwargs...) + +Shortcut for [`reaction_variability_analysis`](@ref) that examines all reactions. +""" +reaction_variability_analysis(model::MetabolicModel, optimizer; kwargs...) = + reaction_variability_analysis( + model, + collect(1:n_reactions(model)), + optimizer; + kwargs..., + ) diff --git a/test/analysis/flux_variability_analysis.jl b/test/analysis/flux_variability_analysis.jl index e624ba221..8d384ce5b 100644 --- a/test/analysis/flux_variability_analysis.jl +++ b/test/analysis/flux_variability_analysis.jl @@ -9,6 +9,9 @@ 2.0 2.0 ] + rates = reaction_variability_analysis(cp, optimizer) + @test fluxes == rates + fluxes = flux_variability_analysis(cp, [2], optimizer) @test size(fluxes) == (1, 2) From 31a4490b68f023e40682fbb4954cbab99c2c7831 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 8 Apr 2022 11:15:04 +0200 Subject: [PATCH 029/109] update accessors --- src/base/types/MATModel.jl | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/base/types/MATModel.jl b/src/base/types/MATModel.jl index d55f09433..be4c49b63 100644 --- a/src/base/types/MATModel.jl +++ b/src/base/types/MATModel.jl @@ -152,10 +152,13 @@ metabolite_formula(m::MATModel, mid::String) = _maybemap( Extract metabolite charge from `metCharge` or `metCharges`. """ -metabolite_charge(m::MATModel, mid::String) = _maybemap( - x -> x[findfirst(==(mid), metabolites(m))], - get(m.mat, "metCharge", get(m.mat, "metCharges", nothing)), -) +function metabolite_charge(m::MATModel, mid::String) + met_charge = _maybemap( + x -> x[findfirst(==(mid), metabolites(m))], + get(m.mat, "metCharge", get(m.mat, "metCharges", nothing)), + ) + isnan(met_charge) ? 0 : met_charge +end """ metabolite_compartment(m::MATModel, mid::String) @@ -244,3 +247,19 @@ function Base.convert(::Type{MATModel}, m::MetabolicModel) ), ) end + +""" + reaction_name(m::MATModel, mid::String) + +Extract metabolite compartment from `rxnNames`. +""" +reaction_name(m::MATModel, rid::String) = + _maybemap(x -> x[findfirst(==(rid), reactions(m))], get(m.mat, "rxnNames", nothing)) + +""" + metabolite_name(m::MATModel, mid::String) + +Extract metabolite compartment from `metNames`. +""" +metabolite_name(m::MATModel, mid::String) = + _maybemap(x -> x[findfirst(==(mid), metabolites(m))], get(m.mat, "metNames", nothing)) From 536e67969964d3d06af174216fcfc4d1fb9b82e1 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 8 Apr 2022 11:52:56 +0200 Subject: [PATCH 030/109] add gets and update accessors --- src/base/constants.jl | 6 ++++++ src/base/types/MATModel.jl | 15 +++++++-------- src/base/utils/guesskey.jl | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/base/constants.jl b/src/base/constants.jl index 864a68847..078a6a7cd 100644 --- a/src/base/constants.jl +++ b/src/base/constants.jl @@ -24,6 +24,12 @@ const _constants = ( objective = ["c"], grrs = ["gene_reaction_rules", "grRules", "rules"], ids = ["id", "description"], + metformulas = ["metFormula", "metFormulas"], + metcharges = ["metCharge", "metCharges"], + metcompartments = ["metCompartment", "metCompartments"], + rxnnames = ["rxnNames",], + metnames = ["metNames",], + ), gene_annotation_checks = ( "ncbigene", diff --git a/src/base/types/MATModel.jl b/src/base/types/MATModel.jl index be4c49b63..1df7e7431 100644 --- a/src/base/types/MATModel.jl +++ b/src/base/types/MATModel.jl @@ -144,7 +144,7 @@ Extract metabolite formula from key `metFormula` or `metFormulas`. """ metabolite_formula(m::MATModel, mid::String) = _maybemap( x -> _parse_formula(x[findfirst(==(mid), metabolites(m))]), - get(m.mat, "metFormula", get(m.mat, "metFormulas", nothing)), + gets(m.mat, nothing, _constants.keynames.metformulas), ) """ @@ -155,7 +155,7 @@ Extract metabolite charge from `metCharge` or `metCharges`. function metabolite_charge(m::MATModel, mid::String) met_charge = _maybemap( x -> x[findfirst(==(mid), metabolites(m))], - get(m.mat, "metCharge", get(m.mat, "metCharges", nothing)), + gets(m.mat, nothing, _constants.keynames.metcharges), ) isnan(met_charge) ? 0 : met_charge end @@ -167,10 +167,9 @@ Extract metabolite compartment from `metCompartment` or `metCompartments`. """ metabolite_compartment(m::MATModel, mid::String) = _maybemap( x -> x[findfirst(==(mid), metabolites(m))], - get(m.mat, "metCompartment", get(m.mat, "metCompartments", nothing)), + gets(m.mat, nothing, _constants.keynames.metcompartments), ) - """ reaction_stoichiometry(model::MATModel, rid::String)::Dict{String, Float64} @@ -251,15 +250,15 @@ end """ reaction_name(m::MATModel, mid::String) -Extract metabolite compartment from `rxnNames`. +Extract reaction name from `rxnNames`. """ reaction_name(m::MATModel, rid::String) = - _maybemap(x -> x[findfirst(==(rid), reactions(m))], get(m.mat, "rxnNames", nothing)) + _maybemap(x -> x[findfirst(==(rid), reactions(m))], gets(m.mat, nothing, _constants.keynames.rxnnames)) """ metabolite_name(m::MATModel, mid::String) -Extract metabolite compartment from `metNames`. +Extract metabolite name from `metNames`. """ metabolite_name(m::MATModel, mid::String) = - _maybemap(x -> x[findfirst(==(mid), metabolites(m))], get(m.mat, "metNames", nothing)) + _maybemap(x -> x[findfirst(==(mid), metabolites(m))], gets(m.mat, nothing, _constants.keynames.metnames)) diff --git a/src/base/utils/guesskey.jl b/src/base/utils/guesskey.jl index 94089daeb..01f85d252 100644 --- a/src/base/utils/guesskey.jl +++ b/src/base/utils/guesskey.jl @@ -20,3 +20,17 @@ function _guesskey(avail, possibilities) end return x[1] end + +""" + gets(collection, fail, keys) + +Return `fail` if key in `keys` is not in `collection`, otherwise +return `collection[key]`. Useful if may different keys need to be +tried due to non-standardized model formats. +""" +function gets(collection, fail, keys) + for key in keys + haskey(collection, key) && return collection[key] + end + return fail +end From c5ccab9d2da9a749bee011bf4dd1965f6849b931 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 8 Apr 2022 11:56:24 +0200 Subject: [PATCH 031/109] format --- src/base/constants.jl | 5 ++--- src/base/types/MATModel.jl | 12 ++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/base/constants.jl b/src/base/constants.jl index 078a6a7cd..56c8b36f2 100644 --- a/src/base/constants.jl +++ b/src/base/constants.jl @@ -27,9 +27,8 @@ const _constants = ( metformulas = ["metFormula", "metFormulas"], metcharges = ["metCharge", "metCharges"], metcompartments = ["metCompartment", "metCompartments"], - rxnnames = ["rxnNames",], - metnames = ["metNames",], - + rxnnames = ["rxnNames"], + metnames = ["metNames"], ), gene_annotation_checks = ( "ncbigene", diff --git a/src/base/types/MATModel.jl b/src/base/types/MATModel.jl index 1df7e7431..09200476d 100644 --- a/src/base/types/MATModel.jl +++ b/src/base/types/MATModel.jl @@ -252,13 +252,17 @@ end Extract reaction name from `rxnNames`. """ -reaction_name(m::MATModel, rid::String) = - _maybemap(x -> x[findfirst(==(rid), reactions(m))], gets(m.mat, nothing, _constants.keynames.rxnnames)) +reaction_name(m::MATModel, rid::String) = _maybemap( + x -> x[findfirst(==(rid), reactions(m))], + gets(m.mat, nothing, _constants.keynames.rxnnames), +) """ metabolite_name(m::MATModel, mid::String) Extract metabolite name from `metNames`. """ -metabolite_name(m::MATModel, mid::String) = - _maybemap(x -> x[findfirst(==(mid), metabolites(m))], gets(m.mat, nothing, _constants.keynames.metnames)) +metabolite_name(m::MATModel, mid::String) = _maybemap( + x -> x[findfirst(==(mid), metabolites(m))], + gets(m.mat, nothing, _constants.keynames.metnames), +) From f32716bf898e161a531e955ac6bf7fd34bd8392d Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Thu, 24 Feb 2022 23:31:02 +0100 Subject: [PATCH 032/109] added gecko and test --- src/analysis/enzyme_utils.jl | 342 ++++++++++++++++++++++++++++++ src/analysis/gecko.jl | 331 +++++++++++++++++++++++++++++ src/analysis/smoment.jl | 0 test/analysis/gecko.jl | 30 +++ test/analysis/smoment.jl | 38 ++++ test/data_static.jl | 394 +++++++++++++++++++++++++++++++++++ 6 files changed, 1135 insertions(+) create mode 100644 src/analysis/enzyme_utils.jl create mode 100644 src/analysis/gecko.jl create mode 100644 src/analysis/smoment.jl create mode 100644 test/analysis/gecko.jl create mode 100644 test/analysis/smoment.jl diff --git a/src/analysis/enzyme_utils.jl b/src/analysis/enzyme_utils.jl new file mode 100644 index 000000000..31ed42056 --- /dev/null +++ b/src/analysis/enzyme_utils.jl @@ -0,0 +1,342 @@ +""" + _bounds(model::StandardModel, rid::String) + +Return lower and upper bounds for `rid` in `model`. +""" +function _bounds(model::StandardModel, rid::String) + #TODO generalize this to other model types + model.reactions[rid].lb, model.reactions[rid].ub +end + +""" + _is_reversible(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is reversible. +""" +function _is_reversible(model::StandardModel, rid::String) + lb, ub = _bounds(model, rid) + lb < 0 && ub > 0 +end + +""" + _is_forward_only(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is forward only. +""" +function _is_forward_only(model::StandardModel, rid::String) + lb, ub = _bounds(model, rid) + lb >= 0 && ub > 0 +end + +""" + _is_backward_only(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is backward only. +""" +function _is_backward_only(model::StandardModel, rid::String) + lb, ub = _bounds(model, rid) + lb < 0 && ub <= 0 +end + +""" + _is_unidirectional(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is unidirectional. +""" +function _is_unidirectional(model::StandardModel, rid::String) + _is_forward_only(model, rid) || _is_backward_only(model, rid) +end + +""" + _is_blocked(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is blocked. +""" +function _is_blocked(model::StandardModel, rid::String) + lb, ub = _bounds(model, rid) + lb == ub == 0 +end + +""" + _has_isozymes(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is catalyzed by multiple enzymes, +i.e. it has isozymes according to the gene reaction rules. +""" +function _has_isozymes(model::StandardModel, rid::String) + length(reaction_gene_association(model, rid)) > 1 +end + +""" + _has_grr(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` has a gene reaction rule entry. +""" +function _has_grr(model::StandardModel, rid::String) + #TODO simplify this once COBREXA enforces universal rules for GRR representation + !isnothing(reaction_gene_association(model, rid)) && + reaction_gene_association(model, rid) != [[]] && + !isempty(first(reaction_gene_association(model, rid))) +end + +""" + _get_proteins_with_kcats(model::StandardModel, reaction_kcats) + +Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats`, +which is a dictionary mapping reaction ids to the kcats of each isozyme. Assume that if +a reaction has a kcat then each isozyme has a kcat. +""" +function _get_proteins_with_kcats(model::StandardModel, reaction_kcats) + unique( + vcat( + vcat( + [ + reaction_gene_association(model, rid) for + rid in reactions(model) if haskey(reaction_kcats, rid) + ]..., + )..., + ), + ) +end + +""" + _build_irreversible_stoichiometric_matrix(model::StandardModel) + +Return the stoichiometric matrix. All reactions are forward only i.e. only +positive fluxes are allowed. Include arm reactions. +""" +function _build_irreversible_stoichiometric_matrix(model::StandardModel) + # components used to build stoichiometric matrix + S_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + lbs = Vector{Float64}(), + ubs = Vector{Float64}(), + ) + + # establish the ordering in a named tuple + idxs = ( #: pseudo metabolites and reactions are added to model + met_idxs = Dict{String,Int}(), + rxn_idxs = Dict{String,Int}(), + max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + ) + #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, + #TODO but named tuples are immutable... :( + + # fill the matrix entries + #: blocked treated as reversible because unclear what direction the reaction would go + for rid in reactions(model) + if _has_grr(model, rid) && _has_isozymes(model, rid) + if _is_unidirectional(model, rid) + dir = _is_forward_only(model, rid) ? "§FOR" : "§REV" + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif _is_reversible(model, rid) || _is_blocked(model, rid) + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + else # no grr or single enzyme only + if _is_unidirectional(model, rid) + dir = _is_forward_only(model, rid) ? "§FOR" : "§REV" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif _is_reversible(model, rid) || _is_blocked(model, rid) + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + end + end + + S = sparse( + S_components.row_idxs, + S_components.col_idxs, + S_components.coeffs, + length(idxs.met_idxs), + length(idxs.rxn_idxs), + ) + + return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs +end + + +""" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + +Add entries to the components that will be used to build the stoichiometric matrix. +Simple variant that does not deal with isozymes and arm reactions. +""" +function _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) + push!(S_components.coeffs, fix_sign * coeff) + end + lb, ub = abs.(_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + _is_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + _is_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end +end + +""" + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + +Add entries to the components that will be used to build the stoichiometric matrix. +Complex variant that deals with isozymes and arm reactions. +""" +function _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + # add pseudo metabolite + pm = "§PM$(idxs.pseudo_met_idx[1])" + idxs.pseudo_met_idx[1] += 1 + idxs.met_idxs[pm] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + # find half reactions to get arm reaction + lhs = [] + rhs = [] + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + if coeff <= 0 + push!(lhs, (mid, coeff)) + else + push!(rhs, (mid, coeff)) + end + end + product_half_reaction = dir == "§FOR" ? rhs : lhs + reagent_half_reaction = dir == "§FOR" ? lhs : rhs + # add arm reaction + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + pr = rid * "§ARM" * dir + idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, 1) + for (mid, coeff) in reagent_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds for ARM reaction that corresponds to original model's bounds + lb, ub = abs.(_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + _is_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + _is_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end + # add isozyme reactions + for (i, _) in enumerate(reaction_gene_association(model, rid)) + iso_rid = rid * "§ISO$i" * dir + idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, -1) + for (mid, coeff) in product_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds + push!(S_components.lbs, 0) + if _is_blocked(model, rid) + push!(S_components.ubs, 0) + else + push!(S_components.ubs, 10_000) # arbitrary upper bound + end + end +end + +""" + _add_enzyme_variable(model, iso_num, rid, original_rid, protein_stoichiometry, reaction_kcats, E_components, col_idx, protein_ids) + +Helper function to add an column into the enzyme stoichiometric matrix. +""" +function _add_enzyme_variable( + model, + iso_num, + rid, + original_rid, + protein_stoichiometry, + reaction_kcats, + E_components, + col_idx, + protein_ids, +) + grr = reaction_gene_association(model, original_rid)[iso_num] + pstoich = protein_stoichiometry[original_rid][iso_num] + kcat = + contains(rid, "§FOR") ? reaction_kcats[original_rid][iso_num][1] : + reaction_kcats[original_rid][iso_num][2] + for (idx, pid) in enumerate(grr) + push!(E_components.row_idxs, first(indexin([pid], protein_ids))) + push!(E_components.col_idxs, col_idx) + push!(E_components.coeffs, -pstoich[idx] / kcat) + end +end + +""" + _order_id_to_idx_dict(id_to_idx_dict) + +Return the keys of `id_to_idx_dict` sorted by the values, which +are taken to be the indices. +""" +function _order_id_to_idx_dict(dmap) + ks = collect(keys(dmap)) + vs = collect(values(dmap)) + return ks[sortperm(vs)] +end + +""" + _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) + +Return dictionaries of reaction ids mapped to fluxes, +and protein ids mapped to concentrations using `reaction_map` to +determine the ids of fluxes and `protein_ids` for the gene ids. +The solution in `solution` is used to fill the dictionaries. +""" +function _map_irrev_to_rev_ids(reaction_map, solution; protein_ids = []) + reaction_flux = Dict{String,Float64}() + for (k, i) in reaction_map + contains(k, "§ISO") && continue # §ISO§FOR and §ISO§REV need to be ignored + rid = split(k, "§")[1] + v = contains(k, "§FOR") ? solution[i] : -solution[i] + reaction_flux[rid] = get(reaction_flux, rid, 0) + v + end + + if isempty(protein_ids) + return reaction_flux + else + n_reactions = length(reaction_map) + protein_flux = Dict{String,Float64}() + for (i, pid) in enumerate(protein_ids) + protein_flux[pid] = solution[n_reactions+i] + end + return reaction_flux, protein_flux + end +end + + diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl new file mode 100644 index 000000000..643dbe230 --- /dev/null +++ b/src/analysis/gecko.jl @@ -0,0 +1,331 @@ +""" + gecko( + model::StandardModel, + optimizer; + objective_id = "", + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_protein_measurements = Dict(), + ub_protein_measurements = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + ) + +Perform flux balance analysis on `model` with `optimizer`, using GECKO to +incorporate enzyme capacity and kinetic constraints. See `Sánchez, Benjamín J., +et al. "Improving the phenotype predictions of a yeast genome‐scale metabolic +model by incorporating enzymatic constraints." Molecular systems biology, 2017.` +for implementation details. + +Total enzyme capacity (sum of all enzyme concentrations multiplied by their +molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of +enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can +be bounded by `lb_flux_measurements`, `ub_flux_measurements`, +`lb_protein_measurements`, and `ub_protein_measurements` respectively. The +reaction to be optimized is specified by `objective_id`. Both lower and upper +bounds need to be supplied if a reaction flux is to be bounded, likewise with +protein concentration bounds. Note, since the model uses irreversible reactions +internally, you should append `"§FOR"` for the forward direction and `"§REV"` +for the reverse direction in which ever reaction you want to optimize; this is + not necesarry for the bound constraints. To optimize anything else, use the +lower level [`gecko_opt_problem`](@ref). Futhermore, `"§"` is reserved for +internal use as a delimiter, no reaction id should contain that character. + +The protein masses (in molar mass units) for each gene in the model should also +be supplied through `protein_masses`. The format is a dictionary of gene ids +mapped to molar masses. Additionally, the reaction turnover numbers (catalytic +constants, kcats) are supplied through `reaction_kcats`, which is also a +dictionary mapping reaction ids to kcats of each isozyme encoded by the +reaction's gene reaction rule. Each isozyme should have both a forward and +reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` +for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to +be supplied by `protein_stoichiometry`. The format is also a dictionary mapping +gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, +e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the +first isozyme of `rid` is composed of two subunits, each present once in the +protein, while the second isozyme is composed of two subunits, but the second +subunit is present twice in the isozyme. + +The function returns a dictionary mapping reaction ids to their fluxes, as well +as a dictionary mapping gene ids to their concentrations. Note, the units depend +on those used in `reaction_kcats` and `protein_masses`. Only the protein and +reaction flux bounds are optional kwargs, all other kwargs must be supplied. +Only reactions with kcats will have enzyme bounds associated with them, but all +isozymes are assumed to have data if data is supplied. + +Currently only `modifications` that change attributes of the `optimizer` are +supported. +""" +function gecko( + model::StandardModel, + optimizer; + objective_id = "", + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_protein_measurements = Dict(), + ub_protein_measurements = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + sense = MOI.MAX_SENSE, + modifications = [], +) + + _, E, d, M, h, reaction_map, _, protein_ids = gecko_opt_problem( + model; + protein_stoichiometry, + protein_masses, + reaction_kcats, + lb_protein_measurements, + ub_protein_measurements, + lb_flux_measurements, + ub_flux_measurements, + total_protein_mass, + ) + + opt_model = Model(optimizer) + x = @variable(opt_model, x[1:size(E, 2)]) + bid = reaction_map[objective_id] + @objective(opt_model, sense, x[bid]) + @constraint(opt_model, E * x .== d) + @constraint(opt_model, M * x .<= h) + + # apply the modifications, if any + for mod in modifications + mod(nothing, opt_model) + end + + optimize!(opt_model) + + _map_irrev_to_rev_ids(reaction_map, value.(x); protein_ids) +end + +""" + gecko_opt_problem( + model::StandardModel; + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_protein_measurements = Dict(), + ub_protein_measurements = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + ) + +Lower level function that returns the matrix form of a model with enzyme capacity +constraints, in GECKO format, see [`gecko`](@ref) for the higher level function. +``` +max/min cᵀ * x +s.t. E * x = d + M * x ≤ h +``` +Returns `c, E, d, M, h, reaction_map, metabolite_map, protein_ids`, where +`reaction_map` + +Format of arguments are always in order of grr for each reaction `rxn_id`: +1) protein_stoichiometry: `Dict[rxn_id] = [[1,2,1,1],...]` +2) protein_masses: `Dict[p_id] = [mm, ...]` in units of kDa +3) reaction_kcat: `Dict[rxn_id] = [[kcat_for, kcat_rev],...]` for each complex + +Assumptions: +1) Each isozyme has a kcat (forward and reverse) for each reaction it catalyzes +2) Only reactions with kcats have enzyme constraints +3) Both `lb_flux_measurements` and `ub_flux_measurements` have the same keys + +Notes: +1) The objective vector, `c` is not set +2) The parameters are the kcats and the total protein measurement +3) The symbol `§` is a reserved delimiter, do not use it in reaction or metabolite ids +""" +function gecko_opt_problem( + model::StandardModel; + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_protein_measurements = Dict(), + ub_protein_measurements = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, +) + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model) + + #: find all gene products that have kcats associated with them + protein_ids = _get_proteins_with_kcats(model, reaction_kcats) + + #: size of resultant model + n_reactions = size(S, 2) + n_proteins = length(protein_ids) + n_metabolites = size(S, 1) + n_vars = n_reactions + n_proteins + + #: equality lhs + E_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + ) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + contains(rid, "§ARM") && continue + !haskey(reaction_kcats, original_rid) && continue + + # these entries have kcats + if contains(rid, "§ISO") + iso_num = parse( + Int, + replace( + first(filter(startswith("ISO"), split(rid, "§")[2:end])), + "ISO" => "", + ), + ) + else # only one enzyme + iso_num = 1 + end + + # add all entries to column of matrix + _add_enzyme_variable( + model, + iso_num, # only one enzyme + rid, + original_rid, + protein_stoichiometry, + reaction_kcats, + E_components, + col_idx, + protein_ids, + ) + end + + Se = sparse( + E_components.row_idxs, + E_components.col_idxs, + E_components.coeffs, + n_proteins, + n_reactions, + ) + + E = [ + S zeros(n_metabolites, n_proteins) + Se I(n_proteins) + ] + + #: equality rhs + d = zeros(n_metabolites + n_proteins) + + #: need to set objective reaction outside + c = spzeros(n_vars) + + #: inequality constraints + M, h = _gecko_build_inequality_constraints( + lb_protein_measurements, + ub_protein_measurements, + protein_ids, + protein_masses, + n_reactions, + n_proteins, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + total_protein_mass, + ) + + return c, E, d, M, h, reaction_map, metabolite_map, protein_ids +end + +""" + _gecko_build_inequality_constraints( + lb_protein_measurements, + ub_protein_measurements, + protein_ids, + protein_masses, + n_reactions, + n_proteins, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + total_protein_mass, + ) + +Helper function to build inequality constraints. Returns the inequality constraint in matrix format. +""" +function _gecko_build_inequality_constraints( + lb_protein_measurements, + ub_protein_measurements, + protein_ids, + protein_masses, + n_reactions, + n_proteins, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + total_protein_mass, +) + #: inequality lhs + mw_proteins = [protein_masses[pid] for pid in protein_ids] + M = Array( + [ + -I(n_reactions) zeros(n_reactions, n_proteins) + I(n_reactions) zeros(n_reactions, n_proteins) + zeros(n_proteins, n_reactions) -I(n_proteins) + zeros(n_proteins, n_reactions) I(n_proteins) + zeros(1, n_reactions) mw_proteins' + ], + ) + + #: inequality rhs + for original_rid in keys(lb_flux_measurements) # only constrain if measurement available + lb = lb_flux_measurements[original_rid] + ub = ub_flux_measurements[original_rid] + rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] + filter!(x -> !contains(x, "§ISO"), rids) # remove isozyme partial reactions (ARM reactions take care of these) + + if lb > 0 # forward only + for rid in rids + contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) + end + elseif ub < 0 # reverse only + for rid in rids + contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) + end + else # measurement does not rule our reversibility + for rid in rids + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) + end + end + end + + lb_proteins = [ + haskey(lb_protein_measurements, pid) ? lb_protein_measurements[pid] : 0.0 for + pid in protein_ids + ] + ub_proteins = [ + haskey(ub_protein_measurements, pid) ? ub_protein_measurements[pid] : 10_000.0 + for pid in protein_ids + ] + + h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; total_protein_mass]) + + return M, h +end diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl new file mode 100644 index 000000000..e69de29bb diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl new file mode 100644 index 000000000..97f09d9dc --- /dev/null +++ b/test/analysis/gecko.jl @@ -0,0 +1,30 @@ +@testset "GECKO" begin + model = load_model(StandardModel, model_paths["e_coli_core.json"]) + model.reactions["EX_glc__D_e"].lb = -1000.0 # unconstraint because enzyme constraints take over + total_protein_mass = 100 # mg/gdW + + rxn_fluxes, prot_concens = gecko( + model, + Tulip.Optimizer; + objective_id = "BIOMASS_Ecoli_core_w_GAM§FOR", + protein_stoichiometry = ecoli_core_protein_stoichiometry, + protein_masses = ecoli_core_protein_masses, + reaction_kcats = ecoli_core_reaction_kcats, + lb_protein_measurements = Dict("b2779" => 0.01), + ub_protein_measurements = Dict("b2779" => 0.06), + lb_flux_measurements = Dict("GLCpts" => -1.0), + ub_flux_measurements = Dict("GLCpts" => 12.0), + total_protein_mass, + sense = COBREXA.MOI.MAX_SENSE, + modifications = [ + change_optimizer_attribute("IPM_IterationsLimit", 1000), + ] + ) + + prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) + @test isapprox(rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.812827846796761, atol = TEST_TOLERANCE) + @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) +end + + + diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl new file mode 100644 index 000000000..d3c0ab81b --- /dev/null +++ b/test/analysis/smoment.jl @@ -0,0 +1,38 @@ +using COBREXA, + Tulip, + JSON, + JuMP, + Statistics, + TerminalPager, + LinearAlgebra, + ForwardDiff, + CairoMakie, + SparseArrays + + +#! remove isozymes with lower effectivity (kcat/total_mass), only one enzyme per reaction +remove_slow_isozymes!(model, reaction_kcats, protein_stoichiometry, protein_masses) + +#: SMOMENT +total_protein_mass = 200.0 # mg/gDW +model.reactions["EX_glc__D_e"].lb = -1000.0 #! unconstrain otherwise bound will be hit +obj_id = "BIOMASS_Ecoli_core_w_GAM§FOR" + +c, E, d, M, h, reaction_map, metabolite_map = smoment( + model, + protein_stoichiometry, + protein_masses, + reaction_kcats; + total_protein_mass, +); + +m = Model(Gurobi.Optimizer); +@variable(m, x[1:size(E, 2)]); +bid = reaction_map[obj_id] +@objective(m, Max, x[bid]); +@constraint(m, E * x .== d); +@constraint(m, M * x .<= h); +optimize!(m) + +reaction_fluxes_pre = map_ids_to_sols(reaction_map, value.(x)); +plot_flux_summary(reaction_fluxes_pre) \ No newline at end of file diff --git a/test/data_static.jl b/test/data_static.jl index 97699cadc..dc972f182 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -154,3 +154,397 @@ const reaction_standard_gibbs_free_energies = Dict( "PPS" => -6.0551989457468665, "FUM" => -3.424133018702122, ) + +const ecoli_core_protein_masses = Dict( + #= + Data downloaded from Uniprot for E. coli K12, + gene mass in kDa. + =# + "b4301" => 23.214, + "b1602" => 48.723, + "b4154" => 65.972, + "b3236" => 32.337, + "b1621" => 56.627, + "b1779" => 35.532, + "b3951" => 85.96, + "b1676" => 50.729, + "b3114" => 85.936, + "b1241" => 96.127, + "b2276" => 52.044, + "b1761" => 48.581, + "b3925" => 35.852, + "b3493" => 53.389, + "b3733" => 31.577, + "b2926" => 41.118, + "b0979" => 42.424, + "b4015" => 47.522, + "b2296" => 43.29, + "b4232" => 36.834, + "b3732" => 50.325, + "b2282" => 36.219, + "b2283" => 100.299, + "b0451" => 44.515, + "b2463" => 82.417, + "b0734" => 42.453, + "b3738" => 30.303, + "b3386" => 24.554, + "b3603" => 59.168, + "b2416" => 63.562, + "b0729" => 29.777, + "b0767" => 36.308, + "b3734" => 55.222, + "b4122" => 60.105, + "b2987" => 53.809, + "b2579" => 14.284, + "b0809" => 26.731, + "b1524" => 33.516, + "b3612" => 56.194, + "b3735" => 19.332, + "b3731" => 15.068, + "b1817" => 35.048, + "b1603" => 54.623, + "b1773" => 30.81, + "b4090" => 16.073, + "b0114" => 99.668, + "b3962" => 51.56, + "b2464" => 35.659, + "b2976" => 80.489, + "b1818" => 27.636, + "b2285" => 18.59, + "b1702" => 87.435, + "b1849" => 42.434, + "b1812" => 50.97, + "b0902" => 28.204, + "b3403" => 59.643, + "b1612" => 60.299, + "b1854" => 51.357, + "b0811" => 27.19, + "b0721" => 14.299, + "b2914" => 22.86, + "b1297" => 53.177, + "b0723" => 64.422, + "b3919" => 26.972, + "b3115" => 43.384, + "b4077" => 47.159, + "b3528" => 45.436, + "b0351" => 33.442, + "b2029" => 51.481, + "b1819" => 30.955, + "b0728" => 41.393, + "b2935" => 72.212, + "b2415" => 9.119, + "b0727" => 44.011, + "b0116" => 50.688, + "b0485" => 32.903, + "b3736" => 17.264, + "b0008" => 35.219, + "b3212" => 163.297, + "b3870" => 51.904, + "b4014" => 60.274, + "b2280" => 19.875, + "b2133" => 64.612, + "b2278" => 66.438, + "b0118" => 93.498, + "b2288" => 16.457, + "b3739" => 13.632, + "b3916" => 34.842, + "b3952" => 32.43, + "b2925" => 39.147, + "b2465" => 73.043, + "b2297" => 77.172, + "b2417" => 18.251, + "b4395" => 24.065, + "b3956" => 99.063, + "b0722" => 12.868, + "b2779" => 45.655, + "b0115" => 66.096, + "b0733" => 58.205, + "b1478" => 35.38, + "b2492" => 30.565, + "b0724" => 26.77, + "b0755" => 28.556, + "b1136" => 45.757, + "b2286" => 68.236, + "b0978" => 57.92, + "b1852" => 55.704, + "b2281" => 20.538, + "b2587" => 47.052, + "b2458" => 36.067, + "b0904" => 30.991, + "b1101" => 50.677, + "b0875" => 23.703, + "b3213" => 52.015, + "b2975" => 58.92, + "b0720" => 48.015, + "b0903" => 85.357, + "b1723" => 32.456, + "b2097" => 38.109, + "b3737" => 8.256, + "b0810" => 24.364, + "b4025" => 61.53, + "b1380" => 36.535, + "b0356" => 39.359, + "b2277" => 56.525, + "b1276" => 97.677, + "b4152" => 15.015, + "b1479" => 63.197, + "b4153" => 27.123, + "b4151" => 13.107, + "b2287" => 25.056, + "b0474" => 23.586, + "b2284" => 49.292, + "b1611" => 50.489, + "b0726" => 105.062, + "b2279" => 10.845, +) + +const ecoli_core_protein_stoichiometry = Dict( + #= + Data made up, each isozyme is assumed to be composed of + only one subunit each. + =# + "ACALD" => [[1.0], [1.0]], + "PTAr" => [[1.0], [1.0]], + "ALCD2x" => [[1.0], [1.0], [1.0]], + "PDH" => [[1.0, 1.0, 1.0]], + "PYK" => [[1.0], [1.0]], + "CO2t" => [[1.0]], + "MALt2_2" => [[1.0]], + "CS" => [[1.0]], + "PGM" => [[1.0], [1.0], [1.0]], + "TKT1" => [[1.0], [1.0]], + "ACONTa" => [[1.0], [1.0]], + "GLNS" => [[1.0], [1.0]], + "ICL" => [[1.0]], + "FBA" => [[1.0], [1.0], [1.0]], + "FORt2" => [[1.0], [1.0]], + "G6PDH2r" => [[1.0]], + "AKGDH" => [[1.0, 1.0, 1.0]], + "TKT2" => [[1.0], [1.0]], + "FRD7" => [[1.0, 1.0, 1.0, 1.0]], + "SUCOAS" => [[1.0, 1.0]], + "FBP" => [[1.0], [1.0]], + "ICDHyr" => [[1.0]], + "AKGt2r" => [[1.0]], + "GLUSy" => [[1.0, 1.0]], + "TPI" => [[1.0]], + "FORt" => [[1.0], [1.0]], + "ACONTb" => [[1.0], [1.0]], + "GLNabc" => [[1.0, 1.0, 1.0]], + "RPE" => [[1.0], [1.0]], + "ACKr" => [[1.0], [1.0], [1.0]], + "THD2" => [[1.0, 1.0]], + "PFL" => [[1.0, 1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]], + "RPI" => [[1.0], [1.0]], + "D_LACt2" => [[1.0], [1.0]], + "TALA" => [[1.0], [1.0]], + "PPCK" => [[1.0]], + "ACt2r" => [[1.0]], + "NH4t" => [[1.0], [1.0]], + "PGL" => [[1.0]], + "NADTRHD" => [[1.0], [1.0, 1.0]], + "PGK" => [[1.0]], + "LDH_D" => [[1.0], [1.0]], + "ME1" => [[1.0]], + "PIt2r" => [[1.0], [1.0]], + "ATPS4r" => [ + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ], + "GLCpts" => [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]], + "GLUDy" => [[1.0]], + "CYTBD" => [[1.0, 1.0], [1.0, 1.0]], + "FUMt2_2" => [[1.0]], + "FRUpts2" => [[1.0, 1.0, 1.0, 1.0, 1.0]], + "GAPD" => [[1.0]], + "H2Ot" => [[1.0], [1.0]], + "PPC" => [[1.0]], + "NADH16" => [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]], + "PFK" => [[1.0], [1.0]], + "MDH" => [[1.0]], + "PGI" => [[1.0]], + "O2t" => [[1.0]], + "ME2" => [[1.0]], + "GND" => [[1.0]], + "SUCCt2_2" => [[1.0]], + "GLUN" => [[1.0], [1.0], [1.0]], + "ETOHt2r" => [[1.0]], + "ADK1" => [[1.0]], + "ACALDt" => [[1.0]], + "SUCDi" => [[1.0, 1.0, 1.0, 1.0]], + "ENO" => [[1.0]], + "MALS" => [[1.0], [1.0]], + "GLUt2r" => [[1.0]], + "PPS" => [[1.0]], + "FUM" => [[1.0], [1.0], [1.0]], +) + +const ecoli_core_reaction_kcats = Dict( + #= + Data taken from Heckmann, David, et al. "Machine learning applied to enzyme + turnover numbers reveals protein structural correlates and improves metabolic + models." Nature communications 9.1 (2018): 1-10. Assume forward and reverse + kcats are the same, and each isozyme has the same kcat. + =# + "ACALD" => + [[568.1130792316333, 568.1130792316333], [568.856126503717, 568.856126503717]], + "PTAr" => [ + [1171.9703624351055, 1171.9703624351055], + [1173.7231032615289, 1173.7231032615289], + ], + "ALCD2x" => [ + [75.9547881894345, 75.9547881894345], + [75.96334310351442, 75.96334310351442], + [76.1472359297987, 76.1472359297987], + ], + "PDH" => [[529.7610874857239, 529.7610874857239]], + "PYK" => [ + [422.0226052080562, 422.0226052080562], + [422.1332899347833, 422.1332899347833], + ], + "MALt2_2" => [[234.03664660088714, 234.03664660088714]], + "CS" => [[113.29607453875758, 113.29607453875758]], + "PGM" => [ + [681.4234715886669, 681.4234715886669], + [681.6540601244343, 681.6540601244343], + [680.5234799168278, 680.5234799168278], + ], + "TKT1" => [ + [311.16139580671637, 311.16139580671637], + [311.20967965149947, 311.20967965149947], + ], + "ACONTa" => [ + [191.02308213992006, 191.02308213992006], + [191.03458045697235, 191.03458045697235], + ], + "GLNS" => [ + [89.83860937287024, 89.83860937287024], + [89.82177852142014, 89.82177852142014], + ], + "ICL" => [[17.45922330097792, 17.45922330097792]], + "FBA" => [ + [373.425646787578, 373.425646787578], + [372.74936053215833, 372.74936053215833], + [372.88627228768166, 372.88627228768166], + ], + "FORt2" => [ + [233.93045260179326, 233.93045260179326], + [233.84804009142908, 233.84804009142908], + ], + "G6PDH2r" => [[589.3761070080022, 589.3761070080022]], + "AKGDH" => [[264.48071159327156, 264.48071159327156]], + "TKT2" => [ + [467.4226876901618, 467.4226876901618], + [468.1440593542596, 468.1440593542596], + ], + "FRD7" => [[90.20637824912605, 90.20637824912605]], + "SUCOAS" => [[18.494387648707622, 18.494387648707622]], + "FBP" => [ + [568.5346256470805, 568.5346256470805], + [567.6367759041788, 567.6367759041788], + ], + "ICDHyr" => [[39.62446791678959, 39.62446791678959]], + "AKGt2r" => [[234.99097804446805, 234.99097804446805]], + "GLUSy" => [[33.262997317319055, 33.262997317319055]], + "TPI" => [[698.301904211076, 698.301904211076]], + "FORt" => [ + [234.38391855848187, 234.38391855848187], + [234.34725576182922, 234.34725576182922], + ], + "ACONTb" => [ + [159.74612206327865, 159.74612206327865], + [159.81975755249232, 159.81975755249232], + ], + "GLNabc" => [[233.80358131677775, 233.80358131677775]], + "RPE" => [ + [1772.4850826683305, 1772.4850826683305], + [1768.8536177485582, 1768.8536177485582], + ], + "ACKr" => [ + [554.611547307207, 554.611547307207], + [555.112707891257, 555.112707891257], + [555.2464368932744, 555.2464368932744], + ], + "THD2" => [[24.739139801185537, 24.739139801185537]], + "PFL" => [ + [96.56316095411077, 96.56316095411077], + [96.65024313036014, 96.65024313036014], + [96.60761818004025, 96.60761818004025], + [96.49541118899961, 96.49541118899961], + ], + "RPI" => [ + [51.771578021074234, 51.771578021074234], + [51.81603467243345, 51.81603467243345], + ], + "D_LACt2" => [ + [233.51709131524734, 233.51709131524734], + [233.83187606098016, 233.83187606098016], + ], + "TALA" => [ + [109.05210545422884, 109.05210545422884], + [109.04246437049026, 109.04246437049026], + ], + "PPCK" => [[218.4287805666016, 218.4287805666016]], + "PGL" => [[2120.4297518987964, 2120.4297518987964]], + "NADTRHD" => [ + [186.99387360624777, 186.99387360624777], + [187.16629305266423, 187.16629305266423], + ], + "PGK" => [[57.641966636896335, 57.641966636896335]], + "LDH_D" => [ + [31.11118891764946, 31.11118891764946], + [31.12493425054357, 31.12493425054357], + ], + "ME1" => [[487.0161203971232, 487.0161203971232]], + "PIt2r" => [ + [233.8651331835765, 233.8651331835765], + [234.27374798581067, 234.27374798581067], + ], + "ATPS4r" => [ + [7120.878030435999, 7120.878030435999], + [7116.751386037507, 7116.751386037507], + ], + "GLCpts" => [ + [233.9009878400008, 233.9009878400008], + [233.66656882114864, 233.66656882114864], + [233.66893882934883, 233.66893882934883], + ], + "GLUDy" => [[105.32811069172409, 105.32811069172409]], + "CYTBD" => [ + [153.18512795009505, 153.18512795009505], + [153.2429537682265, 153.2429537682265], + ], + "FUMt2_2" => [[234.37495609395967, 234.37495609395967]], + "FRUpts2" => [[234.1933863380989, 234.1933863380989]], + "GAPD" => [[128.76795529111456, 128.76795529111456]], + "PPC" => [[165.52424516841342, 165.52424516841342]], + "NADH16" => [[971.7487306963936, 971.7487306963936]], + "PFK" => [ + [1000.4626204522712, 1000.4626204522712], + [1000.5875517343595, 1000.5875517343595], + ], + "MDH" => [[25.931655783969283, 25.931655783969283]], + "PGI" => [[468.11833198138834, 468.11833198138834]], + "ME2" => [[443.0973626307168, 443.0973626307168]], + "GND" => [[240.1252264230952, 240.1252264230952]], + "SUCCt2_2" => [[234.18109388303225, 234.18109388303225]], + "GLUN" => [ + [44.76358496525738, 44.76358496525738], + [44.84850207360875, 44.84850207360875], + [44.76185250415503, 44.76185250415503], + ], + "ADK1" => [[111.64869652600649, 111.64869652600649]], + "SUCDi" => [[680.3193833053011, 680.3193833053011]], + "ENO" => [[209.35855069219886, 209.35855069219886]], + "MALS" => [ + [252.7540503869977, 252.7540503869977], + [252.2359738678874, 252.2359738678874], + ], + "GLUt2r" => [[234.22890837451837, 234.22890837451837]], + "PPS" => [[706.1455885214322, 706.1455885214322]], + "FUM" => [ + [1576.8372583425075, 1576.8372583425075], + [1576.233088455828, 1576.233088455828], + [1575.9638204848736, 1575.9638204848736], + ], +) From 0068237fbc9cff33475318031eff67b4ed7801d0 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 00:11:10 +0100 Subject: [PATCH 033/109] added smoment and small fixes to gecko --- src/analysis/enzyme_utils.jl | 51 +++++++ src/analysis/gecko.jl | 20 ++- src/analysis/smoment.jl | 277 +++++++++++++++++++++++++++++++++++ test/analysis/smoment.jl | 57 +++---- 4 files changed, 361 insertions(+), 44 deletions(-) diff --git a/src/analysis/enzyme_utils.jl b/src/analysis/enzyme_utils.jl index 31ed42056..7d708bfba 100644 --- a/src/analysis/enzyme_utils.jl +++ b/src/analysis/enzyme_utils.jl @@ -339,4 +339,55 @@ function _map_irrev_to_rev_ids(reaction_map, solution; protein_ids = []) end end +""" + remove_slow_isozymes!( + model::StandardModel; + reaction_kcats = Dict(), + protein_stoichiometry = Dict(), + protein_masses = Dict(), + ) + +Remove all but the fastest isozyme from each reaction in `model`. +Use the largest kcat (for, rev) for these calculations. Modifies all +the arguments in place. +""" +function remove_slow_isozymes!( + model::StandardModel; + reaction_kcats = Dict(), + protein_stoichiometry = Dict(), + protein_masses = Dict(), +) + for rid in reactions(model) + if _has_grr(model, rid) && haskey(reaction_kcats, rid) + kcat_effs = Float64[] + grrs = reaction_gene_association(model, rid) + for (i, grr) in enumerate(grrs) + push!( + kcat_effs, + dot( + protein_stoichiometry[rid][i], + [protein_masses[gid] for gid in grr], + ) / maximum(reaction_kcats[rid][i]), + ) + end + idx = argmin(kcat_effs) + model.reactions[rid].grr = [grrs[idx]] + reaction_kcats[rid] = [reaction_kcats[rid][idx]] + protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] + end + end + + curated_gids = String[] + for rid in reactions(model) + if _has_grr(model, rid) + for grr in reaction_gene_association(model, rid) + append!(curated_gids, grr) + end + end + end + rm_gids = setdiff(genes(model), curated_gids) + delete!(model.genes, rm_gids) # remove genes that were deleted + + return nothing +end diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 643dbe230..cca249bb1 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -23,10 +23,10 @@ Total enzyme capacity (sum of all enzyme concentrations multiplied by their molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can be bounded by `lb_flux_measurements`, `ub_flux_measurements`, -`lb_protein_measurements`, and `ub_protein_measurements` respectively. The -reaction to be optimized is specified by `objective_id`. Both lower and upper -bounds need to be supplied if a reaction flux is to be bounded, likewise with -protein concentration bounds. Note, since the model uses irreversible reactions +`lb_protein_measurements`, and `ub_protein_measurements` respectively. Both +lower and upper bounds need to be supplied if a reaction flux is to be bounded, +likewise with protein concentration bounds. The reaction to be optimized is +specified by `objective_id`. Note, since the model uses irreversible reactions internally, you should append `"§FOR"` for the forward direction and `"§REV"` for the reverse direction in which ever reaction you want to optimize; this is not necesarry for the bound constraints. To optimize anything else, use the @@ -124,12 +124,16 @@ s.t. E * x = d M * x ≤ h ``` Returns `c, E, d, M, h, reaction_map, metabolite_map, protein_ids`, where -`reaction_map` +`reaction_map` shows the order of the columns (reactions) in `E`. Proteins +are ordered according to `protein_ids`, and follow after reactions. Use +[`_map_irrev_to_rev_ids`](@ref) to map the solution of an optimization +problem back to the original model's name space. Format of arguments are always in order of grr for each reaction `rxn_id`: -1) protein_stoichiometry: `Dict[rxn_id] = [[1,2,1,1],...]` -2) protein_masses: `Dict[p_id] = [mm, ...]` in units of kDa -3) reaction_kcat: `Dict[rxn_id] = [[kcat_for, kcat_rev],...]` for each complex +1) protein_stoichiometry: `Dict(rxn_id => [[1,2,1,1]])` +2) protein_masses: `Dict(p_id => [mm, ...])` in units of kDa +3) reaction_kcat: `Dict(rxn_id => [[kcat_for, kcat_rev]])` for each complex + Assumptions: 1) Each isozyme has a kcat (forward and reverse) for each reaction it catalyzes diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index e69de29bb..d3217a691 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -0,0 +1,277 @@ +""" + smoment( + model::StandardModel, + optimizer; + objective_id = "", + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + sense = MOI.MAX_SENSE, + modifications = [], + ) + +Perform enzyme capacity constrained flux balance analysis on `model` with +`optimizer` using the SMOMENT algorithm, see `Bekiaris, Pavlos Stephanos, and +Steffen Klamt. "Automatic construction of metabolic models with enzyme +constraints." BMC bioinformatics, 2020.` for implementation details. + +SMOMENT is a direct simplification of GECKO (despite it being named after the +MOMENT algorithm). Total enzyme capacity (sum of all enzyme concentrations +multiplied by their molar mass) is constrained by `total_protein_mass`, a +unitless mass fraction of enzyme mass to cell dry mass. The reaction fluxes can +be bounded by `lb_flux_measurements`, `ub_flux_measurements`. Both lower and +upper bounds need to be supplied if a reaction flux is to be bounded. The +reaction to be optimized is specified by `objective_id`. Note, since the +model uses irreversible reactions internally, you should append `"§FOR"` for the +forward direction and `"§REV"` for the reverse direction in which ever reaction +you want to optimize; this is not necesarry for the bound constraints. To +optimize anything else, use the lower level [`smoment_opt_problem`](@ref). +Futhermore, `"§"` is reserved for internal use as a delimiter, no reaction id +should contain that character. Also note, internally only the fastest isozyme per GRR +is used to enforce enzyme constraints, i.e. [`remove_slow_isozymes!`](@ref) is +called on `model`. + +The protein masses (in molar mass units) for each gene in the model should also +be supplied through `protein_masses`. The format is a dictionary of gene ids +mapped to molar masses. Additionally, the reaction turnover numbers (catalytic +constants, kcats) are supplied through `reaction_kcats`, which is also a +dictionary mapping reaction ids to kcats of each isozyme encoded by the +reaction's gene reaction rule. Each isozyme should have both a forward and +reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` +for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to +be supplied by `protein_stoichiometry`. The format is also a dictionary mapping +gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, +e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the +first isozyme of `rid` is composed of two subunits, each present once in the +protein, while the second isozyme is composed of two subunits, but the second +subunit is present twice in the isozyme. + +The function returns a dictionary mapping reaction ids to their fluxes. Note, +the units depend on those used in `reaction_kcats` and `protein_masses`. Only +the protein and reaction flux bounds are optional kwargs, all other kwargs must +be supplied. Only reactions with kcats will have enzyme bounds associated with +them, but all isozymes are assumed to have data if data is supplied. + +Currently only `modifications` that change attributes of the `optimizer` are +supported. +""" +function smoment( + model::StandardModel, + optimizer; + objective_id = "", + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + sense = MOI.MAX_SENSE, + modifications = [], +) + pruned_model = deepcopy(model) # copy model so that original model is not modified + + remove_slow_isozymes!( + pruned_model; + reaction_kcats, + protein_stoichiometry, + protein_masses, + ) + + _, E, d, M, h, reaction_map, _ = smoment_opt_problem( + pruned_model; + protein_stoichiometry, + protein_masses, + reaction_kcats, + lb_flux_measurements, + ub_flux_measurements, + total_protein_mass, + ) + + opt_model = Model(optimizer) + x = @variable(opt_model, x[1:size(E, 2)]) + bid = reaction_map[objective_id] + @objective(opt_model, sense, x[bid]) + @constraint(opt_model, E * x .== d) + @constraint(opt_model, M * x .<= h) + + # apply the modifications, if any + for mod in modifications + mod(nothing, opt_model) + end + + optimize!(opt_model) + + _map_irrev_to_rev_ids(reaction_map, value.(x)) + +end + +""" + smoment_opt_problem( + model::StandardModel; + protein_stoichiometry, + protein_masses, + reaction_kcats, + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, + ) + +Lower level function that returns the matrix form of a model with enzyme capacity +constraints, in SMOMENT format, see [`smoment`](@ref) for the higher level function. +``` +max/min cᵀ * x +s.t. E * x = d + M * x ≤ h +``` +Returns `c, E, d, M, h, reaction_map, metabolite_map`, where `reaction_map` +shows the order of the columns (reactions) in `E`. Use +[`_map_irrev_to_rev_ids`](@ref) to map the solution of an optimization problem +back to the original model's name space. Note, this function implements the most +basic version of SMOMENT, i.e. you cannot limit the concentration of any protein +(use [`gecko`](@ref) for that). Importantly, this function assumes that a +preprocessing step has been performed that changes the model so that each +reaction only has one GRR corresponding to the fastest isozyme. For this +preprocessing step, use [`remove_slow_isozymes!`](@ref). + +Format of arguments are always in order of grr for each reaction `rxn_id`: +1) protein_stoichiometry: `Dict(rxn_id => [[1,2,1,1]])` +2) protein_masses: `Dict(p_id => [mm, ...])` in units of kDa +3) reaction_kcat: `Dict(rxn_id => [[kcat_for, kcat_rev]])` NOTE: no isozymes. + +Assumptions: +1) No isozymes. +2) Both `lb_flux_measurements` and `ub_flux_measurements` have the same keys + +Notes: +1) The objective vector, `c` is not set +2) The parameters are the kcats and the total protein measurement +3) The symbol `§` is a reserved delimiter, do not use it in reaction or metabolite ids +""" +function smoment_opt_problem( + model::StandardModel; + protein_stoichiometry, + protein_masses, + reaction_kcats, + lb_flux_measurements = Dict(), + ub_flux_measurements = Dict(), + total_protein_mass = 0.0, +) + + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model) + + #: size of resultant model + n_reactions = size(S, 2) + n_metabolites = size(S, 1) + n_vars = n_reactions + 1 + + #: equality lhs + Se = zeros(1, n_reactions) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + !haskey(reaction_kcats, original_rid) && continue + # these entries have kcats, only one GRR by assumption + grr = first(reaction_gene_association(model, original_rid)) + pstoich = first(protein_stoichiometry[original_rid]) + mw = dot(pstoich, [protein_masses[gid] for gid in grr]) + kcat = + contains(rid, "§FOR") ? first(reaction_kcats[original_rid])[1] : + first(reaction_kcats[original_rid])[2] + Se[1, col_idx] = -mw / kcat + end + + E = [ + S zeros(n_metabolites, 1) + Se 1.0 + ] + + # #: equality rhs + d = zeros(n_metabolites + 1) + + # #: need to set objective reaction outside + c = spzeros(n_vars) + + #: inequality constraints + M, h = _smoment_build_inequality_constraints( + n_reactions, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + total_protein_mass, + ) + + return c, E, d, M, h, reaction_map, metabolite_map +end + + +""" + _smoment_build_inequality_constraints( + n_reactions, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + ) + +Helper function to return functions describing the inequality +constraints for smoment. +""" +function _smoment_build_inequality_constraints( + n_reactions, + lb_flux_measurements, + ub_flux_measurements, + lb_fluxes, + ub_fluxes, + reaction_map, + total_protein_mass, +) + #: inequality lhs + M = Array( + [ + -I(n_reactions) zeros(n_reactions, 1) + I(n_reactions) zeros(n_reactions, 1) + zeros(1, n_reactions) 1 + ], + ) + + #: inequality rhs + for original_rid in keys(lb_flux_measurements) # only constrain if measurement available + lb = lb_flux_measurements[original_rid] + ub = ub_flux_measurements[original_rid] + rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] + + if lb > 0 # forward only + for rid in rids + contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) + end + elseif ub < 0 # reverse only + for rid in rids + contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) + end + else # measurement does not rule our reversibility + for rid in rids + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) + end + end + end + + h = Array([-lb_fluxes; ub_fluxes; total_protein_mass]) + + return M, h +end diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index d3c0ab81b..1c2567f3d 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,38 +1,23 @@ -using COBREXA, - Tulip, - JSON, - JuMP, - Statistics, - TerminalPager, - LinearAlgebra, - ForwardDiff, - CairoMakie, - SparseArrays +@testset "SMOMENT" begin + model = load_model(StandardModel, model_paths["e_coli_core.json"]) + model.reactions["EX_glc__D_e"].lb = -1000.0 # unconstraint because enzyme constraints take over + total_protein_mass = 100 # mg/gdW + rxn_fluxes = smoment( + model, + Tulip.Optimizer; + objective_id = "BIOMASS_Ecoli_core_w_GAM§FOR", + protein_stoichiometry = ecoli_core_protein_stoichiometry, + protein_masses = ecoli_core_protein_masses, + reaction_kcats = ecoli_core_reaction_kcats, + lb_flux_measurements = Dict("GLCpts" => -1.0), + ub_flux_measurements = Dict("GLCpts" => 12.0), + total_protein_mass, + sense = COBREXA.MOI.MAX_SENSE, + modifications = [ + change_optimizer_attribute("IPM_IterationsLimit", 1000), + ] + ) -#! remove isozymes with lower effectivity (kcat/total_mass), only one enzyme per reaction -remove_slow_isozymes!(model, reaction_kcats, protein_stoichiometry, protein_masses) - -#: SMOMENT -total_protein_mass = 200.0 # mg/gDW -model.reactions["EX_glc__D_e"].lb = -1000.0 #! unconstrain otherwise bound will be hit -obj_id = "BIOMASS_Ecoli_core_w_GAM§FOR" - -c, E, d, M, h, reaction_map, metabolite_map = smoment( - model, - protein_stoichiometry, - protein_masses, - reaction_kcats; - total_protein_mass, -); - -m = Model(Gurobi.Optimizer); -@variable(m, x[1:size(E, 2)]); -bid = reaction_map[obj_id] -@objective(m, Max, x[bid]); -@constraint(m, E * x .== d); -@constraint(m, M * x .<= h); -optimize!(m) - -reaction_fluxes_pre = map_ids_to_sols(reaction_map, value.(x)); -plot_flux_summary(reaction_fluxes_pre) \ No newline at end of file + @test isapprox(rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.8907273630431708, atol = TEST_TOLERANCE) +end From a876a1a835788c01bcddbfbb3b623a6761e89d89 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 00:14:53 +0100 Subject: [PATCH 034/109] format --- src/analysis/enzyme_utils.jl | 2 +- src/analysis/gecko.jl | 2 +- test/analysis/gecko.jl | 10 ++++++---- test/analysis/smoment.jl | 10 ++++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/analysis/enzyme_utils.jl b/src/analysis/enzyme_utils.jl index 7d708bfba..a4419e31b 100644 --- a/src/analysis/enzyme_utils.jl +++ b/src/analysis/enzyme_utils.jl @@ -388,6 +388,6 @@ function remove_slow_isozymes!( end rm_gids = setdiff(genes(model), curated_gids) delete!(model.genes, rm_gids) # remove genes that were deleted - + return nothing end diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index cca249bb1..202cd5281 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -97,7 +97,7 @@ function gecko( for mod in modifications mod(nothing, opt_model) end - + optimize!(opt_model) _map_irrev_to_rev_ids(reaction_map, value.(x); protein_ids) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 97f09d9dc..07624991a 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -16,13 +16,15 @@ ub_flux_measurements = Dict("GLCpts" => 12.0), total_protein_mass, sense = COBREXA.MOI.MAX_SENSE, - modifications = [ - change_optimizer_attribute("IPM_IterationsLimit", 1000), - ] + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], ) prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) - @test isapprox(rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.812827846796761, atol = TEST_TOLERANCE) + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.812827846796761, + atol = TEST_TOLERANCE, + ) @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) end diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 1c2567f3d..d469e7811 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -14,10 +14,12 @@ ub_flux_measurements = Dict("GLCpts" => 12.0), total_protein_mass, sense = COBREXA.MOI.MAX_SENSE, - modifications = [ - change_optimizer_attribute("IPM_IterationsLimit", 1000), - ] + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], ) - @test isapprox(rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.8907273630431708, atol = TEST_TOLERANCE) + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.8907273630431708, + atol = TEST_TOLERANCE, + ) end From 5a07fb50563b26e9e0c11eedf891600b0e043084 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 09:48:53 +0100 Subject: [PATCH 035/109] update method signatures --- src/analysis/smoment.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index d3217a691..a5098baff 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -111,9 +111,9 @@ end """ smoment_opt_problem( model::StandardModel; - protein_stoichiometry, - protein_masses, - reaction_kcats, + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), lb_flux_measurements = Dict(), ub_flux_measurements = Dict(), total_protein_mass = 0.0, @@ -152,9 +152,9 @@ Notes: """ function smoment_opt_problem( model::StandardModel; - protein_stoichiometry, - protein_masses, - reaction_kcats, + protein_stoichiometry = Dict(), + protein_masses = Dict(), + reaction_kcats = Dict(), lb_flux_measurements = Dict(), ub_flux_measurements = Dict(), total_protein_mass = 0.0, From 8ced80f6ee794c4d1ec1a6fe86b1bc6733bd5bec Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 11:43:56 +0100 Subject: [PATCH 036/109] make explicit model change --- src/analysis/smoment.jl | 17 +++++------------ test/analysis/smoment.jl | 7 +++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index a5098baff..429a029b8 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -30,9 +30,10 @@ forward direction and `"§REV"` for the reverse direction in which ever reaction you want to optimize; this is not necesarry for the bound constraints. To optimize anything else, use the lower level [`smoment_opt_problem`](@ref). Futhermore, `"§"` is reserved for internal use as a delimiter, no reaction id -should contain that character. Also note, internally only the fastest isozyme per GRR -is used to enforce enzyme constraints, i.e. [`remove_slow_isozymes!`](@ref) is -called on `model`. +should contain that character. Also note, SMOMENT assumes that each reaction only has +a single enzyme (one GRR) associated with it. It is required that a model be modified to +ensure that this condition is met. For ease-of-use, [`remove_slow_isozymes!`](@ref) is +supplied to effect this. The protein masses (in molar mass units) for each gene in the model should also be supplied through `protein_masses`. The format is a dictionary of gene ids @@ -71,17 +72,9 @@ function smoment( sense = MOI.MAX_SENSE, modifications = [], ) - pruned_model = deepcopy(model) # copy model so that original model is not modified - - remove_slow_isozymes!( - pruned_model; - reaction_kcats, - protein_stoichiometry, - protein_masses, - ) _, E, d, M, h, reaction_map, _ = smoment_opt_problem( - pruned_model; + model; protein_stoichiometry, protein_masses, reaction_kcats, diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index d469e7811..bedd3198a 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -3,6 +3,13 @@ model.reactions["EX_glc__D_e"].lb = -1000.0 # unconstraint because enzyme constraints take over total_protein_mass = 100 # mg/gdW + remove_slow_isozymes!( + model; + reaction_kcats, + protein_stoichiometry, + protein_masses, + ) + rxn_fluxes = smoment( model, Tulip.Optimizer; From bacdf0651c8e84726f09a40030741f00f176cd98 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 13:16:05 +0100 Subject: [PATCH 037/109] fix test --- test/analysis/smoment.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index bedd3198a..96243329a 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -5,9 +5,9 @@ remove_slow_isozymes!( model; - reaction_kcats, - protein_stoichiometry, - protein_masses, + protein_stoichiometry = ecoli_core_protein_stoichiometry, + protein_masses = ecoli_core_protein_masses, + reaction_kcats = ecoli_core_reaction_kcats, ) rxn_fluxes = smoment( From 784ecf588695060570ca339a765e8231de193b47 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 25 Feb 2022 20:08:24 +0100 Subject: [PATCH 038/109] reduce bound, unnecessarily high --- src/analysis/gecko.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 202cd5281..eb522f230 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -325,7 +325,7 @@ function _gecko_build_inequality_constraints( pid in protein_ids ] ub_proteins = [ - haskey(ub_protein_measurements, pid) ? ub_protein_measurements[pid] : 10_000.0 + haskey(ub_protein_measurements, pid) ? ub_protein_measurements[pid] : 1000.0 for pid in protein_ids ] From 9f8aa92100bf2278ff1a1d44ff9b118ba20acd72 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sat, 2 Apr 2022 20:25:16 +0200 Subject: [PATCH 039/109] gecko streamlined --- src/analysis/enzyme_utils.jl | 351 +------------- src/analysis/flux_balance_analysis.jl | 4 +- src/analysis/gecko.jl | 335 ------------- src/base/solver.jl | 44 ++ src/base/types/MetabolicModel.jl | 11 + src/base/types/StandardModel.jl | 82 ++++ src/base/types/zGeckoModel.jl | 653 ++++++++++++++++++++++++++ src/base/types/zSMomentModel.jl | 0 test/analysis/gecko.jl | 43 +- 9 files changed, 827 insertions(+), 696 deletions(-) delete mode 100644 src/analysis/gecko.jl create mode 100644 src/base/types/zGeckoModel.jl create mode 100644 src/base/types/zSMomentModel.jl diff --git a/src/analysis/enzyme_utils.jl b/src/analysis/enzyme_utils.jl index a4419e31b..367bd8070 100644 --- a/src/analysis/enzyme_utils.jl +++ b/src/analysis/enzyme_utils.jl @@ -1,344 +1,3 @@ -""" - _bounds(model::StandardModel, rid::String) - -Return lower and upper bounds for `rid` in `model`. -""" -function _bounds(model::StandardModel, rid::String) - #TODO generalize this to other model types - model.reactions[rid].lb, model.reactions[rid].ub -end - -""" - _is_reversible(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is reversible. -""" -function _is_reversible(model::StandardModel, rid::String) - lb, ub = _bounds(model, rid) - lb < 0 && ub > 0 -end - -""" - _is_forward_only(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is forward only. -""" -function _is_forward_only(model::StandardModel, rid::String) - lb, ub = _bounds(model, rid) - lb >= 0 && ub > 0 -end - -""" - _is_backward_only(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is backward only. -""" -function _is_backward_only(model::StandardModel, rid::String) - lb, ub = _bounds(model, rid) - lb < 0 && ub <= 0 -end - -""" - _is_unidirectional(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is unidirectional. -""" -function _is_unidirectional(model::StandardModel, rid::String) - _is_forward_only(model, rid) || _is_backward_only(model, rid) -end - -""" - _is_blocked(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is blocked. -""" -function _is_blocked(model::StandardModel, rid::String) - lb, ub = _bounds(model, rid) - lb == ub == 0 -end - -""" - _has_isozymes(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is catalyzed by multiple enzymes, -i.e. it has isozymes according to the gene reaction rules. -""" -function _has_isozymes(model::StandardModel, rid::String) - length(reaction_gene_association(model, rid)) > 1 -end - -""" - _has_grr(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` has a gene reaction rule entry. -""" -function _has_grr(model::StandardModel, rid::String) - #TODO simplify this once COBREXA enforces universal rules for GRR representation - !isnothing(reaction_gene_association(model, rid)) && - reaction_gene_association(model, rid) != [[]] && - !isempty(first(reaction_gene_association(model, rid))) -end - -""" - _get_proteins_with_kcats(model::StandardModel, reaction_kcats) - -Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats`, -which is a dictionary mapping reaction ids to the kcats of each isozyme. Assume that if -a reaction has a kcat then each isozyme has a kcat. -""" -function _get_proteins_with_kcats(model::StandardModel, reaction_kcats) - unique( - vcat( - vcat( - [ - reaction_gene_association(model, rid) for - rid in reactions(model) if haskey(reaction_kcats, rid) - ]..., - )..., - ), - ) -end - -""" - _build_irreversible_stoichiometric_matrix(model::StandardModel) - -Return the stoichiometric matrix. All reactions are forward only i.e. only -positive fluxes are allowed. Include arm reactions. -""" -function _build_irreversible_stoichiometric_matrix(model::StandardModel) - # components used to build stoichiometric matrix - S_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - lbs = Vector{Float64}(), - ubs = Vector{Float64}(), - ) - - # establish the ordering in a named tuple - idxs = ( #: pseudo metabolites and reactions are added to model - met_idxs = Dict{String,Int}(), - rxn_idxs = Dict{String,Int}(), - max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - ) - #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, - #TODO but named tuples are immutable... :( - - # fill the matrix entries - #: blocked treated as reversible because unclear what direction the reaction would go - for rid in reactions(model) - if _has_grr(model, rid) && _has_isozymes(model, rid) - if _is_unidirectional(model, rid) - dir = _is_forward_only(model, rid) ? "§FOR" : "§REV" - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - elseif _is_reversible(model, rid) || _is_blocked(model, rid) - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") - else - @warn "Unhandled bound type for $rid" - end - else # no grr or single enzyme only - if _is_unidirectional(model, rid) - dir = _is_forward_only(model, rid) ? "§FOR" : "§REV" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - elseif _is_reversible(model, rid) || _is_blocked(model, rid) - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") - else - @warn "Unhandled bound type for $rid" - end - end - end - - S = sparse( - S_components.row_idxs, - S_components.col_idxs, - S_components.coeffs, - length(idxs.met_idxs), - length(idxs.rxn_idxs), - ) - - return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs -end - - -""" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - -Add entries to the components that will be used to build the stoichiometric matrix. -Simple variant that does not deal with isozymes and arm reactions. -""" -function _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) - push!(S_components.coeffs, fix_sign * coeff) - end - lb, ub = abs.(_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - _is_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - _is_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end -end - -""" - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - -Add entries to the components that will be used to build the stoichiometric matrix. -Complex variant that deals with isozymes and arm reactions. -""" -function _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - # add pseudo metabolite - pm = "§PM$(idxs.pseudo_met_idx[1])" - idxs.pseudo_met_idx[1] += 1 - idxs.met_idxs[pm] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - # find half reactions to get arm reaction - lhs = [] - rhs = [] - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - if coeff <= 0 - push!(lhs, (mid, coeff)) - else - push!(rhs, (mid, coeff)) - end - end - product_half_reaction = dir == "§FOR" ? rhs : lhs - reagent_half_reaction = dir == "§FOR" ? lhs : rhs - # add arm reaction - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - pr = rid * "§ARM" * dir - idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, 1) - for (mid, coeff) in reagent_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds for ARM reaction that corresponds to original model's bounds - lb, ub = abs.(_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - _is_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - _is_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end - # add isozyme reactions - for (i, _) in enumerate(reaction_gene_association(model, rid)) - iso_rid = rid * "§ISO$i" * dir - idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, -1) - for (mid, coeff) in product_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds - push!(S_components.lbs, 0) - if _is_blocked(model, rid) - push!(S_components.ubs, 0) - else - push!(S_components.ubs, 10_000) # arbitrary upper bound - end - end -end - -""" - _add_enzyme_variable(model, iso_num, rid, original_rid, protein_stoichiometry, reaction_kcats, E_components, col_idx, protein_ids) - -Helper function to add an column into the enzyme stoichiometric matrix. -""" -function _add_enzyme_variable( - model, - iso_num, - rid, - original_rid, - protein_stoichiometry, - reaction_kcats, - E_components, - col_idx, - protein_ids, -) - grr = reaction_gene_association(model, original_rid)[iso_num] - pstoich = protein_stoichiometry[original_rid][iso_num] - kcat = - contains(rid, "§FOR") ? reaction_kcats[original_rid][iso_num][1] : - reaction_kcats[original_rid][iso_num][2] - for (idx, pid) in enumerate(grr) - push!(E_components.row_idxs, first(indexin([pid], protein_ids))) - push!(E_components.col_idxs, col_idx) - push!(E_components.coeffs, -pstoich[idx] / kcat) - end -end - -""" - _order_id_to_idx_dict(id_to_idx_dict) - -Return the keys of `id_to_idx_dict` sorted by the values, which -are taken to be the indices. -""" -function _order_id_to_idx_dict(dmap) - ks = collect(keys(dmap)) - vs = collect(values(dmap)) - return ks[sortperm(vs)] -end - -""" - _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) - -Return dictionaries of reaction ids mapped to fluxes, -and protein ids mapped to concentrations using `reaction_map` to -determine the ids of fluxes and `protein_ids` for the gene ids. -The solution in `solution` is used to fill the dictionaries. -""" -function _map_irrev_to_rev_ids(reaction_map, solution; protein_ids = []) - reaction_flux = Dict{String,Float64}() - for (k, i) in reaction_map - contains(k, "§ISO") && continue # §ISO§FOR and §ISO§REV need to be ignored - rid = split(k, "§")[1] - v = contains(k, "§FOR") ? solution[i] : -solution[i] - reaction_flux[rid] = get(reaction_flux, rid, 0) + v - end - - if isempty(protein_ids) - return reaction_flux - else - n_reactions = length(reaction_map) - protein_flux = Dict{String,Float64}() - for (i, pid) in enumerate(protein_ids) - protein_flux[pid] = solution[n_reactions+i] - end - return reaction_flux, protein_flux - end -end - """ remove_slow_isozymes!( model::StandardModel; @@ -391,3 +50,13 @@ function remove_slow_isozymes!( return nothing end + +""" + protein_dict(model::GeckoModel, opt_model) + +Return a dictionary mapping protein concentrations to their ids. +""" +protein_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + last(_map_irrev_to_rev_ids(model.data.reaction_map, value.(opt_model[:x]); protein_ids=model.data.protein_ids)) : nothing + diff --git a/src/analysis/flux_balance_analysis.jl b/src/analysis/flux_balance_analysis.jl index ffd9375b1..dfbcbbffb 100644 --- a/src/analysis/flux_balance_analysis.jl +++ b/src/analysis/flux_balance_analysis.jl @@ -70,15 +70,15 @@ biomass_reaction_id = findfirst(model.reactions, "BIOMASS_Ecoli_core_w_GAM") modified_solution = flux_balance_analysis(model, GLPK.optimizer; modifications=[change_objective(biomass_reaction_id)]) ``` - """ function flux_balance_analysis( model::M, optimizer; modifications = [], + kwargs..., ) where {M<:MetabolicModel} - opt_model = make_optimization_model(model, optimizer) + opt_model = make_optimization_model(model, optimizer; kwargs...) for mod in modifications mod(model, opt_model) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl deleted file mode 100644 index eb522f230..000000000 --- a/src/analysis/gecko.jl +++ /dev/null @@ -1,335 +0,0 @@ -""" - gecko( - model::StandardModel, - optimizer; - objective_id = "", - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_protein_measurements = Dict(), - ub_protein_measurements = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - ) - -Perform flux balance analysis on `model` with `optimizer`, using GECKO to -incorporate enzyme capacity and kinetic constraints. See `Sánchez, Benjamín J., -et al. "Improving the phenotype predictions of a yeast genome‐scale metabolic -model by incorporating enzymatic constraints." Molecular systems biology, 2017.` -for implementation details. - -Total enzyme capacity (sum of all enzyme concentrations multiplied by their -molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of -enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can -be bounded by `lb_flux_measurements`, `ub_flux_measurements`, -`lb_protein_measurements`, and `ub_protein_measurements` respectively. Both -lower and upper bounds need to be supplied if a reaction flux is to be bounded, -likewise with protein concentration bounds. The reaction to be optimized is -specified by `objective_id`. Note, since the model uses irreversible reactions -internally, you should append `"§FOR"` for the forward direction and `"§REV"` -for the reverse direction in which ever reaction you want to optimize; this is - not necesarry for the bound constraints. To optimize anything else, use the -lower level [`gecko_opt_problem`](@ref). Futhermore, `"§"` is reserved for -internal use as a delimiter, no reaction id should contain that character. - -The protein masses (in molar mass units) for each gene in the model should also -be supplied through `protein_masses`. The format is a dictionary of gene ids -mapped to molar masses. Additionally, the reaction turnover numbers (catalytic -constants, kcats) are supplied through `reaction_kcats`, which is also a -dictionary mapping reaction ids to kcats of each isozyme encoded by the -reaction's gene reaction rule. Each isozyme should have both a forward and -reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` -for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to -be supplied by `protein_stoichiometry`. The format is also a dictionary mapping -gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, -e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the -first isozyme of `rid` is composed of two subunits, each present once in the -protein, while the second isozyme is composed of two subunits, but the second -subunit is present twice in the isozyme. - -The function returns a dictionary mapping reaction ids to their fluxes, as well -as a dictionary mapping gene ids to their concentrations. Note, the units depend -on those used in `reaction_kcats` and `protein_masses`. Only the protein and -reaction flux bounds are optional kwargs, all other kwargs must be supplied. -Only reactions with kcats will have enzyme bounds associated with them, but all -isozymes are assumed to have data if data is supplied. - -Currently only `modifications` that change attributes of the `optimizer` are -supported. -""" -function gecko( - model::StandardModel, - optimizer; - objective_id = "", - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_protein_measurements = Dict(), - ub_protein_measurements = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - sense = MOI.MAX_SENSE, - modifications = [], -) - - _, E, d, M, h, reaction_map, _, protein_ids = gecko_opt_problem( - model; - protein_stoichiometry, - protein_masses, - reaction_kcats, - lb_protein_measurements, - ub_protein_measurements, - lb_flux_measurements, - ub_flux_measurements, - total_protein_mass, - ) - - opt_model = Model(optimizer) - x = @variable(opt_model, x[1:size(E, 2)]) - bid = reaction_map[objective_id] - @objective(opt_model, sense, x[bid]) - @constraint(opt_model, E * x .== d) - @constraint(opt_model, M * x .<= h) - - # apply the modifications, if any - for mod in modifications - mod(nothing, opt_model) - end - - optimize!(opt_model) - - _map_irrev_to_rev_ids(reaction_map, value.(x); protein_ids) -end - -""" - gecko_opt_problem( - model::StandardModel; - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_protein_measurements = Dict(), - ub_protein_measurements = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - ) - -Lower level function that returns the matrix form of a model with enzyme capacity -constraints, in GECKO format, see [`gecko`](@ref) for the higher level function. -``` -max/min cᵀ * x -s.t. E * x = d - M * x ≤ h -``` -Returns `c, E, d, M, h, reaction_map, metabolite_map, protein_ids`, where -`reaction_map` shows the order of the columns (reactions) in `E`. Proteins -are ordered according to `protein_ids`, and follow after reactions. Use -[`_map_irrev_to_rev_ids`](@ref) to map the solution of an optimization -problem back to the original model's name space. - -Format of arguments are always in order of grr for each reaction `rxn_id`: -1) protein_stoichiometry: `Dict(rxn_id => [[1,2,1,1]])` -2) protein_masses: `Dict(p_id => [mm, ...])` in units of kDa -3) reaction_kcat: `Dict(rxn_id => [[kcat_for, kcat_rev]])` for each complex - - -Assumptions: -1) Each isozyme has a kcat (forward and reverse) for each reaction it catalyzes -2) Only reactions with kcats have enzyme constraints -3) Both `lb_flux_measurements` and `ub_flux_measurements` have the same keys - -Notes: -1) The objective vector, `c` is not set -2) The parameters are the kcats and the total protein measurement -3) The symbol `§` is a reserved delimiter, do not use it in reaction or metabolite ids -""" -function gecko_opt_problem( - model::StandardModel; - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_protein_measurements = Dict(), - ub_protein_measurements = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, -) - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model) - - #: find all gene products that have kcats associated with them - protein_ids = _get_proteins_with_kcats(model, reaction_kcats) - - #: size of resultant model - n_reactions = size(S, 2) - n_proteins = length(protein_ids) - n_metabolites = size(S, 1) - n_vars = n_reactions + n_proteins - - #: equality lhs - E_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - ) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - contains(rid, "§ARM") && continue - !haskey(reaction_kcats, original_rid) && continue - - # these entries have kcats - if contains(rid, "§ISO") - iso_num = parse( - Int, - replace( - first(filter(startswith("ISO"), split(rid, "§")[2:end])), - "ISO" => "", - ), - ) - else # only one enzyme - iso_num = 1 - end - - # add all entries to column of matrix - _add_enzyme_variable( - model, - iso_num, # only one enzyme - rid, - original_rid, - protein_stoichiometry, - reaction_kcats, - E_components, - col_idx, - protein_ids, - ) - end - - Se = sparse( - E_components.row_idxs, - E_components.col_idxs, - E_components.coeffs, - n_proteins, - n_reactions, - ) - - E = [ - S zeros(n_metabolites, n_proteins) - Se I(n_proteins) - ] - - #: equality rhs - d = zeros(n_metabolites + n_proteins) - - #: need to set objective reaction outside - c = spzeros(n_vars) - - #: inequality constraints - M, h = _gecko_build_inequality_constraints( - lb_protein_measurements, - ub_protein_measurements, - protein_ids, - protein_masses, - n_reactions, - n_proteins, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - total_protein_mass, - ) - - return c, E, d, M, h, reaction_map, metabolite_map, protein_ids -end - -""" - _gecko_build_inequality_constraints( - lb_protein_measurements, - ub_protein_measurements, - protein_ids, - protein_masses, - n_reactions, - n_proteins, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - total_protein_mass, - ) - -Helper function to build inequality constraints. Returns the inequality constraint in matrix format. -""" -function _gecko_build_inequality_constraints( - lb_protein_measurements, - ub_protein_measurements, - protein_ids, - protein_masses, - n_reactions, - n_proteins, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - total_protein_mass, -) - #: inequality lhs - mw_proteins = [protein_masses[pid] for pid in protein_ids] - M = Array( - [ - -I(n_reactions) zeros(n_reactions, n_proteins) - I(n_reactions) zeros(n_reactions, n_proteins) - zeros(n_proteins, n_reactions) -I(n_proteins) - zeros(n_proteins, n_reactions) I(n_proteins) - zeros(1, n_reactions) mw_proteins' - ], - ) - - #: inequality rhs - for original_rid in keys(lb_flux_measurements) # only constrain if measurement available - lb = lb_flux_measurements[original_rid] - ub = ub_flux_measurements[original_rid] - rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] - filter!(x -> !contains(x, "§ISO"), rids) # remove isozyme partial reactions (ARM reactions take care of these) - - if lb > 0 # forward only - for rid in rids - contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) - end - elseif ub < 0 # reverse only - for rid in rids - contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) - end - else # measurement does not rule our reversibility - for rid in rids - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) - end - end - end - - lb_proteins = [ - haskey(lb_protein_measurements, pid) ? lb_protein_measurements[pid] : 0.0 for - pid in protein_ids - ] - ub_proteins = [ - haskey(ub_protein_measurements, pid) ? ub_protein_measurements[pid] : 1000.0 - for pid in protein_ids - ] - - h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; total_protein_mass]) - - return M, h -end diff --git a/src/base/solver.jl b/src/base/solver.jl index 07564c4cd..425f8b2cd 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -28,6 +28,10 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.M isempty(C) || @constraint(optimization_model, c_lbs, cl .<= coupling(model) * x) # coupling lower bounds isempty(C) || @constraint(optimization_model, c_ubs, coupling(model) * x .<= cu) # coupling upper bounds + enzyme_vec, enzyme_mass = enzyme_capacity(model) # nothing if not present + !isnothing(enzyme_capacity) && + @constraint(optimization_model, enz_cap, dot(enzyme_vec, x) <= enzyme_mass) + return optimization_model end @@ -123,3 +127,43 @@ flux_dict(model, flux_balance_analysis(model, ...)) flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = is_solved(opt_model) ? Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing + +""" + flux_dict(model::GeckoModel, opt_model) + +Specialization to format solved data for `GeckoModel`s but maps +the solution back into the namespace of the underlying model (the +original ids). +""" +flux_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + _map_irrev_to_rev_ids(model.data.reaction_map, value.(opt_model[:x])) : nothing + +""" + _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) + +Return dictionaries of reaction ids mapped to fluxes, +and protein ids mapped to concentrations using `reaction_map` to +determine the ids of fluxes and `protein_ids` for the gene ids. +The solution in `solution` is used to fill the dictionaries. +""" +function _map_irrev_to_rev_ids(reaction_map, solution; protein_ids = []) + reaction_flux = Dict{String,Float64}() + for (k, i) in reaction_map + contains(k, "§ISO") && continue # §ISO§FOR and §ISO§REV need to be ignored + rid = split(k, "§")[1] + v = contains(k, "§FOR") ? solution[i] : -solution[i] + reaction_flux[rid] = get(reaction_flux, rid, 0) + v + end + + if isempty(protein_ids) + return reaction_flux + else + n_reactions = length(reaction_map) + protein_flux = Dict{String,Float64}() + for (i, pid) in enumerate(protein_ids) + protein_flux[pid] = solution[n_reactions+i] + end + return reaction_flux, protein_flux + end +end diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index d5d2576e9..bbaa72dab 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -314,6 +314,17 @@ function gene_notes(model::MetabolicModel, gene_id::String)::Notes return Dict() end +""" + enzyme_capacity(model::MetabolicModel) + +Return enzyme capacity inequality constraint vector and bound, or nothing +if it doesn't exist in the model. +""" +function enzyme_capacity(model::MetabolicModel) + #TODO this needs a type + nothing, nothing +end + """ reaction_name(model::MetabolicModel, rid::String) diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index e842525e9..56248aa10 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -382,3 +382,85 @@ function Base.convert(::Type{StandardModel}, model::MetabolicModel) genes = modelgenes, ) end + +#TODO generalize these to other model types + +""" + reaction_bounds(model::StandardModel, rid::String) + +Return lower and upper bounds for `rid` in `model`. +""" +function reaction_bounds(model::StandardModel, rid::String) + model.reactions[rid].lb, model.reactions[rid].ub +end + +""" + is_reaction_reversible(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is reversible. +""" +function is_reaction_reversible(model::StandardModel, rid::String) + lb, ub = reaction_bounds(model, rid) + lb < 0 && ub > 0 +end + +""" + is_reaction_forward_only(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is forward only. +""" +function is_reaction_forward_only(model::StandardModel, rid::String) + lb, ub = reaction_bounds(model, rid) + lb >= 0 && ub > 0 +end + +""" + is_reaction_backward_only(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is backward only. +""" +function is_reaction_backward_only(model::StandardModel, rid::String) + lb, ub = reaction_bounds(model, rid) + lb < 0 && ub <= 0 +end + +""" + is_reaction_unidirectional(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is unidirectional. +""" +function is_reaction_unidirectional(model::StandardModel, rid::String) + is_reaction_forward_only(model, rid) || is_reaction_backward_only(model, rid) +end + +""" + is_reaction_blocked(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is blocked. +""" +function is_reaction_blocked(model::StandardModel, rid::String) + lb, ub = reaction_bounds(model, rid) + lb == ub == 0 +end + +""" + has_reaction_isozymes(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` is catalyzed by multiple enzymes, +i.e. it has isozymes according to the gene reaction rules. +""" +function has_reaction_isozymes(model::StandardModel, rid::String) + length(reaction_gene_association(model, rid)) > 1 +end + +""" + reaction_has_grr(model::StandardModel, rid::String) + +Check if reaction `rid` in `model` has a gene reaction rule entry. +""" +function has_reaction_grr(model::StandardModel, rid::String) + #TODO simplify this once COBREXA enforces universal rules for GRR representation + !isnothing(reaction_gene_association(model, rid)) && + reaction_gene_association(model, rid) != [[]] && + !isempty(first(reaction_gene_association(model, rid))) +end \ No newline at end of file diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl new file mode 100644 index 000000000..bb0338990 --- /dev/null +++ b/src/base/types/zGeckoModel.jl @@ -0,0 +1,653 @@ +""" + mutable struct GeckoData + +Holds the already constructed GECKO problem. This is more efficient +than construct the matrices from scratch each time the model is run. + +# Fields +``` +c::SparseVector{Float64, Int64} +E::SparseMatrixCSC{Float64, Int64} +d::SparseVector{Float64, Int64} +M::SparseMatrixCSC{Float64, Int64} +h::SparseVector{Float64, Int64} +reaction_map::Dict{String,Int} +metabolite_map::Dict{String,Int} +protein_ids::Vector{String} +``` +""" +mutable struct GeckoData + c::SparseVector{Float64,Int64} + E::SparseMatrixCSC{Float64,Int64} + d::SparseVector{Float64,Int64} + M::SparseMatrixCSC{Float64,Int64} + h::SparseVector{Float64,Int64} + reaction_map::Dict{String,Int} + metabolite_map::Dict{String,Int} + protein_ids::Vector{String} +end + +""" + GeckoData() + +Empty constructor. +""" +GeckoData() = GeckoData( + spzeros(0), + spzeros(0, 0), + spzeros(0), + spzeros(0, 0), + spzeros(0), + Dict{String,Int}(), + Dict{String,Int}(), + Vector{String}(), +) + +""" + mutable struct GeckoModel <: MetabolicModel + +A model that incorporates enzyme capacity and kinetic constraints via the GECKO +formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype +predictions of a yeast genome‐scale metabolic model by incorporating enzymatic +constraints." Molecular systems biology, 2017.` for implementation details. + +Total enzyme capacity (sum of all enzyme concentrations multiplied by their +molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of +enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can +be bounded by `flux_measurements` and `protein_measurements` respectively. Both +lower and upper bounds need to be supplied (as a tuple) if a reaction flux is to +be bounded, likewise with protein concentration bounds. + +Note, since the model uses irreversible reactions internally, `"§FOR"` (for the +forward direction) and `"§REV"` (for the reverse direction) is appended to each +reaction internally. Futhermore, `"§"` is reserved for internal use as a +delimiter, no reaction id should contain that character. + +The protein masses (in molar mass units) for each gene in the model should also +be supplied through `protein_masses`. The format is a dictionary of gene ids +mapped to molar masses. Additionally, the reaction turnover numbers (catalytic +constants, kcats) are supplied through `reaction_kcats`, which is also a +dictionary mapping reaction ids to kcats of each isozyme encoded by the +reaction's gene reaction rule. Each isozyme should have both a forward and +reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` +for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to +be supplied by `protein_stoichiometry`. The format is also a dictionary mapping +gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, +e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the +first isozyme of `rid` is composed of two subunits, each present once in the +protein, while the second isozyme is composed of two subunits, but the second +subunit is present twice in the isozyme. + +Note, the units depend on those used in `reaction_kcats` and `protein_masses`. +Only the protein and reaction flux bounds are optional parameters, all other +parameters must be supplied. Only reactions with kcats will have enzyme bounds +associated with them, but all isozymes are assumed to have data if data is +supplied. + +Currently only `modifications` that change attributes of the `optimizer` are +supported. + +To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` +to run an analysis on it. + +See also: [`StandardModel`](@ref) + +# Fields +``` +smodel::StandardModel +data::GeckoData +reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] +reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] +protein_masses::Dict{String,Float64} +total_protein_mass::Float64 +flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) +protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) +``` +""" +mutable struct GeckoModel <: MetabolicModel + smodel::StandardModel + data::GeckoData + reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] + reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] + protein_masses::Dict{String,Float64} + total_protein_mass::Float64 + flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) + protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) +end + +""" + GeckoModel( + model::MetabolicModel; + reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), + reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), + protein_masses = Dict{String,Float64}(), + total_protein = 0.0, + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), + protein_measurements = Dict{String,Tuple{Float64,Float64}}(), + ) + +Constructor for `GeckoModel`. +""" +function GeckoModel( + model::MetabolicModel; + reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), + reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), + protein_masses = Dict{String,Float64}(), + total_protein = 0.0, + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), + protein_measurements = Dict{String,Tuple{Float64,Float64}}(), +) + gm = GeckoModel( + convert(StandardModel, model), + GeckoData(), # empty + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein, + flux_measurements, + protein_measurements, + ) + + # build data in GeckoModel + build_geckomodel_internals!(gm) + + return gm +end + +""" + stoichiometry(model::GeckoModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +stoichiometry(model::GeckoModel) = model.data.E + +""" + balance(model::GeckoModel) + +Return stoichiometric balance. +""" +balance(model::GeckoModel) = model.data.d + +""" + objective(model::GeckoModel) + +Return objective of `model`. +""" +objective(model::GeckoModel) = model.data.c + +@_inherit_model_methods GeckoModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr + +""" + reactions(model::GeckoModel) + +Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) +to get the order of the remaining variables. +""" +reactions(model::GeckoModel) = _order_id_to_idx_dict(model.data.reaction_map) + +""" + metabolites(model::GeckoModel) + +Returns the metabolites ordered according to the stoichiometric matrix. +""" +metabolites(model::GeckoModel) = _order_id_to_idx_dict(model.data.metabolite_map) + +""" + genes(model::GeckoModel) + +Returns the genes (proteins) in the order as they appear as variables in the model. +""" +genes(model::GeckoModel) = model.data.protein_ids + +""" + _order_id_to_idx_dict(id_to_idx_dict) + +Return the keys of `id_to_idx_dict` sorted by the values, which +are taken to be the indices. This is a helper function for +[`reactions`](@ref) and [`metabolites`](@ref). +""" +function _order_id_to_idx_dict(dmap) + ks = collect(keys(dmap)) + vs = collect(values(dmap)) + return ks[sortperm(vs)] +end + +""" + bounds(model::GeckoModel) + +Return variable bounds for `GeckoModel`. +""" +function bounds(model::GeckoModel) + n_rxns = length(model.data.reaction_map) + n_prots = length(model.data.protein_ids) + lbs = [-model.data.h[1:n_rxns]; -model.data.h[2*n_rxns.+(1:n_prots)]] + ubs = [model.data.h[n_rxns.+(1:n_rxns)]; model.data.h[2*n_rxns+n_prots.+(1:n_prots)]] + return lbs, ubs +end + +""" + enzyme_capacity(model::GeckoModel) + +Return enzyme capacity inequality constraint vector and bound, or nothing +if it doesn't exist in the model. +""" +enzyme_capacity(model::GeckoModel) = (model.data.M[end, :], model.data.h[end]) + +""" + build_geckomodel_internals!(model::GeckoModel) + +Lower level function that updates the matrix form of a model with enzyme +capacity constraints, in GECKO format. + +Specifically, updates `model.data` with the vector and matrix coefficients `c, +E, d, M, h` satisfying +``` +opt cᵀ * x +s.t. E * x = d + M * x ≤ h +``` +as well as `reaction_map, metabolite_map, protein_ids`, where +`reaction_map` shows the order of the columns (reactions) in `E`. Proteins +are ordered according to `protein_ids`, and follow after reactions. +""" +function build_geckomodel_internals!(model::GeckoModel) + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model.smodel) + + #: find all gene products that have kcats associated with them + protein_ids = _get_proteins_with_kcats(model) + + #: size of resultant model + n_reactions = size(S, 2) + n_proteins = length(protein_ids) + n_metabolites = size(S, 1) + n_vars = n_reactions + n_proteins + + #: equality lhs + E_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + ) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + contains(rid, "§ARM") && continue + !haskey(model.reaction_kcats, original_rid) && continue + + # these entries have kcats + if contains(rid, "§ISO") + iso_num = parse( + Int, + replace( + first(filter(startswith("ISO"), split(rid, "§")[2:end])), + "ISO" => "", + ), + ) + else # only one enzyme + iso_num = 1 + end + + # add all entries to column of matrix + _add_enzyme_variable( + model, + iso_num, # only one enzyme + rid, + original_rid, + E_components, + col_idx, + protein_ids, + ) + end + + Se = sparse( + E_components.row_idxs, + E_components.col_idxs, + E_components.coeffs, + n_proteins, + n_reactions, + ) + + E = [ + S zeros(n_metabolites, n_proteins) + Se I(n_proteins) + ] + + #: equality rhs + d = zeros(n_metabolites + n_proteins) + + #: find objective + obj_idx_orig = first(findnz(objective(model.smodel))[1]) + obj_id_orig = reactions(model.smodel)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective + c = zeros(n_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: inequality constraints + M, h = _gecko_build_inequality_constraints( + model, + protein_ids, + n_reactions, + n_proteins, + lb_fluxes, + ub_fluxes, + reaction_map, + ) + + #: overwrite geckomodel data + model.data = GeckoData( + sparse(c), + sparse(E), + sparse(d), + sparse(M), + sparse(h), + reaction_map, + metabolite_map, + protein_ids, + ) + + return nothing +end + +""" + _gecko_build_inequality_constraints( + model::GeckoModel, + protein_ids, + n_reactions, + n_proteins, + lb_fluxes, + ub_fluxes, + reaction_map, + ) + +Helper function to build inequality constraints. Returns the inequality constraint in matrix format. +""" +function _gecko_build_inequality_constraints( + model::GeckoModel, + protein_ids, + n_reactions, + n_proteins, + lb_fluxes, + ub_fluxes, + reaction_map, +) + #: inequality lhs + mw_proteins = [model.protein_masses[pid] for pid in protein_ids] + M = Array( + [ + -I(n_reactions) zeros(n_reactions, n_proteins) + I(n_reactions) zeros(n_reactions, n_proteins) + zeros(n_proteins, n_reactions) -I(n_proteins) + zeros(n_proteins, n_reactions) I(n_proteins) + zeros(1, n_reactions) mw_proteins' + ], + ) + + #: inequality rhs + for original_rid in keys(model.flux_measurements) # only constrain if measurement available + lb = model.flux_measurements[original_rid][1] + ub = model.flux_measurements[original_rid][2] + rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] + filter!(x -> !contains(x, "§ISO"), rids) # remove isozyme partial reactions (ARM reactions take care of these) + + if lb > 0 # forward only + for rid in rids + contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) + end + elseif ub < 0 # reverse only + for rid in rids + contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) + end + else # measurement does not rule our reversibility + for rid in rids + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) + end + end + end + + lb_proteins = [ + haskey(model.protein_measurements, pid) ? model.protein_measurements[pid][1] : 0.0 for pid in protein_ids + ] + ub_proteins = [ + haskey(model.protein_measurements, pid) ? model.protein_measurements[pid][2] : + 1000.0 for pid in protein_ids + ] + + h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; model.total_protein_mass]) + + return M, h +end + +""" + _build_irreversible_stoichiometric_matrix(model::StandardModel) + +Return the stoichiometric matrix. All reactions are forward only i.e. only +positive fluxes are allowed. Include arm reactions. +""" +function _build_irreversible_stoichiometric_matrix(model::StandardModel) + # components used to build stoichiometric matrix + S_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + lbs = Vector{Float64}(), + ubs = Vector{Float64}(), + ) + + # establish the ordering in a named tuple + idxs = ( #: pseudo metabolites and reactions are added to model + met_idxs = Dict{String,Int}(), + rxn_idxs = Dict{String,Int}(), + max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + ) + #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, + #TODO but named tuples are immutable... :( + + # fill the matrix entries + #: blocked treated as reversible because unclear what direction the reaction would go + for rid in reactions(model) + if has_reaction_grr(model, rid) && has_reaction_isozymes(model, rid) + if is_reaction_unidirectional(model, rid) + dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + else # no grr or single enzyme only + if is_reaction_unidirectional(model, rid) + dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + end + end + + S = sparse( + S_components.row_idxs, + S_components.col_idxs, + S_components.coeffs, + length(idxs.met_idxs), + length(idxs.rxn_idxs), + ) + + return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs +end + + +""" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + +Add entries to the components that will be used to build the stoichiometric +matrix. Simple variant that does not deal with isozymes and arm reactions. +""" +function _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) + push!(S_components.coeffs, fix_sign * coeff) + end + lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end +end + +""" + _add_isozyme_to_irrev_stoich_mat(model::GeckoModel, rid, idxs, S_components, dir) + +Add entries to the components that will be used to build the stoichiometric matrix. +Complex variant that deals with isozymes and arm reactions. +""" +function _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + # add pseudo metabolite + pm = "§PM$(idxs.pseudo_met_idx[1])" + idxs.pseudo_met_idx[1] += 1 + idxs.met_idxs[pm] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + # find half reactions to get arm reaction + lhs = [] + rhs = [] + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + if coeff <= 0 + push!(lhs, (mid, coeff)) + else + push!(rhs, (mid, coeff)) + end + end + product_half_reaction = dir == "§FOR" ? rhs : lhs + reagent_half_reaction = dir == "§FOR" ? lhs : rhs + # add arm reaction + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + pr = rid * "§ARM" * dir + idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, 1) + for (mid, coeff) in reagent_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds for ARM reaction that corresponds to original model's bounds + lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end + # add isozyme reactions + for (i, _) in enumerate(reaction_gene_association(model, rid)) + iso_rid = rid * "§ISO$i" * dir + idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, -1) + for (mid, coeff) in product_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds + push!(S_components.lbs, 0) + if is_reaction_blocked(model, rid) + push!(S_components.ubs, 0) + else + push!(S_components.ubs, 1000) # arbitrary upper bound + end + end +end + +""" + _add_enzyme_variable( + model::GeckoModel, + iso_num, + rid, + original_rid, + E_components, + col_idx, + protein_ids, + ) + +Helper function to add an column into the enzyme stoichiometric matrix. +""" +function _add_enzyme_variable( + model::GeckoModel, + iso_num, + rid, + original_rid, + E_components, + col_idx, + protein_ids, +) + grr = reaction_gene_association(model, original_rid)[iso_num] + pstoich = model.reaction_protein_stoichiometry[original_rid][iso_num] + kcat = + contains(rid, "§FOR") ? model.reaction_kcats[original_rid][iso_num][1] : + model.reaction_kcats[original_rid][iso_num][2] + for (idx, pid) in enumerate(grr) + push!(E_components.row_idxs, first(indexin([pid], protein_ids))) + push!(E_components.col_idxs, col_idx) + push!(E_components.coeffs, -pstoich[idx] / kcat) + end +end + +""" + _get_proteins_with_kcats(model::GeckoModel) + +Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. +Assume that if a reaction has a kcat then each isozyme has a kcat. +""" +function _get_proteins_with_kcats(model::GeckoModel) + unique( + vcat( + vcat( + [ + reaction_gene_association(model.smodel, rid) for + rid in reactions(model.smodel) if haskey(model.reaction_kcats, rid) + ]..., + )..., + ), + ) +end diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl new file mode 100644 index 000000000..e69de29bb diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 07624991a..6034225ae 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -1,32 +1,39 @@ @testset "GECKO" begin - model = load_model(StandardModel, model_paths["e_coli_core.json"]) - model.reactions["EX_glc__D_e"].lb = -1000.0 # unconstraint because enzyme constraints take over + smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) + smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over + protein_measurements = Dict("b2779" => (0.01, 0.06)) + flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) total_protein_mass = 100 # mg/gdW - rxn_fluxes, prot_concens = gecko( + model = GeckoModel( + smodel; + reaction_kcats = ecoli_core_reaction_kcats, + reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, + protein_masses = ecoli_core_protein_masses, + total_protein = total_protein_mass, # mg/gdW + flux_measurements, + protein_measurements, + ) + + opt_model = flux_balance_analysis( model, Tulip.Optimizer; - objective_id = "BIOMASS_Ecoli_core_w_GAM§FOR", - protein_stoichiometry = ecoli_core_protein_stoichiometry, - protein_masses = ecoli_core_protein_masses, - reaction_kcats = ecoli_core_reaction_kcats, - lb_protein_measurements = Dict("b2779" => 0.01), - ub_protein_measurements = Dict("b2779" => 0.06), - lb_flux_measurements = Dict("GLCpts" => -1.0), - ub_flux_measurements = Dict("GLCpts" => 12.0), - total_protein_mass, + modifications = [ + change_optimizer_attribute("IPM_IterationsLimit", 1000), + ], sense = COBREXA.MOI.MAX_SENSE, - modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], ) - prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) + rxn_fluxes = flux_dict(model, opt_model) + prot_concens = protein_dict(model, opt_model) + @test isapprox( rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.812827846796761, atol = TEST_TOLERANCE, ) - @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) -end - - + prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) + + @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) +end \ No newline at end of file From cf829449cf454021b39c68c6eb0fa593f0bcc301 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sun, 3 Apr 2022 15:40:06 +0200 Subject: [PATCH 040/109] fixed enzyme constrained problems --- src/analysis/smoment.jl | 270 ---------------- src/base/solver.jl | 16 +- src/base/types/zGeckoModel.jl | 203 +++++++----- src/base/types/zSMomentModel.jl | 295 ++++++++++++++++++ .../enzyme_utils.jl => base/utils/enzyme.jl} | 32 +- test/analysis/smoment.jl | 33 +- 6 files changed, 468 insertions(+), 381 deletions(-) delete mode 100644 src/analysis/smoment.jl rename src/{analysis/enzyme_utils.jl => base/utils/enzyme.jl} (77%) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl deleted file mode 100644 index 429a029b8..000000000 --- a/src/analysis/smoment.jl +++ /dev/null @@ -1,270 +0,0 @@ -""" - smoment( - model::StandardModel, - optimizer; - objective_id = "", - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - sense = MOI.MAX_SENSE, - modifications = [], - ) - -Perform enzyme capacity constrained flux balance analysis on `model` with -`optimizer` using the SMOMENT algorithm, see `Bekiaris, Pavlos Stephanos, and -Steffen Klamt. "Automatic construction of metabolic models with enzyme -constraints." BMC bioinformatics, 2020.` for implementation details. - -SMOMENT is a direct simplification of GECKO (despite it being named after the -MOMENT algorithm). Total enzyme capacity (sum of all enzyme concentrations -multiplied by their molar mass) is constrained by `total_protein_mass`, a -unitless mass fraction of enzyme mass to cell dry mass. The reaction fluxes can -be bounded by `lb_flux_measurements`, `ub_flux_measurements`. Both lower and -upper bounds need to be supplied if a reaction flux is to be bounded. The -reaction to be optimized is specified by `objective_id`. Note, since the -model uses irreversible reactions internally, you should append `"§FOR"` for the -forward direction and `"§REV"` for the reverse direction in which ever reaction -you want to optimize; this is not necesarry for the bound constraints. To -optimize anything else, use the lower level [`smoment_opt_problem`](@ref). -Futhermore, `"§"` is reserved for internal use as a delimiter, no reaction id -should contain that character. Also note, SMOMENT assumes that each reaction only has -a single enzyme (one GRR) associated with it. It is required that a model be modified to -ensure that this condition is met. For ease-of-use, [`remove_slow_isozymes!`](@ref) is -supplied to effect this. - -The protein masses (in molar mass units) for each gene in the model should also -be supplied through `protein_masses`. The format is a dictionary of gene ids -mapped to molar masses. Additionally, the reaction turnover numbers (catalytic -constants, kcats) are supplied through `reaction_kcats`, which is also a -dictionary mapping reaction ids to kcats of each isozyme encoded by the -reaction's gene reaction rule. Each isozyme should have both a forward and -reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` -for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to -be supplied by `protein_stoichiometry`. The format is also a dictionary mapping -gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, -e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the -first isozyme of `rid` is composed of two subunits, each present once in the -protein, while the second isozyme is composed of two subunits, but the second -subunit is present twice in the isozyme. - -The function returns a dictionary mapping reaction ids to their fluxes. Note, -the units depend on those used in `reaction_kcats` and `protein_masses`. Only -the protein and reaction flux bounds are optional kwargs, all other kwargs must -be supplied. Only reactions with kcats will have enzyme bounds associated with -them, but all isozymes are assumed to have data if data is supplied. - -Currently only `modifications` that change attributes of the `optimizer` are -supported. -""" -function smoment( - model::StandardModel, - optimizer; - objective_id = "", - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - sense = MOI.MAX_SENSE, - modifications = [], -) - - _, E, d, M, h, reaction_map, _ = smoment_opt_problem( - model; - protein_stoichiometry, - protein_masses, - reaction_kcats, - lb_flux_measurements, - ub_flux_measurements, - total_protein_mass, - ) - - opt_model = Model(optimizer) - x = @variable(opt_model, x[1:size(E, 2)]) - bid = reaction_map[objective_id] - @objective(opt_model, sense, x[bid]) - @constraint(opt_model, E * x .== d) - @constraint(opt_model, M * x .<= h) - - # apply the modifications, if any - for mod in modifications - mod(nothing, opt_model) - end - - optimize!(opt_model) - - _map_irrev_to_rev_ids(reaction_map, value.(x)) - -end - -""" - smoment_opt_problem( - model::StandardModel; - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, - ) - -Lower level function that returns the matrix form of a model with enzyme capacity -constraints, in SMOMENT format, see [`smoment`](@ref) for the higher level function. -``` -max/min cᵀ * x -s.t. E * x = d - M * x ≤ h -``` -Returns `c, E, d, M, h, reaction_map, metabolite_map`, where `reaction_map` -shows the order of the columns (reactions) in `E`. Use -[`_map_irrev_to_rev_ids`](@ref) to map the solution of an optimization problem -back to the original model's name space. Note, this function implements the most -basic version of SMOMENT, i.e. you cannot limit the concentration of any protein -(use [`gecko`](@ref) for that). Importantly, this function assumes that a -preprocessing step has been performed that changes the model so that each -reaction only has one GRR corresponding to the fastest isozyme. For this -preprocessing step, use [`remove_slow_isozymes!`](@ref). - -Format of arguments are always in order of grr for each reaction `rxn_id`: -1) protein_stoichiometry: `Dict(rxn_id => [[1,2,1,1]])` -2) protein_masses: `Dict(p_id => [mm, ...])` in units of kDa -3) reaction_kcat: `Dict(rxn_id => [[kcat_for, kcat_rev]])` NOTE: no isozymes. - -Assumptions: -1) No isozymes. -2) Both `lb_flux_measurements` and `ub_flux_measurements` have the same keys - -Notes: -1) The objective vector, `c` is not set -2) The parameters are the kcats and the total protein measurement -3) The symbol `§` is a reserved delimiter, do not use it in reaction or metabolite ids -""" -function smoment_opt_problem( - model::StandardModel; - protein_stoichiometry = Dict(), - protein_masses = Dict(), - reaction_kcats = Dict(), - lb_flux_measurements = Dict(), - ub_flux_measurements = Dict(), - total_protein_mass = 0.0, -) - - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model) - - #: size of resultant model - n_reactions = size(S, 2) - n_metabolites = size(S, 1) - n_vars = n_reactions + 1 - - #: equality lhs - Se = zeros(1, n_reactions) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - !haskey(reaction_kcats, original_rid) && continue - # these entries have kcats, only one GRR by assumption - grr = first(reaction_gene_association(model, original_rid)) - pstoich = first(protein_stoichiometry[original_rid]) - mw = dot(pstoich, [protein_masses[gid] for gid in grr]) - kcat = - contains(rid, "§FOR") ? first(reaction_kcats[original_rid])[1] : - first(reaction_kcats[original_rid])[2] - Se[1, col_idx] = -mw / kcat - end - - E = [ - S zeros(n_metabolites, 1) - Se 1.0 - ] - - # #: equality rhs - d = zeros(n_metabolites + 1) - - # #: need to set objective reaction outside - c = spzeros(n_vars) - - #: inequality constraints - M, h = _smoment_build_inequality_constraints( - n_reactions, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - total_protein_mass, - ) - - return c, E, d, M, h, reaction_map, metabolite_map -end - - -""" - _smoment_build_inequality_constraints( - n_reactions, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - ) - -Helper function to return functions describing the inequality -constraints for smoment. -""" -function _smoment_build_inequality_constraints( - n_reactions, - lb_flux_measurements, - ub_flux_measurements, - lb_fluxes, - ub_fluxes, - reaction_map, - total_protein_mass, -) - #: inequality lhs - M = Array( - [ - -I(n_reactions) zeros(n_reactions, 1) - I(n_reactions) zeros(n_reactions, 1) - zeros(1, n_reactions) 1 - ], - ) - - #: inequality rhs - for original_rid in keys(lb_flux_measurements) # only constrain if measurement available - lb = lb_flux_measurements[original_rid] - ub = ub_flux_measurements[original_rid] - rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] - - if lb > 0 # forward only - for rid in rids - contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) - end - elseif ub < 0 # reverse only - for rid in rids - contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) - end - else # measurement does not rule our reversibility - for rid in rids - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) - end - end - end - - h = Array([-lb_fluxes; ub_fluxes; total_protein_mass]) - - return M, h -end diff --git a/src/base/solver.jl b/src/base/solver.jl index 425f8b2cd..3a14025d8 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -29,7 +29,7 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.M isempty(C) || @constraint(optimization_model, c_ubs, coupling(model) * x .<= cu) # coupling upper bounds enzyme_vec, enzyme_mass = enzyme_capacity(model) # nothing if not present - !isnothing(enzyme_capacity) && + !isnothing(enzyme_vec) && @constraint(optimization_model, enz_cap, dot(enzyme_vec, x) <= enzyme_mass) return optimization_model @@ -137,7 +137,19 @@ original ids). """ flux_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? - _map_irrev_to_rev_ids(model.data.reaction_map, value.(opt_model[:x])) : nothing + _map_irrev_to_rev_ids(model.geckodata.reaction_map, value.(opt_model[:x])) : nothing + + +""" + flux_dict(model::SMomentModel, opt_model) + +Specialization to format solved data for `SMomentModel`s but maps +the solution back into the namespace of the underlying model (the +original ids). +""" +flux_dict(model::SMomentModel, opt_model) = + is_solved(opt_model) ? + _map_irrev_to_rev_ids(model.smomentdata.reaction_map, value.(opt_model[:x])) : nothing """ _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl index bb0338990..138b8dc71 100644 --- a/src/base/types/zGeckoModel.jl +++ b/src/base/types/zGeckoModel.jl @@ -1,8 +1,82 @@ +""" + mutable struct EnzymeData + +Holds data relevant for enzyme constrained metabolic models. + +Reaction turnover numbers (catalytic constants, kcats) are supplied through +`reaction_kcats`, which is a dictionary mapping reaction ids to kcats of each +isozyme. Each isozyme should have both a forward and reverse kcat, so +`reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` for `rid` with two +isozymes. The stoichiometry of each isozyme needs to be supplied by +`protein_stoichiometry`. The format is also a dictionary mapping gene ids to +their stoichiometry, e.g. `protein_stoichiometry = Dict(rid => +[[1,1],[1,2]],...)` implies that the first isozyme of `rid` is composed of two +subunits, each present once in the protein, while the second isozyme is composed +of two subunits, but the second subunit is present twice in the isozyme. The +order of each entry in `reaction_kcats` and `reaction_protein_stoichiometry` is +taken to be the same as the order returned when calling +[`reaction_gene_association`](@ref) on the model. The protein masses (in molar +mass units) for each gene in the model should be supplied through +`protein_masses`. The format is a dictionary of gene ids mapped to molar masses. + +Total enzyme capacity (sum of all enzyme concentrations multiplied by their +molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of +enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can +be bounded by `flux_measurements` and `protein_measurements` respectively. Both +lower and upper bounds need to be supplied (as a tuple) if a reaction flux is to +be bounded, likewise with protein concentration bounds. + +# Fields +``` +reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] +reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] +protein_masses::Dict{String,Float64} +total_protein_mass::Float64 +flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) +protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) +``` +""" +mutable struct EnzymeData + reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] + reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] + protein_masses::Dict{String,Float64} + total_protein_mass::Float64 + flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) + protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) +end + +""" + EnzymeData( + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein; + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), + protein_measurements = Dict{String,Tuple{Float64,Float64}}(), + ) + +Constructor for `EnzymeData`. +""" +EnzymeData( + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein; + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), + protein_measurements = Dict{String,Tuple{Float64,Float64}}(), +) = EnzymeData( + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein, + flux_measurements, + protein_measurements, +) + """ mutable struct GeckoData -Holds the already constructed GECKO problem. This is more efficient -than construct the matrices from scratch each time the model is run. +Holds the already constructed GECKO problem. # Fields ``` @@ -51,41 +125,16 @@ formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast genome‐scale metabolic model by incorporating enzymatic constraints." Molecular systems biology, 2017.` for implementation details. -Total enzyme capacity (sum of all enzyme concentrations multiplied by their -molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of -enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can -be bounded by `flux_measurements` and `protein_measurements` respectively. Both -lower and upper bounds need to be supplied (as a tuple) if a reaction flux is to -be bounded, likewise with protein concentration bounds. - Note, since the model uses irreversible reactions internally, `"§FOR"` (for the forward direction) and `"§REV"` (for the reverse direction) is appended to each reaction internally. Futhermore, `"§"` is reserved for internal use as a -delimiter, no reaction id should contain that character. - -The protein masses (in molar mass units) for each gene in the model should also -be supplied through `protein_masses`. The format is a dictionary of gene ids -mapped to molar masses. Additionally, the reaction turnover numbers (catalytic -constants, kcats) are supplied through `reaction_kcats`, which is also a -dictionary mapping reaction ids to kcats of each isozyme encoded by the -reaction's gene reaction rule. Each isozyme should have both a forward and -reverse kcat, so `reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` -for `rid` with two isozymes. Finally, the stoichiometry of each isozyme needs to -be supplied by `protein_stoichiometry`. The format is also a dictionary mapping -gene ids returned by [`reaction_gene_association`](@ref) to their stoichiometry, -e.g. `protein_stoichiometry = Dict(rid => [[1,1],[1,2]],...)` implies that the -first isozyme of `rid` is composed of two subunits, each present once in the -protein, while the second isozyme is composed of two subunits, but the second -subunit is present twice in the isozyme. - -Note, the units depend on those used in `reaction_kcats` and `protein_masses`. -Only the protein and reaction flux bounds are optional parameters, all other -parameters must be supplied. Only reactions with kcats will have enzyme bounds -associated with them, but all isozymes are assumed to have data if data is -supplied. - -Currently only `modifications` that change attributes of the `optimizer` are -supported. +delimiter, no reaction id should contain that character. The units depend on +those used in `enzymedata.reaction_kcats` and `enzymedata.protein_masses`. Only +the protein and reaction flux bounds are optional parameters, all other +parameters must be supplied to the `enzymedata` field. Only reactions with kcats +will have enzyme bounds associated with them, but all isozymes are assumed to +have data if data is supplied. Currently only `modifications` that change +attributes of the `optimizer` are supported. To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` to run an analysis on it. @@ -95,24 +144,14 @@ See also: [`StandardModel`](@ref) # Fields ``` smodel::StandardModel -data::GeckoData -reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] -reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] -protein_masses::Dict{String,Float64} -total_protein_mass::Float64 -flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) -protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) +geckodata::GeckoData +enzymedata::EnzymeData ``` """ mutable struct GeckoModel <: MetabolicModel smodel::StandardModel - data::GeckoData - reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] - reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] - protein_masses::Dict{String,Float64} - total_protein_mass::Float64 - flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) - protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) + geckodata::GeckoData + enzymedata::EnzymeData end """ @@ -140,12 +179,14 @@ function GeckoModel( gm = GeckoModel( convert(StandardModel, model), GeckoData(), # empty - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein, - flux_measurements, - protein_measurements, + EnzymeData( + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein; + flux_measurements, + protein_measurements, + ), ) # build data in GeckoModel @@ -159,21 +200,24 @@ end Return stoichiometry matrix that includes enzymes as metabolites. """ -stoichiometry(model::GeckoModel) = model.data.E +function stoichiometry(model::GeckoModel) + build_geckomodel_internals!(model) + return model.geckodata.E +end """ balance(model::GeckoModel) Return stoichiometric balance. """ -balance(model::GeckoModel) = model.data.d +balance(model::GeckoModel) = model.geckodata.d """ objective(model::GeckoModel) Return objective of `model`. """ -objective(model::GeckoModel) = model.data.c +objective(model::GeckoModel) = model.geckodata.c @_inherit_model_methods GeckoModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr @@ -183,21 +227,21 @@ objective(model::GeckoModel) = model.data.c Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) to get the order of the remaining variables. """ -reactions(model::GeckoModel) = _order_id_to_idx_dict(model.data.reaction_map) +reactions(model::GeckoModel) = _order_id_to_idx_dict(model.geckodata.reaction_map) """ metabolites(model::GeckoModel) Returns the metabolites ordered according to the stoichiometric matrix. """ -metabolites(model::GeckoModel) = _order_id_to_idx_dict(model.data.metabolite_map) +metabolites(model::GeckoModel) = _order_id_to_idx_dict(model.geckodata.metabolite_map) """ genes(model::GeckoModel) Returns the genes (proteins) in the order as they appear as variables in the model. """ -genes(model::GeckoModel) = model.data.protein_ids +genes(model::GeckoModel) = model.geckodata.protein_ids """ _order_id_to_idx_dict(id_to_idx_dict) @@ -218,10 +262,10 @@ end Return variable bounds for `GeckoModel`. """ function bounds(model::GeckoModel) - n_rxns = length(model.data.reaction_map) - n_prots = length(model.data.protein_ids) - lbs = [-model.data.h[1:n_rxns]; -model.data.h[2*n_rxns.+(1:n_prots)]] - ubs = [model.data.h[n_rxns.+(1:n_rxns)]; model.data.h[2*n_rxns+n_prots.+(1:n_prots)]] + n_rxns = length(model.geckodata.reaction_map) + n_prots = length(model.geckodata.protein_ids) + lbs = [-model.geckodata.h[1:n_rxns]; -model.geckodata.h[2*n_rxns.+(1:n_prots)]] + ubs = [model.geckodata.h[n_rxns.+(1:n_rxns)]; model.geckodata.h[2*n_rxns+n_prots.+(1:n_prots)]] return lbs, ubs end @@ -231,7 +275,7 @@ end Return enzyme capacity inequality constraint vector and bound, or nothing if it doesn't exist in the model. """ -enzyme_capacity(model::GeckoModel) = (model.data.M[end, :], model.data.h[end]) +enzyme_capacity(model::GeckoModel) = (model.geckodata.M[end, :], model.geckodata.h[end]) """ build_geckomodel_internals!(model::GeckoModel) @@ -239,7 +283,7 @@ enzyme_capacity(model::GeckoModel) = (model.data.M[end, :], model.data.h[end]) Lower level function that updates the matrix form of a model with enzyme capacity constraints, in GECKO format. -Specifically, updates `model.data` with the vector and matrix coefficients `c, +Specifically, updates `model.geckodata` with the vector and matrix coefficients `c, E, d, M, h` satisfying ``` opt cᵀ * x @@ -275,7 +319,7 @@ function build_geckomodel_internals!(model::GeckoModel) # skip these entries contains(rid, "§ARM") && continue - !haskey(model.reaction_kcats, original_rid) && continue + !haskey(model.enzymedata.reaction_kcats, original_rid) && continue # these entries have kcats if contains(rid, "§ISO") @@ -338,7 +382,7 @@ function build_geckomodel_internals!(model::GeckoModel) ) #: overwrite geckomodel data - model.data = GeckoData( + model.geckodata = GeckoData( sparse(c), sparse(E), sparse(d), @@ -375,7 +419,7 @@ function _gecko_build_inequality_constraints( reaction_map, ) #: inequality lhs - mw_proteins = [model.protein_masses[pid] for pid in protein_ids] + mw_proteins = [model.enzymedata.protein_masses[pid] for pid in protein_ids] M = Array( [ -I(n_reactions) zeros(n_reactions, n_proteins) @@ -387,9 +431,9 @@ function _gecko_build_inequality_constraints( ) #: inequality rhs - for original_rid in keys(model.flux_measurements) # only constrain if measurement available - lb = model.flux_measurements[original_rid][1] - ub = model.flux_measurements[original_rid][2] + for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available + lb = model.enzymedata.flux_measurements[original_rid][1] + ub = model.enzymedata.flux_measurements[original_rid][2] rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] filter!(x -> !contains(x, "§ISO"), rids) # remove isozyme partial reactions (ARM reactions take care of these) @@ -416,14 +460,14 @@ function _gecko_build_inequality_constraints( end lb_proteins = [ - haskey(model.protein_measurements, pid) ? model.protein_measurements[pid][1] : 0.0 for pid in protein_ids + haskey(model.enzymedata.protein_measurements, pid) ? model.enzymedata.protein_measurements[pid][1] : 0.0 for pid in protein_ids ] ub_proteins = [ - haskey(model.protein_measurements, pid) ? model.protein_measurements[pid][2] : + haskey(model.enzymedata.protein_measurements, pid) ? model.enzymedata.protein_measurements[pid][2] : 1000.0 for pid in protein_ids ] - h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; model.total_protein_mass]) + h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; model.enzymedata.total_protein_mass]) return M, h end @@ -492,7 +536,6 @@ function _build_irreversible_stoichiometric_matrix(model::StandardModel) return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs end - """ _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) @@ -622,10 +665,10 @@ function _add_enzyme_variable( protein_ids, ) grr = reaction_gene_association(model, original_rid)[iso_num] - pstoich = model.reaction_protein_stoichiometry[original_rid][iso_num] + pstoich = model.enzymedata.reaction_protein_stoichiometry[original_rid][iso_num] kcat = - contains(rid, "§FOR") ? model.reaction_kcats[original_rid][iso_num][1] : - model.reaction_kcats[original_rid][iso_num][2] + contains(rid, "§FOR") ? model.enzymedata.reaction_kcats[original_rid][iso_num][1] : + model.enzymedata.reaction_kcats[original_rid][iso_num][2] for (idx, pid) in enumerate(grr) push!(E_components.row_idxs, first(indexin([pid], protein_ids))) push!(E_components.col_idxs, col_idx) @@ -645,7 +688,7 @@ function _get_proteins_with_kcats(model::GeckoModel) vcat( [ reaction_gene_association(model.smodel, rid) for - rid in reactions(model.smodel) if haskey(model.reaction_kcats, rid) + rid in reactions(model.smodel) if haskey(model.enzymedata.reaction_kcats, rid) ]..., )..., ), diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl index e69de29bb..434af77e5 100644 --- a/src/base/types/zSMomentModel.jl +++ b/src/base/types/zSMomentModel.jl @@ -0,0 +1,295 @@ +""" + mutable struct SMomentData + +Holds the already constructed SMOMENT problem. + +# Fields +``` +c::SparseVector{Float64, Int64} +E::SparseMatrixCSC{Float64, Int64} +d::SparseVector{Float64, Int64} +M::SparseMatrixCSC{Float64, Int64} +h::SparseVector{Float64, Int64} +reaction_map::Dict{String,Int} +metabolite_map::Dict{String,Int} +``` +""" +mutable struct SMomentData + c::SparseVector{Float64,Int64} + E::SparseMatrixCSC{Float64,Int64} + d::SparseVector{Float64,Int64} + M::SparseMatrixCSC{Float64,Int64} + h::SparseVector{Float64,Int64} + reaction_map::Dict{String,Int} + metabolite_map::Dict{String,Int} +end + +""" + SMomentData() + +Empty constructor. +""" +SMomentData() = SMomentData( + spzeros(0), + spzeros(0, 0), + spzeros(0), + spzeros(0, 0), + spzeros(0), + Dict{String,Int}(), + Dict{String,Int}(), +) + +""" + mutable struct SMomentModel <: MetabolicModel + +Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, +and Steffen Klamt. "Automatic construction of metabolic models with enzyme +constraints." BMC bioinformatics, 2020.` for implementation details. + +Note, `"§"` is reserved for internal use as a delimiter, no reaction id should +contain that character. Also note, SMOMENT assumes that each reaction only has a +single enzyme (one GRR) associated with it. It is required that a model be +modified to ensure that this condition is met. For ease-of-use, +[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only +`modifications` that change attributes of the `optimizer` are supported. +""" +mutable struct SMomentModel <: MetabolicModel + smodel::StandardModel + smomentdata::SMomentData + enzymedata::EnzymeData +end + +""" + SMomentModel( + model::MetabolicModel; + reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), + reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), + protein_masses = Dict{String,Float64}(), + total_protein = 0.0, + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), + ) + +Construct an `SMomentModel`. + +""" +function SMomentModel( + model::MetabolicModel; + reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), + reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), + protein_masses = Dict{String,Float64}(), + total_protein = 0.0, + flux_measurements = Dict{String,Tuple{Float64,Float64}}(), +) + sm = convert(StandardModel, model) + # check that input data is in correct format for smoment + if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || + any(length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid)) + @warn("For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data.") + end + + smm = SMomentModel( + sm, + SMomentData(), # empty + EnzymeData( + reaction_kcats, + reaction_protein_stoichiometry, + protein_masses, + total_protein; + flux_measurements, + ), + ) + + # build data in SMomentModel + build_smomentmodel_internals!(smm) + + return smm +end + +""" + stoichiometry(model::SMomentModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +function stoichiometry(model::SMomentModel) + build_smomentmodel_internals!(model) + return model.smomentdata.E +end + +""" + balance(model::SMomentModel) + +Return stoichiometric balance. +""" +balance(model::SMomentModel) = model.smomentdata.d + +""" + objective(model::SMomentModel) + +Return objective of `model`. +""" +objective(model::SMomentModel) = model.smomentdata.c + +@_inherit_model_methods SMomentModel () smodel () genes +@_inherit_model_methods SMomentModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr + +""" + reactions(model::SMomentModel) + +Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) +to get the order of the remaining variables. +""" +reactions(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.reaction_map) + +""" + metabolites(model::SMomentModel) + +Returns the metabolites ordered according to the stoichiometric matrix. +""" +metabolites(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.metabolite_map) + +""" + bounds(model::SMomentModel) + +Return variable bounds for `SMomentModel`. +""" +function bounds(model::SMomentModel) + n_rxns = length(model.smomentdata.reaction_map) + lbs = [-model.smomentdata.h[1:n_rxns]; 0] + ubs = [model.smomentdata.h[n_rxns.+(1:n_rxns)]; model.smomentdata.h[end]] + return lbs, ubs +end + +""" + build_smomentmodel_internals!(model::SMomentModel) + +Build internal data structures used to solve SMOMENT type flux +balance analysis problems. +""" +function build_smomentmodel_internals!(model::SMomentModel) + + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model.smodel) + + #: size of resultant model + n_reactions = size(S, 2) + n_metabolites = size(S, 1) + n_vars = n_reactions + 1 + + #: equality lhs + Se = zeros(1, n_reactions) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + !haskey(model.enzymedata.reaction_kcats, original_rid) && continue + # these entries have kcats, only one GRR by assumption + grr = first(reaction_gene_association(model, original_rid)) + pstoich = first(model.enzymedata.reaction_protein_stoichiometry[original_rid]) + mw = dot(pstoich, [model.enzymedata.protein_masses[gid] for gid in grr]) + kcat = + contains(rid, "§FOR") ? first(model.enzymedata.reaction_kcats[original_rid])[1] : + first(model.enzymedata.reaction_kcats[original_rid])[2] + Se[1, col_idx] = -mw / kcat + end + + E = [ + S zeros(n_metabolites, 1) + Se 1.0 + ] + + #: equality rhs + d = zeros(n_metabolites + 1) + + #: find objective + obj_idx_orig = first(findnz(objective(model.smodel))[1]) + obj_id_orig = reactions(model.smodel)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective + c = zeros(n_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: inequality constraints + M, h = _smoment_build_inequality_constraints( + model, + n_reactions, + lb_fluxes, + ub_fluxes, + reaction_map, + ) + + #: overwrite geckomodel data + model.smomentdata = SMomentData( + sparse(c), + sparse(E), + sparse(d), + sparse(M), + sparse(h), + reaction_map, + metabolite_map, + ) + + return nothing +end + +""" + _smoment_build_inequality_constraints( + model::SMomentModel, + n_reactions, + lb_fluxes, + ub_fluxes, + reaction_map, + ) + +Helper function to return functions describing the inequality +constraints for smoment. +""" +function _smoment_build_inequality_constraints( + model::SMomentModel, + n_reactions, + lb_fluxes, + ub_fluxes, + reaction_map, +) + + #: inequality lhs + M = Array( + [ + -I(n_reactions) zeros(n_reactions, 1) + I(n_reactions) zeros(n_reactions, 1) + zeros(1, n_reactions) 1 + ], + ) + + #: inequality rhs + for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available + lb = model.enzymedata.flux_measurements[original_rid][1] + ub = model.enzymedata.flux_measurements[original_rid][2] + rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] + + if lb > 0 # forward only + for rid in rids + contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) + end + elseif ub < 0 # reverse only + for rid in rids + contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) + end + else # measurement does not rule our reversibility + for rid in rids + contains(rid, "§FOR") && + (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) + contains(rid, "§REV") && + (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) + end + end + end + + h = Array([-lb_fluxes; ub_fluxes; model.enzymedata.total_protein_mass]) + + return M, h +end \ No newline at end of file diff --git a/src/analysis/enzyme_utils.jl b/src/base/utils/enzyme.jl similarity index 77% rename from src/analysis/enzyme_utils.jl rename to src/base/utils/enzyme.jl index 367bd8070..a5272c879 100644 --- a/src/analysis/enzyme_utils.jl +++ b/src/base/utils/enzyme.jl @@ -1,3 +1,13 @@ +""" + protein_dict(model::GeckoModel, opt_model) + +Return a dictionary mapping protein concentrations to their ids. +""" +protein_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + last(_map_irrev_to_rev_ids(model.geckodata.reaction_map, value.(opt_model[:x]); protein_ids=model.geckodata.protein_ids)) : nothing + + """ remove_slow_isozymes!( model::StandardModel; @@ -13,18 +23,18 @@ the arguments in place. function remove_slow_isozymes!( model::StandardModel; reaction_kcats = Dict(), - protein_stoichiometry = Dict(), + reaction_protein_stoichiometry = Dict(), protein_masses = Dict(), ) for rid in reactions(model) - if _has_grr(model, rid) && haskey(reaction_kcats, rid) + if has_reaction_grr(model, rid) && haskey(reaction_kcats, rid) kcat_effs = Float64[] grrs = reaction_gene_association(model, rid) for (i, grr) in enumerate(grrs) push!( kcat_effs, dot( - protein_stoichiometry[rid][i], + reaction_protein_stoichiometry[rid][i], [protein_masses[gid] for gid in grr], ) / maximum(reaction_kcats[rid][i]), ) @@ -33,13 +43,13 @@ function remove_slow_isozymes!( model.reactions[rid].grr = [grrs[idx]] reaction_kcats[rid] = [reaction_kcats[rid][idx]] - protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] + reaction_protein_stoichiometry[rid] = [reaction_protein_stoichiometry[rid][idx]] end end curated_gids = String[] for rid in reactions(model) - if _has_grr(model, rid) + if has_reaction_grr(model, rid) for grr in reaction_gene_association(model, rid) append!(curated_gids, grr) end @@ -49,14 +59,4 @@ function remove_slow_isozymes!( delete!(model.genes, rm_gids) # remove genes that were deleted return nothing -end - -""" - protein_dict(model::GeckoModel, opt_model) - -Return a dictionary mapping protein concentrations to their ids. -""" -protein_dict(model::GeckoModel, opt_model) = - is_solved(opt_model) ? - last(_map_irrev_to_rev_ids(model.data.reaction_map, value.(opt_model[:x]); protein_ids=model.data.protein_ids)) : nothing - +end \ No newline at end of file diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 96243329a..42f0d56f5 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,29 +1,36 @@ @testset "SMOMENT" begin - model = load_model(StandardModel, model_paths["e_coli_core.json"]) - model.reactions["EX_glc__D_e"].lb = -1000.0 # unconstraint because enzyme constraints take over + smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) + smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over + flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) total_protein_mass = 100 # mg/gdW remove_slow_isozymes!( - model; - protein_stoichiometry = ecoli_core_protein_stoichiometry, + smodel; + reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, protein_masses = ecoli_core_protein_masses, reaction_kcats = ecoli_core_reaction_kcats, ) - rxn_fluxes = smoment( + model = SMomentModel( + smodel; + reaction_kcats = ecoli_core_reaction_kcats, + reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, + protein_masses = ecoli_core_protein_masses, + total_protein = total_protein_mass, # mg/gdW + flux_measurements, + ) + + opt_model = flux_balance_analysis( model, Tulip.Optimizer; - objective_id = "BIOMASS_Ecoli_core_w_GAM§FOR", - protein_stoichiometry = ecoli_core_protein_stoichiometry, - protein_masses = ecoli_core_protein_masses, - reaction_kcats = ecoli_core_reaction_kcats, - lb_flux_measurements = Dict("GLCpts" => -1.0), - ub_flux_measurements = Dict("GLCpts" => 12.0), - total_protein_mass, + modifications = [ + change_optimizer_attribute("IPM_IterationsLimit", 1000), + ], sense = COBREXA.MOI.MAX_SENSE, - modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], ) + rxn_fluxes = flux_dict(model, opt_model) + @test isapprox( rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], 0.8907273630431708, From 885d96b4f850cbe156198a66e72355effca05a57 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sun, 3 Apr 2022 15:44:53 +0200 Subject: [PATCH 041/109] format --- src/base/types/StandardModel.jl | 2 +- src/base/types/zGeckoModel.jl | 27 ++++++++++++++++++++------- src/base/types/zSMomentModel.jl | 22 ++++++++++++++-------- src/base/utils/enzyme.jl | 10 ++++++++-- test/analysis/gecko.jl | 8 +++----- test/analysis/smoment.jl | 4 +--- 6 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index 56248aa10..965ee484a 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -463,4 +463,4 @@ function has_reaction_grr(model::StandardModel, rid::String) !isnothing(reaction_gene_association(model, rid)) && reaction_gene_association(model, rid) != [[]] && !isempty(first(reaction_gene_association(model, rid))) -end \ No newline at end of file +end diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl index 138b8dc71..ff0ad38c1 100644 --- a/src/base/types/zGeckoModel.jl +++ b/src/base/types/zGeckoModel.jl @@ -36,7 +36,7 @@ flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) ``` """ -mutable struct EnzymeData +mutable struct EnzymeData reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] protein_masses::Dict{String,Float64} @@ -265,7 +265,10 @@ function bounds(model::GeckoModel) n_rxns = length(model.geckodata.reaction_map) n_prots = length(model.geckodata.protein_ids) lbs = [-model.geckodata.h[1:n_rxns]; -model.geckodata.h[2*n_rxns.+(1:n_prots)]] - ubs = [model.geckodata.h[n_rxns.+(1:n_rxns)]; model.geckodata.h[2*n_rxns+n_prots.+(1:n_prots)]] + ubs = [ + model.geckodata.h[n_rxns.+(1:n_rxns)] + model.geckodata.h[2*n_rxns+n_prots.+(1:n_prots)] + ] return lbs, ubs end @@ -460,14 +463,23 @@ function _gecko_build_inequality_constraints( end lb_proteins = [ - haskey(model.enzymedata.protein_measurements, pid) ? model.enzymedata.protein_measurements[pid][1] : 0.0 for pid in protein_ids + haskey(model.enzymedata.protein_measurements, pid) ? + model.enzymedata.protein_measurements[pid][1] : 0.0 for pid in protein_ids ] ub_proteins = [ - haskey(model.enzymedata.protein_measurements, pid) ? model.enzymedata.protein_measurements[pid][2] : - 1000.0 for pid in protein_ids + haskey(model.enzymedata.protein_measurements, pid) ? + model.enzymedata.protein_measurements[pid][2] : 1000.0 for pid in protein_ids ] - h = Array([-lb_fluxes; ub_fluxes; -lb_proteins; ub_proteins; model.enzymedata.total_protein_mass]) + h = Array( + [ + -lb_fluxes + ub_fluxes + -lb_proteins + ub_proteins + model.enzymedata.total_protein_mass + ], + ) return M, h end @@ -688,7 +700,8 @@ function _get_proteins_with_kcats(model::GeckoModel) vcat( [ reaction_gene_association(model.smodel, rid) for - rid in reactions(model.smodel) if haskey(model.enzymedata.reaction_kcats, rid) + rid in reactions(model.smodel) if + haskey(model.enzymedata.reaction_kcats, rid) ]..., )..., ), diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl index 434af77e5..9b21d4538 100644 --- a/src/base/types/zSMomentModel.jl +++ b/src/base/types/zSMomentModel.jl @@ -14,7 +14,7 @@ reaction_map::Dict{String,Int} metabolite_map::Dict{String,Int} ``` """ -mutable struct SMomentData +mutable struct SMomentData c::SparseVector{Float64,Int64} E::SparseMatrixCSC{Float64,Int64} d::SparseVector{Float64,Int64} @@ -82,9 +82,14 @@ function SMomentModel( ) sm = convert(StandardModel, model) # check that input data is in correct format for smoment - if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || - any(length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid)) - @warn("For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data.") + if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || + any( + length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if + haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid) + ) + @warn( + "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." + ) end smm = SMomentModel( @@ -188,7 +193,8 @@ function build_smomentmodel_internals!(model::SMomentModel) pstoich = first(model.enzymedata.reaction_protein_stoichiometry[original_rid]) mw = dot(pstoich, [model.enzymedata.protein_masses[gid] for gid in grr]) kcat = - contains(rid, "§FOR") ? first(model.enzymedata.reaction_kcats[original_rid])[1] : + contains(rid, "§FOR") ? + first(model.enzymedata.reaction_kcats[original_rid])[1] : first(model.enzymedata.reaction_kcats[original_rid])[2] Se[1, col_idx] = -mw / kcat end @@ -211,7 +217,7 @@ function build_smomentmodel_internals!(model::SMomentModel) #: inequality constraints M, h = _smoment_build_inequality_constraints( - model, + model, n_reactions, lb_fluxes, ub_fluxes, @@ -251,7 +257,7 @@ function _smoment_build_inequality_constraints( ub_fluxes, reaction_map, ) - + #: inequality lhs M = Array( [ @@ -292,4 +298,4 @@ function _smoment_build_inequality_constraints( h = Array([-lb_fluxes; ub_fluxes; model.enzymedata.total_protein_mass]) return M, h -end \ No newline at end of file +end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index a5272c879..f99934358 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -5,7 +5,13 @@ Return a dictionary mapping protein concentrations to their ids. """ protein_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? - last(_map_irrev_to_rev_ids(model.geckodata.reaction_map, value.(opt_model[:x]); protein_ids=model.geckodata.protein_ids)) : nothing + last( + _map_irrev_to_rev_ids( + model.geckodata.reaction_map, + value.(opt_model[:x]); + protein_ids = model.geckodata.protein_ids, + ), + ) : nothing """ @@ -59,4 +65,4 @@ function remove_slow_isozymes!( delete!(model.genes, rm_gids) # remove genes that were deleted return nothing -end \ No newline at end of file +end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 6034225ae..8db92dcbd 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -18,9 +18,7 @@ opt_model = flux_balance_analysis( model, Tulip.Optimizer; - modifications = [ - change_optimizer_attribute("IPM_IterationsLimit", 1000), - ], + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], sense = COBREXA.MOI.MAX_SENSE, ) @@ -34,6 +32,6 @@ ) prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) - + @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) -end \ No newline at end of file +end diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 42f0d56f5..efd6a8b0d 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -23,9 +23,7 @@ opt_model = flux_balance_analysis( model, Tulip.Optimizer; - modifications = [ - change_optimizer_attribute("IPM_IterationsLimit", 1000), - ], + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], sense = COBREXA.MOI.MAX_SENSE, ) From 8dd3d1bc21370759412718b47106130ab6f85348 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sun, 3 Apr 2022 15:49:02 +0200 Subject: [PATCH 042/109] added data source --- test/data_static.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/data_static.jl b/test/data_static.jl index dc972f182..c03e679e5 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -158,7 +158,9 @@ const reaction_standard_gibbs_free_energies = Dict( const ecoli_core_protein_masses = Dict( #= Data downloaded from Uniprot for E. coli K12, - gene mass in kDa. + gene mass in kDa. To obtain these data yourself, go to + Uniprot: https://www.uniprot.org/ + and search using these terms: =# "b4301" => 23.214, "b1602" => 48.723, From 3998673ed31374de12a449bbfeb7ea1d26847b6c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 3 Apr 2022 16:01:32 +0200 Subject: [PATCH 043/109] obliterate traling whitespace --- src/base/solver.jl | 12 +++++------ src/base/types/MetabolicModel.jl | 2 +- src/base/types/StandardModel.jl | 2 +- src/base/types/zGeckoModel.jl | 34 ++++++++++++++++---------------- src/base/types/zSMomentModel.jl | 10 +++++----- src/base/utils/enzyme.jl | 2 +- test/data_static.jl | 12 +++++------ 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/base/solver.jl b/src/base/solver.jl index 3a14025d8..bd012d7fd 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -131,8 +131,8 @@ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = """ flux_dict(model::GeckoModel, opt_model) -Specialization to format solved data for `GeckoModel`s but maps -the solution back into the namespace of the underlying model (the +Specialization to format solved data for `GeckoModel`s but maps +the solution back into the namespace of the underlying model (the original ids). """ flux_dict(model::GeckoModel, opt_model) = @@ -143,8 +143,8 @@ flux_dict(model::GeckoModel, opt_model) = """ flux_dict(model::SMomentModel, opt_model) -Specialization to format solved data for `SMomentModel`s but maps -the solution back into the namespace of the underlying model (the +Specialization to format solved data for `SMomentModel`s but maps +the solution back into the namespace of the underlying model (the original ids). """ flux_dict(model::SMomentModel, opt_model) = @@ -154,8 +154,8 @@ flux_dict(model::SMomentModel, opt_model) = """ _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) -Return dictionaries of reaction ids mapped to fluxes, -and protein ids mapped to concentrations using `reaction_map` to +Return dictionaries of reaction ids mapped to fluxes, +and protein ids mapped to concentrations using `reaction_map` to determine the ids of fluxes and `protein_ids` for the gene ids. The solution in `solution` is used to fill the dictionaries. """ diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index bbaa72dab..202f4ab0c 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -317,7 +317,7 @@ end """ enzyme_capacity(model::MetabolicModel) -Return enzyme capacity inequality constraint vector and bound, or nothing +Return enzyme capacity inequality constraint vector and bound, or nothing if it doesn't exist in the model. """ function enzyme_capacity(model::MetabolicModel) diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index 965ee484a..4e2562637 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -446,7 +446,7 @@ end """ has_reaction_isozymes(model::StandardModel, rid::String) -Check if reaction `rid` in `model` is catalyzed by multiple enzymes, +Check if reaction `rid` in `model` is catalyzed by multiple enzymes, i.e. it has isozymes according to the gene reaction rules. """ function has_reaction_isozymes(model::StandardModel, rid::String) diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl index ff0ad38c1..4dd5aa783 100644 --- a/src/base/types/zGeckoModel.jl +++ b/src/base/types/zGeckoModel.jl @@ -1,8 +1,8 @@ """ - mutable struct EnzymeData + mutable struct EnzymeData + +Holds data relevant for enzyme constrained metabolic models. -Holds data relevant for enzyme constrained metabolic models. - Reaction turnover numbers (catalytic constants, kcats) are supplied through `reaction_kcats`, which is a dictionary mapping reaction ids to kcats of each isozyme. Each isozyme should have both a forward and reverse kcat, so @@ -17,17 +17,17 @@ order of each entry in `reaction_kcats` and `reaction_protein_stoichiometry` is taken to be the same as the order returned when calling [`reaction_gene_association`](@ref) on the model. The protein masses (in molar mass units) for each gene in the model should be supplied through -`protein_masses`. The format is a dictionary of gene ids mapped to molar masses. +`protein_masses`. The format is a dictionary of gene ids mapped to molar masses. Total enzyme capacity (sum of all enzyme concentrations multiplied by their molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can be bounded by `flux_measurements` and `protein_measurements` respectively. Both lower and upper bounds need to be supplied (as a tuple) if a reaction flux is to -be bounded, likewise with protein concentration bounds. +be bounded, likewise with protein concentration bounds. # Fields -``` +``` reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] protein_masses::Dict{String,Float64} @@ -79,7 +79,7 @@ EnzymeData( Holds the already constructed GECKO problem. # Fields -``` +``` c::SparseVector{Float64, Int64} E::SparseMatrixCSC{Float64, Int64} d::SparseVector{Float64, Int64} @@ -136,7 +136,7 @@ will have enzyme bounds associated with them, but all isozymes are assumed to have data if data is supplied. Currently only `modifications` that change attributes of the `optimizer` are supported. -To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` +To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` to run an analysis on it. See also: [`StandardModel`](@ref) @@ -165,7 +165,7 @@ end protein_measurements = Dict{String,Tuple{Float64,Float64}}(), ) -Constructor for `GeckoModel`. +Constructor for `GeckoModel`. """ function GeckoModel( model::MetabolicModel; @@ -247,7 +247,7 @@ genes(model::GeckoModel) = model.geckodata.protein_ids _order_id_to_idx_dict(id_to_idx_dict) Return the keys of `id_to_idx_dict` sorted by the values, which -are taken to be the indices. This is a helper function for +are taken to be the indices. This is a helper function for [`reactions`](@ref) and [`metabolites`](@ref). """ function _order_id_to_idx_dict(dmap) @@ -275,7 +275,7 @@ end """ enzyme_capacity(model::GeckoModel) -Return enzyme capacity inequality constraint vector and bound, or nothing +Return enzyme capacity inequality constraint vector and bound, or nothing if it doesn't exist in the model. """ enzyme_capacity(model::GeckoModel) = (model.geckodata.M[end, :], model.geckodata.h[end]) @@ -287,14 +287,14 @@ Lower level function that updates the matrix form of a model with enzyme capacity constraints, in GECKO format. Specifically, updates `model.geckodata` with the vector and matrix coefficients `c, -E, d, M, h` satisfying +E, d, M, h` satisfying ``` opt cᵀ * x -s.t. E * x = d +s.t. E * x = d M * x ≤ h ``` -as well as `reaction_map, metabolite_map, protein_ids`, where -`reaction_map` shows the order of the columns (reactions) in `E`. Proteins +as well as `reaction_map, metabolite_map, protein_ids`, where +`reaction_map` shows the order of the columns (reactions) in `E`. Proteins are ordered according to `protein_ids`, and follow after reactions. """ function build_geckomodel_internals!(model::GeckoModel) @@ -487,7 +487,7 @@ end """ _build_irreversible_stoichiometric_matrix(model::StandardModel) -Return the stoichiometric matrix. All reactions are forward only i.e. only +Return the stoichiometric matrix. All reactions are forward only i.e. only positive fluxes are allowed. Include arm reactions. """ function _build_irreversible_stoichiometric_matrix(model::StandardModel) @@ -508,7 +508,7 @@ function _build_irreversible_stoichiometric_matrix(model::StandardModel) max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple ) - #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, + #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, #TODO but named tuples are immutable... :( # fill the matrix entries diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl index 9b21d4538..81f8a92e9 100644 --- a/src/base/types/zSMomentModel.jl +++ b/src/base/types/zSMomentModel.jl @@ -4,7 +4,7 @@ Holds the already constructed SMOMENT problem. # Fields -``` +``` c::SparseVector{Float64, Int64} E::SparseMatrixCSC{Float64, Int64} d::SparseVector{Float64, Int64} @@ -44,14 +44,14 @@ SMomentData() = SMomentData( Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, and Steffen Klamt. "Automatic construction of metabolic models with enzyme -constraints." BMC bioinformatics, 2020.` for implementation details. +constraints." BMC bioinformatics, 2020.` for implementation details. Note, `"§"` is reserved for internal use as a delimiter, no reaction id should contain that character. Also note, SMOMENT assumes that each reaction only has a single enzyme (one GRR) associated with it. It is required that a model be modified to ensure that this condition is met. For ease-of-use, [`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only -`modifications` that change attributes of the `optimizer` are supported. +`modifications` that change attributes of the `optimizer` are supported. """ mutable struct SMomentModel <: MetabolicModel smodel::StandardModel @@ -167,7 +167,7 @@ end """ build_smomentmodel_internals!(model::SMomentModel) -Build internal data structures used to solve SMOMENT type flux +Build internal data structures used to solve SMOMENT type flux balance analysis problems. """ function build_smomentmodel_internals!(model::SMomentModel) @@ -247,7 +247,7 @@ end reaction_map, ) -Helper function to return functions describing the inequality +Helper function to return functions describing the inequality constraints for smoment. """ function _smoment_build_inequality_constraints( diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index f99934358..366166d41 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -23,7 +23,7 @@ protein_dict(model::GeckoModel, opt_model) = ) Remove all but the fastest isozyme from each reaction in `model`. -Use the largest kcat (for, rev) for these calculations. Modifies all +Use the largest kcat (for, rev) for these calculations. Modifies all the arguments in place. """ function remove_slow_isozymes!( diff --git a/test/data_static.jl b/test/data_static.jl index c03e679e5..ebe01c3c5 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -157,8 +157,8 @@ const reaction_standard_gibbs_free_energies = Dict( const ecoli_core_protein_masses = Dict( #= - Data downloaded from Uniprot for E. coli K12, - gene mass in kDa. To obtain these data yourself, go to + Data downloaded from Uniprot for E. coli K12, + gene mass in kDa. To obtain these data yourself, go to Uniprot: https://www.uniprot.org/ and search using these terms: =# @@ -302,7 +302,7 @@ const ecoli_core_protein_masses = Dict( const ecoli_core_protein_stoichiometry = Dict( #= - Data made up, each isozyme is assumed to be composed of + Data made up, each isozyme is assumed to be composed of only one subunit each. =# "ACALD" => [[1.0], [1.0]], @@ -383,9 +383,9 @@ const ecoli_core_protein_stoichiometry = Dict( const ecoli_core_reaction_kcats = Dict( #= - Data taken from Heckmann, David, et al. "Machine learning applied to enzyme - turnover numbers reveals protein structural correlates and improves metabolic - models." Nature communications 9.1 (2018): 1-10. Assume forward and reverse + Data taken from Heckmann, David, et al. "Machine learning applied to enzyme + turnover numbers reveals protein structural correlates and improves metabolic + models." Nature communications 9.1 (2018): 1-10. Assume forward and reverse kcats are the same, and each isozyme has the same kcat. =# "ACALD" => From c1379a0f9bfdd13716feee989e54c886f3b157c8 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 11:02:09 +0200 Subject: [PATCH 044/109] add isozyme funct --- src/base/utils/enzyme.jl | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 366166d41..cdd990118 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -66,3 +66,63 @@ function remove_slow_isozymes!( return nothing end + +""" + remove_low_expressed_isozymes!( + model::StandardModel; + reaction_kcats = Dict(), + protein_stoichiometry = Dict(), + protein_masses = Dict(), + gid_measurements = Dict(), + ) + +Remove isozymes that are not expressed. If multiple isozymes are expressed, pick +one that has the highest expression. +""" +function remove_low_expressed_isozymes!( + model::StandardModel; + reaction_kcats = Dict(), + protein_stoichiometry = Dict(), + protein_masses = Dict(), + gid_measurements = Dict(), +) + + for rid in reactions(model) + if COBREXA._has_grr(model, rid) + measured_proteins = Float64[] + grrs = reaction_gene_association(model, rid) + for (i, grr) in enumerate(grrs) + + push!( + measured_proteins, + sum( + map( + *, + protein_stoichiometry[rid][i], + [get(gid_measurements, gid, 0.0) for gid in grr], + [protein_masses[gid] for gid in grr], + ), + ), + ) + end + idx = argmax(measured_proteins) + + model.reactions[rid].grr = [grrs[idx]] + reaction_kcats[rid] = [reaction_kcats[rid][idx]] + protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] + end + end + + curated_gids = String[] + for rid in reactions(model) + if COBREXA._has_grr(model, rid) + for grr in reaction_gene_association(model, rid) + append!(curated_gids, grr) + end + end + end + rm_gids = setdiff(genes(model), curated_gids) + delete!(model.genes, rm_gids) # remove genes that were deleted + + return nothing +end \ No newline at end of file From bb31baf0e72ba3185b3b63cbca5a794959abd4b1 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 11:03:42 +0200 Subject: [PATCH 045/109] format and fix --- src/base/utils/enzyme.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index cdd990118..34ee468c7 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -88,7 +88,7 @@ function remove_low_expressed_isozymes!( ) for rid in reactions(model) - if COBREXA._has_grr(model, rid) + if has_reaction_grr(model, rid) measured_proteins = Float64[] grrs = reaction_gene_association(model, rid) for (i, grr) in enumerate(grrs) @@ -115,7 +115,7 @@ function remove_low_expressed_isozymes!( curated_gids = String[] for rid in reactions(model) - if COBREXA._has_grr(model, rid) + if has_reaction_grr(model, rid) for grr in reaction_gene_association(model, rid) append!(curated_gids, grr) end @@ -123,6 +123,6 @@ function remove_low_expressed_isozymes!( end rm_gids = setdiff(genes(model), curated_gids) delete!(model.genes, rm_gids) # remove genes that were deleted - + return nothing -end \ No newline at end of file +end From 4bfa2bcc73200c2259d881d1d5bb44778018613d Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 13:32:22 +0200 Subject: [PATCH 046/109] update kwarg --- src/base/types/zGeckoModel.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl index 4dd5aa783..77ef2c6df 100644 --- a/src/base/types/zGeckoModel.jl +++ b/src/base/types/zGeckoModel.jl @@ -61,14 +61,14 @@ EnzymeData( reaction_kcats, reaction_protein_stoichiometry, protein_masses, - total_protein; + total_protein_mass; flux_measurements = Dict{String,Tuple{Float64,Float64}}(), protein_measurements = Dict{String,Tuple{Float64,Float64}}(), ) = EnzymeData( reaction_kcats, reaction_protein_stoichiometry, protein_masses, - total_protein, + total_protein_mass, flux_measurements, protein_measurements, ) From 123d6d0af860a728fcd5c75919c787630cc1df56 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 13:40:59 +0200 Subject: [PATCH 047/109] fix another kwarg --- src/base/types/zGeckoModel.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl index 77ef2c6df..3be128553 100644 --- a/src/base/types/zGeckoModel.jl +++ b/src/base/types/zGeckoModel.jl @@ -172,7 +172,7 @@ function GeckoModel( reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), protein_masses = Dict{String,Float64}(), - total_protein = 0.0, + total_protein_mass = 0.0, flux_measurements = Dict{String,Tuple{Float64,Float64}}(), protein_measurements = Dict{String,Tuple{Float64,Float64}}(), ) @@ -183,7 +183,7 @@ function GeckoModel( reaction_kcats, reaction_protein_stoichiometry, protein_masses, - total_protein; + total_protein_mass; flux_measurements, protein_measurements, ), From 7e405eb6821624ba583a87de880698eed928d9af Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 14:14:02 +0200 Subject: [PATCH 048/109] fix kwargs in tests --- test/analysis/gecko.jl | 2 +- test/analysis/smoment.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 8db92dcbd..963d0dff2 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -10,7 +10,7 @@ reaction_kcats = ecoli_core_reaction_kcats, reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, protein_masses = ecoli_core_protein_masses, - total_protein = total_protein_mass, # mg/gdW + total_protein_mass = total_protein_mass, # mg/gdW flux_measurements, protein_measurements, ) diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index efd6a8b0d..80ab74e08 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -16,7 +16,7 @@ reaction_kcats = ecoli_core_reaction_kcats, reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, protein_masses = ecoli_core_protein_masses, - total_protein = total_protein_mass, # mg/gdW + total_protein_mass = total_protein_mass, # mg/gdW flux_measurements, ) From d516391107daf58ede85dab2bd8a6bc09338ed7d Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 4 Apr 2022 14:15:40 +0200 Subject: [PATCH 049/109] more kwargs --- src/base/types/zSMomentModel.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl index 81f8a92e9..fe569501e 100644 --- a/src/base/types/zSMomentModel.jl +++ b/src/base/types/zSMomentModel.jl @@ -77,7 +77,7 @@ function SMomentModel( reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), protein_masses = Dict{String,Float64}(), - total_protein = 0.0, + total_protein_mass = 0.0, flux_measurements = Dict{String,Tuple{Float64,Float64}}(), ) sm = convert(StandardModel, model) @@ -99,7 +99,7 @@ function SMomentModel( reaction_kcats, reaction_protein_stoichiometry, protein_masses, - total_protein; + total_protein_mass; flux_measurements, ), ) From dd8f9901af06f0825dd617ec46d673593068e4d9 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 5 Apr 2022 10:17:02 +0200 Subject: [PATCH 050/109] fix has reaction edge case --- src/base/types/StandardModel.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index 4e2562637..31d981901 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -460,7 +460,8 @@ Check if reaction `rid` in `model` has a gene reaction rule entry. """ function has_reaction_grr(model::StandardModel, rid::String) #TODO simplify this once COBREXA enforces universal rules for GRR representation - !isnothing(reaction_gene_association(model, rid)) && + haskey(model.reactions, rid) && + !isnothing(reaction_gene_association(model, rid)) && reaction_gene_association(model, rid) != [[]] && !isempty(first(reaction_gene_association(model, rid))) end From 577b1af4639d5a4b65e4b17315042efbfe8fc2dd Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Fri, 8 Apr 2022 21:12:05 +0200 Subject: [PATCH 051/109] fixup gecko --- src/COBREXA.jl | 1 + src/base/solver.jl | 63 +- src/base/types/Gene.jl | 6 +- src/base/types/Isozyme.jl | 17 + src/base/types/MetabolicModel.jl | 1 - src/base/types/derivedmodels/GeckoModel.jl | 430 +++++++++++ src/base/types/derivedmodels/SMomentModel.jl | 260 +++++++ src/base/types/zGeckoModel.jl | 709 ------------------- src/base/types/zSMomentModel.jl | 301 -------- src/base/utils/enzyme.jl | 212 +++--- src/base/utils/irreversible_stoichiometry.jl | 182 +++++ test/analysis/gecko.jl | 47 +- test/analysis/smoment.jl | 64 +- 13 files changed, 1064 insertions(+), 1229 deletions(-) create mode 100644 src/base/types/Isozyme.jl create mode 100644 src/base/types/derivedmodels/GeckoModel.jl create mode 100644 src/base/types/derivedmodels/SMomentModel.jl delete mode 100644 src/base/types/zGeckoModel.jl delete mode 100644 src/base/types/zSMomentModel.jl create mode 100644 src/base/utils/irreversible_stoichiometry.jl diff --git a/src/COBREXA.jl b/src/COBREXA.jl index c27cc8f64..76c912cd8 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -32,6 +32,7 @@ _inc_all.( joinpath("base", "logging"), joinpath("base", "macros"), joinpath("base", "types"), + joinpath("base", "types", "derivedmodels"), "base", "io", joinpath("io", "show"), diff --git a/src/base/solver.jl b/src/base/solver.jl index bd012d7fd..e79ef39a1 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -8,6 +8,8 @@ Convert `MetabolicModel`s to a JuMP model, place objectives and the equality constraint. + +Here coupling means inequality constraints coupling multiple variables together. """ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.MAX_SENSE) @@ -25,12 +27,8 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.M C = coupling(model) # empty if no coupling cl, cu = coupling_bounds(model) - isempty(C) || @constraint(optimization_model, c_lbs, cl .<= coupling(model) * x) # coupling lower bounds - isempty(C) || @constraint(optimization_model, c_ubs, coupling(model) * x .<= cu) # coupling upper bounds - - enzyme_vec, enzyme_mass = enzyme_capacity(model) # nothing if not present - !isnothing(enzyme_vec) && - @constraint(optimization_model, enz_cap, dot(enzyme_vec, x) <= enzyme_mass) + isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds + isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds return optimization_model end @@ -87,7 +85,6 @@ function set_optmodel_bound!( isnothing(ub) || set_normalized_rhs(opt_model[:ubs][vidx], ub) end - """ solved_objective_value(opt_model)::Maybe{Float64} @@ -127,55 +124,3 @@ flux_dict(model, flux_balance_analysis(model, ...)) flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = is_solved(opt_model) ? Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing - -""" - flux_dict(model::GeckoModel, opt_model) - -Specialization to format solved data for `GeckoModel`s but maps -the solution back into the namespace of the underlying model (the -original ids). -""" -flux_dict(model::GeckoModel, opt_model) = - is_solved(opt_model) ? - _map_irrev_to_rev_ids(model.geckodata.reaction_map, value.(opt_model[:x])) : nothing - - -""" - flux_dict(model::SMomentModel, opt_model) - -Specialization to format solved data for `SMomentModel`s but maps -the solution back into the namespace of the underlying model (the -original ids). -""" -flux_dict(model::SMomentModel, opt_model) = - is_solved(opt_model) ? - _map_irrev_to_rev_ids(model.smomentdata.reaction_map, value.(opt_model[:x])) : nothing - -""" - _map_irrev_to_rev_ids(reaction_map, protein_ids, solution) - -Return dictionaries of reaction ids mapped to fluxes, -and protein ids mapped to concentrations using `reaction_map` to -determine the ids of fluxes and `protein_ids` for the gene ids. -The solution in `solution` is used to fill the dictionaries. -""" -function _map_irrev_to_rev_ids(reaction_map, solution; protein_ids = []) - reaction_flux = Dict{String,Float64}() - for (k, i) in reaction_map - contains(k, "§ISO") && continue # §ISO§FOR and §ISO§REV need to be ignored - rid = split(k, "§")[1] - v = contains(k, "§FOR") ? solution[i] : -solution[i] - reaction_flux[rid] = get(reaction_flux, rid, 0) + v - end - - if isempty(protein_ids) - return reaction_flux - else - n_reactions = length(reaction_map) - protein_flux = Dict{String,Float64}() - for (i, pid) in enumerate(protein_ids) - protein_flux[pid] = solution[n_reactions+i] - end - return reaction_flux, protein_flux - end -end diff --git a/src/base/types/Gene.jl b/src/base/types/Gene.jl index b27dec9fa..a8f3a5da1 100644 --- a/src/base/types/Gene.jl +++ b/src/base/types/Gene.jl @@ -7,6 +7,7 @@ id :: String name :: Maybe{String} notes :: Dict{String, Vector{String}} annotation :: Dict{String, Union{Vector{String}, String}} +molar_mass :: Maybe{Float64} ```` """ mutable struct Gene @@ -14,7 +15,8 @@ mutable struct Gene name::Maybe{String} notes::Notes annotations::Annotations + molar_mass::Maybe{Float64} - Gene(id::String = ""; name = nothing, notes = Notes(), annotations = Annotations()) = - new(id, name, notes, annotations) + Gene(id::String = ""; name = nothing, notes = Notes(), annotations = Annotations(), molar_mass=nothing) = + new(id, name, notes, annotations, molar_mass) end diff --git a/src/base/types/Isozyme.jl b/src/base/types/Isozyme.jl new file mode 100644 index 000000000..b7f253191 --- /dev/null +++ b/src/base/types/Isozyme.jl @@ -0,0 +1,17 @@ +""" + mutable struct Isozyme + +Struct containing isozyme information. Here, `stoichiometry` is a +dictionary of gene ids to their stoichiometry in the isozyme complex, +and `kcats` is a tuple of the forward and reverse kcats of the isozyme. + +# Fields +```` +stoichiometry :: Dict{String, Int} +kcats :: Tuple{Float64, Float64} +```` +""" +mutable struct Isozyme + stoichiometry :: Dict{String, Int} + kcats :: Tuple{Float64, Float64} +end \ No newline at end of file diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index 202f4ab0c..304fdadda 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -89,7 +89,6 @@ function objective(a::MetabolicModel)::SparseVec _missing_impl_error(objective, (a,)) end - """ fluxes(a::MetabolicModel)::Vector{String} diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl new file mode 100644 index 000000000..ca2e3eae1 --- /dev/null +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -0,0 +1,430 @@ +""" + mutable struct GeckoModel <: MetabolicModel + +A model that incorporates enzyme capacity and kinetic constraints via the GECKO +formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype +predictions of a yeast genome‐scale metabolic model by incorporating enzymatic +constraints." Molecular systems biology, 2017.` for implementation details. + +Note, since the model uses irreversible reactions internally, `"§FOR"` (for the +forward direction) and `"§REV"` (for the reverse direction) is appended to each +reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, +no reaction id should contain this character. + +To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` +to run an analysis on it. + +# Fields +``` +reaction_ids::Vector{String} +irrev_reaction_ids::Vector{String} +metabolites::Vector{String} +gene_ids::Vector{String} +c::SparseVec +S::SparseMat +b::SparseVec +xl::SparseVec +xu::SparseVec +C::SparseMat +cl::Vector{Float64} +cu::Vector{Float64} +``` +""" +mutable struct GeckoModel <: MetabolicModel + reaction_ids::Vector{String} + irrev_reaction_ids::Vector{String} + metabolites::Vector{String} + gene_ids::Vector{String} + + # gecko + c::SparseVec + S::SparseMat + b::SparseVec + xl::SparseVec + xu::SparseVec + + # enzyme capacity constraints + C::SparseMat + cl::Vector{Float64} + cu::Vector{Float64} +end + +""" + stoichiometry(model::GeckoModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +stoichiometry(model::GeckoModel) = model.S + +""" + balance(model::GeckoModel) + +Return stoichiometric balance. +""" +balance(model::GeckoModel) = model.b + +""" + objective(model::GeckoModel) + +Return objective of `model`. +""" +objective(model::GeckoModel) = model.c + +""" + reactions(model::GeckoModel) + +Returns reactions order according to stoichiometric matrix. +""" +reactions(model::GeckoModel) = model.reaction_ids + +""" + n_reactions(model::GeckoModel) + +Returns the number of reactions in the model. +""" +n_reactions(model::GeckoModel) = length(model.reaction_ids) + +""" + genes(model::GeckoModel) + +Returns the genes (proteins) in the order as they appear as variables in the +model. +""" +genes(model::GeckoModel) = model.gene_ids + +""" + n_genes(model::GeckoModel) + +Returns the number of genes in the model. +""" +n_genes(model::GeckoModel) = length(model.gene_ids) + +""" + metabolites(model::GeckoModel) + +Return the metabolites in `model`. +""" +metabolites(model::GeckoModel) = model.metabolites + +""" + n_metabolites(model::GeckoModel) = + +Return the number of metabolites in `model`. +""" +n_metabolites(model::GeckoModel) = length(metabolites(model)) + +""" + bounds(model::GeckoModel) + +Return variable bounds for `GeckoModel`. +""" +bounds(model::GeckoModel) = (model.xl, model.xu) + +""" + coupling(model::GeckoModel) + +Coupling constraint matrix for a `GeckoModel`. +""" +coupling(model::GeckoModel) = model.C + +""" + coupling_bounds(model::GeckoModel) + +Coupling bounds for a `GeckoModel`. +""" +coupling_bounds(model::GeckoModel) = (model.cl, model.cu) + +""" + reaction_flux(model::MetabolicModel) + +Helper function to get fluxes from optimization problem. +""" +function reaction_flux(model::GeckoModel) + R = spzeros(n_reactions(model), n_genes(model) + length(model.irrev_reaction_ids)) + for (i, rid) in enumerate(reactions(model)) + for_idx = findfirst(x -> x == rid*"§ARM§FOR" || x == rid*"§FOR", model.irrev_reaction_ids) + rev_idx = findfirst(x -> x == rid*"§ARM§REV" || x == rid*"§REV", model.irrev_reaction_ids) + !isnothing(for_idx) && (R[i, for_idx] = 1.0) + !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) + end + return R' +end + +""" + GeckoModel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + enzyme_capacities = [(),], + ) + +Construct a `GeckoModel` based on `model` using the kinetic data encoded by +`rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`, +which is a vector of tuples. In the first position of the tuple is a list of gene ids, +and the second position is mass upperbound of the sum of these gene ids. + +The units of the fluxes and protein concentration depend on those used in +`rid_isozymes` for the kcats and the molar masses encoded in the genes of +`model`. Currently only `modifications` that change attributes of the +`optimizer` are supported. + +# Example +``` +gm = GeckoModel( + model; + rid_isozymes, + enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], +) + +opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer +) + +rxn_fluxes = flux_dict(gm, opt_model) +prot_concens = protein_dict(gm, opt_model) +``` +""" +function GeckoModel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + enzyme_capacities = [(),], +) + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model) + + #: find all gene products that have kcats associated with them + gene_ids = get_genes_with_kcats(rid_isozymes) + + #: size of resultant model + num_reactions = size(S, 2) + num_genes = length(gene_ids) + num_metabolites = size(S, 1) + num_vars = num_reactions + num_genes + + #: equality lhs + E_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + ) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + contains(rid, "§ARM") && continue + !haskey(rid_isozymes, original_rid) && continue + + # these entries have kcats + if contains(rid, "§ISO") + iso_num = parse( + Int, + replace( + first(filter(startswith("ISO"), split(rid, "§")[2:end])), + "ISO" => "", + ), + ) + else # only one enzyme + iso_num = 1 + end + + # add all entries to column of matrix + COBREXA._add_enzyme_variable( + rid_isozymes, + iso_num, # only one enzyme + rid, + original_rid, + E_components, + col_idx, + gene_ids, + ) + end + + Se = sparse( + E_components.row_idxs, + E_components.col_idxs, + E_components.coeffs, + num_genes, + num_reactions, + ) + + stoich_mat = sparse( + [ + S zeros(num_metabolites, num_genes) + Se I(num_genes) + ] + ) + + #: equality rhs + b = spzeros(num_metabolites + num_genes) + + #: find objective (assume objective is forward) + obj_idx_orig = first(findnz(objective(model))[1]) + obj_id_orig = reactions(model)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" + c = spzeros(num_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: inequality constraints + xl = sparse([lb_fluxes; fill(0.0, num_genes)]) + xu = sparse([ub_fluxes; fill(1000.0, num_genes)]) + + #: enzyme capacity constraints + mw_proteins = [model.genes[pid].molar_mass for pid in gene_ids] + C = spzeros(length(enzyme_capacities), num_vars) + cl = spzeros(length(enzyme_capacities)) + cu = spzeros(length(enzyme_capacities)) + + for (i, enz_cap) in enumerate(enzyme_capacities) + enz_idxs = indexin(first(enz_cap), gene_ids) + C[i, num_reactions .+ enz_idxs] .= mw_proteins[enz_idxs] + cu[i] = last(enz_cap) + end + + return GeckoModel( + reactions(model), + _order_id_to_idx_dict(reaction_map), + _order_id_to_idx_dict(metabolite_map), + gene_ids, + c, + stoich_mat, + b, + xl, + xu, + C, + cl, + cu, + ) +end + +""" + _add_enzyme_variable( + rid_isozymes, + iso_num, + rid, + original_rid, + E_components, + col_idx, + gene_ids, + ) + +Helper function to add an column into the enzyme stoichiometric matrix. +""" +function _add_enzyme_variable( + rid_isozymes, + iso_num, + rid, + original_rid, + E_components, + col_idx, + gene_ids, +) + pstoich = rid_isozymes[original_rid][iso_num].stoichiometry + kcat = + contains(rid, "§FOR") ? rid_isozymes[original_rid][iso_num].kcats[1] : + rid_isozymes[original_rid][iso_num].kcats[2] + for (pid, pst) in pstoich + push!(E_components.row_idxs, first(indexin([pid], gene_ids))) + push!(E_components.col_idxs, col_idx) + push!(E_components.coeffs, -pst / kcat) + end +end + +""" + get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) + +Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. +Assume that if a reaction has a kcat then each isozyme has a kcat. +""" +function get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) + gids = String[] + for isozymes in values(rid_isozymes) + for isozyme in isozymes + append!(gids, collect(keys(isozyme.stoichiometry))) + end + end + return unique(gids) +end + +""" + _order_id_to_idx_dict(id_to_idx_dict) + +Return the keys of `id_to_idx_dict` sorted by the values, which +are taken to be the indices. This is a helper function for +[`reactions`](@ref) and [`metabolites`](@ref). +""" +function _order_id_to_idx_dict(dmap) + ks = collect(keys(dmap)) + vs = collect(values(dmap)) + return ks[sortperm(vs)] +end + +""" + change_bound(model::GeckoModel, id; lb=nothing, ub=nothing) + +Change the bound of variable in `model`. Does not change the bound if respective +bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the +`GeckoModel` has irreversible reactions, then these reactions will be +permanently irreversible in the model, i.e. changing their bounds to make them +reversible will have no effect. +""" +function change_bound(model::GeckoModel, id; lb=nothing, ub=nothing) + gene_idx = first(indexin([id], model.gene_ids)) + + if isnothing(gene_idx) + flux_for_idx = findfirst(x -> x == id*"§ARM§FOR" || x == id*"§FOR", model.irrev_reaction_ids) + if !isnothing(flux_for_idx) + if !isnothing(lb) + if lb <= 0 + model.xl[flux_for_idx] = 0 + else + model.xl[flux_for_idx] = lb + end + end + if !isnothing(ub) + if ub <= 0 + model.xu[flux_for_idx] = 0 + else + model.xu[flux_for_idx] = ub + end + end + end + + flux_rev_idx = findfirst(x -> x == id*"§ARM§REV" || x == id*"§REV", model.irrev_reaction_ids) + if !isnothing(flux_rev_idx) + if !isnothing(lb) + if lb >= 0 + model.xu[flux_rev_idx] = 0 + else + model.xu[flux_rev_idx] = -lb + end + if !isnothing(ub) + if ub >= 0 + model.xl[flux_rev_idx] = 0 + else + model.xl[flux_rev_idx] = -ub + end + end + end + end + else + n = length(model.irrev_reaction_ids) + !isnothing(lb) && (model.xl[n + gene_idx] = lb) + !isnothing(ub) && (model.xu[n + gene_idx] = ub) + end + + return nothing +end + +""" + change_bounds(model::GeckoModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + +Change the bounds of multiple variables in `model` simultaneously. See +[`change_bound`](@ref) for details. +""" +function change_bounds(model::GeckoModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + for (id, lb, ub) in zip(ids, lbs, ubs) + change_bound(model, id; lb=lb, ub=ub) + end +end diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl new file mode 100644 index 000000000..e39088d8e --- /dev/null +++ b/src/base/types/derivedmodels/SMomentModel.jl @@ -0,0 +1,260 @@ +# """ +# mutable struct SMomentModel <: MetabolicModel + +# Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, +# and Steffen Klamt. "Automatic construction of metabolic models with enzyme +# constraints." BMC bioinformatics, 2020.` for implementation details. + +# Note, `"§"` is reserved for internal use as a delimiter, no reaction id should +# contain that character. Also note, SMOMENT assumes that each reaction only has a +# single enzyme (one GRR) associated with it. It is required that a model be +# modified to ensure that this condition is met. For ease-of-use, +# [`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only +# `modifications` that change attributes of the `optimizer` are supported. +# """ +# mutable struct SMomentModel <: MetabolicModel +# smodel::StandardModel +# smomentdata::SMomentData +# enzymedata::EnzymeData +# end + +# """ +# SMomentModel( +# model::MetabolicModel; +# reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), +# reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), +# protein_masses = Dict{String,Float64}(), +# total_protein = 0.0, +# flux_measurements = Dict{String,Tuple{Float64,Float64}}(), +# ) + +# Construct an `SMomentModel`. + +# """ +# function SMomentModel( +# model::MetabolicModel; +# reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), +# reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), +# protein_masses = Dict{String,Float64}(), +# total_protein_mass = 0.0, +# flux_measurements = Dict{String,Tuple{Float64,Float64}}(), +# ) +# sm = convert(StandardModel, model) +# # check that input data is in correct format for smoment +# if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || +# any( +# length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if +# haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid) +# ) +# @warn( +# "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." +# ) +# end + +# smm = SMomentModel( +# sm, +# SMomentData(), # empty +# EnzymeData( +# reaction_kcats, +# reaction_protein_stoichiometry, +# protein_masses, +# total_protein_mass; +# flux_measurements, +# ), +# ) + +# # build data in SMomentModel +# build_smomentmodel_internals!(smm) + +# return smm +# end + +# """ +# stoichiometry(model::SMomentModel) + +# Return stoichiometry matrix that includes enzymes as metabolites. +# """ +# function stoichiometry(model::SMomentModel) +# build_smomentmodel_internals!(model) +# return model.smomentdata.E +# end + +# """ +# balance(model::SMomentModel) + +# Return stoichiometric balance. +# """ +# balance(model::SMomentModel) = model.smomentdata.d + +# """ +# objective(model::SMomentModel) + +# Return objective of `model`. +# """ +# objective(model::SMomentModel) = model.smomentdata.c + +# @_inherit_model_methods SMomentModel () smodel () genes +# @_inherit_model_methods SMomentModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr + +# """ +# reactions(model::SMomentModel) + +# Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) +# to get the order of the remaining variables. +# """ +# reactions(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.reaction_map) + +# """ +# metabolites(model::SMomentModel) + +# Returns the metabolites ordered according to the stoichiometric matrix. +# """ +# metabolites(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.metabolite_map) + +# """ +# bounds(model::SMomentModel) + +# Return variable bounds for `SMomentModel`. +# """ +# function bounds(model::SMomentModel) +# n_rxns = length(model.smomentdata.reaction_map) +# lbs = [-model.smomentdata.h[1:n_rxns]; 0] +# ubs = [model.smomentdata.h[n_rxns.+(1:n_rxns)]; model.smomentdata.h[end]] +# return lbs, ubs +# end + +# """ +# build_smomentmodel_internals!(model::SMomentModel) + +# Build internal data structures used to solve SMOMENT type flux +# balance analysis problems. +# """ +# function build_smomentmodel_internals!(model::SMomentModel) + +# S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = +# _build_irreversible_stoichiometric_matrix(model.smodel) + +# #: size of resultant model +# n_reactions = size(S, 2) +# n_metabolites = size(S, 1) +# n_vars = n_reactions + 1 + +# #: equality lhs +# Se = zeros(1, n_reactions) + +# for (rid, col_idx) in reaction_map +# original_rid = string(split(rid, "§")[1]) + +# # skip these entries +# !haskey(model.enzymedata.reaction_kcats, original_rid) && continue +# # these entries have kcats, only one GRR by assumption +# grr = first(reaction_gene_association(model, original_rid)) +# pstoich = first(model.enzymedata.reaction_protein_stoichiometry[original_rid]) +# mw = dot(pstoich, [model.enzymedata.protein_masses[gid] for gid in grr]) +# kcat = +# contains(rid, "§FOR") ? +# first(model.enzymedata.reaction_kcats[original_rid])[1] : +# first(model.enzymedata.reaction_kcats[original_rid])[2] +# Se[1, col_idx] = -mw / kcat +# end + +# E = [ +# S zeros(n_metabolites, 1) +# Se 1.0 +# ] + +# #: equality rhs +# d = zeros(n_metabolites + 1) + +# #: find objective +# obj_idx_orig = first(findnz(objective(model.smodel))[1]) +# obj_id_orig = reactions(model.smodel)[obj_idx_orig] +# obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective +# c = zeros(n_vars) +# obj_idx = reaction_map[obj_id] +# c[obj_idx] = 1.0 + +# #: inequality constraints +# M, h = _smoment_build_inequality_constraints( +# model, +# n_reactions, +# lb_fluxes, +# ub_fluxes, +# reaction_map, +# ) + +# #: overwrite geckomodel data +# model.smomentdata = SMomentData( +# sparse(c), +# sparse(E), +# sparse(d), +# sparse(M), +# sparse(h), +# reaction_map, +# metabolite_map, +# ) + +# return nothing +# end + +# """ +# _smoment_build_inequality_constraints( +# model::SMomentModel, +# n_reactions, +# lb_fluxes, +# ub_fluxes, +# reaction_map, +# ) + +# Helper function to return functions describing the inequality +# constraints for smoment. +# """ +# function _smoment_build_inequality_constraints( +# model::SMomentModel, +# n_reactions, +# lb_fluxes, +# ub_fluxes, +# reaction_map, +# ) + +# #: inequality lhs +# M = Array( +# [ +# -I(n_reactions) zeros(n_reactions, 1) +# I(n_reactions) zeros(n_reactions, 1) +# zeros(1, n_reactions) 1 +# ], +# ) + +# #: inequality rhs +# for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available +# lb = model.enzymedata.flux_measurements[original_rid][1] +# ub = model.enzymedata.flux_measurements[original_rid][2] +# rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] + +# if lb > 0 # forward only +# for rid in rids +# contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) +# contains(rid, "§FOR") && +# (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) +# end +# elseif ub < 0 # reverse only +# for rid in rids +# contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) +# contains(rid, "§REV") && +# (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) +# end +# else # measurement does not rule our reversibility +# for rid in rids +# contains(rid, "§FOR") && +# (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) +# contains(rid, "§REV") && +# (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) +# end +# end +# end + +# h = Array([-lb_fluxes; ub_fluxes; model.enzymedata.total_protein_mass]) + +# return M, h +# end diff --git a/src/base/types/zGeckoModel.jl b/src/base/types/zGeckoModel.jl deleted file mode 100644 index 3be128553..000000000 --- a/src/base/types/zGeckoModel.jl +++ /dev/null @@ -1,709 +0,0 @@ -""" - mutable struct EnzymeData - -Holds data relevant for enzyme constrained metabolic models. - -Reaction turnover numbers (catalytic constants, kcats) are supplied through -`reaction_kcats`, which is a dictionary mapping reaction ids to kcats of each -isozyme. Each isozyme should have both a forward and reverse kcat, so -`reaction_kcats = Dict(rid => [[k1f, k1r], [k2f, k2r]], ...)` for `rid` with two -isozymes. The stoichiometry of each isozyme needs to be supplied by -`protein_stoichiometry`. The format is also a dictionary mapping gene ids to -their stoichiometry, e.g. `protein_stoichiometry = Dict(rid => -[[1,1],[1,2]],...)` implies that the first isozyme of `rid` is composed of two -subunits, each present once in the protein, while the second isozyme is composed -of two subunits, but the second subunit is present twice in the isozyme. The -order of each entry in `reaction_kcats` and `reaction_protein_stoichiometry` is -taken to be the same as the order returned when calling -[`reaction_gene_association`](@ref) on the model. The protein masses (in molar -mass units) for each gene in the model should be supplied through -`protein_masses`. The format is a dictionary of gene ids mapped to molar masses. - -Total enzyme capacity (sum of all enzyme concentrations multiplied by their -molar mass) is constrained by `total_protein_mass`, a unitless mass fraction of -enzyme mass to cell dry mass. The reaction fluxes and protein concentrations can -be bounded by `flux_measurements` and `protein_measurements` respectively. Both -lower and upper bounds need to be supplied (as a tuple) if a reaction flux is to -be bounded, likewise with protein concentration bounds. - -# Fields -``` -reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] -reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] -protein_masses::Dict{String,Float64} -total_protein_mass::Float64 -flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) -protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) -``` -""" -mutable struct EnzymeData - reaction_kcats::Dict{String,Vector{Vector{Float64}}} # rid => [[for, rev], ...] - reaction_protein_stoichiometry::Dict{String,Vector{Vector{Float64}}} # rid => [[stoich, stoich,...], ...] - protein_masses::Dict{String,Float64} - total_protein_mass::Float64 - flux_measurements::Dict{String,Tuple{Float64,Float64}} # rid => (lb, ub) - protein_measurements::Dict{String,Tuple{Float64,Float64}} # pid => (lb, ub) -end - -""" - EnzymeData( - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein; - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), - protein_measurements = Dict{String,Tuple{Float64,Float64}}(), - ) - -Constructor for `EnzymeData`. -""" -EnzymeData( - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein_mass; - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), - protein_measurements = Dict{String,Tuple{Float64,Float64}}(), -) = EnzymeData( - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein_mass, - flux_measurements, - protein_measurements, -) - -""" - mutable struct GeckoData - -Holds the already constructed GECKO problem. - -# Fields -``` -c::SparseVector{Float64, Int64} -E::SparseMatrixCSC{Float64, Int64} -d::SparseVector{Float64, Int64} -M::SparseMatrixCSC{Float64, Int64} -h::SparseVector{Float64, Int64} -reaction_map::Dict{String,Int} -metabolite_map::Dict{String,Int} -protein_ids::Vector{String} -``` -""" -mutable struct GeckoData - c::SparseVector{Float64,Int64} - E::SparseMatrixCSC{Float64,Int64} - d::SparseVector{Float64,Int64} - M::SparseMatrixCSC{Float64,Int64} - h::SparseVector{Float64,Int64} - reaction_map::Dict{String,Int} - metabolite_map::Dict{String,Int} - protein_ids::Vector{String} -end - -""" - GeckoData() - -Empty constructor. -""" -GeckoData() = GeckoData( - spzeros(0), - spzeros(0, 0), - spzeros(0), - spzeros(0, 0), - spzeros(0), - Dict{String,Int}(), - Dict{String,Int}(), - Vector{String}(), -) - -""" - mutable struct GeckoModel <: MetabolicModel - -A model that incorporates enzyme capacity and kinetic constraints via the GECKO -formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype -predictions of a yeast genome‐scale metabolic model by incorporating enzymatic -constraints." Molecular systems biology, 2017.` for implementation details. - -Note, since the model uses irreversible reactions internally, `"§FOR"` (for the -forward direction) and `"§REV"` (for the reverse direction) is appended to each -reaction internally. Futhermore, `"§"` is reserved for internal use as a -delimiter, no reaction id should contain that character. The units depend on -those used in `enzymedata.reaction_kcats` and `enzymedata.protein_masses`. Only -the protein and reaction flux bounds are optional parameters, all other -parameters must be supplied to the `enzymedata` field. Only reactions with kcats -will have enzyme bounds associated with them, but all isozymes are assumed to -have data if data is supplied. Currently only `modifications` that change -attributes of the `optimizer` are supported. - -To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` -to run an analysis on it. - -See also: [`StandardModel`](@ref) - -# Fields -``` -smodel::StandardModel -geckodata::GeckoData -enzymedata::EnzymeData -``` -""" -mutable struct GeckoModel <: MetabolicModel - smodel::StandardModel - geckodata::GeckoData - enzymedata::EnzymeData -end - -""" - GeckoModel( - model::MetabolicModel; - reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), - reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), - protein_masses = Dict{String,Float64}(), - total_protein = 0.0, - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), - protein_measurements = Dict{String,Tuple{Float64,Float64}}(), - ) - -Constructor for `GeckoModel`. -""" -function GeckoModel( - model::MetabolicModel; - reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), - reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), - protein_masses = Dict{String,Float64}(), - total_protein_mass = 0.0, - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), - protein_measurements = Dict{String,Tuple{Float64,Float64}}(), -) - gm = GeckoModel( - convert(StandardModel, model), - GeckoData(), # empty - EnzymeData( - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein_mass; - flux_measurements, - protein_measurements, - ), - ) - - # build data in GeckoModel - build_geckomodel_internals!(gm) - - return gm -end - -""" - stoichiometry(model::GeckoModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -function stoichiometry(model::GeckoModel) - build_geckomodel_internals!(model) - return model.geckodata.E -end - -""" - balance(model::GeckoModel) - -Return stoichiometric balance. -""" -balance(model::GeckoModel) = model.geckodata.d - -""" - objective(model::GeckoModel) - -Return objective of `model`. -""" -objective(model::GeckoModel) = model.geckodata.c - -@_inherit_model_methods GeckoModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr - -""" - reactions(model::GeckoModel) - -Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) -to get the order of the remaining variables. -""" -reactions(model::GeckoModel) = _order_id_to_idx_dict(model.geckodata.reaction_map) - -""" - metabolites(model::GeckoModel) - -Returns the metabolites ordered according to the stoichiometric matrix. -""" -metabolites(model::GeckoModel) = _order_id_to_idx_dict(model.geckodata.metabolite_map) - -""" - genes(model::GeckoModel) - -Returns the genes (proteins) in the order as they appear as variables in the model. -""" -genes(model::GeckoModel) = model.geckodata.protein_ids - -""" - _order_id_to_idx_dict(id_to_idx_dict) - -Return the keys of `id_to_idx_dict` sorted by the values, which -are taken to be the indices. This is a helper function for -[`reactions`](@ref) and [`metabolites`](@ref). -""" -function _order_id_to_idx_dict(dmap) - ks = collect(keys(dmap)) - vs = collect(values(dmap)) - return ks[sortperm(vs)] -end - -""" - bounds(model::GeckoModel) - -Return variable bounds for `GeckoModel`. -""" -function bounds(model::GeckoModel) - n_rxns = length(model.geckodata.reaction_map) - n_prots = length(model.geckodata.protein_ids) - lbs = [-model.geckodata.h[1:n_rxns]; -model.geckodata.h[2*n_rxns.+(1:n_prots)]] - ubs = [ - model.geckodata.h[n_rxns.+(1:n_rxns)] - model.geckodata.h[2*n_rxns+n_prots.+(1:n_prots)] - ] - return lbs, ubs -end - -""" - enzyme_capacity(model::GeckoModel) - -Return enzyme capacity inequality constraint vector and bound, or nothing -if it doesn't exist in the model. -""" -enzyme_capacity(model::GeckoModel) = (model.geckodata.M[end, :], model.geckodata.h[end]) - -""" - build_geckomodel_internals!(model::GeckoModel) - -Lower level function that updates the matrix form of a model with enzyme -capacity constraints, in GECKO format. - -Specifically, updates `model.geckodata` with the vector and matrix coefficients `c, -E, d, M, h` satisfying -``` -opt cᵀ * x -s.t. E * x = d - M * x ≤ h -``` -as well as `reaction_map, metabolite_map, protein_ids`, where -`reaction_map` shows the order of the columns (reactions) in `E`. Proteins -are ordered according to `protein_ids`, and follow after reactions. -""" -function build_geckomodel_internals!(model::GeckoModel) - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model.smodel) - - #: find all gene products that have kcats associated with them - protein_ids = _get_proteins_with_kcats(model) - - #: size of resultant model - n_reactions = size(S, 2) - n_proteins = length(protein_ids) - n_metabolites = size(S, 1) - n_vars = n_reactions + n_proteins - - #: equality lhs - E_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - ) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - contains(rid, "§ARM") && continue - !haskey(model.enzymedata.reaction_kcats, original_rid) && continue - - # these entries have kcats - if contains(rid, "§ISO") - iso_num = parse( - Int, - replace( - first(filter(startswith("ISO"), split(rid, "§")[2:end])), - "ISO" => "", - ), - ) - else # only one enzyme - iso_num = 1 - end - - # add all entries to column of matrix - _add_enzyme_variable( - model, - iso_num, # only one enzyme - rid, - original_rid, - E_components, - col_idx, - protein_ids, - ) - end - - Se = sparse( - E_components.row_idxs, - E_components.col_idxs, - E_components.coeffs, - n_proteins, - n_reactions, - ) - - E = [ - S zeros(n_metabolites, n_proteins) - Se I(n_proteins) - ] - - #: equality rhs - d = zeros(n_metabolites + n_proteins) - - #: find objective - obj_idx_orig = first(findnz(objective(model.smodel))[1]) - obj_id_orig = reactions(model.smodel)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective - c = zeros(n_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: inequality constraints - M, h = _gecko_build_inequality_constraints( - model, - protein_ids, - n_reactions, - n_proteins, - lb_fluxes, - ub_fluxes, - reaction_map, - ) - - #: overwrite geckomodel data - model.geckodata = GeckoData( - sparse(c), - sparse(E), - sparse(d), - sparse(M), - sparse(h), - reaction_map, - metabolite_map, - protein_ids, - ) - - return nothing -end - -""" - _gecko_build_inequality_constraints( - model::GeckoModel, - protein_ids, - n_reactions, - n_proteins, - lb_fluxes, - ub_fluxes, - reaction_map, - ) - -Helper function to build inequality constraints. Returns the inequality constraint in matrix format. -""" -function _gecko_build_inequality_constraints( - model::GeckoModel, - protein_ids, - n_reactions, - n_proteins, - lb_fluxes, - ub_fluxes, - reaction_map, -) - #: inequality lhs - mw_proteins = [model.enzymedata.protein_masses[pid] for pid in protein_ids] - M = Array( - [ - -I(n_reactions) zeros(n_reactions, n_proteins) - I(n_reactions) zeros(n_reactions, n_proteins) - zeros(n_proteins, n_reactions) -I(n_proteins) - zeros(n_proteins, n_reactions) I(n_proteins) - zeros(1, n_reactions) mw_proteins' - ], - ) - - #: inequality rhs - for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available - lb = model.enzymedata.flux_measurements[original_rid][1] - ub = model.enzymedata.flux_measurements[original_rid][2] - rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] - filter!(x -> !contains(x, "§ISO"), rids) # remove isozyme partial reactions (ARM reactions take care of these) - - if lb > 0 # forward only - for rid in rids - contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) - end - elseif ub < 0 # reverse only - for rid in rids - contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) - end - else # measurement does not rule our reversibility - for rid in rids - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) - end - end - end - - lb_proteins = [ - haskey(model.enzymedata.protein_measurements, pid) ? - model.enzymedata.protein_measurements[pid][1] : 0.0 for pid in protein_ids - ] - ub_proteins = [ - haskey(model.enzymedata.protein_measurements, pid) ? - model.enzymedata.protein_measurements[pid][2] : 1000.0 for pid in protein_ids - ] - - h = Array( - [ - -lb_fluxes - ub_fluxes - -lb_proteins - ub_proteins - model.enzymedata.total_protein_mass - ], - ) - - return M, h -end - -""" - _build_irreversible_stoichiometric_matrix(model::StandardModel) - -Return the stoichiometric matrix. All reactions are forward only i.e. only -positive fluxes are allowed. Include arm reactions. -""" -function _build_irreversible_stoichiometric_matrix(model::StandardModel) - # components used to build stoichiometric matrix - S_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - lbs = Vector{Float64}(), - ubs = Vector{Float64}(), - ) - - # establish the ordering in a named tuple - idxs = ( #: pseudo metabolites and reactions are added to model - met_idxs = Dict{String,Int}(), - rxn_idxs = Dict{String,Int}(), - max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - ) - #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, - #TODO but named tuples are immutable... :( - - # fill the matrix entries - #: blocked treated as reversible because unclear what direction the reaction would go - for rid in reactions(model) - if has_reaction_grr(model, rid) && has_reaction_isozymes(model, rid) - if is_reaction_unidirectional(model, rid) - dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") - else - @warn "Unhandled bound type for $rid" - end - else # no grr or single enzyme only - if is_reaction_unidirectional(model, rid) - dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") - else - @warn "Unhandled bound type for $rid" - end - end - end - - S = sparse( - S_components.row_idxs, - S_components.col_idxs, - S_components.coeffs, - length(idxs.met_idxs), - length(idxs.rxn_idxs), - ) - - return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs -end - -""" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - -Add entries to the components that will be used to build the stoichiometric -matrix. Simple variant that does not deal with isozymes and arm reactions. -""" -function _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) - push!(S_components.coeffs, fix_sign * coeff) - end - lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end -end - -""" - _add_isozyme_to_irrev_stoich_mat(model::GeckoModel, rid, idxs, S_components, dir) - -Add entries to the components that will be used to build the stoichiometric matrix. -Complex variant that deals with isozymes and arm reactions. -""" -function _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - # add pseudo metabolite - pm = "§PM$(idxs.pseudo_met_idx[1])" - idxs.pseudo_met_idx[1] += 1 - idxs.met_idxs[pm] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - # find half reactions to get arm reaction - lhs = [] - rhs = [] - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - if coeff <= 0 - push!(lhs, (mid, coeff)) - else - push!(rhs, (mid, coeff)) - end - end - product_half_reaction = dir == "§FOR" ? rhs : lhs - reagent_half_reaction = dir == "§FOR" ? lhs : rhs - # add arm reaction - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - pr = rid * "§ARM" * dir - idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, 1) - for (mid, coeff) in reagent_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds for ARM reaction that corresponds to original model's bounds - lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end - # add isozyme reactions - for (i, _) in enumerate(reaction_gene_association(model, rid)) - iso_rid = rid * "§ISO$i" * dir - idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, -1) - for (mid, coeff) in product_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds - push!(S_components.lbs, 0) - if is_reaction_blocked(model, rid) - push!(S_components.ubs, 0) - else - push!(S_components.ubs, 1000) # arbitrary upper bound - end - end -end - -""" - _add_enzyme_variable( - model::GeckoModel, - iso_num, - rid, - original_rid, - E_components, - col_idx, - protein_ids, - ) - -Helper function to add an column into the enzyme stoichiometric matrix. -""" -function _add_enzyme_variable( - model::GeckoModel, - iso_num, - rid, - original_rid, - E_components, - col_idx, - protein_ids, -) - grr = reaction_gene_association(model, original_rid)[iso_num] - pstoich = model.enzymedata.reaction_protein_stoichiometry[original_rid][iso_num] - kcat = - contains(rid, "§FOR") ? model.enzymedata.reaction_kcats[original_rid][iso_num][1] : - model.enzymedata.reaction_kcats[original_rid][iso_num][2] - for (idx, pid) in enumerate(grr) - push!(E_components.row_idxs, first(indexin([pid], protein_ids))) - push!(E_components.col_idxs, col_idx) - push!(E_components.coeffs, -pstoich[idx] / kcat) - end -end - -""" - _get_proteins_with_kcats(model::GeckoModel) - -Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. -Assume that if a reaction has a kcat then each isozyme has a kcat. -""" -function _get_proteins_with_kcats(model::GeckoModel) - unique( - vcat( - vcat( - [ - reaction_gene_association(model.smodel, rid) for - rid in reactions(model.smodel) if - haskey(model.enzymedata.reaction_kcats, rid) - ]..., - )..., - ), - ) -end diff --git a/src/base/types/zSMomentModel.jl b/src/base/types/zSMomentModel.jl deleted file mode 100644 index fe569501e..000000000 --- a/src/base/types/zSMomentModel.jl +++ /dev/null @@ -1,301 +0,0 @@ -""" - mutable struct SMomentData - -Holds the already constructed SMOMENT problem. - -# Fields -``` -c::SparseVector{Float64, Int64} -E::SparseMatrixCSC{Float64, Int64} -d::SparseVector{Float64, Int64} -M::SparseMatrixCSC{Float64, Int64} -h::SparseVector{Float64, Int64} -reaction_map::Dict{String,Int} -metabolite_map::Dict{String,Int} -``` -""" -mutable struct SMomentData - c::SparseVector{Float64,Int64} - E::SparseMatrixCSC{Float64,Int64} - d::SparseVector{Float64,Int64} - M::SparseMatrixCSC{Float64,Int64} - h::SparseVector{Float64,Int64} - reaction_map::Dict{String,Int} - metabolite_map::Dict{String,Int} -end - -""" - SMomentData() - -Empty constructor. -""" -SMomentData() = SMomentData( - spzeros(0), - spzeros(0, 0), - spzeros(0), - spzeros(0, 0), - spzeros(0), - Dict{String,Int}(), - Dict{String,Int}(), -) - -""" - mutable struct SMomentModel <: MetabolicModel - -Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, -and Steffen Klamt. "Automatic construction of metabolic models with enzyme -constraints." BMC bioinformatics, 2020.` for implementation details. - -Note, `"§"` is reserved for internal use as a delimiter, no reaction id should -contain that character. Also note, SMOMENT assumes that each reaction only has a -single enzyme (one GRR) associated with it. It is required that a model be -modified to ensure that this condition is met. For ease-of-use, -[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only -`modifications` that change attributes of the `optimizer` are supported. -""" -mutable struct SMomentModel <: MetabolicModel - smodel::StandardModel - smomentdata::SMomentData - enzymedata::EnzymeData -end - -""" - SMomentModel( - model::MetabolicModel; - reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), - reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), - protein_masses = Dict{String,Float64}(), - total_protein = 0.0, - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), - ) - -Construct an `SMomentModel`. - -""" -function SMomentModel( - model::MetabolicModel; - reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), - reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), - protein_masses = Dict{String,Float64}(), - total_protein_mass = 0.0, - flux_measurements = Dict{String,Tuple{Float64,Float64}}(), -) - sm = convert(StandardModel, model) - # check that input data is in correct format for smoment - if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || - any( - length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if - haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid) - ) - @warn( - "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." - ) - end - - smm = SMomentModel( - sm, - SMomentData(), # empty - EnzymeData( - reaction_kcats, - reaction_protein_stoichiometry, - protein_masses, - total_protein_mass; - flux_measurements, - ), - ) - - # build data in SMomentModel - build_smomentmodel_internals!(smm) - - return smm -end - -""" - stoichiometry(model::SMomentModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -function stoichiometry(model::SMomentModel) - build_smomentmodel_internals!(model) - return model.smomentdata.E -end - -""" - balance(model::SMomentModel) - -Return stoichiometric balance. -""" -balance(model::SMomentModel) = model.smomentdata.d - -""" - objective(model::SMomentModel) - -Return objective of `model`. -""" -objective(model::SMomentModel) = model.smomentdata.c - -@_inherit_model_methods SMomentModel () smodel () genes -@_inherit_model_methods SMomentModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr - -""" - reactions(model::SMomentModel) - -Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) -to get the order of the remaining variables. -""" -reactions(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.reaction_map) - -""" - metabolites(model::SMomentModel) - -Returns the metabolites ordered according to the stoichiometric matrix. -""" -metabolites(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.metabolite_map) - -""" - bounds(model::SMomentModel) - -Return variable bounds for `SMomentModel`. -""" -function bounds(model::SMomentModel) - n_rxns = length(model.smomentdata.reaction_map) - lbs = [-model.smomentdata.h[1:n_rxns]; 0] - ubs = [model.smomentdata.h[n_rxns.+(1:n_rxns)]; model.smomentdata.h[end]] - return lbs, ubs -end - -""" - build_smomentmodel_internals!(model::SMomentModel) - -Build internal data structures used to solve SMOMENT type flux -balance analysis problems. -""" -function build_smomentmodel_internals!(model::SMomentModel) - - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model.smodel) - - #: size of resultant model - n_reactions = size(S, 2) - n_metabolites = size(S, 1) - n_vars = n_reactions + 1 - - #: equality lhs - Se = zeros(1, n_reactions) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - !haskey(model.enzymedata.reaction_kcats, original_rid) && continue - # these entries have kcats, only one GRR by assumption - grr = first(reaction_gene_association(model, original_rid)) - pstoich = first(model.enzymedata.reaction_protein_stoichiometry[original_rid]) - mw = dot(pstoich, [model.enzymedata.protein_masses[gid] for gid in grr]) - kcat = - contains(rid, "§FOR") ? - first(model.enzymedata.reaction_kcats[original_rid])[1] : - first(model.enzymedata.reaction_kcats[original_rid])[2] - Se[1, col_idx] = -mw / kcat - end - - E = [ - S zeros(n_metabolites, 1) - Se 1.0 - ] - - #: equality rhs - d = zeros(n_metabolites + 1) - - #: find objective - obj_idx_orig = first(findnz(objective(model.smodel))[1]) - obj_id_orig = reactions(model.smodel)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective - c = zeros(n_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: inequality constraints - M, h = _smoment_build_inequality_constraints( - model, - n_reactions, - lb_fluxes, - ub_fluxes, - reaction_map, - ) - - #: overwrite geckomodel data - model.smomentdata = SMomentData( - sparse(c), - sparse(E), - sparse(d), - sparse(M), - sparse(h), - reaction_map, - metabolite_map, - ) - - return nothing -end - -""" - _smoment_build_inequality_constraints( - model::SMomentModel, - n_reactions, - lb_fluxes, - ub_fluxes, - reaction_map, - ) - -Helper function to return functions describing the inequality -constraints for smoment. -""" -function _smoment_build_inequality_constraints( - model::SMomentModel, - n_reactions, - lb_fluxes, - ub_fluxes, - reaction_map, -) - - #: inequality lhs - M = Array( - [ - -I(n_reactions) zeros(n_reactions, 1) - I(n_reactions) zeros(n_reactions, 1) - zeros(1, n_reactions) 1 - ], - ) - - #: inequality rhs - for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available - lb = model.enzymedata.flux_measurements[original_rid][1] - ub = model.enzymedata.flux_measurements[original_rid][2] - rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] - - if lb > 0 # forward only - for rid in rids - contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) - end - elseif ub < 0 # reverse only - for rid in rids - contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) - end - else # measurement does not rule our reversibility - for rid in rids - contains(rid, "§FOR") && - (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) - contains(rid, "§REV") && - (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) - end - end - end - - h = Array([-lb_fluxes; ub_fluxes; model.enzymedata.total_protein_mass]) - - return M, h -end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 34ee468c7..c2ca19ee0 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -4,125 +4,117 @@ Return a dictionary mapping protein concentrations to their ids. """ protein_dict(model::GeckoModel, opt_model) = - is_solved(opt_model) ? - last( - _map_irrev_to_rev_ids( - model.geckodata.reaction_map, - value.(opt_model[:x]); - protein_ids = model.geckodata.protein_ids, - ), - ) : nothing + is_solved(opt_model) ? Dict(model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end])) : nothing +# """ +# remove_slow_isozymes!( +# model::StandardModel; +# reaction_kcats = Dict(), +# protein_stoichiometry = Dict(), +# protein_masses = Dict(), +# ) -""" - remove_slow_isozymes!( - model::StandardModel; - reaction_kcats = Dict(), - protein_stoichiometry = Dict(), - protein_masses = Dict(), - ) +# Remove all but the fastest isozyme from each reaction in `model`. +# Use the largest kcat (for, rev) for these calculations. Modifies all +# the arguments in place. +# """ +# function remove_slow_isozymes!( +# model::StandardModel; +# reaction_kcats = Dict(), +# reaction_protein_stoichiometry = Dict(), +# protein_masses = Dict(), +# ) +# for rid in reactions(model) +# if has_reaction_grr(model, rid) && haskey(reaction_kcats, rid) +# kcat_effs = Float64[] +# grrs = reaction_gene_association(model, rid) +# for (i, grr) in enumerate(grrs) +# push!( +# kcat_effs, +# dot( +# reaction_protein_stoichiometry[rid][i], +# [protein_masses[gid] for gid in grr], +# ) / maximum(reaction_kcats[rid][i]), +# ) +# end +# idx = argmin(kcat_effs) -Remove all but the fastest isozyme from each reaction in `model`. -Use the largest kcat (for, rev) for these calculations. Modifies all -the arguments in place. -""" -function remove_slow_isozymes!( - model::StandardModel; - reaction_kcats = Dict(), - reaction_protein_stoichiometry = Dict(), - protein_masses = Dict(), -) - for rid in reactions(model) - if has_reaction_grr(model, rid) && haskey(reaction_kcats, rid) - kcat_effs = Float64[] - grrs = reaction_gene_association(model, rid) - for (i, grr) in enumerate(grrs) - push!( - kcat_effs, - dot( - reaction_protein_stoichiometry[rid][i], - [protein_masses[gid] for gid in grr], - ) / maximum(reaction_kcats[rid][i]), - ) - end - idx = argmin(kcat_effs) +# model.reactions[rid].grr = [grrs[idx]] +# reaction_kcats[rid] = [reaction_kcats[rid][idx]] +# reaction_protein_stoichiometry[rid] = [reaction_protein_stoichiometry[rid][idx]] +# end +# end - model.reactions[rid].grr = [grrs[idx]] - reaction_kcats[rid] = [reaction_kcats[rid][idx]] - reaction_protein_stoichiometry[rid] = [reaction_protein_stoichiometry[rid][idx]] - end - end +# curated_gids = String[] +# for rid in reactions(model) +# if has_reaction_grr(model, rid) +# for grr in reaction_gene_association(model, rid) +# append!(curated_gids, grr) +# end +# end +# end +# rm_gids = setdiff(genes(model), curated_gids) +# delete!(model.genes, rm_gids) # remove genes that were deleted - curated_gids = String[] - for rid in reactions(model) - if has_reaction_grr(model, rid) - for grr in reaction_gene_association(model, rid) - append!(curated_gids, grr) - end - end - end - rm_gids = setdiff(genes(model), curated_gids) - delete!(model.genes, rm_gids) # remove genes that were deleted +# return nothing +# end - return nothing -end +# """ +# remove_low_expressed_isozymes!( +# model::StandardModel; +# reaction_kcats = Dict(), +# protein_stoichiometry = Dict(), +# protein_masses = Dict(), +# gid_measurements = Dict(), +# ) -""" - remove_low_expressed_isozymes!( - model::StandardModel; - reaction_kcats = Dict(), - protein_stoichiometry = Dict(), - protein_masses = Dict(), - gid_measurements = Dict(), - ) - -Remove isozymes that are not expressed. If multiple isozymes are expressed, pick -one that has the highest expression. -""" -function remove_low_expressed_isozymes!( - model::StandardModel; - reaction_kcats = Dict(), - protein_stoichiometry = Dict(), - protein_masses = Dict(), - gid_measurements = Dict(), -) +# Remove isozymes that are not expressed. If multiple isozymes are expressed, pick +# one that has the highest expression. +# """ +# function remove_low_expressed_isozymes!( +# model::StandardModel; +# reaction_kcats = Dict(), +# protein_stoichiometry = Dict(), +# protein_masses = Dict(), +# gid_measurements = Dict(), +# ) - for rid in reactions(model) - if has_reaction_grr(model, rid) - measured_proteins = Float64[] - grrs = reaction_gene_association(model, rid) - for (i, grr) in enumerate(grrs) +# for rid in reactions(model) +# if has_reaction_grr(model, rid) +# measured_proteins = Float64[] +# grrs = reaction_gene_association(model, rid) +# for (i, grr) in enumerate(grrs) - push!( - measured_proteins, - sum( - map( - *, - protein_stoichiometry[rid][i], - [get(gid_measurements, gid, 0.0) for gid in grr], - [protein_masses[gid] for gid in grr], - ), - ), - ) - end - idx = argmax(measured_proteins) +# push!( +# measured_proteins, +# sum( +# map( +# *, +# protein_stoichiometry[rid][i], +# [get(gid_measurements, gid, 0.0) for gid in grr], +# [protein_masses[gid] for gid in grr], +# ), +# ), +# ) +# end +# idx = argmax(measured_proteins) - model.reactions[rid].grr = [grrs[idx]] - reaction_kcats[rid] = [reaction_kcats[rid][idx]] - protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] - end - end +# model.reactions[rid].grr = [grrs[idx]] +# reaction_kcats[rid] = [reaction_kcats[rid][idx]] +# protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] +# end +# end - curated_gids = String[] - for rid in reactions(model) - if has_reaction_grr(model, rid) - for grr in reaction_gene_association(model, rid) - append!(curated_gids, grr) - end - end - end - rm_gids = setdiff(genes(model), curated_gids) - delete!(model.genes, rm_gids) # remove genes that were deleted +# curated_gids = String[] +# for rid in reactions(model) +# if has_reaction_grr(model, rid) +# for grr in reaction_gene_association(model, rid) +# append!(curated_gids, grr) +# end +# end +# end +# rm_gids = setdiff(genes(model), curated_gids) +# delete!(model.genes, rm_gids) # remove genes that were deleted - return nothing -end +# return nothing +# end diff --git a/src/base/utils/irreversible_stoichiometry.jl b/src/base/utils/irreversible_stoichiometry.jl new file mode 100644 index 000000000..75369dcfd --- /dev/null +++ b/src/base/utils/irreversible_stoichiometry.jl @@ -0,0 +1,182 @@ +""" + _build_irreversible_stoichiometric_matrix(model::StandardModel) + +Return a stoichiometric matrix where all reactions are forward only i.e. only +positive fluxes are allowed. To accomplish this for models with isozymes, +so-called arm reactions are included. +""" +function _build_irreversible_stoichiometric_matrix(model::StandardModel) + # components used to build stoichiometric matrix + S_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + lbs = Vector{Float64}(), + ubs = Vector{Float64}(), + ) + + # establish the ordering in a named tuple + idxs = ( #: pseudo metabolites and reactions are added to model + met_idxs = Dict{String,Int}(), + rxn_idxs = Dict{String,Int}(), + max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple + ) + #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, + #TODO but named tuples are immutable... :( + + # fill the matrix entries + #: blocked treated as reversible because unclear what direction the reaction would go + for rid in reactions(model) + if has_reaction_grr(model, rid) && has_reaction_isozymes(model, rid) + if is_reaction_unidirectional(model, rid) + dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + else # no grr or single enzyme only + if is_reaction_unidirectional(model, rid) + dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + else + @warn "Unhandled bound type for $rid" + end + end + end + + S = sparse( + S_components.row_idxs, + S_components.col_idxs, + S_components.coeffs, + length(idxs.met_idxs), + length(idxs.rxn_idxs), + ) + + return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs +end + +""" + _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + +Add entries to the components that will be used to build the stoichiometric +matrix. Simple variant that does not deal with isozymes and arm reactions. +""" +function _add_enzyme_to_irrev_stoich_mat(model::StandardModel, rid, idxs, S_components, dir) + idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) + push!(S_components.coeffs, fix_sign * coeff) + end + lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end +end + +""" + _add_isozyme_to_irrev_stoich_mat( + model::StandardModel, + rid, + idxs, + S_components, + dir, + ) + +Add entries to the components that will be used to build the stoichiometric matrix. +Complex variant that deals with isozymes and arm reactions. +""" +function _add_isozyme_to_irrev_stoich_mat( + model::StandardModel, + rid, + idxs, + S_components, + dir, +) + # add pseudo metabolite + pm = "§PM$(idxs.pseudo_met_idx[1])" + idxs.pseudo_met_idx[1] += 1 + idxs.met_idxs[pm] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + # find half reactions to get arm reaction + lhs = [] + rhs = [] + for (mid, coeff) in reaction_stoichiometry(model, rid) + if !haskey(idxs.met_idxs, mid) + idxs.met_idxs[mid] = idxs.max_met_idx[1] + idxs.max_met_idx[1] += 1 + end + if coeff <= 0 + push!(lhs, (mid, coeff)) + else + push!(rhs, (mid, coeff)) + end + end + product_half_reaction = dir == "§FOR" ? rhs : lhs + reagent_half_reaction = dir == "§FOR" ? lhs : rhs + # add arm reaction + fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction + pr = rid * "§ARM" * dir + idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, 1) + for (mid, coeff) in reagent_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[pr]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds for ARM reaction that corresponds to original model's bounds + lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub + if dir == "§FOR" + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, lb) + push!(S_components.ubs, ub) + else + is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : + push!(S_components.lbs, ub) + push!(S_components.ubs, lb) + end + # add isozyme reactions + for (i, _) in enumerate(reaction_gene_association(model, rid)) + iso_rid = rid * "§ISO$i" * dir + idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] + idxs.max_rxn_idx[1] += 1 + push!(S_components.row_idxs, idxs.met_idxs[pm]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, -1) + for (mid, coeff) in product_half_reaction + push!(S_components.row_idxs, idxs.met_idxs[mid]) + push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) + push!(S_components.coeffs, fix_sign * coeff) + end + # add bounds + push!(S_components.lbs, 0) + if is_reaction_blocked(model, rid) + push!(S_components.ubs, 0) + else + push!(S_components.ubs, 1000) # arbitrary upper bound + end + end +end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 963d0dff2..5eb9c4f2b 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -1,29 +1,45 @@ @testset "GECKO" begin - smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) - smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over - protein_measurements = Dict("b2779" => (0.01, 0.06)) - flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) + model = load_model(StandardModel, model_paths["e_coli_core.json"]) total_protein_mass = 100 # mg/gdW - model = GeckoModel( - smodel; - reaction_kcats = ecoli_core_reaction_kcats, - reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, - protein_masses = ecoli_core_protein_masses, - total_protein_mass = total_protein_mass, # mg/gdW - flux_measurements, - protein_measurements, + #: construct isozymes from model + rid_isozymes = Dict{String,Vector{Isozyme}}() + for (rid, kcats) in ecoli_core_reaction_kcats + grrs = reaction_gene_association(model, rid) + rid_isozymes[rid] = [ + Isozyme( + Dict(grrs[i] .=> ecoli_core_protein_stoichiometry[rid][i]), + (kcats[i][1], kcats[i][2]), + ) for i = 1:length(grrs) + ] + end + + #: add molar mass to genes in model + for (gid, g) in model.genes + model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) + end + + gm = GeckoModel( + model; + rid_isozymes, + enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], + ) + change_bounds( + gm, + ["EX_glc__D_e", "b2779", "GLCpts"]; + lbs = [-1000.0, 0.01, -1.0], + ubs = [nothing, 0.06, 12.0], ) opt_model = flux_balance_analysis( - model, + gm, Tulip.Optimizer; modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], sense = COBREXA.MOI.MAX_SENSE, ) - rxn_fluxes = flux_dict(model, opt_model) - prot_concens = protein_dict(model, opt_model) + rxn_fluxes = flux_dict(gm, opt_model) + prot_concens = protein_dict(gm, opt_model) @test isapprox( rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], @@ -35,3 +51,4 @@ @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) end + diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 80ab74e08..2606dc83c 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,37 +1,37 @@ -@testset "SMOMENT" begin - smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) - smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over - flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) - total_protein_mass = 100 # mg/gdW +# @testset "SMOMENT" begin +# smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) +# smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over +# flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) +# total_protein_mass = 100 # mg/gdW - remove_slow_isozymes!( - smodel; - reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, - protein_masses = ecoli_core_protein_masses, - reaction_kcats = ecoli_core_reaction_kcats, - ) +# remove_slow_isozymes!( +# smodel; +# reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, +# protein_masses = ecoli_core_protein_masses, +# reaction_kcats = ecoli_core_reaction_kcats, +# ) - model = SMomentModel( - smodel; - reaction_kcats = ecoli_core_reaction_kcats, - reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, - protein_masses = ecoli_core_protein_masses, - total_protein_mass = total_protein_mass, # mg/gdW - flux_measurements, - ) +# model = SMomentModel( +# smodel; +# reaction_kcats = ecoli_core_reaction_kcats, +# reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, +# protein_masses = ecoli_core_protein_masses, +# total_protein_mass = total_protein_mass, # mg/gdW +# flux_measurements, +# ) - opt_model = flux_balance_analysis( - model, - Tulip.Optimizer; - modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], - sense = COBREXA.MOI.MAX_SENSE, - ) +# opt_model = flux_balance_analysis( +# model, +# Tulip.Optimizer; +# modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], +# sense = COBREXA.MOI.MAX_SENSE, +# ) - rxn_fluxes = flux_dict(model, opt_model) +# rxn_fluxes = flux_dict(model, opt_model) - @test isapprox( - rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], - 0.8907273630431708, - atol = TEST_TOLERANCE, - ) -end +# @test isapprox( +# rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], +# 0.8907273630431708, +# atol = TEST_TOLERANCE, +# ) +# end From e846c58fd3af541fe327458029bd79cf603049e6 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sun, 10 Apr 2022 19:25:49 +0200 Subject: [PATCH 052/109] add smoment and fixup gecko --- src/base/types/derivedmodels/GeckoModel.jl | 44 +- src/base/types/derivedmodels/SMomentModel.jl | 519 +++++++++---------- src/base/utils/enzyme.jl | 182 +++---- src/base/utils/irreversible_stoichiometry.jl | 34 +- test/analysis/smoment.jl | 73 +-- 5 files changed, 416 insertions(+), 436 deletions(-) diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl index ca2e3eae1..ab0a682ba 100644 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -11,8 +11,7 @@ forward direction) and `"§REV"` (for the reverse direction) is appended to each reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, no reaction id should contain this character. -To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel` -to run an analysis on it. +To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`. # Fields ``` @@ -73,7 +72,8 @@ objective(model::GeckoModel) = model.c """ reactions(model::GeckoModel) -Returns reactions order according to stoichiometric matrix. +Returns the reversible reactions in `model`. For +the irreversible reactions, use [`irreversible_reactions`][@ref]. """ reactions(model::GeckoModel) = model.reaction_ids @@ -84,6 +84,13 @@ Returns the number of reactions in the model. """ n_reactions(model::GeckoModel) = length(model.reaction_ids) +""" + irreversible_reactions(model::GeckoModel) + +Returns the irreversible reactions in `model`. +""" +irreversible_reactions(model::GeckoModel) = model.irrev_reaction_ids + """ genes(model::GeckoModel) @@ -190,7 +197,7 @@ function GeckoModel( enzyme_capacities = [(),], ) S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model) + _build_irreversible_stoichiometric_matrix(model, rid_isozymes) #: find all gene products that have kcats associated with them gene_ids = get_genes_with_kcats(rid_isozymes) @@ -331,35 +338,6 @@ function _add_enzyme_variable( end end -""" - get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) - -Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. -Assume that if a reaction has a kcat then each isozyme has a kcat. -""" -function get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) - gids = String[] - for isozymes in values(rid_isozymes) - for isozyme in isozymes - append!(gids, collect(keys(isozyme.stoichiometry))) - end - end - return unique(gids) -end - -""" - _order_id_to_idx_dict(id_to_idx_dict) - -Return the keys of `id_to_idx_dict` sorted by the values, which -are taken to be the indices. This is a helper function for -[`reactions`](@ref) and [`metabolites`](@ref). -""" -function _order_id_to_idx_dict(dmap) - ks = collect(keys(dmap)) - vs = collect(values(dmap)) - return ks[sortperm(vs)] -end - """ change_bound(model::GeckoModel, id; lb=nothing, ub=nothing) diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl index e39088d8e..111416aa7 100644 --- a/src/base/types/derivedmodels/SMomentModel.jl +++ b/src/base/types/derivedmodels/SMomentModel.jl @@ -1,260 +1,259 @@ -# """ -# mutable struct SMomentModel <: MetabolicModel - -# Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, -# and Steffen Klamt. "Automatic construction of metabolic models with enzyme -# constraints." BMC bioinformatics, 2020.` for implementation details. - -# Note, `"§"` is reserved for internal use as a delimiter, no reaction id should -# contain that character. Also note, SMOMENT assumes that each reaction only has a -# single enzyme (one GRR) associated with it. It is required that a model be -# modified to ensure that this condition is met. For ease-of-use, -# [`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only -# `modifications` that change attributes of the `optimizer` are supported. -# """ -# mutable struct SMomentModel <: MetabolicModel -# smodel::StandardModel -# smomentdata::SMomentData -# enzymedata::EnzymeData -# end - -# """ -# SMomentModel( -# model::MetabolicModel; -# reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), -# reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), -# protein_masses = Dict{String,Float64}(), -# total_protein = 0.0, -# flux_measurements = Dict{String,Tuple{Float64,Float64}}(), -# ) - -# Construct an `SMomentModel`. - -# """ -# function SMomentModel( -# model::MetabolicModel; -# reaction_kcats = Dict{String,Vector{Vector{Float64}}}(), -# reaction_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}(), -# protein_masses = Dict{String,Float64}(), -# total_protein_mass = 0.0, -# flux_measurements = Dict{String,Tuple{Float64,Float64}}(), -# ) -# sm = convert(StandardModel, model) -# # check that input data is in correct format for smoment -# if any(length(v) > 1 for (rid, v) in reaction_kcats if has_reaction_grr(sm, rid)) || -# any( -# length(v) > 1 for (rid, v) in reaction_protein_stoichiometry if -# haskey(reaction_kcats, rid) && has_reaction_grr(sm, rid) -# ) -# @warn( -# "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." -# ) -# end - -# smm = SMomentModel( -# sm, -# SMomentData(), # empty -# EnzymeData( -# reaction_kcats, -# reaction_protein_stoichiometry, -# protein_masses, -# total_protein_mass; -# flux_measurements, -# ), -# ) - -# # build data in SMomentModel -# build_smomentmodel_internals!(smm) - -# return smm -# end - -# """ -# stoichiometry(model::SMomentModel) - -# Return stoichiometry matrix that includes enzymes as metabolites. -# """ -# function stoichiometry(model::SMomentModel) -# build_smomentmodel_internals!(model) -# return model.smomentdata.E -# end - -# """ -# balance(model::SMomentModel) - -# Return stoichiometric balance. -# """ -# balance(model::SMomentModel) = model.smomentdata.d - -# """ -# objective(model::SMomentModel) - -# Return objective of `model`. -# """ -# objective(model::SMomentModel) = model.smomentdata.c - -# @_inherit_model_methods SMomentModel () smodel () genes -# @_inherit_model_methods SMomentModel (rid::String,) smodel (rid,) reaction_gene_association reaction_stoichiometry reaction_bounds is_reaction_reversible is_reaction_forward_only is_reaction_backward_only is_reaction_unidirectional is_reaction_blocked has_reaction_isozymes has_reaction_grr - -# """ -# reactions(model::SMomentModel) - -# Returns reactions order according to stoichiometric matrix. Note, call [`genes`](@ref) -# to get the order of the remaining variables. -# """ -# reactions(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.reaction_map) - -# """ -# metabolites(model::SMomentModel) - -# Returns the metabolites ordered according to the stoichiometric matrix. -# """ -# metabolites(model::SMomentModel) = _order_id_to_idx_dict(model.smomentdata.metabolite_map) - -# """ -# bounds(model::SMomentModel) - -# Return variable bounds for `SMomentModel`. -# """ -# function bounds(model::SMomentModel) -# n_rxns = length(model.smomentdata.reaction_map) -# lbs = [-model.smomentdata.h[1:n_rxns]; 0] -# ubs = [model.smomentdata.h[n_rxns.+(1:n_rxns)]; model.smomentdata.h[end]] -# return lbs, ubs -# end - -# """ -# build_smomentmodel_internals!(model::SMomentModel) - -# Build internal data structures used to solve SMOMENT type flux -# balance analysis problems. -# """ -# function build_smomentmodel_internals!(model::SMomentModel) - -# S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = -# _build_irreversible_stoichiometric_matrix(model.smodel) - -# #: size of resultant model -# n_reactions = size(S, 2) -# n_metabolites = size(S, 1) -# n_vars = n_reactions + 1 - -# #: equality lhs -# Se = zeros(1, n_reactions) - -# for (rid, col_idx) in reaction_map -# original_rid = string(split(rid, "§")[1]) - -# # skip these entries -# !haskey(model.enzymedata.reaction_kcats, original_rid) && continue -# # these entries have kcats, only one GRR by assumption -# grr = first(reaction_gene_association(model, original_rid)) -# pstoich = first(model.enzymedata.reaction_protein_stoichiometry[original_rid]) -# mw = dot(pstoich, [model.enzymedata.protein_masses[gid] for gid in grr]) -# kcat = -# contains(rid, "§FOR") ? -# first(model.enzymedata.reaction_kcats[original_rid])[1] : -# first(model.enzymedata.reaction_kcats[original_rid])[2] -# Se[1, col_idx] = -mw / kcat -# end - -# E = [ -# S zeros(n_metabolites, 1) -# Se 1.0 -# ] - -# #: equality rhs -# d = zeros(n_metabolites + 1) - -# #: find objective -# obj_idx_orig = first(findnz(objective(model.smodel))[1]) -# obj_id_orig = reactions(model.smodel)[obj_idx_orig] -# obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective -# c = zeros(n_vars) -# obj_idx = reaction_map[obj_id] -# c[obj_idx] = 1.0 - -# #: inequality constraints -# M, h = _smoment_build_inequality_constraints( -# model, -# n_reactions, -# lb_fluxes, -# ub_fluxes, -# reaction_map, -# ) - -# #: overwrite geckomodel data -# model.smomentdata = SMomentData( -# sparse(c), -# sparse(E), -# sparse(d), -# sparse(M), -# sparse(h), -# reaction_map, -# metabolite_map, -# ) - -# return nothing -# end - -# """ -# _smoment_build_inequality_constraints( -# model::SMomentModel, -# n_reactions, -# lb_fluxes, -# ub_fluxes, -# reaction_map, -# ) - -# Helper function to return functions describing the inequality -# constraints for smoment. -# """ -# function _smoment_build_inequality_constraints( -# model::SMomentModel, -# n_reactions, -# lb_fluxes, -# ub_fluxes, -# reaction_map, -# ) - -# #: inequality lhs -# M = Array( -# [ -# -I(n_reactions) zeros(n_reactions, 1) -# I(n_reactions) zeros(n_reactions, 1) -# zeros(1, n_reactions) 1 -# ], -# ) - -# #: inequality rhs -# for original_rid in keys(model.enzymedata.flux_measurements) # only constrain if measurement available -# lb = model.enzymedata.flux_measurements[original_rid][1] -# ub = model.enzymedata.flux_measurements[original_rid][2] -# rids = [rid for rid in keys(reaction_map) if startswith(rid, original_rid)] - -# if lb > 0 # forward only -# for rid in rids -# contains(rid, "§REV") && (ub_fluxes[reaction_map[rid]] = 0.0) -# contains(rid, "§FOR") && -# (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = lb) -# end -# elseif ub < 0 # reverse only -# for rid in rids -# contains(rid, "§FOR") && (ub_fluxes[reaction_map[rid]] = 0.0) -# contains(rid, "§REV") && -# (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = -ub) -# end -# else # measurement does not rule our reversibility -# for rid in rids -# contains(rid, "§FOR") && -# (ub_fluxes[reaction_map[rid]] = ub; lb_fluxes[reaction_map[rid]] = 0) -# contains(rid, "§REV") && -# (ub_fluxes[reaction_map[rid]] = -lb; lb_fluxes[reaction_map[rid]] = 0) -# end -# end -# end - -# h = Array([-lb_fluxes; ub_fluxes; model.enzymedata.total_protein_mass]) - -# return M, h -# end +""" + mutable struct SMomentModel <: MetabolicModel + +Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, +and Steffen Klamt. "Automatic construction of metabolic models with enzyme +constraints." BMC bioinformatics, 2020.` for implementation details. + +Note, `"§"` is reserved for internal use as a delimiter, no reaction id should +contain that character. Also note, SMOMENT assumes that each reaction only has a +single enzyme (one GRR) associated with it. It is required that a model be +modified to ensure that this condition is met. For ease-of-use, +[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only +`modifications` that change attributes of the `optimizer` are supported. + +# Fields +``` +reaction_ids::Vector{String} +irrev_reaction_ids::Vector{String} +metabolites::Vector{String} +c::SparseVec +S::SparseMat +b::SparseVec +xl::SparseVec +xu::SparseVec +C::SparseMat +cl::Vector{Float64} +cu::Vector{Float64} +``` +""" +mutable struct SMomentModel <: MetabolicModel + reaction_ids::Vector{String} + irrev_reaction_ids::Vector{String} + metabolites::Vector{String} + c::SparseVec + S::SparseMat + b::SparseVec + xl::SparseVec + xu::SparseVec +end + +""" + stoichiometry(model::SMomentModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +stoichiometry(model::SMomentModel) = model.S + +""" + balance(model::SMomentModel) + +Return stoichiometric balance. +""" +balance(model::SMomentModel) = model.b + +""" + objective(model::SMomentModel) + +Return objective of `model`. +""" +objective(model::SMomentModel) = model.c + +""" + reactions(model::SMomentModel) + +Returns the reversible reactions in `model`. For +the irreversible reactions, use [`irreversible_reactions`][@ref]. +""" +reactions(model::SMomentModel) = model.reaction_ids + +""" + n_reactions(model::SMomentModel) + +Returns the number of reactions in the model. +""" +n_reactions(model::SMomentModel) = length(model.reaction_ids) + +""" + irreversible_reactions(model::SMomentModel) + +Returns the irreversible reactions in `model`. +""" +irreversible_reactions(model::SMomentModel) = model.irrev_reaction_ids + +""" + metabolites(model::SMomentModel) + +Return the metabolites in `model`. +""" +metabolites(model::SMomentModel) = model.metabolites + +""" + n_metabolites(model::SMomentModel) = + +Return the number of metabolites in `model`. +""" +n_metabolites(model::SMomentModel) = length(metabolites(model)) + +""" + bounds(model::SMomentModel) + +Return variable bounds for `SMomentModel`. +""" +bounds(model::SMomentModel) = (model.xl, model.xu) + +""" + reaction_flux(model::MetabolicModel) + +Helper function to get fluxes from optimization problem. +""" +function reaction_flux(model::SMomentModel) + R = spzeros(n_reactions(model), length(model.irrev_reaction_ids) + 1) + for (i, rid) in enumerate(reactions(model)) + for_idx = findfirst(x -> x == rid*"§ARM§FOR" || x == rid*"§FOR", model.irrev_reaction_ids) + rev_idx = findfirst(x -> x == rid*"§ARM§REV" || x == rid*"§REV", model.irrev_reaction_ids) + !isnothing(for_idx) && (R[i, for_idx] = 1.0) + !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) + end + return R' +end + +""" + SMomentModel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + ) + +Construct an `SMomentModel`. + +""" +function SMomentModel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + enzyme_capacity = 0.0, +) + + # check that input data is in correct format for smoment + if any(length(v) > 1 for v in values(rid_isozymes)) + @warn( + "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." + ) + end + + irrevS, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model) + + #: size of resultant model + num_reactions = size(irrevS, 2) + num_metabolites = size(irrevS, 1) + num_vars = num_reactions + 1 + + #: equality lhs + Se = zeros(1, num_reactions) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + !haskey(rid_isozymes, original_rid) && continue + # these entries have kcats, only one GRR by assumption + isozyme = first(rid_isozymes[original_rid]) + mw = sum([model.genes[gid].molar_mass * ps for (gid, ps) in isozyme.stoichiometry]) + kcat = contains(rid, "§FOR") ? first(isozyme.kcats) : last(isozyme.kcats) + Se[1, col_idx] = -mw / kcat + end + + S = [ + irrevS zeros(num_metabolites, 1) + Se 1.0 + ] + + #: equality rhs + b = zeros(num_metabolites + 1) + + #: find objective + obj_idx_orig = first(findnz(objective(model))[1]) + obj_id_orig = reactions(model)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective + c = spzeros(num_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: bounds + xl = sparse([lb_fluxes; 0.0]) + xu = sparse([ub_fluxes; enzyme_capacity]) + + return SMomentModel( + reactions(model), + _order_id_to_idx_dict(reaction_map), + _order_id_to_idx_dict(metabolite_map), + c, + S, + b, + xl, + xu, + ) +end + +""" + change_bound(model::SMomentModel, id; lb=nothing, ub=nothing) + +Change the bound of variable in `model`. Does not change the bound if respective +bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the +`SMomentModel` has irreversible reactions, then these reactions will be +permanently irreversible in the model, i.e. changing their bounds to make them +reversible will have no effect. +""" +function change_bound(model::SMomentModel, id; lb=nothing, ub=nothing) + + + flux_for_idx = findfirst(x -> x == id*"§ARM§FOR" || x == id*"§FOR", model.irrev_reaction_ids) + if !isnothing(flux_for_idx) + if !isnothing(lb) + if lb <= 0 + model.xl[flux_for_idx] = 0 + else + model.xl[flux_for_idx] = lb + end + end + if !isnothing(ub) + if ub <= 0 + model.xu[flux_for_idx] = 0 + else + model.xu[flux_for_idx] = ub + end + end + end + + flux_rev_idx = findfirst(x -> x == id*"§ARM§REV" || x == id*"§REV", model.irrev_reaction_ids) + if !isnothing(flux_rev_idx) + if !isnothing(lb) + if lb >= 0 + model.xu[flux_rev_idx] = 0 + else + model.xu[flux_rev_idx] = -lb + end + if !isnothing(ub) + if ub >= 0 + model.xl[flux_rev_idx] = 0 + else + model.xl[flux_rev_idx] = -ub + end + end + end + end + + return nothing +end + +""" + change_bounds(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + +Change the bounds of multiple variables in `model` simultaneously. See +[`change_bound`](@ref) for details. +""" +function change_bounds(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + for (id, lb, ub) in zip(ids, lbs, ubs) + change_bound(model, id; lb=lb, ub=ub) + end +end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index c2ca19ee0..427283cb2 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -6,115 +6,89 @@ Return a dictionary mapping protein concentrations to their ids. protein_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict(model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end])) : nothing -# """ -# remove_slow_isozymes!( -# model::StandardModel; -# reaction_kcats = Dict(), -# protein_stoichiometry = Dict(), -# protein_masses = Dict(), -# ) - -# Remove all but the fastest isozyme from each reaction in `model`. -# Use the largest kcat (for, rev) for these calculations. Modifies all -# the arguments in place. -# """ -# function remove_slow_isozymes!( -# model::StandardModel; -# reaction_kcats = Dict(), -# reaction_protein_stoichiometry = Dict(), -# protein_masses = Dict(), -# ) -# for rid in reactions(model) -# if has_reaction_grr(model, rid) && haskey(reaction_kcats, rid) -# kcat_effs = Float64[] -# grrs = reaction_gene_association(model, rid) -# for (i, grr) in enumerate(grrs) -# push!( -# kcat_effs, -# dot( -# reaction_protein_stoichiometry[rid][i], -# [protein_masses[gid] for gid in grr], -# ) / maximum(reaction_kcats[rid][i]), -# ) -# end -# idx = argmin(kcat_effs) - -# model.reactions[rid].grr = [grrs[idx]] -# reaction_kcats[rid] = [reaction_kcats[rid][idx]] -# reaction_protein_stoichiometry[rid] = [reaction_protein_stoichiometry[rid][idx]] -# end -# end - -# curated_gids = String[] -# for rid in reactions(model) -# if has_reaction_grr(model, rid) -# for grr in reaction_gene_association(model, rid) -# append!(curated_gids, grr) -# end -# end -# end -# rm_gids = setdiff(genes(model), curated_gids) -# delete!(model.genes, rm_gids) # remove genes that were deleted +""" + get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) -# return nothing -# end +Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. +Assume that if a reaction has a kcat then each isozyme has a kcat. +""" +function get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) + gids = String[] + for isozymes in values(rid_isozymes) + for isozyme in isozymes + append!(gids, collect(keys(isozyme.stoichiometry))) + end + end + return unique(gids) +end -# """ -# remove_low_expressed_isozymes!( -# model::StandardModel; -# reaction_kcats = Dict(), -# protein_stoichiometry = Dict(), -# protein_masses = Dict(), -# gid_measurements = Dict(), -# ) +""" + remove_slow_isozymes!( + model::StandardModel, + rid_isozymes = Dict{String, Vector{Isozyme}}(), + ) -# Remove isozymes that are not expressed. If multiple isozymes are expressed, pick -# one that has the highest expression. -# """ -# function remove_low_expressed_isozymes!( -# model::StandardModel; -# reaction_kcats = Dict(), -# protein_stoichiometry = Dict(), -# protein_masses = Dict(), -# gid_measurements = Dict(), -# ) +Remove all but the fastest isozymes from `rid_isozymes`. Use the largest kcat +(for, rev) for these calculations. Modifies `rid_isozymes` in place. +""" +function remove_slow_isozymes!( + model::StandardModel, + rid_isozymes = Dict{String, Vector{Isozyme}}(), +) + for (rid, isozymes) in rid_isozymes + kcat_effs = Float64[] + for isozyme in isozymes + gid_stoich = isozyme.stoichiometry + kcats = isozyme.kcats + push!( + kcat_effs, + dot( + [stoich for stoich in values(gid_stoich)], + [model.genes[gid].molar_mass for gid in keys(gid_stoich)], + ) / maximum(kcats), + ) + end + idx = argmin(kcat_effs) + rid_isozymes[rid] = [rid_isozymes[rid][idx]] + end -# for rid in reactions(model) -# if has_reaction_grr(model, rid) -# measured_proteins = Float64[] -# grrs = reaction_gene_association(model, rid) -# for (i, grr) in enumerate(grrs) + return nothing +end -# push!( -# measured_proteins, -# sum( -# map( -# *, -# protein_stoichiometry[rid][i], -# [get(gid_measurements, gid, 0.0) for gid in grr], -# [protein_masses[gid] for gid in grr], -# ), -# ), -# ) -# end -# idx = argmax(measured_proteins) +""" + remove_low_expressed_isozymes!( + model::StandardModel, + rid_isozymes = Dict{String, Vector{Isozyme}}() + gid_measurements = Dict(), + ) -# model.reactions[rid].grr = [grrs[idx]] -# reaction_kcats[rid] = [reaction_kcats[rid][idx]] -# protein_stoichiometry[rid] = [protein_stoichiometry[rid][idx]] -# end -# end +Modify `rid_isozymes` in place by keeping only the highest expressed isozyme. +""" +function remove_low_expressed_isozymes!( + model::StandardModel, + rid_isozymes = Dict{String, Vector{Isozyme}}(), + gid_measurements = Dict(), +) -# curated_gids = String[] -# for rid in reactions(model) -# if has_reaction_grr(model, rid) -# for grr in reaction_gene_association(model, rid) -# append!(curated_gids, grr) -# end -# end -# end -# rm_gids = setdiff(genes(model), curated_gids) -# delete!(model.genes, rm_gids) # remove genes that were deleted + for (rid, isozymes) in rid_isozymes + measured_proteins = Float64[] + for isozyme in isozymes + gid_stoich = isozyme.stoichiometry + push!( + measured_proteins, + sum( + map( + *, + collect(values(gid_stoich)), + [get(gid_measurements, gid, 0.0) for gid in keys(gid_stoich)], + [model.genes[gid].molar_mass for gid in keys(gid_stoich)], + ), + ), + ) + end + idx = argmax(measured_proteins) + rid_isozymes[rid] = [rid_isozymes[rid][idx]] + end -# return nothing -# end + return nothing +end diff --git a/src/base/utils/irreversible_stoichiometry.jl b/src/base/utils/irreversible_stoichiometry.jl index 75369dcfd..b1978a203 100644 --- a/src/base/utils/irreversible_stoichiometry.jl +++ b/src/base/utils/irreversible_stoichiometry.jl @@ -3,9 +3,15 @@ Return a stoichiometric matrix where all reactions are forward only i.e. only positive fluxes are allowed. To accomplish this for models with isozymes, -so-called arm reactions are included. +so-called arm reactions are included. Note, reactions that are irreversible +in the original model will be irreversible in this model. E.g., if a reaction +is forward only in the original model, then there will be no reverse component +for this reaction in the irreversible stoichiometric matrix. """ -function _build_irreversible_stoichiometric_matrix(model::StandardModel) +function _build_irreversible_stoichiometric_matrix( + model::StandardModel, + rid_isozymes = Dict{String, Vector{Isozyme}}(), +) # components used to build stoichiometric matrix S_components = ( #TODO add size hints if possible row_idxs = Vector{Int}(), @@ -29,13 +35,13 @@ function _build_irreversible_stoichiometric_matrix(model::StandardModel) # fill the matrix entries #: blocked treated as reversible because unclear what direction the reaction would go for rid in reactions(model) - if has_reaction_grr(model, rid) && has_reaction_isozymes(model, rid) + if haskey(rid_isozymes, rid) && length(rid_isozymes[rid]) > 1 if is_reaction_unidirectional(model, rid) dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) + _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, dir) elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_isozyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") + _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, "§FOR") + _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, "§REV") else @warn "Unhandled bound type for $rid" end @@ -108,6 +114,7 @@ Complex variant that deals with isozymes and arm reactions. """ function _add_isozyme_to_irrev_stoich_mat( model::StandardModel, + rid_isoyzmes, rid, idxs, S_components, @@ -159,7 +166,7 @@ function _add_isozyme_to_irrev_stoich_mat( push!(S_components.ubs, lb) end # add isozyme reactions - for (i, _) in enumerate(reaction_gene_association(model, rid)) + for (i, _) in enumerate(rid_isoyzmes[rid]) iso_rid = rid * "§ISO$i" * dir idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] idxs.max_rxn_idx[1] += 1 @@ -180,3 +187,16 @@ function _add_isozyme_to_irrev_stoich_mat( end end end + +""" + _order_id_to_idx_dict(id_to_idx_dict) + +Return the keys of `id_to_idx_dict` sorted by the values, which +are taken to be the indices. This is a helper function for +[`reactions`](@ref) and [`metabolites`](@ref). +""" +function _order_id_to_idx_dict(dmap) + ks = collect(keys(dmap)) + vs = collect(values(dmap)) + return ks[sortperm(vs)] +end \ No newline at end of file diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 2606dc83c..3719cc2b1 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,37 +1,46 @@ -# @testset "SMOMENT" begin -# smodel = load_model(StandardModel, model_paths["e_coli_core.json"]) -# smodel.reactions["EX_glc__D_e"].lb = -1000.0 # unconstrain because enzyme constraints take over -# flux_measurements = Dict("GLCpts" => (-1.0, 12.0)) -# total_protein_mass = 100 # mg/gdW +@testset "SMOMENT" begin + model = load_model(StandardModel, model_paths["e_coli_core.json"]) + total_protein_mass = 100 # mg/gdW -# remove_slow_isozymes!( -# smodel; -# reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, -# protein_masses = ecoli_core_protein_masses, -# reaction_kcats = ecoli_core_reaction_kcats, -# ) + #: construct isozymes from model + rid_isozymes = Dict{String,Vector{Isozyme}}() + for (rid, kcats) in ecoli_core_reaction_kcats + grrs = reaction_gene_association(model, rid) + rid_isozymes[rid] = [ + Isozyme( + Dict(grrs[i] .=> ecoli_core_protein_stoichiometry[rid][i]), + (kcats[i][1], kcats[i][2]), + ) for i = 1:length(grrs) + ] + end -# model = SMomentModel( -# smodel; -# reaction_kcats = ecoli_core_reaction_kcats, -# reaction_protein_stoichiometry = ecoli_core_protein_stoichiometry, -# protein_masses = ecoli_core_protein_masses, -# total_protein_mass = total_protein_mass, # mg/gdW -# flux_measurements, -# ) + #: add molar mass to genes in model + for (gid, g) in model.genes + model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) + end -# opt_model = flux_balance_analysis( -# model, -# Tulip.Optimizer; -# modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], -# sense = COBREXA.MOI.MAX_SENSE, -# ) + remove_slow_isozymes!( + model, + rid_isozymes + ) -# rxn_fluxes = flux_dict(model, opt_model) + smm = SMomentModel( + model; + rid_isozymes, + enzyme_capacity = total_protein_mass + ) -# @test isapprox( -# rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], -# 0.8907273630431708, -# atol = TEST_TOLERANCE, -# ) -# end + change_bounds(smm, ["EX_glc__D_e", "GLCpts"]; lbs=[-1000.0, -1.0], ubs=[nothing, 12.0]) + + rxn_fluxes = flux_balance_analysis_dict( + smm, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.8907273630431708, + atol = TEST_TOLERANCE, + ) +end From 45346049ae621882c68f7a1746b8b84c6dbb778e Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Sun, 10 Apr 2022 19:41:42 +0200 Subject: [PATCH 053/109] format --- src/base/types/Gene.jl | 9 ++- src/base/types/Isozyme.jl | 6 +- src/base/types/derivedmodels/GeckoModel.jl | 81 ++++++++++++-------- src/base/types/derivedmodels/SMomentModel.jl | 47 ++++++++---- src/base/utils/enzyme.jl | 11 ++- src/base/utils/irreversible_stoichiometry.jl | 33 ++++++-- test/analysis/smoment.jl | 18 ++--- 7 files changed, 130 insertions(+), 75 deletions(-) diff --git a/src/base/types/Gene.jl b/src/base/types/Gene.jl index a8f3a5da1..9a7ec1315 100644 --- a/src/base/types/Gene.jl +++ b/src/base/types/Gene.jl @@ -17,6 +17,11 @@ mutable struct Gene annotations::Annotations molar_mass::Maybe{Float64} - Gene(id::String = ""; name = nothing, notes = Notes(), annotations = Annotations(), molar_mass=nothing) = - new(id, name, notes, annotations, molar_mass) + Gene( + id::String = ""; + name = nothing, + notes = Notes(), + annotations = Annotations(), + molar_mass = nothing, + ) = new(id, name, notes, annotations, molar_mass) end diff --git a/src/base/types/Isozyme.jl b/src/base/types/Isozyme.jl index b7f253191..94945dcf8 100644 --- a/src/base/types/Isozyme.jl +++ b/src/base/types/Isozyme.jl @@ -12,6 +12,6 @@ kcats :: Tuple{Float64, Float64} ```` """ mutable struct Isozyme - stoichiometry :: Dict{String, Int} - kcats :: Tuple{Float64, Float64} -end \ No newline at end of file + stoichiometry::Dict{String,Int} + kcats::Tuple{Float64,Float64} +end diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl index ab0a682ba..569a6ddc2 100644 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -149,8 +149,14 @@ Helper function to get fluxes from optimization problem. function reaction_flux(model::GeckoModel) R = spzeros(n_reactions(model), n_genes(model) + length(model.irrev_reaction_ids)) for (i, rid) in enumerate(reactions(model)) - for_idx = findfirst(x -> x == rid*"§ARM§FOR" || x == rid*"§FOR", model.irrev_reaction_ids) - rev_idx = findfirst(x -> x == rid*"§ARM§REV" || x == rid*"§REV", model.irrev_reaction_ids) + for_idx = findfirst( + x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", + model.irrev_reaction_ids, + ) + rev_idx = findfirst( + x -> x == rid * "§ARM§REV" || x == rid * "§REV", + model.irrev_reaction_ids, + ) !isnothing(for_idx) && (R[i, for_idx] = 1.0) !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) end @@ -193,8 +199,8 @@ prot_concens = protein_dict(gm, opt_model) """ function GeckoModel( model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), - enzyme_capacities = [(),], + rid_isozymes = Dict{String,Vector{Isozyme}}(), + enzyme_capacities = [()], ) S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = _build_irreversible_stoichiometric_matrix(model, rid_isozymes) @@ -255,12 +261,10 @@ function GeckoModel( num_reactions, ) - stoich_mat = sparse( - [ - S zeros(num_metabolites, num_genes) - Se I(num_genes) - ] - ) + stoich_mat = sparse([ + S zeros(num_metabolites, num_genes) + Se I(num_genes) + ]) #: equality rhs b = spzeros(num_metabolites + num_genes) @@ -285,7 +289,7 @@ function GeckoModel( for (i, enz_cap) in enumerate(enzyme_capacities) enz_idxs = indexin(first(enz_cap), gene_ids) - C[i, num_reactions .+ enz_idxs] .= mw_proteins[enz_idxs] + C[i, num_reactions.+enz_idxs] .= mw_proteins[enz_idxs] cu[i] = last(enz_cap) end @@ -294,13 +298,13 @@ function GeckoModel( _order_id_to_idx_dict(reaction_map), _order_id_to_idx_dict(metabolite_map), gene_ids, - c, - stoich_mat, - b, - xl, + c, + stoich_mat, + b, + xl, xu, - C, - cl, + C, + cl, cu, ) end @@ -347,51 +351,57 @@ bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound(model::GeckoModel, id; lb=nothing, ub=nothing) +function change_bound(model::GeckoModel, id; lb = nothing, ub = nothing) gene_idx = first(indexin([id], model.gene_ids)) - + if isnothing(gene_idx) - flux_for_idx = findfirst(x -> x == id*"§ARM§FOR" || x == id*"§FOR", model.irrev_reaction_ids) + flux_for_idx = findfirst( + x -> x == id * "§ARM§FOR" || x == id * "§FOR", + model.irrev_reaction_ids, + ) if !isnothing(flux_for_idx) if !isnothing(lb) - if lb <= 0 + if lb <= 0 model.xl[flux_for_idx] = 0 else model.xl[flux_for_idx] = lb end end if !isnothing(ub) - if ub <= 0 + if ub <= 0 model.xu[flux_for_idx] = 0 else model.xu[flux_for_idx] = ub end end end - - flux_rev_idx = findfirst(x -> x == id*"§ARM§REV" || x == id*"§REV", model.irrev_reaction_ids) + + flux_rev_idx = findfirst( + x -> x == id * "§ARM§REV" || x == id * "§REV", + model.irrev_reaction_ids, + ) if !isnothing(flux_rev_idx) if !isnothing(lb) - if lb >= 0 + if lb >= 0 model.xu[flux_rev_idx] = 0 else model.xu[flux_rev_idx] = -lb end if !isnothing(ub) - if ub >= 0 + if ub >= 0 model.xl[flux_rev_idx] = 0 else model.xl[flux_rev_idx] = -ub end end - end + end end else n = length(model.irrev_reaction_ids) - !isnothing(lb) && (model.xl[n + gene_idx] = lb) - !isnothing(ub) && (model.xu[n + gene_idx] = ub) + !isnothing(lb) && (model.xl[n+gene_idx] = lb) + !isnothing(ub) && (model.xu[n+gene_idx] = ub) end - + return nothing end @@ -401,8 +411,13 @@ end Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ -function change_bounds(model::GeckoModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) - for (id, lb, ub) in zip(ids, lbs, ubs) - change_bound(model, id; lb=lb, ub=ub) +function change_bounds( + model::GeckoModel, + ids; + lbs = fill(nothing, length(ids)), + ubs = fill(nothing, length(ids)), +) + for (id, lb, ub) in zip(ids, lbs, ubs) + change_bound(model, id; lb = lb, ub = ub) end end diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl index 111416aa7..b56024260 100644 --- a/src/base/types/derivedmodels/SMomentModel.jl +++ b/src/base/types/derivedmodels/SMomentModel.jl @@ -110,8 +110,14 @@ Helper function to get fluxes from optimization problem. function reaction_flux(model::SMomentModel) R = spzeros(n_reactions(model), length(model.irrev_reaction_ids) + 1) for (i, rid) in enumerate(reactions(model)) - for_idx = findfirst(x -> x == rid*"§ARM§FOR" || x == rid*"§FOR", model.irrev_reaction_ids) - rev_idx = findfirst(x -> x == rid*"§ARM§REV" || x == rid*"§REV", model.irrev_reaction_ids) + for_idx = findfirst( + x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", + model.irrev_reaction_ids, + ) + rev_idx = findfirst( + x -> x == rid * "§ARM§REV" || x == rid * "§REV", + model.irrev_reaction_ids, + ) !isnothing(for_idx) && (R[i, for_idx] = 1.0) !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) end @@ -129,7 +135,7 @@ Construct an `SMomentModel`. """ function SMomentModel( model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), + rid_isozymes = Dict{String,Vector{Isozyme}}(), enzyme_capacity = 0.0, ) @@ -204,45 +210,47 @@ bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct th permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound(model::SMomentModel, id; lb=nothing, ub=nothing) +function change_bound(model::SMomentModel, id; lb = nothing, ub = nothing) - - flux_for_idx = findfirst(x -> x == id*"§ARM§FOR" || x == id*"§FOR", model.irrev_reaction_ids) + + flux_for_idx = + findfirst(x -> x == id * "§ARM§FOR" || x == id * "§FOR", model.irrev_reaction_ids) if !isnothing(flux_for_idx) if !isnothing(lb) - if lb <= 0 + if lb <= 0 model.xl[flux_for_idx] = 0 else model.xl[flux_for_idx] = lb end end if !isnothing(ub) - if ub <= 0 + if ub <= 0 model.xu[flux_for_idx] = 0 else model.xu[flux_for_idx] = ub end end end - - flux_rev_idx = findfirst(x -> x == id*"§ARM§REV" || x == id*"§REV", model.irrev_reaction_ids) + + flux_rev_idx = + findfirst(x -> x == id * "§ARM§REV" || x == id * "§REV", model.irrev_reaction_ids) if !isnothing(flux_rev_idx) if !isnothing(lb) - if lb >= 0 + if lb >= 0 model.xu[flux_rev_idx] = 0 else model.xu[flux_rev_idx] = -lb end if !isnothing(ub) - if ub >= 0 + if ub >= 0 model.xl[flux_rev_idx] = 0 else model.xl[flux_rev_idx] = -ub end end - end + end end - + return nothing end @@ -252,8 +260,13 @@ end Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ -function change_bounds(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) - for (id, lb, ub) in zip(ids, lbs, ubs) - change_bound(model, id; lb=lb, ub=ub) +function change_bounds( + model::SMomentModel, + ids; + lbs = fill(nothing, length(ids)), + ubs = fill(nothing, length(ids)), +) + for (id, lb, ub) in zip(ids, lbs, ubs) + change_bound(model, id; lb = lb, ub = ub) end end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 427283cb2..8a9acba5a 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -4,7 +4,10 @@ Return a dictionary mapping protein concentrations to their ids. """ protein_dict(model::GeckoModel, opt_model) = - is_solved(opt_model) ? Dict(model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end])) : nothing + is_solved(opt_model) ? + Dict( + model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end]), + ) : nothing """ get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) @@ -12,7 +15,7 @@ protein_dict(model::GeckoModel, opt_model) = Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. Assume that if a reaction has a kcat then each isozyme has a kcat. """ -function get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) +function get_genes_with_kcats(rid_isozymes::Dict{String,Vector{Isozyme}}) gids = String[] for isozymes in values(rid_isozymes) for isozyme in isozymes @@ -33,7 +36,7 @@ Remove all but the fastest isozymes from `rid_isozymes`. Use the largest kcat """ function remove_slow_isozymes!( model::StandardModel, - rid_isozymes = Dict{String, Vector{Isozyme}}(), + rid_isozymes = Dict{String,Vector{Isozyme}}(), ) for (rid, isozymes) in rid_isozymes kcat_effs = Float64[] @@ -66,7 +69,7 @@ Modify `rid_isozymes` in place by keeping only the highest expressed isozyme. """ function remove_low_expressed_isozymes!( model::StandardModel, - rid_isozymes = Dict{String, Vector{Isozyme}}(), + rid_isozymes = Dict{String,Vector{Isozyme}}(), gid_measurements = Dict(), ) diff --git a/src/base/utils/irreversible_stoichiometry.jl b/src/base/utils/irreversible_stoichiometry.jl index b1978a203..beac91060 100644 --- a/src/base/utils/irreversible_stoichiometry.jl +++ b/src/base/utils/irreversible_stoichiometry.jl @@ -9,8 +9,8 @@ is forward only in the original model, then there will be no reverse component for this reaction in the irreversible stoichiometric matrix. """ function _build_irreversible_stoichiometric_matrix( - model::StandardModel, - rid_isozymes = Dict{String, Vector{Isozyme}}(), + model::StandardModel, + rid_isozymes = Dict{String,Vector{Isozyme}}(), ) # components used to build stoichiometric matrix S_components = ( #TODO add size hints if possible @@ -38,10 +38,31 @@ function _build_irreversible_stoichiometric_matrix( if haskey(rid_isozymes, rid) && length(rid_isozymes[rid]) > 1 if is_reaction_unidirectional(model, rid) dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, dir) + _add_isozyme_to_irrev_stoich_mat( + model, + rid_isozymes, + rid, + idxs, + S_components, + dir, + ) elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, "§FOR") - _add_isozyme_to_irrev_stoich_mat(model, rid_isozymes, rid, idxs, S_components, "§REV") + _add_isozyme_to_irrev_stoich_mat( + model, + rid_isozymes, + rid, + idxs, + S_components, + "§FOR", + ) + _add_isozyme_to_irrev_stoich_mat( + model, + rid_isozymes, + rid, + idxs, + S_components, + "§REV", + ) else @warn "Unhandled bound type for $rid" end @@ -199,4 +220,4 @@ function _order_id_to_idx_dict(dmap) ks = collect(keys(dmap)) vs = collect(values(dmap)) return ks[sortperm(vs)] -end \ No newline at end of file +end diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 3719cc2b1..5a87535ab 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -19,19 +19,17 @@ model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) end - remove_slow_isozymes!( - model, - rid_isozymes - ) + remove_slow_isozymes!(model, rid_isozymes) + + smm = SMomentModel(model; rid_isozymes, enzyme_capacity = total_protein_mass) - smm = SMomentModel( - model; - rid_isozymes, - enzyme_capacity = total_protein_mass + change_bounds( + smm, + ["EX_glc__D_e", "GLCpts"]; + lbs = [-1000.0, -1.0], + ubs = [nothing, 12.0], ) - change_bounds(smm, ["EX_glc__D_e", "GLCpts"]; lbs=[-1000.0, -1.0], ubs=[nothing, 12.0]) - rxn_fluxes = flux_balance_analysis_dict( smm, Tulip.Optimizer; From df1d4b37edcc4b2c72d2b04ab0de720091d4e5ab Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 11 Apr 2022 22:47:24 +0200 Subject: [PATCH 054/109] fix args --- src/base/types/derivedmodels/GeckoModel.jl | 42 +++++++++++----------- test/analysis/gecko.jl | 4 +-- test/analysis/smoment.jl | 4 +-- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl index 569a6ddc2..cb5304ae3 100644 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -343,7 +343,7 @@ function _add_enzyme_variable( end """ - change_bound(model::GeckoModel, id; lb=nothing, ub=nothing) + change_bound(model::GeckoModel, id; lower=nothing, upper=nothing) Change the bound of variable in `model`. Does not change the bound if respective bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the @@ -351,7 +351,7 @@ bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound(model::GeckoModel, id; lb = nothing, ub = nothing) +function change_bound(model::GeckoModel, id; lower = nothing, upper = nothing) gene_idx = first(indexin([id], model.gene_ids)) if isnothing(gene_idx) @@ -360,18 +360,18 @@ function change_bound(model::GeckoModel, id; lb = nothing, ub = nothing) model.irrev_reaction_ids, ) if !isnothing(flux_for_idx) - if !isnothing(lb) - if lb <= 0 + if !isnothing(lower) + if lower <= 0 model.xl[flux_for_idx] = 0 else - model.xl[flux_for_idx] = lb + model.xl[flux_for_idx] = lower end end - if !isnothing(ub) - if ub <= 0 + if !isnothing(upper) + if upper <= 0 model.xu[flux_for_idx] = 0 else - model.xu[flux_for_idx] = ub + model.xu[flux_for_idx] = upper end end end @@ -381,32 +381,32 @@ function change_bound(model::GeckoModel, id; lb = nothing, ub = nothing) model.irrev_reaction_ids, ) if !isnothing(flux_rev_idx) - if !isnothing(lb) - if lb >= 0 + if !isnothing(lower) + if lower >= 0 model.xu[flux_rev_idx] = 0 else - model.xu[flux_rev_idx] = -lb + model.xu[flux_rev_idx] = -lower end - if !isnothing(ub) - if ub >= 0 + if !isnothing(upper) + if upper >= 0 model.xl[flux_rev_idx] = 0 else - model.xl[flux_rev_idx] = -ub + model.xl[flux_rev_idx] = -upper end end end end else n = length(model.irrev_reaction_ids) - !isnothing(lb) && (model.xl[n+gene_idx] = lb) - !isnothing(ub) && (model.xu[n+gene_idx] = ub) + !isnothing(lower) && (model.xl[n+gene_idx] = lower) + !isnothing(upper) && (model.xu[n+gene_idx] = upper) end return nothing end """ - change_bounds(model::GeckoModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + change_bounds(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. @@ -414,10 +414,10 @@ Change the bounds of multiple variables in `model` simultaneously. See function change_bounds( model::GeckoModel, ids; - lbs = fill(nothing, length(ids)), - ubs = fill(nothing, length(ids)), + lower = fill(nothing, length(ids)), + upper = fill(nothing, length(ids)), ) - for (id, lb, ub) in zip(ids, lbs, ubs) - change_bound(model, id; lb = lb, ub = ub) + for (id, lower, upper) in zip(ids, lower, upper) + change_bound(model, id; lower = lower, upper = upper) end end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 5eb9c4f2b..63e505245 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -27,8 +27,8 @@ change_bounds( gm, ["EX_glc__D_e", "b2779", "GLCpts"]; - lbs = [-1000.0, 0.01, -1.0], - ubs = [nothing, 0.06, 12.0], + lower = [-1000.0, 0.01, -1.0], + upper = [nothing, 0.06, 12.0], ) opt_model = flux_balance_analysis( diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 5a87535ab..9a5b41feb 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -26,8 +26,8 @@ change_bounds( smm, ["EX_glc__D_e", "GLCpts"]; - lbs = [-1000.0, -1.0], - ubs = [nothing, 12.0], + lower = [-1000.0, -1.0], + upper = [nothing, 12.0], ) rxn_fluxes = flux_balance_analysis_dict( From 3ff2ab67ae01b809787804731061309517a5c1a2 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 11 Apr 2022 22:55:13 +0200 Subject: [PATCH 055/109] fix func name --- src/base/types/derivedmodels/GeckoModel.jl | 8 ++++---- src/base/types/derivedmodels/SMomentModel.jl | 10 +++++----- test/analysis/gecko.jl | 2 +- test/analysis/smoment.jl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl index cb5304ae3..da416dc43 100644 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -343,7 +343,7 @@ function _add_enzyme_variable( end """ - change_bound(model::GeckoModel, id; lower=nothing, upper=nothing) + change_bound!(model::GeckoModel, id; lower=nothing, upper=nothing) Change the bound of variable in `model`. Does not change the bound if respective bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the @@ -351,7 +351,7 @@ bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound(model::GeckoModel, id; lower = nothing, upper = nothing) +function change_bound!(model::GeckoModel, id; lower = nothing, upper = nothing) gene_idx = first(indexin([id], model.gene_ids)) if isnothing(gene_idx) @@ -406,12 +406,12 @@ function change_bound(model::GeckoModel, id; lower = nothing, upper = nothing) end """ - change_bounds(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) + change_bounds!(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ -function change_bounds( +function change_bounds!( model::GeckoModel, ids; lower = fill(nothing, length(ids)), diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl index b56024260..83158e653 100644 --- a/src/base/types/derivedmodels/SMomentModel.jl +++ b/src/base/types/derivedmodels/SMomentModel.jl @@ -202,7 +202,7 @@ function SMomentModel( end """ - change_bound(model::SMomentModel, id; lb=nothing, ub=nothing) + change_bound!(model::SMomentModel, id; lb=nothing, ub=nothing) Change the bound of variable in `model`. Does not change the bound if respective bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the @@ -210,7 +210,7 @@ bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct th permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound(model::SMomentModel, id; lb = nothing, ub = nothing) +function change_bound!(model::SMomentModel, id; lb = nothing, ub = nothing) flux_for_idx = @@ -255,18 +255,18 @@ function change_bound(model::SMomentModel, id; lb = nothing, ub = nothing) end """ - change_bounds(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + change_bounds!(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ -function change_bounds( +function change_bounds!( model::SMomentModel, ids; lbs = fill(nothing, length(ids)), ubs = fill(nothing, length(ids)), ) for (id, lb, ub) in zip(ids, lbs, ubs) - change_bound(model, id; lb = lb, ub = ub) + change_bound!(model, id; lb = lb, ub = ub) end end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 63e505245..24fa52e2d 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -24,7 +24,7 @@ rid_isozymes, enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], ) - change_bounds( + change_bounds!( gm, ["EX_glc__D_e", "b2779", "GLCpts"]; lower = [-1000.0, 0.01, -1.0], diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 9a5b41feb..cc5f331e8 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -23,7 +23,7 @@ smm = SMomentModel(model; rid_isozymes, enzyme_capacity = total_protein_mass) - change_bounds( + change_bounds!( smm, ["EX_glc__D_e", "GLCpts"]; lower = [-1000.0, -1.0], From fa0044301a4740bfa7a649aadfa03eb7f073f612 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 12 Apr 2022 09:27:47 +0200 Subject: [PATCH 056/109] fix funcs more --- src/base/types/derivedmodels/GeckoModel.jl | 2 +- src/base/types/derivedmodels/SMomentModel.jl | 38 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl index da416dc43..79b8d813e 100644 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ b/src/base/types/derivedmodels/GeckoModel.jl @@ -418,6 +418,6 @@ function change_bounds!( upper = fill(nothing, length(ids)), ) for (id, lower, upper) in zip(ids, lower, upper) - change_bound(model, id; lower = lower, upper = upper) + change_bound!(model, id; lower = lower, upper = upper) end end diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl index 83158e653..1297293f7 100644 --- a/src/base/types/derivedmodels/SMomentModel.jl +++ b/src/base/types/derivedmodels/SMomentModel.jl @@ -202,7 +202,7 @@ function SMomentModel( end """ - change_bound!(model::SMomentModel, id; lb=nothing, ub=nothing) + change_bound!(model::SMomentModel, id; lower=nothing, upper=nothing) Change the bound of variable in `model`. Does not change the bound if respective bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the @@ -210,24 +210,24 @@ bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct th permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ -function change_bound!(model::SMomentModel, id; lb = nothing, ub = nothing) +function change_bound!(model::SMomentModel, id; lower = nothing, upper = nothing) flux_for_idx = findfirst(x -> x == id * "§ARM§FOR" || x == id * "§FOR", model.irrev_reaction_ids) if !isnothing(flux_for_idx) - if !isnothing(lb) - if lb <= 0 + if !isnothing(lower) + if lower <= 0 model.xl[flux_for_idx] = 0 else - model.xl[flux_for_idx] = lb + model.xl[flux_for_idx] = lower end end - if !isnothing(ub) - if ub <= 0 + if !isnothing(upper) + if upper <= 0 model.xu[flux_for_idx] = 0 else - model.xu[flux_for_idx] = ub + model.xu[flux_for_idx] = upper end end end @@ -235,17 +235,17 @@ function change_bound!(model::SMomentModel, id; lb = nothing, ub = nothing) flux_rev_idx = findfirst(x -> x == id * "§ARM§REV" || x == id * "§REV", model.irrev_reaction_ids) if !isnothing(flux_rev_idx) - if !isnothing(lb) - if lb >= 0 + if !isnothing(lower) + if lower >= 0 model.xu[flux_rev_idx] = 0 else - model.xu[flux_rev_idx] = -lb + model.xu[flux_rev_idx] = -lower end - if !isnothing(ub) - if ub >= 0 + if !isnothing(upper) + if upper >= 0 model.xl[flux_rev_idx] = 0 else - model.xl[flux_rev_idx] = -ub + model.xl[flux_rev_idx] = -upper end end end @@ -255,7 +255,7 @@ function change_bound!(model::SMomentModel, id; lb = nothing, ub = nothing) end """ - change_bounds!(model::SMomentModel, ids; lbs=fill(nothing, length(ids)), ubs=fill(nothing, length(ids))) + change_bounds!(model::SMomentModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. @@ -263,10 +263,10 @@ Change the bounds of multiple variables in `model` simultaneously. See function change_bounds!( model::SMomentModel, ids; - lbs = fill(nothing, length(ids)), - ubs = fill(nothing, length(ids)), + lower = fill(nothing, length(ids)), + upper = fill(nothing, length(ids)), ) - for (id, lb, ub) in zip(ids, lbs, ubs) - change_bound!(model, id; lb = lb, ub = ub) + for (id, lower, upper) in zip(ids, lower, upper) + change_bound!(model, id; lower = lower, upper = upper) end end From d8ac8c692656cf04078d09c404fd88dc190a7317 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 12 Apr 2022 11:07:55 +0200 Subject: [PATCH 057/109] implement reviews --- src/COBREXA.jl | 2 +- src/analysis/gecko.jl | 178 ++++++++ src/analysis/smoment.jl | 75 ++++ src/base/solver.jl | 7 + src/base/types/Isozyme.jl | 6 +- src/base/types/MetabolicModel.jl | 11 - src/base/types/StandardModel.jl | 8 +- src/base/types/derivedmodels/GeckoModel.jl | 423 ------------------- src/base/types/derivedmodels/SMomentModel.jl | 272 ------------ src/base/types/wrappers/GeckoModel.jl | 171 ++++++++ src/base/types/wrappers/SMomentModel.jl | 132 ++++++ src/base/utils/enzyme.jl | 11 +- src/reconstruction/gecko.jl | 79 ++++ src/reconstruction/smoment.jl | 69 +++ 14 files changed, 729 insertions(+), 715 deletions(-) create mode 100644 src/analysis/gecko.jl create mode 100644 src/analysis/smoment.jl delete mode 100644 src/base/types/derivedmodels/GeckoModel.jl delete mode 100644 src/base/types/derivedmodels/SMomentModel.jl create mode 100644 src/base/types/wrappers/GeckoModel.jl create mode 100644 src/base/types/wrappers/SMomentModel.jl create mode 100644 src/reconstruction/gecko.jl create mode 100644 src/reconstruction/smoment.jl diff --git a/src/COBREXA.jl b/src/COBREXA.jl index 76c912cd8..74d4cc16b 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -32,7 +32,7 @@ _inc_all.( joinpath("base", "logging"), joinpath("base", "macros"), joinpath("base", "types"), - joinpath("base", "types", "derivedmodels"), + joinpath("base", "types", "wrappers"), "base", "io", joinpath("io", "show"), diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl new file mode 100644 index 000000000..fbfea61c6 --- /dev/null +++ b/src/analysis/gecko.jl @@ -0,0 +1,178 @@ +""" + make_geckomodel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + enzyme_capacities = [(),], + ) + +Construct a `GeckoModel` based on `model` using the kinetic data encoded by +`rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`, +which is a vector of tuples. In the first position of the tuple is a list of gene ids, +and the second position is mass upperbound of the sum of these gene ids. + +The units of the fluxes and protein concentration depend on those used in +`rid_isozymes` for the kcats and the molar masses encoded in the genes of +`model`. Currently only `modifications` that change attributes of the +`optimizer` are supported. + +# Example +``` +gm = make_geckomodel( + model; + rid_isozymes, + enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], +) + +opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer +) + +rxn_fluxes = flux_dict(gm, opt_model) +prot_concens = protein_dict(gm, opt_model) +``` +""" +function make_geckomodel( + model::StandardModel; + rid_isozymes = Dict{String,Vector{Isozyme}}(), + enzyme_capacities = [()], +) + S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model, rid_isozymes) + + #: find all gene products that have kcats associated with them + gene_ids = get_genes_with_kcats(rid_isozymes) + + #: size of resultant model + num_reactions = size(S, 2) + num_genes = length(gene_ids) + num_metabolites = size(S, 1) + num_vars = num_reactions + num_genes + + #: equality lhs + E_components = ( #TODO add size hints if possible + row_idxs = Vector{Int}(), + col_idxs = Vector{Int}(), + coeffs = Vector{Float64}(), + ) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + contains(rid, "§ARM") && continue + !haskey(rid_isozymes, original_rid) && continue + + # these entries have kcats + if contains(rid, "§ISO") + iso_num = parse( + Int, + replace( + first(filter(startswith("ISO"), split(rid, "§")[2:end])), + "ISO" => "", + ), + ) + else # only one enzyme + iso_num = 1 + end + + # add all entries to column of matrix + _add_enzyme_variable( + rid_isozymes, + iso_num, # only one enzyme + rid, + original_rid, + E_components, + col_idx, + gene_ids, + ) + end + + Se = sparse( + E_components.row_idxs, + E_components.col_idxs, + E_components.coeffs, + num_genes, + num_reactions, + ) + + stoich_mat = sparse([ + S zeros(num_metabolites, num_genes) + Se I(num_genes) + ]) + + #: equality rhs + b = spzeros(num_metabolites + num_genes) + + #: find objective (assume objective is forward) + obj_idx_orig = first(findnz(objective(model))[1]) + obj_id_orig = reactions(model)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" + c = spzeros(num_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: inequality constraints + xl = sparse([lb_fluxes; fill(0.0, num_genes)]) + xu = sparse([ub_fluxes; fill(1000.0, num_genes)]) + + #: enzyme capacity constraints + mw_proteins = [model.genes[pid].molar_mass for pid in gene_ids] + C = spzeros(length(enzyme_capacities), num_vars) + cl = spzeros(length(enzyme_capacities)) + cu = spzeros(length(enzyme_capacities)) + + for (i, enz_cap) in enumerate(enzyme_capacities) + enz_idxs = indexin(first(enz_cap), gene_ids) + C[i, num_reactions.+enz_idxs] .= mw_proteins[enz_idxs] + cu[i] = last(enz_cap) + end + + return GeckoModel( + reactions(model), + _order_id_to_idx_dict(reaction_map), + _order_id_to_idx_dict(metabolite_map), + gene_ids, + c, + stoich_mat, + b, + xl, + xu, + C, + cl, + cu, + ) +end + +""" + _add_enzyme_variable( + rid_isozymes, + iso_num, + rid, + original_rid, + E_components, + col_idx, + gene_ids, + ) + +Helper function to add an column into the enzyme stoichiometric matrix. +""" +function _add_enzyme_variable( + rid_isozymes, + iso_num, + rid, + original_rid, + E_components, + col_idx, + gene_ids, +) + pstoich = rid_isozymes[original_rid][iso_num].stoichiometry + kcat = + contains(rid, "§FOR") ? rid_isozymes[original_rid][iso_num].kcats[1] : + rid_isozymes[original_rid][iso_num].kcats[2] + for (pid, pst) in pstoich + push!(E_components.row_idxs, first(indexin([pid], gene_ids))) + push!(E_components.col_idxs, col_idx) + push!(E_components.coeffs, -pst / kcat) + end +end diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl new file mode 100644 index 000000000..040dcb9e7 --- /dev/null +++ b/src/analysis/smoment.jl @@ -0,0 +1,75 @@ +""" + make_smomentmodel( + model::StandardModel; + rid_isozymes = Dict{String, Vector{Isozyme}}(), + ) + +Construct an `SMomentModel` model using `model` and `rid_isozymes`. +""" +function make_smomentmodel( + model::StandardModel; + rid_isozymes = Dict{String,Vector{Isozyme}}(), + enzyme_capacity = 0.0, +) + + # check that input data is in correct format for smoment + if any(length(v) > 1 for v in values(rid_isozymes)) + @warn( + "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." + ) + end + + irrevS, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = + _build_irreversible_stoichiometric_matrix(model) + + #: size of resultant model + num_reactions = size(irrevS, 2) + num_metabolites = size(irrevS, 1) + num_vars = num_reactions + 1 + + #: equality lhs + Se = zeros(1, num_reactions) + + for (rid, col_idx) in reaction_map + original_rid = string(split(rid, "§")[1]) + + # skip these entries + !haskey(rid_isozymes, original_rid) && continue + # these entries have kcats, only one GRR by assumption + isozyme = first(rid_isozymes[original_rid]) + mw = sum([model.genes[gid].molar_mass * ps for (gid, ps) in isozyme.stoichiometry]) + kcat = contains(rid, "§FOR") ? first(isozyme.kcats) : last(isozyme.kcats) + Se[1, col_idx] = -mw / kcat + end + + S = [ + irrevS zeros(num_metabolites, 1) + Se 1.0 + ] + + #: equality rhs + b = zeros(num_metabolites + 1) + + #: find objective + obj_idx_orig = first(findnz(objective(model))[1]) + obj_id_orig = reactions(model)[obj_idx_orig] + obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective + c = spzeros(num_vars) + obj_idx = reaction_map[obj_id] + c[obj_idx] = 1.0 + + #: bounds + xl = sparse([lb_fluxes; 0.0]) + xu = sparse([ub_fluxes; enzyme_capacity]) + + return SMomentModel( + reactions(model), + _order_id_to_idx_dict(reaction_map), + _order_id_to_idx_dict(metabolite_map), + c, + S, + b, + xl, + xu, + ) +end \ No newline at end of file diff --git a/src/base/solver.jl b/src/base/solver.jl index e79ef39a1..2c4ff84e8 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -124,3 +124,10 @@ flux_dict(model, flux_balance_analysis(model, ...)) flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = is_solved(opt_model) ? Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing + +""" + flux_dict(model::MetabolicModel) + +A pipeable variant of `flux_dict` +""" +flux_dict(model::MetabolicModel) = x -> flux_dict(model, x) \ No newline at end of file diff --git a/src/base/types/Isozyme.jl b/src/base/types/Isozyme.jl index 94945dcf8..b3fdcd6bc 100644 --- a/src/base/types/Isozyme.jl +++ b/src/base/types/Isozyme.jl @@ -1,9 +1,9 @@ """ mutable struct Isozyme -Struct containing isozyme information. Here, `stoichiometry` is a -dictionary of gene ids to their stoichiometry in the isozyme complex, -and `kcats` is a tuple of the forward and reverse kcats of the isozyme. +Struct containing isozyme information. Here, `stoichiometry` is a dictionary of +gene ids to their stoichiometry in the isozyme complex, and `kcats` is a tuple +of the forward and reverse turnover numbers of the isozyme. # Fields ```` diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl index 304fdadda..1ba000648 100644 --- a/src/base/types/MetabolicModel.jl +++ b/src/base/types/MetabolicModel.jl @@ -313,17 +313,6 @@ function gene_notes(model::MetabolicModel, gene_id::String)::Notes return Dict() end -""" - enzyme_capacity(model::MetabolicModel) - -Return enzyme capacity inequality constraint vector and bound, or nothing -if it doesn't exist in the model. -""" -function enzyme_capacity(model::MetabolicModel) - #TODO this needs a type - nothing, nothing -end - """ reaction_name(model::MetabolicModel, rid::String) diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index 31d981901..f4dd74e82 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -444,21 +444,21 @@ function is_reaction_blocked(model::StandardModel, rid::String) end """ - has_reaction_isozymes(model::StandardModel, rid::String) + reaction_has_multiple_isozymes(model::StandardModel, rid::String) Check if reaction `rid` in `model` is catalyzed by multiple enzymes, i.e. it has isozymes according to the gene reaction rules. """ -function has_reaction_isozymes(model::StandardModel, rid::String) +function reaction_has_multiple_isozymes(model::StandardModel, rid::String) length(reaction_gene_association(model, rid)) > 1 end """ - reaction_has_grr(model::StandardModel, rid::String) + reaction_has_valid_gene_association(model::StandardModel, rid::String) Check if reaction `rid` in `model` has a gene reaction rule entry. """ -function has_reaction_grr(model::StandardModel, rid::String) +function reaction_has_valid_gene_association(model::StandardModel, rid::String) #TODO simplify this once COBREXA enforces universal rules for GRR representation haskey(model.reactions, rid) && !isnothing(reaction_gene_association(model, rid)) && diff --git a/src/base/types/derivedmodels/GeckoModel.jl b/src/base/types/derivedmodels/GeckoModel.jl deleted file mode 100644 index 79b8d813e..000000000 --- a/src/base/types/derivedmodels/GeckoModel.jl +++ /dev/null @@ -1,423 +0,0 @@ -""" - mutable struct GeckoModel <: MetabolicModel - -A model that incorporates enzyme capacity and kinetic constraints via the GECKO -formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype -predictions of a yeast genome‐scale metabolic model by incorporating enzymatic -constraints." Molecular systems biology, 2017.` for implementation details. - -Note, since the model uses irreversible reactions internally, `"§FOR"` (for the -forward direction) and `"§REV"` (for the reverse direction) is appended to each -reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, -no reaction id should contain this character. - -To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`. - -# Fields -``` -reaction_ids::Vector{String} -irrev_reaction_ids::Vector{String} -metabolites::Vector{String} -gene_ids::Vector{String} -c::SparseVec -S::SparseMat -b::SparseVec -xl::SparseVec -xu::SparseVec -C::SparseMat -cl::Vector{Float64} -cu::Vector{Float64} -``` -""" -mutable struct GeckoModel <: MetabolicModel - reaction_ids::Vector{String} - irrev_reaction_ids::Vector{String} - metabolites::Vector{String} - gene_ids::Vector{String} - - # gecko - c::SparseVec - S::SparseMat - b::SparseVec - xl::SparseVec - xu::SparseVec - - # enzyme capacity constraints - C::SparseMat - cl::Vector{Float64} - cu::Vector{Float64} -end - -""" - stoichiometry(model::GeckoModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -stoichiometry(model::GeckoModel) = model.S - -""" - balance(model::GeckoModel) - -Return stoichiometric balance. -""" -balance(model::GeckoModel) = model.b - -""" - objective(model::GeckoModel) - -Return objective of `model`. -""" -objective(model::GeckoModel) = model.c - -""" - reactions(model::GeckoModel) - -Returns the reversible reactions in `model`. For -the irreversible reactions, use [`irreversible_reactions`][@ref]. -""" -reactions(model::GeckoModel) = model.reaction_ids - -""" - n_reactions(model::GeckoModel) - -Returns the number of reactions in the model. -""" -n_reactions(model::GeckoModel) = length(model.reaction_ids) - -""" - irreversible_reactions(model::GeckoModel) - -Returns the irreversible reactions in `model`. -""" -irreversible_reactions(model::GeckoModel) = model.irrev_reaction_ids - -""" - genes(model::GeckoModel) - -Returns the genes (proteins) in the order as they appear as variables in the -model. -""" -genes(model::GeckoModel) = model.gene_ids - -""" - n_genes(model::GeckoModel) - -Returns the number of genes in the model. -""" -n_genes(model::GeckoModel) = length(model.gene_ids) - -""" - metabolites(model::GeckoModel) - -Return the metabolites in `model`. -""" -metabolites(model::GeckoModel) = model.metabolites - -""" - n_metabolites(model::GeckoModel) = - -Return the number of metabolites in `model`. -""" -n_metabolites(model::GeckoModel) = length(metabolites(model)) - -""" - bounds(model::GeckoModel) - -Return variable bounds for `GeckoModel`. -""" -bounds(model::GeckoModel) = (model.xl, model.xu) - -""" - coupling(model::GeckoModel) - -Coupling constraint matrix for a `GeckoModel`. -""" -coupling(model::GeckoModel) = model.C - -""" - coupling_bounds(model::GeckoModel) - -Coupling bounds for a `GeckoModel`. -""" -coupling_bounds(model::GeckoModel) = (model.cl, model.cu) - -""" - reaction_flux(model::MetabolicModel) - -Helper function to get fluxes from optimization problem. -""" -function reaction_flux(model::GeckoModel) - R = spzeros(n_reactions(model), n_genes(model) + length(model.irrev_reaction_ids)) - for (i, rid) in enumerate(reactions(model)) - for_idx = findfirst( - x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", - model.irrev_reaction_ids, - ) - rev_idx = findfirst( - x -> x == rid * "§ARM§REV" || x == rid * "§REV", - model.irrev_reaction_ids, - ) - !isnothing(for_idx) && (R[i, for_idx] = 1.0) - !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) - end - return R' -end - -""" - GeckoModel( - model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), - enzyme_capacities = [(),], - ) - -Construct a `GeckoModel` based on `model` using the kinetic data encoded by -`rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`, -which is a vector of tuples. In the first position of the tuple is a list of gene ids, -and the second position is mass upperbound of the sum of these gene ids. - -The units of the fluxes and protein concentration depend on those used in -`rid_isozymes` for the kcats and the molar masses encoded in the genes of -`model`. Currently only `modifications` that change attributes of the -`optimizer` are supported. - -# Example -``` -gm = GeckoModel( - model; - rid_isozymes, - enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], -) - -opt_model = flux_balance_analysis( - gm, - Tulip.Optimizer -) - -rxn_fluxes = flux_dict(gm, opt_model) -prot_concens = protein_dict(gm, opt_model) -``` -""" -function GeckoModel( - model::StandardModel; - rid_isozymes = Dict{String,Vector{Isozyme}}(), - enzyme_capacities = [()], -) - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model, rid_isozymes) - - #: find all gene products that have kcats associated with them - gene_ids = get_genes_with_kcats(rid_isozymes) - - #: size of resultant model - num_reactions = size(S, 2) - num_genes = length(gene_ids) - num_metabolites = size(S, 1) - num_vars = num_reactions + num_genes - - #: equality lhs - E_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - ) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - contains(rid, "§ARM") && continue - !haskey(rid_isozymes, original_rid) && continue - - # these entries have kcats - if contains(rid, "§ISO") - iso_num = parse( - Int, - replace( - first(filter(startswith("ISO"), split(rid, "§")[2:end])), - "ISO" => "", - ), - ) - else # only one enzyme - iso_num = 1 - end - - # add all entries to column of matrix - COBREXA._add_enzyme_variable( - rid_isozymes, - iso_num, # only one enzyme - rid, - original_rid, - E_components, - col_idx, - gene_ids, - ) - end - - Se = sparse( - E_components.row_idxs, - E_components.col_idxs, - E_components.coeffs, - num_genes, - num_reactions, - ) - - stoich_mat = sparse([ - S zeros(num_metabolites, num_genes) - Se I(num_genes) - ]) - - #: equality rhs - b = spzeros(num_metabolites + num_genes) - - #: find objective (assume objective is forward) - obj_idx_orig = first(findnz(objective(model))[1]) - obj_id_orig = reactions(model)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" - c = spzeros(num_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: inequality constraints - xl = sparse([lb_fluxes; fill(0.0, num_genes)]) - xu = sparse([ub_fluxes; fill(1000.0, num_genes)]) - - #: enzyme capacity constraints - mw_proteins = [model.genes[pid].molar_mass for pid in gene_ids] - C = spzeros(length(enzyme_capacities), num_vars) - cl = spzeros(length(enzyme_capacities)) - cu = spzeros(length(enzyme_capacities)) - - for (i, enz_cap) in enumerate(enzyme_capacities) - enz_idxs = indexin(first(enz_cap), gene_ids) - C[i, num_reactions.+enz_idxs] .= mw_proteins[enz_idxs] - cu[i] = last(enz_cap) - end - - return GeckoModel( - reactions(model), - _order_id_to_idx_dict(reaction_map), - _order_id_to_idx_dict(metabolite_map), - gene_ids, - c, - stoich_mat, - b, - xl, - xu, - C, - cl, - cu, - ) -end - -""" - _add_enzyme_variable( - rid_isozymes, - iso_num, - rid, - original_rid, - E_components, - col_idx, - gene_ids, - ) - -Helper function to add an column into the enzyme stoichiometric matrix. -""" -function _add_enzyme_variable( - rid_isozymes, - iso_num, - rid, - original_rid, - E_components, - col_idx, - gene_ids, -) - pstoich = rid_isozymes[original_rid][iso_num].stoichiometry - kcat = - contains(rid, "§FOR") ? rid_isozymes[original_rid][iso_num].kcats[1] : - rid_isozymes[original_rid][iso_num].kcats[2] - for (pid, pst) in pstoich - push!(E_components.row_idxs, first(indexin([pid], gene_ids))) - push!(E_components.col_idxs, col_idx) - push!(E_components.coeffs, -pst / kcat) - end -end - -""" - change_bound!(model::GeckoModel, id; lower=nothing, upper=nothing) - -Change the bound of variable in `model`. Does not change the bound if respective -bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the -`GeckoModel` has irreversible reactions, then these reactions will be -permanently irreversible in the model, i.e. changing their bounds to make them -reversible will have no effect. -""" -function change_bound!(model::GeckoModel, id; lower = nothing, upper = nothing) - gene_idx = first(indexin([id], model.gene_ids)) - - if isnothing(gene_idx) - flux_for_idx = findfirst( - x -> x == id * "§ARM§FOR" || x == id * "§FOR", - model.irrev_reaction_ids, - ) - if !isnothing(flux_for_idx) - if !isnothing(lower) - if lower <= 0 - model.xl[flux_for_idx] = 0 - else - model.xl[flux_for_idx] = lower - end - end - if !isnothing(upper) - if upper <= 0 - model.xu[flux_for_idx] = 0 - else - model.xu[flux_for_idx] = upper - end - end - end - - flux_rev_idx = findfirst( - x -> x == id * "§ARM§REV" || x == id * "§REV", - model.irrev_reaction_ids, - ) - if !isnothing(flux_rev_idx) - if !isnothing(lower) - if lower >= 0 - model.xu[flux_rev_idx] = 0 - else - model.xu[flux_rev_idx] = -lower - end - if !isnothing(upper) - if upper >= 0 - model.xl[flux_rev_idx] = 0 - else - model.xl[flux_rev_idx] = -upper - end - end - end - end - else - n = length(model.irrev_reaction_ids) - !isnothing(lower) && (model.xl[n+gene_idx] = lower) - !isnothing(upper) && (model.xu[n+gene_idx] = upper) - end - - return nothing -end - -""" - change_bounds!(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) - -Change the bounds of multiple variables in `model` simultaneously. See -[`change_bound`](@ref) for details. -""" -function change_bounds!( - model::GeckoModel, - ids; - lower = fill(nothing, length(ids)), - upper = fill(nothing, length(ids)), -) - for (id, lower, upper) in zip(ids, lower, upper) - change_bound!(model, id; lower = lower, upper = upper) - end -end diff --git a/src/base/types/derivedmodels/SMomentModel.jl b/src/base/types/derivedmodels/SMomentModel.jl deleted file mode 100644 index 1297293f7..000000000 --- a/src/base/types/derivedmodels/SMomentModel.jl +++ /dev/null @@ -1,272 +0,0 @@ -""" - mutable struct SMomentModel <: MetabolicModel - -Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, -and Steffen Klamt. "Automatic construction of metabolic models with enzyme -constraints." BMC bioinformatics, 2020.` for implementation details. - -Note, `"§"` is reserved for internal use as a delimiter, no reaction id should -contain that character. Also note, SMOMENT assumes that each reaction only has a -single enzyme (one GRR) associated with it. It is required that a model be -modified to ensure that this condition is met. For ease-of-use, -[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only -`modifications` that change attributes of the `optimizer` are supported. - -# Fields -``` -reaction_ids::Vector{String} -irrev_reaction_ids::Vector{String} -metabolites::Vector{String} -c::SparseVec -S::SparseMat -b::SparseVec -xl::SparseVec -xu::SparseVec -C::SparseMat -cl::Vector{Float64} -cu::Vector{Float64} -``` -""" -mutable struct SMomentModel <: MetabolicModel - reaction_ids::Vector{String} - irrev_reaction_ids::Vector{String} - metabolites::Vector{String} - c::SparseVec - S::SparseMat - b::SparseVec - xl::SparseVec - xu::SparseVec -end - -""" - stoichiometry(model::SMomentModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -stoichiometry(model::SMomentModel) = model.S - -""" - balance(model::SMomentModel) - -Return stoichiometric balance. -""" -balance(model::SMomentModel) = model.b - -""" - objective(model::SMomentModel) - -Return objective of `model`. -""" -objective(model::SMomentModel) = model.c - -""" - reactions(model::SMomentModel) - -Returns the reversible reactions in `model`. For -the irreversible reactions, use [`irreversible_reactions`][@ref]. -""" -reactions(model::SMomentModel) = model.reaction_ids - -""" - n_reactions(model::SMomentModel) - -Returns the number of reactions in the model. -""" -n_reactions(model::SMomentModel) = length(model.reaction_ids) - -""" - irreversible_reactions(model::SMomentModel) - -Returns the irreversible reactions in `model`. -""" -irreversible_reactions(model::SMomentModel) = model.irrev_reaction_ids - -""" - metabolites(model::SMomentModel) - -Return the metabolites in `model`. -""" -metabolites(model::SMomentModel) = model.metabolites - -""" - n_metabolites(model::SMomentModel) = - -Return the number of metabolites in `model`. -""" -n_metabolites(model::SMomentModel) = length(metabolites(model)) - -""" - bounds(model::SMomentModel) - -Return variable bounds for `SMomentModel`. -""" -bounds(model::SMomentModel) = (model.xl, model.xu) - -""" - reaction_flux(model::MetabolicModel) - -Helper function to get fluxes from optimization problem. -""" -function reaction_flux(model::SMomentModel) - R = spzeros(n_reactions(model), length(model.irrev_reaction_ids) + 1) - for (i, rid) in enumerate(reactions(model)) - for_idx = findfirst( - x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", - model.irrev_reaction_ids, - ) - rev_idx = findfirst( - x -> x == rid * "§ARM§REV" || x == rid * "§REV", - model.irrev_reaction_ids, - ) - !isnothing(for_idx) && (R[i, for_idx] = 1.0) - !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) - end - return R' -end - -""" - SMomentModel( - model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), - ) - -Construct an `SMomentModel`. - -""" -function SMomentModel( - model::StandardModel; - rid_isozymes = Dict{String,Vector{Isozyme}}(), - enzyme_capacity = 0.0, -) - - # check that input data is in correct format for smoment - if any(length(v) > 1 for v in values(rid_isozymes)) - @warn( - "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." - ) - end - - irrevS, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model) - - #: size of resultant model - num_reactions = size(irrevS, 2) - num_metabolites = size(irrevS, 1) - num_vars = num_reactions + 1 - - #: equality lhs - Se = zeros(1, num_reactions) - - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) - - # skip these entries - !haskey(rid_isozymes, original_rid) && continue - # these entries have kcats, only one GRR by assumption - isozyme = first(rid_isozymes[original_rid]) - mw = sum([model.genes[gid].molar_mass * ps for (gid, ps) in isozyme.stoichiometry]) - kcat = contains(rid, "§FOR") ? first(isozyme.kcats) : last(isozyme.kcats) - Se[1, col_idx] = -mw / kcat - end - - S = [ - irrevS zeros(num_metabolites, 1) - Se 1.0 - ] - - #: equality rhs - b = zeros(num_metabolites + 1) - - #: find objective - obj_idx_orig = first(findnz(objective(model))[1]) - obj_id_orig = reactions(model)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective - c = spzeros(num_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: bounds - xl = sparse([lb_fluxes; 0.0]) - xu = sparse([ub_fluxes; enzyme_capacity]) - - return SMomentModel( - reactions(model), - _order_id_to_idx_dict(reaction_map), - _order_id_to_idx_dict(metabolite_map), - c, - S, - b, - xl, - xu, - ) -end - -""" - change_bound!(model::SMomentModel, id; lower=nothing, upper=nothing) - -Change the bound of variable in `model`. Does not change the bound if respective -bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the -`SMomentModel` has irreversible reactions, then these reactions will be -permanently irreversible in the model, i.e. changing their bounds to make them -reversible will have no effect. -""" -function change_bound!(model::SMomentModel, id; lower = nothing, upper = nothing) - - - flux_for_idx = - findfirst(x -> x == id * "§ARM§FOR" || x == id * "§FOR", model.irrev_reaction_ids) - if !isnothing(flux_for_idx) - if !isnothing(lower) - if lower <= 0 - model.xl[flux_for_idx] = 0 - else - model.xl[flux_for_idx] = lower - end - end - if !isnothing(upper) - if upper <= 0 - model.xu[flux_for_idx] = 0 - else - model.xu[flux_for_idx] = upper - end - end - end - - flux_rev_idx = - findfirst(x -> x == id * "§ARM§REV" || x == id * "§REV", model.irrev_reaction_ids) - if !isnothing(flux_rev_idx) - if !isnothing(lower) - if lower >= 0 - model.xu[flux_rev_idx] = 0 - else - model.xu[flux_rev_idx] = -lower - end - if !isnothing(upper) - if upper >= 0 - model.xl[flux_rev_idx] = 0 - else - model.xl[flux_rev_idx] = -upper - end - end - end - end - - return nothing -end - -""" - change_bounds!(model::SMomentModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) - -Change the bounds of multiple variables in `model` simultaneously. See -[`change_bound`](@ref) for details. -""" -function change_bounds!( - model::SMomentModel, - ids; - lower = fill(nothing, length(ids)), - upper = fill(nothing, length(ids)), -) - for (id, lower, upper) in zip(ids, lower, upper) - change_bound!(model, id; lower = lower, upper = upper) - end -end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl new file mode 100644 index 000000000..993f16bdc --- /dev/null +++ b/src/base/types/wrappers/GeckoModel.jl @@ -0,0 +1,171 @@ +""" + mutable struct GeckoModel <: MetabolicModel + +A model that incorporates enzyme capacity and kinetic constraints via the GECKO +formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype +predictions of a yeast genome‐scale metabolic model by incorporating enzymatic +constraints." Molecular systems biology, 2017.` for implementation details. + +Note, since the model uses irreversible reactions internally, `"§FOR"` (for the +forward direction) and `"§REV"` (for the reverse direction) is appended to each +reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, +no reaction id should contain this character. + +To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`. + +# Fields +``` +reaction_ids::Vector{String} +irrev_reaction_ids::Vector{String} +metabolites::Vector{String} +gene_ids::Vector{String} +c::SparseVec +S::SparseMat +b::SparseVec +xl::SparseVec +xu::SparseVec +C::SparseMat +cl::Vector{Float64} +cu::Vector{Float64} +``` +""" +mutable struct GeckoModel <: MetabolicModel + reaction_ids::Vector{String} + irrev_reaction_ids::Vector{String} + metabolites::Vector{String} + gene_ids::Vector{String} + + # gecko + c::SparseVec + S::SparseMat + b::SparseVec + xl::SparseVec + xu::SparseVec + + # enzyme capacity constraints + C::SparseMat + cl::Vector{Float64} + cu::Vector{Float64} +end + +""" + stoichiometry(model::GeckoModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +stoichiometry(model::GeckoModel) = model.S + +""" + balance(model::GeckoModel) + +Return stoichiometric balance. +""" +balance(model::GeckoModel) = model.b + +""" + objective(model::GeckoModel) + +Return objective of `model`. +""" +objective(model::GeckoModel) = model.c + +""" + fluxes(model::GeckoModel) + +Returns the reversible reactions in `model`. For +the irreversible reactions, use [`reactions`][@ref]. +""" +fluxes(model::GeckoModel) = model.reaction_ids + +""" + n_reactions(model::GeckoModel) + +Returns the number of reversible reactions in the model. +""" +n_fluxes(model::GeckoModel) = length(model.reaction_ids) + +""" + reactions(model::GeckoModel) + +Returns the irreversible reactions in `model`. +""" +reactions(model::GeckoModel) = model.irrev_reaction_ids + +""" + reactions(model::GeckoModel) + +Returns the number of all irreversible reactions in `model`. +""" +n_reactions(model::GeckoModel) = length(model.irrev_reaction_ids) + +""" + genes(model::GeckoModel) + +Returns the genes (proteins) in the order as they appear as variables in the +model. +""" +genes(model::GeckoModel) = model.gene_ids + +""" + n_genes(model::GeckoModel) + +Returns the number of genes in the model. +""" +n_genes(model::GeckoModel) = length(model.gene_ids) + +""" + metabolites(model::GeckoModel) + +Return the metabolites in `model`. +""" +metabolites(model::GeckoModel) = model.metabolites + +""" + n_metabolites(model::GeckoModel) = + +Return the number of metabolites in `model`. +""" +n_metabolites(model::GeckoModel) = length(metabolites(model)) + +""" + bounds(model::GeckoModel) + +Return variable bounds for `GeckoModel`. +""" +bounds(model::GeckoModel) = (model.xl, model.xu) + +""" + coupling(model::GeckoModel) + +Coupling constraint matrix for a `GeckoModel`. +""" +coupling(model::GeckoModel) = model.C + +""" + coupling_bounds(model::GeckoModel) + +Coupling bounds for a `GeckoModel`. +""" +coupling_bounds(model::GeckoModel) = (model.cl, model.cu) + +""" + reaction_flux(model::MetabolicModel) + +Helper function to get fluxes from optimization problem. +""" +function reaction_flux(model::GeckoModel) + R = spzeros(n_fluxes(model), n_genes(model) + n_reactions(model)) + for (i, rid) in enumerate(fluxes(model)) + for_idx = findfirst( + x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", + model.irrev_reaction_ids, + ) + rev_idx = findfirst( + x -> x == rid * "§ARM§REV" || x == rid * "§REV", + model.irrev_reaction_ids, + ) + !isnothing(for_idx) && (R[i, for_idx] = 1.0) + !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) + end + return R' +end diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl new file mode 100644 index 000000000..82a343ca8 --- /dev/null +++ b/src/base/types/wrappers/SMomentModel.jl @@ -0,0 +1,132 @@ +""" + mutable struct SMomentModel <: MetabolicModel + +Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, +and Steffen Klamt. "Automatic construction of metabolic models with enzyme +constraints." BMC bioinformatics, 2020.` for implementation details. + +Note, `"§"` is reserved for internal use as a delimiter, no reaction id should +contain that character. Also note, SMOMENT assumes that each reaction only has a +single enzyme (one GRR) associated with it. It is required that a model be +modified to ensure that this condition is met. For ease-of-use, +[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only +`modifications` that change attributes of the `optimizer` are supported. + +# Fields +``` +reaction_ids::Vector{String} +irrev_reaction_ids::Vector{String} +metabolites::Vector{String} +c::SparseVec +S::SparseMat +b::SparseVec +xl::SparseVec +xu::SparseVec +C::SparseMat +cl::Vector{Float64} +cu::Vector{Float64} +``` +""" +mutable struct SMomentModel <: MetabolicModel + reaction_ids::Vector{String} + irrev_reaction_ids::Vector{String} + metabolites::Vector{String} + c::SparseVec + S::SparseMat + b::SparseVec + xl::SparseVec + xu::SparseVec +end + +""" + stoichiometry(model::SMomentModel) + +Return stoichiometry matrix that includes enzymes as metabolites. +""" +stoichiometry(model::SMomentModel) = model.S + +""" + balance(model::SMomentModel) + +Return stoichiometric balance. +""" +balance(model::SMomentModel) = model.b + +""" + objective(model::SMomentModel) + +Return objective of `model`. +""" +objective(model::SMomentModel) = model.c + +""" + fluxes(model::SMomentModel) + +Returns the reversible reactions in `model`. For +the irreversible reactions, use [`reactions`][@ref]. +""" +fluxes(model::SMomentModel) = model.reaction_ids + +""" + n_fluxes(model::SMomentModel) + +Returns the number of reversible reactions in the model. +""" +n_fluxes(model::SMomentModel) = length(model.reaction_ids) + +""" + irreversible_reactions(model::SMomentModel) + +Returns the irreversible reactions in `model`. +""" +reactions(model::SMomentModel) = model.irrev_reaction_ids + +""" + n_reactions(model::SMomentModel) + +Returns the number of irreversible reactions in `model`. +""" +n_reactions(model::SMomentModel) = length(model.irrev_reaction_ids) + +""" + metabolites(model::SMomentModel) + +Return the metabolites in `model`. +""" +metabolites(model::SMomentModel) = model.metabolites + +""" + n_metabolites(model::SMomentModel) = + +Return the number of metabolites in `model`. +""" +n_metabolites(model::SMomentModel) = length(metabolites(model)) + +""" + bounds(model::SMomentModel) + +Return variable bounds for `SMomentModel`. +""" +bounds(model::SMomentModel) = (model.xl, model.xu) + +""" + reaction_flux(model::MetabolicModel) + +Helper function to get fluxes from optimization problem. +""" +function reaction_flux(model::SMomentModel) + R = spzeros(n_fluxes(model), n_reactions(model) + 1) + for (i, rid) in enumerate(fluxes(model)) + for_idx = findfirst( + x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", + model.irrev_reaction_ids, + ) + rev_idx = findfirst( + x -> x == rid * "§ARM§REV" || x == rid * "§REV", + model.irrev_reaction_ids, + ) + !isnothing(for_idx) && (R[i, for_idx] = 1.0) + !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) + end + return R' +end \ No newline at end of file diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 8a9acba5a..b1ba7e8cf 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -1,7 +1,9 @@ """ protein_dict(model::GeckoModel, opt_model) -Return a dictionary mapping protein concentrations to their ids. +Return a dictionary mapping protein concentrations to their ids. The argument +`opt_model` is a solved optimization problem, typically returned by +[`flux_balance_analysis`](@ref). """ protein_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? @@ -9,6 +11,13 @@ protein_dict(model::GeckoModel, opt_model) = model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end]), ) : nothing +""" + protein_dict(model::GeckoModel) + +A pipe-able variant of `protein_dict`. +""" +protein_dict(model::GeckoModel) = x -> protein_dict(model, x) + """ get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) diff --git a/src/reconstruction/gecko.jl b/src/reconstruction/gecko.jl new file mode 100644 index 000000000..34c153a8d --- /dev/null +++ b/src/reconstruction/gecko.jl @@ -0,0 +1,79 @@ +""" + change_bound!(model::GeckoModel, id; lower=nothing, upper=nothing) + +Change the bound of variable in `model`. Does not change the bound if respective +bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the +`GeckoModel` has irreversible reactions, then these reactions will be +permanently irreversible in the model, i.e. changing their bounds to make them +reversible will have no effect. +""" +function change_bound!(model::GeckoModel, id; lower = nothing, upper = nothing) + gene_idx = first(indexin([id], model.gene_ids)) + + if isnothing(gene_idx) + flux_for_idx = findfirst( + x -> x == id * "§ARM§FOR" || x == id * "§FOR", + model.irrev_reaction_ids, + ) + if !isnothing(flux_for_idx) + if !isnothing(lower) + if lower <= 0 + model.xl[flux_for_idx] = 0 + else + model.xl[flux_for_idx] = lower + end + end + if !isnothing(upper) + if upper <= 0 + model.xu[flux_for_idx] = 0 + else + model.xu[flux_for_idx] = upper + end + end + end + + flux_rev_idx = findfirst( + x -> x == id * "§ARM§REV" || x == id * "§REV", + model.irrev_reaction_ids, + ) + if !isnothing(flux_rev_idx) + if !isnothing(lower) + if lower >= 0 + model.xu[flux_rev_idx] = 0 + else + model.xu[flux_rev_idx] = -lower + end + if !isnothing(upper) + if upper >= 0 + model.xl[flux_rev_idx] = 0 + else + model.xl[flux_rev_idx] = -upper + end + end + end + end + else + n = length(model.irrev_reaction_ids) + !isnothing(lower) && (model.xl[n+gene_idx] = lower) + !isnothing(upper) && (model.xu[n+gene_idx] = upper) + end + + return nothing +end + +""" + change_bounds!(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) + +Change the bounds of multiple variables in `model` simultaneously. See +[`change_bound`](@ref) for details. +""" +function change_bounds!( + model::GeckoModel, + ids; + lower = fill(nothing, length(ids)), + upper = fill(nothing, length(ids)), +) + for (id, lower, upper) in zip(ids, lower, upper) + change_bound!(model, id; lower = lower, upper = upper) + end +end diff --git a/src/reconstruction/smoment.jl b/src/reconstruction/smoment.jl new file mode 100644 index 000000000..669053f1a --- /dev/null +++ b/src/reconstruction/smoment.jl @@ -0,0 +1,69 @@ +""" + change_bound!(model::SMomentModel, id; lower=nothing, upper=nothing) + +Change the bound of variable in `model`. Does not change the bound if respective +bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the +`SMomentModel` has irreversible reactions, then these reactions will be +permanently irreversible in the model, i.e. changing their bounds to make them +reversible will have no effect. +""" +function change_bound!(model::SMomentModel, id; lower = nothing, upper = nothing) + + + flux_for_idx = + findfirst(x -> x == id * "§ARM§FOR" || x == id * "§FOR", model.irrev_reaction_ids) + if !isnothing(flux_for_idx) + if !isnothing(lower) + if lower <= 0 + model.xl[flux_for_idx] = 0 + else + model.xl[flux_for_idx] = lower + end + end + if !isnothing(upper) + if upper <= 0 + model.xu[flux_for_idx] = 0 + else + model.xu[flux_for_idx] = upper + end + end + end + + flux_rev_idx = + findfirst(x -> x == id * "§ARM§REV" || x == id * "§REV", model.irrev_reaction_ids) + if !isnothing(flux_rev_idx) + if !isnothing(lower) + if lower >= 0 + model.xu[flux_rev_idx] = 0 + else + model.xu[flux_rev_idx] = -lower + end + if !isnothing(upper) + if upper >= 0 + model.xl[flux_rev_idx] = 0 + else + model.xl[flux_rev_idx] = -upper + end + end + end + end + + return nothing +end + +""" + change_bounds!(model::SMomentModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) + +Change the bounds of multiple variables in `model` simultaneously. See +[`change_bound`](@ref) for details. +""" +function change_bounds!( + model::SMomentModel, + ids; + lower = fill(nothing, length(ids)), + upper = fill(nothing, length(ids)), +) + for (id, lower, upper) in zip(ids, lower, upper) + change_bound!(model, id; lower = lower, upper = upper) + end +end From a7984e420768e341a2526aede867c457ed3fbca9 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 12 Apr 2022 11:11:47 +0200 Subject: [PATCH 058/109] format and fix test --- src/analysis/smoment.jl | 2 +- src/base/solver.jl | 2 +- src/base/types/wrappers/SMomentModel.jl | 2 +- test/analysis/gecko.jl | 2 +- test/analysis/smoment.jl | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 040dcb9e7..f0f290cd2 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -72,4 +72,4 @@ function make_smomentmodel( xl, xu, ) -end \ No newline at end of file +end diff --git a/src/base/solver.jl b/src/base/solver.jl index 2c4ff84e8..9d1d9a6d9 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -130,4 +130,4 @@ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = A pipeable variant of `flux_dict` """ -flux_dict(model::MetabolicModel) = x -> flux_dict(model, x) \ No newline at end of file +flux_dict(model::MetabolicModel) = x -> flux_dict(model, x) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 82a343ca8..33c74e152 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -129,4 +129,4 @@ function reaction_flux(model::SMomentModel) !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) end return R' -end \ No newline at end of file +end diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 24fa52e2d..c4bde5836 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -19,7 +19,7 @@ model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) end - gm = GeckoModel( + gm = make_geckomodel( model; rid_isozymes, enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index cc5f331e8..0bd6d8304 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -21,7 +21,7 @@ remove_slow_isozymes!(model, rid_isozymes) - smm = SMomentModel(model; rid_isozymes, enzyme_capacity = total_protein_mass) + smm = make_smomentmodel(model; rid_isozymes, enzyme_capacity = total_protein_mass) change_bounds!( smm, From 92e53439dd316bef919f841267462d44de3ad517 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 12 Apr 2022 11:21:16 +0200 Subject: [PATCH 059/109] get rid of kwargs --- src/analysis/flux_balance_analysis.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/analysis/flux_balance_analysis.jl b/src/analysis/flux_balance_analysis.jl index dfbcbbffb..3fb160a3d 100644 --- a/src/analysis/flux_balance_analysis.jl +++ b/src/analysis/flux_balance_analysis.jl @@ -75,10 +75,10 @@ function flux_balance_analysis( model::M, optimizer; modifications = [], - kwargs..., + sense = MOI.MAX_SENSE, ) where {M<:MetabolicModel} - opt_model = make_optimization_model(model, optimizer; kwargs...) + opt_model = make_optimization_model(model, optimizer; sense) for mod in modifications mod(model, opt_model) From 50dd32753bde712abbe301b7ab67163efda077b6 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 12 Apr 2022 11:34:14 +0200 Subject: [PATCH 060/109] fix flux_dict error --- src/base/solver.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/solver.jl b/src/base/solver.jl index 9d1d9a6d9..1d467a390 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -123,7 +123,7 @@ flux_dict(model, flux_balance_analysis(model, ...)) """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = is_solved(opt_model) ? - Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing + Dict(fluxes(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing """ flux_dict(model::MetabolicModel) From 6a1ba06d48c374714ff9eebdff9d8fb9dca201a9 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 12:39:18 +0200 Subject: [PATCH 061/109] fix the last formatting error --- test/analysis/gecko.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index c4bde5836..f7e29e7b2 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -51,4 +51,3 @@ @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) end - From 007645b8fe32e4606fd6fb13039562296c76b73d Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 12:42:13 +0200 Subject: [PATCH 062/109] clean up trailing blanks --- src/analysis/gecko.jl | 6 +++--- src/base/types/wrappers/GeckoModel.jl | 8 ++++---- src/base/types/wrappers/SMomentModel.jl | 4 ++-- src/base/utils/enzyme.jl | 2 +- src/base/utils/guesskey.jl | 2 +- src/base/utils/irreversible_stoichiometry.jl | 4 ++-- src/reconstruction/gecko.jl | 2 +- src/reconstruction/smoment.jl | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index fbfea61c6..b473003d7 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -3,12 +3,12 @@ model::StandardModel; rid_isozymes = Dict{String, Vector{Isozyme}}(), enzyme_capacities = [(),], - ) + ) -Construct a `GeckoModel` based on `model` using the kinetic data encoded by +Construct a `GeckoModel` based on `model` using the kinetic data encoded by `rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`, which is a vector of tuples. In the first position of the tuple is a list of gene ids, -and the second position is mass upperbound of the sum of these gene ids. +and the second position is mass upperbound of the sum of these gene ids. The units of the fluxes and protein concentration depend on those used in `rid_isozymes` for the kcats and the molar masses encoded in the genes of diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 993f16bdc..11b535349 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -9,7 +9,7 @@ constraints." Molecular systems biology, 2017.` for implementation details. Note, since the model uses irreversible reactions internally, `"§FOR"` (for the forward direction) and `"§REV"` (for the reverse direction) is appended to each reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, -no reaction id should contain this character. +no reaction id should contain this character. To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`. @@ -35,7 +35,7 @@ mutable struct GeckoModel <: MetabolicModel metabolites::Vector{String} gene_ids::Vector{String} - # gecko + # gecko c::SparseVec S::SparseMat b::SparseVec @@ -72,7 +72,7 @@ objective(model::GeckoModel) = model.c """ fluxes(model::GeckoModel) -Returns the reversible reactions in `model`. For +Returns the reversible reactions in `model`. For the irreversible reactions, use [`reactions`][@ref]. """ fluxes(model::GeckoModel) = model.reaction_ids @@ -121,7 +121,7 @@ Return the metabolites in `model`. metabolites(model::GeckoModel) = model.metabolites """ - n_metabolites(model::GeckoModel) = + n_metabolites(model::GeckoModel) = Return the number of metabolites in `model`. """ diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 33c74e152..bc5f01be1 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -62,7 +62,7 @@ objective(model::SMomentModel) = model.c """ fluxes(model::SMomentModel) -Returns the reversible reactions in `model`. For +Returns the reversible reactions in `model`. For the irreversible reactions, use [`reactions`][@ref]. """ fluxes(model::SMomentModel) = model.reaction_ids @@ -96,7 +96,7 @@ Return the metabolites in `model`. metabolites(model::SMomentModel) = model.metabolites """ - n_metabolites(model::SMomentModel) = + n_metabolites(model::SMomentModel) = Return the number of metabolites in `model`. """ diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index b1ba7e8cf..d7880cfd3 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -74,7 +74,7 @@ end gid_measurements = Dict(), ) -Modify `rid_isozymes` in place by keeping only the highest expressed isozyme. +Modify `rid_isozymes` in place by keeping only the highest expressed isozyme. """ function remove_low_expressed_isozymes!( model::StandardModel, diff --git a/src/base/utils/guesskey.jl b/src/base/utils/guesskey.jl index 01f85d252..4a9ad6302 100644 --- a/src/base/utils/guesskey.jl +++ b/src/base/utils/guesskey.jl @@ -25,7 +25,7 @@ end gets(collection, fail, keys) Return `fail` if key in `keys` is not in `collection`, otherwise -return `collection[key]`. Useful if may different keys need to be +return `collection[key]`. Useful if may different keys need to be tried due to non-standardized model formats. """ function gets(collection, fail, keys) diff --git a/src/base/utils/irreversible_stoichiometry.jl b/src/base/utils/irreversible_stoichiometry.jl index beac91060..8a1178167 100644 --- a/src/base/utils/irreversible_stoichiometry.jl +++ b/src/base/utils/irreversible_stoichiometry.jl @@ -3,9 +3,9 @@ Return a stoichiometric matrix where all reactions are forward only i.e. only positive fluxes are allowed. To accomplish this for models with isozymes, -so-called arm reactions are included. Note, reactions that are irreversible +so-called arm reactions are included. Note, reactions that are irreversible in the original model will be irreversible in this model. E.g., if a reaction -is forward only in the original model, then there will be no reverse component +is forward only in the original model, then there will be no reverse component for this reaction in the irreversible stoichiometric matrix. """ function _build_irreversible_stoichiometric_matrix( diff --git a/src/reconstruction/gecko.jl b/src/reconstruction/gecko.jl index 34c153a8d..523bf4a2e 100644 --- a/src/reconstruction/gecko.jl +++ b/src/reconstruction/gecko.jl @@ -64,7 +64,7 @@ end """ change_bounds!(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) -Change the bounds of multiple variables in `model` simultaneously. See +Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ function change_bounds!( diff --git a/src/reconstruction/smoment.jl b/src/reconstruction/smoment.jl index 669053f1a..c5c3b2fad 100644 --- a/src/reconstruction/smoment.jl +++ b/src/reconstruction/smoment.jl @@ -54,7 +54,7 @@ end """ change_bounds!(model::SMomentModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) -Change the bounds of multiple variables in `model` simultaneously. See +Change the bounds of multiple variables in `model` simultaneously. See [`change_bound`](@ref) for details. """ function change_bounds!( From 4280b31d47f840f121fc9d3b0cf6d919c279caee Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 12:45:19 +0200 Subject: [PATCH 063/109] optimization sense should be passed to FBA via modifications --- src/analysis/flux_balance_analysis.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/analysis/flux_balance_analysis.jl b/src/analysis/flux_balance_analysis.jl index 3fb160a3d..25a534b08 100644 --- a/src/analysis/flux_balance_analysis.jl +++ b/src/analysis/flux_balance_analysis.jl @@ -75,10 +75,9 @@ function flux_balance_analysis( model::M, optimizer; modifications = [], - sense = MOI.MAX_SENSE, ) where {M<:MetabolicModel} - opt_model = make_optimization_model(model, optimizer; sense) + opt_model = make_optimization_model(model, optimizer) for mod in modifications mod(model, opt_model) From 186da1dea94948bc35f1456044dbd5d4a02f9b0c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 12:48:14 +0200 Subject: [PATCH 064/109] add an example for flux_dict --- src/base/solver.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/base/solver.jl b/src/base/solver.jl index 1d467a390..1fb5b3a15 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -128,6 +128,11 @@ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = """ flux_dict(model::MetabolicModel) -A pipeable variant of `flux_dict` +A pipeable variant of `flux_dict`. + +# Example +``` +flux_balance_analysis(model, ...) |> flux_dict(model) +``` """ flux_dict(model::MetabolicModel) = x -> flux_dict(model, x) From 2f8c0c93d1ea250ce14af7e5b3259f50fd8043cd Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 13:17:30 +0200 Subject: [PATCH 065/109] clean up struct Isozyme --- src/analysis/smoment.jl | 2 +- src/base/types/Isozyme.jl | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index f0f290cd2..10dea4c2e 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -38,7 +38,7 @@ function make_smomentmodel( # these entries have kcats, only one GRR by assumption isozyme = first(rid_isozymes[original_rid]) mw = sum([model.genes[gid].molar_mass * ps for (gid, ps) in isozyme.stoichiometry]) - kcat = contains(rid, "§FOR") ? first(isozyme.kcats) : last(isozyme.kcats) + kcat = contains(rid, "§FOR") ? isozyme.kcat_forward : isozyme.kcat_reverse Se[1, col_idx] = -mw / kcat end diff --git a/src/base/types/Isozyme.jl b/src/base/types/Isozyme.jl index b3fdcd6bc..685f86c35 100644 --- a/src/base/types/Isozyme.jl +++ b/src/base/types/Isozyme.jl @@ -1,17 +1,18 @@ """ mutable struct Isozyme -Struct containing isozyme information. Here, `stoichiometry` is a dictionary of -gene ids to their stoichiometry in the isozyme complex, and `kcats` is a tuple -of the forward and reverse turnover numbers of the isozyme. +Information about isozyme composition and activity. # Fields -```` -stoichiometry :: Dict{String, Int} -kcats :: Tuple{Float64, Float64} +- `gene_product_count :: Dict{String, Int}` assigns each gene product ID its + count in the isozyme complex (which is used to determine the total mass of + the isozyme) +- `kcat_forward`, `kcat_reverse` -- forward and reverse turnover numbers of the + isozyme ```` """ mutable struct Isozyme - stoichiometry::Dict{String,Int} - kcats::Tuple{Float64,Float64} + gene_product_count::Dict{String,Int} + kcat_forward::Float64 + kcat_reverse::Float64 end From ed05987872970974b3dec8a9b2714b3287497ccc Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 15:47:20 +0200 Subject: [PATCH 066/109] clean up solver stuff (we reexport the MOI constants right?) --- src/base/solver.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/base/solver.jl b/src/base/solver.jl index 1fb5b3a15..058ab5b40 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -3,7 +3,7 @@ make_optimization_model( model::MetabolicModel, optimizer; - sense = MOI.MAX_SENSE, + sense = MAX_SENSE, ) Convert `MetabolicModel`s to a JuMP model, place objectives and the equality @@ -11,7 +11,7 @@ constraint. Here coupling means inequality constraints coupling multiple variables together. """ -function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.MAX_SENSE) +function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_SENSE) precache!(model) @@ -135,4 +135,4 @@ A pipeable variant of `flux_dict`. flux_balance_analysis(model, ...) |> flux_dict(model) ``` """ -flux_dict(model::MetabolicModel) = x -> flux_dict(model, x) +flux_dict(model::MetabolicModel) = opt_model -> flux_dict(model, opt_model) From 29a13d629ba834a32b906c39e833d6f1fd35a104 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 15:48:16 +0200 Subject: [PATCH 067/109] reimplement sMOMENT as a wrapper around the model --- src/analysis/smoment.jl | 121 +++++++++---------- src/base/types/wrappers/SMomentModel.jl | 154 ++++++++++++------------ src/reconstruction/smoment.jl | 11 ++ 3 files changed, 145 insertions(+), 141 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 10dea4c2e..4b0a9b80e 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -1,75 +1,72 @@ + """ - make_smomentmodel( - model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), + make_smoment_model( + model::MetabolicModel; + reaction_isozymes, + gene_product_capacity_required, + total_enzyme_capacity = 0.0, ) -Construct an `SMomentModel` model using `model` and `rid_isozymes`. +Construct an [`SMomentModel`](@ref) model using the inner `model` and a map of +isozymes. """ -function make_smomentmodel( - model::StandardModel; - rid_isozymes = Dict{String,Vector{Isozyme}}(), - enzyme_capacity = 0.0, +function make_smoment_model( + model::MetabolicModel; + reaction_isozymes, + gene_product_molar_mass, + total_enzyme_capacity::Float64, ) + columns = Vector{_smoment_column}() + coupling_row_reaction = Int[] - # check that input data is in correct format for smoment - if any(length(v) > 1 for v in values(rid_isozymes)) - @warn( - "For SMOMENT to work correctly, no isozymes are allowed. Call `remove_slow_isozymes!` to fix the input data." - ) - end - - irrevS, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model) + (lbs, ubs) = bounds(model) + rids = reactions(model) - #: size of resultant model - num_reactions = size(irrevS, 2) - num_metabolites = size(irrevS, 1) - num_vars = num_reactions + 1 + for i = 1:n_reactions(model) + isozyme = reaction_isozymes(rids[i]) + if isnothing(isozyme) + # non-enzymatic reaction (or a totally ignored one) + push!(columns, _smoment_column(i, 0, 0, lbs[i], ubs[i], 0)) + else + # pick a new row for "arm reaction" coupling + coupling_row = length(coupling_row_reaction) + 1 + push!(coupling_row_reaction, i) - #: equality lhs - Se = zeros(1, num_reactions) + mw = sum( + gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.stoichiometry + ) - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) + if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance + # reaction can run in reverse + push!( + columns, + _smoment_column( + i, + -1, + coupling_row, + max(-ubs[i], 0), + -lbs[i], + mw / isozyme.kcat_reverse, + ), + ) + end - # skip these entries - !haskey(rid_isozymes, original_rid) && continue - # these entries have kcats, only one GRR by assumption - isozyme = first(rid_isozymes[original_rid]) - mw = sum([model.genes[gid].molar_mass * ps for (gid, ps) in isozyme.stoichiometry]) - kcat = contains(rid, "§FOR") ? isozyme.kcat_forward : isozyme.kcat_reverse - Se[1, col_idx] = -mw / kcat + if min(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance + # reaction can run forward + push!( + columns, + _smoment_column( + i, + 1, + coupling_row, + max(lbs[i], 0), + ubs[i], + mw / isozyme.kcat_forward, + ), + ) + end + end end - S = [ - irrevS zeros(num_metabolites, 1) - Se 1.0 - ] - - #: equality rhs - b = zeros(num_metabolites + 1) - - #: find objective - obj_idx_orig = first(findnz(objective(model))[1]) - obj_id_orig = reactions(model)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" # assume forward reaction is objective - c = spzeros(num_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: bounds - xl = sparse([lb_fluxes; 0.0]) - xu = sparse([ub_fluxes; enzyme_capacity]) - - return SMomentModel( - reactions(model), - _order_id_to_idx_dict(reaction_map), - _order_id_to_idx_dict(metabolite_map), - c, - S, - b, - xl, - xu, - ) + return SMomentModel(columns, coupling_row_reaction, total_enzyme_capacity, model) end diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index bc5f01be1..81db64ea9 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -1,5 +1,21 @@ + +""" + struct _smoment_column + +A helper type that describes the contents of [`SMomentModel`](@ref)s. """ - mutable struct SMomentModel <: MetabolicModel +struct _smoment_column + reaction_id::Int # number of the corresponding reaction in the inner model + direction::Int # 0 if "as is" and unique, -1 if reverse-only part, 1 if forward-only part + coupling_row::Int # number of row in the coupling (0 if direction==0) + lb::Float64 # must be 0 if the reaction is unidirectional (if direction!=0) + ub::Float64 + capacity_required::Float64 # must be 0 for bidirectional reactions (if direction==0) +end + +# TODO fix the docstring +""" + mutable struct SMomentModel <: ModelWrapper Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, and Steffen Klamt. "Automatic construction of metabolic models with enzyme @@ -11,122 +27,102 @@ single enzyme (one GRR) associated with it. It is required that a model be modified to ensure that this condition is met. For ease-of-use, [`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only `modifications` that change attributes of the `optimizer` are supported. +""" +mutable struct SMomentModel <: ModelWrapper + columns::Vector{_smoment_column} + coupling_row_reaction::Vector{Int} + total_enzyme_capacity::Float64 -# Fields -``` -reaction_ids::Vector{String} -irrev_reaction_ids::Vector{String} -metabolites::Vector{String} -c::SparseVec -S::SparseMat -b::SparseVec -xl::SparseVec -xu::SparseVec -C::SparseMat -cl::Vector{Float64} -cu::Vector{Float64} -``` -""" -mutable struct SMomentModel <: MetabolicModel - reaction_ids::Vector{String} - irrev_reaction_ids::Vector{String} - metabolites::Vector{String} - c::SparseVec - S::SparseMat - b::SparseVec - xl::SparseVec - xu::SparseVec + inner::MetabolicModel end -""" - stoichiometry(model::SMomentModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -stoichiometry(model::SMomentModel) = model.S +unwrap_model(model::SMomentModel) = model.inner """ - balance(model::SMomentModel) + stoichiometry(model::SMomentModel) -Return stoichiometric balance. +Return a stoichiometry of the [`SMomentModel`](@ref). The enzymatic reactions +are split into unidirectional forward and reverse ones. """ -balance(model::SMomentModel) = model.b +stoichiometry(model::SMomentModel) = + stoichiometry(model.inner) * _smoment_column_reactions(model) """ objective(model::SMomentModel) -Return objective of `model`. -""" -objective(model::SMomentModel) = model.c - -""" - fluxes(model::SMomentModel) - -Returns the reversible reactions in `model`. For -the irreversible reactions, use [`reactions`][@ref]. +Reconstruct an objective of the [`SMomentModel`](@ref). """ -fluxes(model::SMomentModel) = model.reaction_ids +objective(model::SMomentModel) = _smoment_column_reactions(model)' * objective(model.inner) """ - n_fluxes(model::SMomentModel) + irreversible_reactions(model::SMomentModel) -Returns the number of reversible reactions in the model. +Returns the irreversible reactions in `model`. """ -n_fluxes(model::SMomentModel) = length(model.reaction_ids) +reactions(model::SMomentModel) = + let inner_reactions = reactions(model.inner) + [ + _smoment_reaction_name(inner_reactions[col.reaction_id], col.direction) for + col in model.columns + ] + end """ - irreversible_reactions(model::SMomentModel) + n_reactions(model::SMomentModel) -Returns the irreversible reactions in `model`. +The number of reactions (including split ones) in [`SMomentModel`](@ref). """ -reactions(model::SMomentModel) = model.irrev_reaction_ids +n_reactions(model::SMomentModel) = length(model.columns) """ - n_reactions(model::SMomentModel) + bounds(model::SMomentModel) -Returns the number of irreversible reactions in `model`. +Return the variable bounds for [`SMomentModel`](@ref). """ -n_reactions(model::SMomentModel) = length(model.irrev_reaction_ids) +bounds(model::SMomentModel) = + ([col.lb for col in model.columns], [col.ub for col in model.columns]) """ - metabolites(model::SMomentModel) + reaction_flux(model::SMomentModel) -Return the metabolites in `model`. +Get the mapping of the reaction rates in [`SMomentModel`](@ref) to the original +fluxes in the wrapped model. """ -metabolites(model::SMomentModel) = model.metabolites +reaction_flux(model::SMomentModel) = + reaction_flux(model.inner)' * _smoment_column_reactions(model) """ - n_metabolites(model::SMomentModel) = + coupling(model::SMomentModel) -Return the number of metabolites in `model`. +Return the coupling of [`SMomentModel`](@ref). That combines the coupling of +the wrapped model, coupling for split reactions, and the coupling for the total +enzyme capacity. """ -n_metabolites(model::SMomentModel) = length(metabolites(model)) +coupling(model::SMomentModel) = vcat( + coupling(model.inner), + _smoment_reaction_coupling(model), + [col.capacity_required for col in model.columns]', +) """ - bounds(model::SMomentModel) + n_coupling_constraints(model::SMomentModel) -Return variable bounds for `SMomentModel`. +Count the coupling constraints in [`SMomentModel`](@ref) (refer to +[`coupling`](@ref) for details). """ -bounds(model::SMomentModel) = (model.xl, model.xu) +n_coupling_constraints(model::SMomentModel) = + n_coupling_constraints(model.inner) + _smoment_n_reaction_couplings(model) + 1 """ - reaction_flux(model::MetabolicModel) + coupling_bounds(model::SMomentModel) -Helper function to get fluxes from optimization problem. +The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for +details). """ -function reaction_flux(model::SMomentModel) - R = spzeros(n_fluxes(model), n_reactions(model) + 1) - for (i, rid) in enumerate(fluxes(model)) - for_idx = findfirst( - x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", - model.irrev_reaction_ids, - ) - rev_idx = findfirst( - x -> x == rid * "§ARM§REV" || x == rid * "§REV", - model.irrev_reaction_ids, - ) - !isnothing(for_idx) && (R[i, for_idx] = 1.0) - !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) +coupling_bounds(model::SMomentModel) = + let + (ilb, iub) = + n_coupling_constraints(model.inner), (rlb, rub) = + _smoment_reaction_coupling_bounds(model) + (vcat(ilb, rlb, 0), vcat(iub, rub, model.total_enzyme_capacity)) end - return R' -end diff --git a/src/reconstruction/smoment.jl b/src/reconstruction/smoment.jl index c5c3b2fad..a5c5ae48c 100644 --- a/src/reconstruction/smoment.jl +++ b/src/reconstruction/smoment.jl @@ -1,3 +1,14 @@ + + +""" + with_smoment(; kwargs...) + +Specifies a model variant which adds extra semantics of the sMOMENT algorithm, +giving a [`SMomentModel`](@ref). The arguments are forwarded to +[`make_smoment_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_smoment(; kwargs...) = model -> make_smoment_model(model; kwargs...) + """ change_bound!(model::SMomentModel, id; lower=nothing, upper=nothing) From 964aeb16b535ad9434c57fe02e8520afec1e8902 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 15:55:22 +0200 Subject: [PATCH 068/109] mangle accordingly. --- src/base/types/wrappers/SMomentModel.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 81db64ea9..be20acdd9 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -55,9 +55,11 @@ Reconstruct an objective of the [`SMomentModel`](@ref). objective(model::SMomentModel) = _smoment_column_reactions(model)' * objective(model.inner) """ - irreversible_reactions(model::SMomentModel) + reactions(model::SMomentModel) -Returns the irreversible reactions in `model`. +Returns the internal reactions in a [`SMomentModel`](@ref) (these may be split +to forward- and reverse-only parts; reactions IDs mangled accordingly with +suffixes). """ reactions(model::SMomentModel) = let inner_reactions = reactions(model.inner) From a2918bcac8acd7eb3e86d9c9a31bb90953f3a943 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 16:03:31 +0200 Subject: [PATCH 069/109] fix format --- src/base/types/wrappers/SMomentModel.jl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index be20acdd9..8a0f8f665 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -121,10 +121,8 @@ n_coupling_constraints(model::SMomentModel) = The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ -coupling_bounds(model::SMomentModel) = - let - (ilb, iub) = - n_coupling_constraints(model.inner), (rlb, rub) = - _smoment_reaction_coupling_bounds(model) - (vcat(ilb, rlb, 0), vcat(iub, rub, model.total_enzyme_capacity)) - end +function coupling_bounds(model::SMomentModel) + (ilb, iub) = n_coupling_constraints(model.inner) + (rlb, rub) = _smoment_reaction_coupling_bounds(model) + return (vcat(ilb, rlb, 0), vcat(iub, rub, model.total_enzyme_capacity)) +end From 75c1fca12a4ffbb59734b40bfbf29046f083a214 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 16:12:29 +0200 Subject: [PATCH 070/109] forgot a file --- src/base/utils/smoment.jl | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/base/utils/smoment.jl diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl new file mode 100644 index 000000000..6c1315c89 --- /dev/null +++ b/src/base/utils/smoment.jl @@ -0,0 +1,57 @@ + +""" + _smoment_reaction_name(original_name::String, direction::Int) + +Internal helper for systematically naming reactions in [`SMomentModel`](@ref). +""" +_smoment_reaction_name(original_name::String, direction::Int) = + direction == 0 ? original_name : + direction > 0 ? "$original_name#forward" : "$original_name#reverse" + +""" + _smoment_column_reactions(model::SMomentModel) + +Retrieve a utility mapping between reactions and split reactions; rows +correspond to "original" reactions, columns correspond to "split" reactions. +""" +_smoment_column_reactions(model::SMomentModel) = sparse( + [col.reaction_id for col in model.columns], + 1:length(model.columns), + [col.direction >= 0 ? 1 : -1 for col in model.columns], + n_reactions(model.inner), + length(model.columns), +) + +""" + _smoment_reaction_coupling(model::SMomentModel) + +Compute the part of the coupling for [`SMomentModel`](@ref) that limits the +"arm" reactions (which group the individual split unidirectional reactions). +""" +_smoment_reaction_coupling(model::SMomentModel) = sparse( + [col.coupling_row for col in model.columns if col.direction != 0], + [i for (i, col) in enumerate(model.columns) if col.direction != 0], + [col.direction for col in model.columns if col.direction != 0], + _smoment_n_reaction_couplings(model), + length(model.columns), +) + +""" + _smoment_n_reaction_couplings(model::SMomentModel) + +Internal helper for determining the number of required couplings to account for +"arm" reactions. +""" +_smoment_n_reaction_couplings(model::SMomentModel) = + isempty(model.columns) ? 0 : maximum(col.coupling_row for col in model.columns) + +""" + _smoment_reaction_coupling_bounds(model::SMomentModel) + +Return bounds that limit the "arm" reactions in [`SMomentModel`](@ref). The +values are taken from the "original" inner model. +""" +_smoment_reaction_coupling_bounds(model::SMomentModel) = + let (lbs, ubs) = bounds(model.inner) + (lbs[coupling_row_reaction], ubs[coupling_row_reaction]) + end From 07783ed19343bbdc8b4e1eec16c17a0a0fb8d9a4 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 16:14:06 +0200 Subject: [PATCH 071/109] improve guessing of smoment coupling size --- src/base/utils/smoment.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl index 6c1315c89..b4d90df15 100644 --- a/src/base/utils/smoment.jl +++ b/src/base/utils/smoment.jl @@ -43,7 +43,7 @@ Internal helper for determining the number of required couplings to account for "arm" reactions. """ _smoment_n_reaction_couplings(model::SMomentModel) = - isempty(model.columns) ? 0 : maximum(col.coupling_row for col in model.columns) + length(model.coupling_row_reaction) """ _smoment_reaction_coupling_bounds(model::SMomentModel) From f4067a143d73a1e528cb6915b32d33de6d893f3d Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 22:22:10 +0200 Subject: [PATCH 072/109] prepare make_smoment_model for overloading --- src/analysis/smoment.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 4b0a9b80e..1398b809f 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -2,9 +2,9 @@ """ make_smoment_model( model::MetabolicModel; - reaction_isozymes, - gene_product_capacity_required, - total_enzyme_capacity = 0.0, + reaction_isozymes::Function, + gene_product_capacity_required::Function, + total_enzyme_capacity, ) Construct an [`SMomentModel`](@ref) model using the inner `model` and a map of @@ -12,8 +12,8 @@ isozymes. """ function make_smoment_model( model::MetabolicModel; - reaction_isozymes, - gene_product_molar_mass, + reaction_isozymes::Function, + gene_product_molar_mass::Function, total_enzyme_capacity::Float64, ) columns = Vector{_smoment_column}() From 1dd5d7b73bcaf4d6c8dac4b2ed0100272c37c50e Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 22:23:15 +0200 Subject: [PATCH 073/109] many small smoment fixes that should make it just work --- src/analysis/smoment.jl | 5 +- src/base/types/wrappers/SMomentModel.jl | 64 +++++-- src/base/utils/enzyme.jl | 33 ---- src/base/utils/smoment.jl | 37 +++- test/analysis/smoment.jl | 51 +++--- test/data_static.jl | 216 ++++++++++++------------ 6 files changed, 215 insertions(+), 191 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 1398b809f..2cc86186b 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -33,7 +33,8 @@ function make_smoment_model( push!(coupling_row_reaction, i) mw = sum( - gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.stoichiometry + gene_product_molar_mass(gid) * ps for + (gid, ps) in isozyme.gene_product_count ) if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance @@ -51,7 +52,7 @@ function make_smoment_model( ) end - if min(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance + if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance # reaction can run forward push!( columns, diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 8a0f8f665..4880c1991 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -15,20 +15,48 @@ end # TODO fix the docstring """ - mutable struct SMomentModel <: ModelWrapper - -Construct an enzyme capacity constrained model see `Bekiaris, Pavlos Stephanos, -and Steffen Klamt. "Automatic construction of metabolic models with enzyme -constraints." BMC bioinformatics, 2020.` for implementation details. - -Note, `"§"` is reserved for internal use as a delimiter, no reaction id should -contain that character. Also note, SMOMENT assumes that each reaction only has a -single enzyme (one GRR) associated with it. It is required that a model be -modified to ensure that this condition is met. For ease-of-use, -[`remove_slow_isozymes!`](@ref) is supplied to effect this. Currently only -`modifications` that change attributes of the `optimizer` are supported. -""" -mutable struct SMomentModel <: ModelWrapper + struct SMomentModel <: ModelWrapper + +Construct an enzyme-capacity constrained model using sMOMENT algorithm, as +described by *Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic +construction of metabolic models with enzyme constraints" BMC bioinformatics, +2020*. + +Use [`make_smoment_model`](@ref) or [`with_smoment`](@ref) to construct the +models. + +The model is constructed as follows: +- stoichiometry of the original model is retained as much as possible, but + enzymatic reations are split into forward and reverse parts (marked by a + suffix like `...#forward` and `...#reverse`), +- sums of forward and reverse reaction pair fluxes are constrained accordingly + to the original model, +- stoichiometry is expanded by a virtual metabolite "enzyme capacity" which is + consumed by all enzymatic reactions at a rate given by enzyme mass divided by + the corresponding kcat, +- the total consumption of the enzyme capacity is constrained by a fixed + maximum. + +The `SMomentModel` structure contains a worked-out representation of the +optimization problem atop a wrapped [`MetabolicModel`](@ref), in particular the +separation of certain reactions into unidirectional forward and reverse parts, +the grouping of these reactions together into virtual "arm" reactions constrained +by bounds from the inner model, an "enzyme capacity" required for each +reaction, and the value of the maximum capacity constraint. + +In the structure, field `columns` describes the correspondence of stoichiometry +columns to the stoichiometry and data of the internal wrapped model; field +`coupling_row_reaction` maps the generated coupling constraints to reaction +indexes in the wrapped model, and `total_enzyme_capacity` is the total bound on +the enzyme capacity consumption as specified in sMOMENT algorithm. + +This implementation allows easy access to fluxes from the split reactions +(available in `reactions(model)`), while the original "simple" reactions from +the wrapped model are retained as [`fluxes`](@ref). All additional constraints +are implemented using [`coupling`](@ref) and [`coupling_bounds`](@ref). +Original coupling is retained. +""" +struct SMomentModel <: ModelWrapper columns::Vector{_smoment_column} coupling_row_reaction::Vector{Int} total_enzyme_capacity::Float64 @@ -91,7 +119,7 @@ Get the mapping of the reaction rates in [`SMomentModel`](@ref) to the original fluxes in the wrapped model. """ reaction_flux(model::SMomentModel) = - reaction_flux(model.inner)' * _smoment_column_reactions(model) + reaction_flux(model.inner) * _smoment_column_reactions(model) """ coupling(model::SMomentModel) @@ -101,7 +129,7 @@ the wrapped model, coupling for split reactions, and the coupling for the total enzyme capacity. """ coupling(model::SMomentModel) = vcat( - coupling(model.inner), + coupling(model.inner) * _smoment_column_reactions(model), _smoment_reaction_coupling(model), [col.capacity_required for col in model.columns]', ) @@ -122,7 +150,7 @@ The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ function coupling_bounds(model::SMomentModel) - (ilb, iub) = n_coupling_constraints(model.inner) + (ilb, iub) = coupling_bounds(model.inner) (rlb, rub) = _smoment_reaction_coupling_bounds(model) - return (vcat(ilb, rlb, 0), vcat(iub, rub, model.total_enzyme_capacity)) + return (vcat(ilb, rlb, [0.0]), vcat(iub, rub, [model.total_enzyme_capacity])) end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index d7880cfd3..630a90ee2 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -34,39 +34,6 @@ function get_genes_with_kcats(rid_isozymes::Dict{String,Vector{Isozyme}}) return unique(gids) end -""" - remove_slow_isozymes!( - model::StandardModel, - rid_isozymes = Dict{String, Vector{Isozyme}}(), - ) - -Remove all but the fastest isozymes from `rid_isozymes`. Use the largest kcat -(for, rev) for these calculations. Modifies `rid_isozymes` in place. -""" -function remove_slow_isozymes!( - model::StandardModel, - rid_isozymes = Dict{String,Vector{Isozyme}}(), -) - for (rid, isozymes) in rid_isozymes - kcat_effs = Float64[] - for isozyme in isozymes - gid_stoich = isozyme.stoichiometry - kcats = isozyme.kcats - push!( - kcat_effs, - dot( - [stoich for stoich in values(gid_stoich)], - [model.genes[gid].molar_mass for gid in keys(gid_stoich)], - ) / maximum(kcats), - ) - end - idx = argmin(kcat_effs) - rid_isozymes[rid] = [rid_isozymes[rid][idx]] - end - - return nothing -end - """ remove_low_expressed_isozymes!( model::StandardModel, diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl index b4d90df15..2f03575f2 100644 --- a/src/base/utils/smoment.jl +++ b/src/base/utils/smoment.jl @@ -42,8 +42,7 @@ _smoment_reaction_coupling(model::SMomentModel) = sparse( Internal helper for determining the number of required couplings to account for "arm" reactions. """ -_smoment_n_reaction_couplings(model::SMomentModel) = - length(model.coupling_row_reaction) +_smoment_n_reaction_couplings(model::SMomentModel) = length(model.coupling_row_reaction) """ _smoment_reaction_coupling_bounds(model::SMomentModel) @@ -53,5 +52,37 @@ values are taken from the "original" inner model. """ _smoment_reaction_coupling_bounds(model::SMomentModel) = let (lbs, ubs) = bounds(model.inner) - (lbs[coupling_row_reaction], ubs[coupling_row_reaction]) + (lbs[model.coupling_row_reaction], ubs[model.coupling_row_reaction]) end + +""" + smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) + +Compute a "score" for picking the most viable isozyme for +[`make_smoment_model`](@ref), based on maximum kcat divided by relative mass of +the isozyme. This is used because sMOMENT algorithm can not handle multiple +isozymes for one reaction. +""" +smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) = + max(isozyme.kcat_forward, isozyme.kcat_reverse) / sum( + count * gene_product_molar_mass(gene) for + (gene, count) in isozyme.gene_product_count + ) + +""" + smoment_isozyme_speed(gene_product_molar_mass::Function) + +A piping- and argmax-friendly overload of [`smoment_isozyme_speed`](@ref). + +# Example +``` +gene_mass_function = gid -> 1.234 + +best_isozyme_for_smoment = argmax( + smoment_isozyme_speed(gene_mass_function), + my_isozyme_vector, +) +``` +""" +smoment_isozyme_speed(gene_product_molar_mass::Function) = + isozyme -> smoment_isozyme_speed(isozyme, gene_product_molar_mass) diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 0bd6d8304..f14d2d2d6 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,37 +1,34 @@ @testset "SMOMENT" begin model = load_model(StandardModel, model_paths["e_coli_core.json"]) - total_protein_mass = 100 # mg/gdW - #: construct isozymes from model - rid_isozymes = Dict{String,Vector{Isozyme}}() - for (rid, kcats) in ecoli_core_reaction_kcats - grrs = reaction_gene_association(model, rid) - rid_isozymes[rid] = [ - Isozyme( - Dict(grrs[i] .=> ecoli_core_protein_stoichiometry[rid][i]), - (kcats[i][1], kcats[i][2]), - ) for i = 1:length(grrs) - ] - end + get_gene_product_mass = gid -> get(ecoli_core_protein_masses, gid, 0.0) - #: add molar mass to genes in model - for (gid, g) in model.genes - model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) - end + get_reaction_isozyme = + rid -> + haskey(ecoli_core_reaction_kcats, rid) ? + argmax( + smoment_isozyme_speed(get_gene_product_mass), + Isozyme( + Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]), + ecoli_core_reaction_kcats[rid][i]..., + ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) + ) : nothing - remove_slow_isozymes!(model, rid_isozymes) - - smm = make_smomentmodel(model; rid_isozymes, enzyme_capacity = total_protein_mass) - - change_bounds!( - smm, - ["EX_glc__D_e", "GLCpts"]; - lower = [-1000.0, -1.0], - upper = [nothing, 12.0], - ) + smoment_model = + model |> + with_changed_bounds( + ["EX_glc__D_e", "GLCpts"], + lower = [-1000.0, -1.0], + upper = [nothing, 12.0], + ) |> + with_smoment( + reaction_isozymes = get_reaction_isozyme, + gene_product_molar_mass = get_gene_product_mass, + total_enzyme_capacity = 100.0, + ) rxn_fluxes = flux_balance_analysis_dict( - smm, + smoment_model, Tulip.Optimizer; modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], ) diff --git a/test/data_static.jl b/test/data_static.jl index ebe01c3c5..1492a5f14 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -73,7 +73,7 @@ test_toyModel() = CoreModel( ["m1[c]", "m3[c]", "m2[c]", "m1[e]", "m3[e]", "biomass[c]"], ) -const reaction_standard_gibbs_free_energies = Dict( +const reaction_standard_gibbs_free_energies = Dict{String,Float64}( #= ΔᵣG⁰ data from Equilibrator using the E. coli core model's reactions To generate this data manually, go to https://equilibrator.weizmann.ac.il/ and @@ -155,7 +155,7 @@ const reaction_standard_gibbs_free_energies = Dict( "FUM" => -3.424133018702122, ) -const ecoli_core_protein_masses = Dict( +const ecoli_core_protein_masses = Dict{String,Float64}( #= Data downloaded from Uniprot for E. coli K12, gene mass in kDa. To obtain these data yourself, go to @@ -300,7 +300,7 @@ const ecoli_core_protein_masses = Dict( "b2279" => 10.845, ) -const ecoli_core_protein_stoichiometry = Dict( +const ecoli_core_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}( #= Data made up, each isozyme is assumed to be composed of only one subunit each. @@ -381,7 +381,7 @@ const ecoli_core_protein_stoichiometry = Dict( "FUM" => [[1.0], [1.0], [1.0]], ) -const ecoli_core_reaction_kcats = Dict( +const ecoli_core_reaction_kcats = Dict{String,Vector{Tuple{Float64,Float64}}}( #= Data taken from Heckmann, David, et al. "Machine learning applied to enzyme turnover numbers reveals protein structural correlates and improves metabolic @@ -389,164 +389,164 @@ const ecoli_core_reaction_kcats = Dict( kcats are the same, and each isozyme has the same kcat. =# "ACALD" => - [[568.1130792316333, 568.1130792316333], [568.856126503717, 568.856126503717]], + [(568.1130792316333, 568.1130792316333), (568.856126503717, 568.856126503717)], "PTAr" => [ - [1171.9703624351055, 1171.9703624351055], - [1173.7231032615289, 1173.7231032615289], + (1171.9703624351055, 1171.9703624351055), + (1173.7231032615289, 1173.7231032615289), ], "ALCD2x" => [ - [75.9547881894345, 75.9547881894345], - [75.96334310351442, 75.96334310351442], - [76.1472359297987, 76.1472359297987], + (75.9547881894345, 75.9547881894345), + (75.96334310351442, 75.96334310351442), + (76.1472359297987, 76.1472359297987), ], - "PDH" => [[529.7610874857239, 529.7610874857239]], + "PDH" => [(529.7610874857239, 529.7610874857239)], "PYK" => [ - [422.0226052080562, 422.0226052080562], - [422.1332899347833, 422.1332899347833], + (422.0226052080562, 422.0226052080562), + (422.1332899347833, 422.1332899347833), ], - "MALt2_2" => [[234.03664660088714, 234.03664660088714]], - "CS" => [[113.29607453875758, 113.29607453875758]], + "MALt2_2" => [(234.03664660088714, 234.03664660088714)], + "CS" => [(113.29607453875758, 113.29607453875758)], "PGM" => [ - [681.4234715886669, 681.4234715886669], - [681.6540601244343, 681.6540601244343], - [680.5234799168278, 680.5234799168278], + (681.4234715886669, 681.4234715886669), + (681.6540601244343, 681.6540601244343), + (680.5234799168278, 680.5234799168278), ], "TKT1" => [ - [311.16139580671637, 311.16139580671637], - [311.20967965149947, 311.20967965149947], + (311.16139580671637, 311.16139580671637), + (311.20967965149947, 311.20967965149947), ], "ACONTa" => [ - [191.02308213992006, 191.02308213992006], - [191.03458045697235, 191.03458045697235], + (191.02308213992006, 191.02308213992006), + (191.03458045697235, 191.03458045697235), ], "GLNS" => [ - [89.83860937287024, 89.83860937287024], - [89.82177852142014, 89.82177852142014], + (89.83860937287024, 89.83860937287024), + (89.82177852142014, 89.82177852142014), ], - "ICL" => [[17.45922330097792, 17.45922330097792]], + "ICL" => [(17.45922330097792, 17.45922330097792)], "FBA" => [ - [373.425646787578, 373.425646787578], - [372.74936053215833, 372.74936053215833], - [372.88627228768166, 372.88627228768166], + (373.425646787578, 373.425646787578), + (372.74936053215833, 372.74936053215833), + (372.88627228768166, 372.88627228768166), ], "FORt2" => [ - [233.93045260179326, 233.93045260179326], - [233.84804009142908, 233.84804009142908], + (233.93045260179326, 233.93045260179326), + (233.84804009142908, 233.84804009142908), ], - "G6PDH2r" => [[589.3761070080022, 589.3761070080022]], - "AKGDH" => [[264.48071159327156, 264.48071159327156]], + "G6PDH2r" => [(589.3761070080022, 589.3761070080022)], + "AKGDH" => [(264.48071159327156, 264.48071159327156)], "TKT2" => [ - [467.4226876901618, 467.4226876901618], - [468.1440593542596, 468.1440593542596], + (467.4226876901618, 467.4226876901618), + (468.1440593542596, 468.1440593542596), ], - "FRD7" => [[90.20637824912605, 90.20637824912605]], - "SUCOAS" => [[18.494387648707622, 18.494387648707622]], + "FRD7" => [(90.20637824912605, 90.20637824912605)], + "SUCOAS" => [(18.494387648707622, 18.494387648707622)], "FBP" => [ - [568.5346256470805, 568.5346256470805], - [567.6367759041788, 567.6367759041788], + (568.5346256470805, 568.5346256470805), + (567.6367759041788, 567.6367759041788), ], - "ICDHyr" => [[39.62446791678959, 39.62446791678959]], - "AKGt2r" => [[234.99097804446805, 234.99097804446805]], - "GLUSy" => [[33.262997317319055, 33.262997317319055]], - "TPI" => [[698.301904211076, 698.301904211076]], + "ICDHyr" => [(39.62446791678959, 39.62446791678959)], + "AKGt2r" => [(234.99097804446805, 234.99097804446805)], + "GLUSy" => [(33.262997317319055, 33.262997317319055)], + "TPI" => [(698.301904211076, 698.301904211076)], "FORt" => [ - [234.38391855848187, 234.38391855848187], - [234.34725576182922, 234.34725576182922], + (234.38391855848187, 234.38391855848187), + (234.34725576182922, 234.34725576182922), ], "ACONTb" => [ - [159.74612206327865, 159.74612206327865], - [159.81975755249232, 159.81975755249232], + (159.74612206327865, 159.74612206327865), + (159.81975755249232, 159.81975755249232), ], - "GLNabc" => [[233.80358131677775, 233.80358131677775]], + "GLNabc" => [(233.80358131677775, 233.80358131677775)], "RPE" => [ - [1772.4850826683305, 1772.4850826683305], - [1768.8536177485582, 1768.8536177485582], + (1772.4850826683305, 1772.4850826683305), + (1768.8536177485582, 1768.8536177485582), ], "ACKr" => [ - [554.611547307207, 554.611547307207], - [555.112707891257, 555.112707891257], - [555.2464368932744, 555.2464368932744], + (554.611547307207, 554.611547307207), + (555.112707891257, 555.112707891257), + (555.2464368932744, 555.2464368932744), ], - "THD2" => [[24.739139801185537, 24.739139801185537]], + "THD2" => [(24.739139801185537, 24.739139801185537)], "PFL" => [ - [96.56316095411077, 96.56316095411077], - [96.65024313036014, 96.65024313036014], - [96.60761818004025, 96.60761818004025], - [96.49541118899961, 96.49541118899961], + (96.56316095411077, 96.56316095411077), + (96.65024313036014, 96.65024313036014), + (96.60761818004025, 96.60761818004025), + (96.49541118899961, 96.49541118899961), ], "RPI" => [ - [51.771578021074234, 51.771578021074234], - [51.81603467243345, 51.81603467243345], + (51.771578021074234, 51.771578021074234), + (51.81603467243345, 51.81603467243345), ], "D_LACt2" => [ - [233.51709131524734, 233.51709131524734], - [233.83187606098016, 233.83187606098016], + (233.51709131524734, 233.51709131524734), + (233.83187606098016, 233.83187606098016), ], "TALA" => [ - [109.05210545422884, 109.05210545422884], - [109.04246437049026, 109.04246437049026], + (109.05210545422884, 109.05210545422884), + (109.04246437049026, 109.04246437049026), ], - "PPCK" => [[218.4287805666016, 218.4287805666016]], - "PGL" => [[2120.4297518987964, 2120.4297518987964]], + "PPCK" => [(218.4287805666016, 218.4287805666016)], + "PGL" => [(2120.4297518987964, 2120.4297518987964)], "NADTRHD" => [ - [186.99387360624777, 186.99387360624777], - [187.16629305266423, 187.16629305266423], + (186.99387360624777, 186.99387360624777), + (187.16629305266423, 187.16629305266423), ], - "PGK" => [[57.641966636896335, 57.641966636896335]], + "PGK" => [(57.641966636896335, 57.641966636896335)], "LDH_D" => [ - [31.11118891764946, 31.11118891764946], - [31.12493425054357, 31.12493425054357], + (31.11118891764946, 31.11118891764946), + (31.12493425054357, 31.12493425054357), ], - "ME1" => [[487.0161203971232, 487.0161203971232]], + "ME1" => [(487.0161203971232, 487.0161203971232)], "PIt2r" => [ - [233.8651331835765, 233.8651331835765], - [234.27374798581067, 234.27374798581067], + (233.8651331835765, 233.8651331835765), + (234.27374798581067, 234.27374798581067), ], "ATPS4r" => [ - [7120.878030435999, 7120.878030435999], - [7116.751386037507, 7116.751386037507], + (7120.878030435999, 7120.878030435999), + (7116.751386037507, 7116.751386037507), ], "GLCpts" => [ - [233.9009878400008, 233.9009878400008], - [233.66656882114864, 233.66656882114864], - [233.66893882934883, 233.66893882934883], + (233.9009878400008, 233.9009878400008), + (233.66656882114864, 233.66656882114864), + (233.66893882934883, 233.66893882934883), ], - "GLUDy" => [[105.32811069172409, 105.32811069172409]], + "GLUDy" => [(105.32811069172409, 105.32811069172409)], "CYTBD" => [ - [153.18512795009505, 153.18512795009505], - [153.2429537682265, 153.2429537682265], - ], - "FUMt2_2" => [[234.37495609395967, 234.37495609395967]], - "FRUpts2" => [[234.1933863380989, 234.1933863380989]], - "GAPD" => [[128.76795529111456, 128.76795529111456]], - "PPC" => [[165.52424516841342, 165.52424516841342]], - "NADH16" => [[971.7487306963936, 971.7487306963936]], + (153.18512795009505, 153.18512795009505), + (153.2429537682265, 153.2429537682265), + ], + "FUMt2_2" => [(234.37495609395967, 234.37495609395967)], + "FRUpts2" => [(234.1933863380989, 234.1933863380989)], + "GAPD" => [(128.76795529111456, 128.76795529111456)], + "PPC" => [(165.52424516841342, 165.52424516841342)], + "NADH16" => [(971.7487306963936, 971.7487306963936)], "PFK" => [ - [1000.4626204522712, 1000.4626204522712], - [1000.5875517343595, 1000.5875517343595], - ], - "MDH" => [[25.931655783969283, 25.931655783969283]], - "PGI" => [[468.11833198138834, 468.11833198138834]], - "ME2" => [[443.0973626307168, 443.0973626307168]], - "GND" => [[240.1252264230952, 240.1252264230952]], - "SUCCt2_2" => [[234.18109388303225, 234.18109388303225]], + (1000.4626204522712, 1000.4626204522712), + (1000.5875517343595, 1000.5875517343595), + ], + "MDH" => [(25.931655783969283, 25.931655783969283)], + "PGI" => [(468.11833198138834, 468.11833198138834)], + "ME2" => [(443.0973626307168, 443.0973626307168)], + "GND" => [(240.1252264230952, 240.1252264230952)], + "SUCCt2_2" => [(234.18109388303225, 234.18109388303225)], "GLUN" => [ - [44.76358496525738, 44.76358496525738], - [44.84850207360875, 44.84850207360875], - [44.76185250415503, 44.76185250415503], + (44.76358496525738, 44.76358496525738), + (44.84850207360875, 44.84850207360875), + (44.76185250415503, 44.76185250415503), ], - "ADK1" => [[111.64869652600649, 111.64869652600649]], - "SUCDi" => [[680.3193833053011, 680.3193833053011]], - "ENO" => [[209.35855069219886, 209.35855069219886]], + "ADK1" => [(111.64869652600649, 111.64869652600649)], + "SUCDi" => [(680.3193833053011, 680.3193833053011)], + "ENO" => [(209.35855069219886, 209.35855069219886)], "MALS" => [ - [252.7540503869977, 252.7540503869977], - [252.2359738678874, 252.2359738678874], + (252.7540503869977, 252.7540503869977), + (252.2359738678874, 252.2359738678874), ], - "GLUt2r" => [[234.22890837451837, 234.22890837451837]], - "PPS" => [[706.1455885214322, 706.1455885214322]], + "GLUt2r" => [(234.22890837451837, 234.22890837451837)], + "PPS" => [(706.1455885214322, 706.1455885214322)], "FUM" => [ - [1576.8372583425075, 1576.8372583425075], - [1576.233088455828, 1576.233088455828], - [1575.9638204848736, 1575.9638204848736], + (1576.8372583425075, 1576.8372583425075), + (1576.233088455828, 1576.233088455828), + (1575.9638204848736, 1575.9638204848736), ], ) From d708a3bf0a90e02ad4032cbf01735ae57924d70a Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 22:29:26 +0200 Subject: [PATCH 074/109] someone transposed the matrix for us! --- src/base/types/wrappers/SMomentModel.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 4880c1991..e2ad0f450 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -119,8 +119,7 @@ Get the mapping of the reaction rates in [`SMomentModel`](@ref) to the original fluxes in the wrapped model. """ reaction_flux(model::SMomentModel) = - reaction_flux(model.inner) * _smoment_column_reactions(model) - + _smoment_column_reactions(model)' * reaction_flux(model.inner) """ coupling(model::SMomentModel) From d8a69f72eeee7ba80710cc8352b61aec6f712135 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Tue, 12 Apr 2022 22:34:59 +0200 Subject: [PATCH 075/109] document make_smoment_model --- src/analysis/smoment.jl | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 2cc86186b..a82dd4a68 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -3,12 +3,24 @@ make_smoment_model( model::MetabolicModel; reaction_isozymes::Function, - gene_product_capacity_required::Function, - total_enzyme_capacity, + gene_product_molar_mass::Function, + total_enzyme_capacity::Float64, ) -Construct an [`SMomentModel`](@ref) model using the inner `model` and a map of -isozymes. +Construct a model with a structure given by sMOMENT algorithm; returns a +[`SMomentModel`](@ref) (see the documentation for details. + +`reaction_isozymes` parameter is a function that returns a single isozyme for +each reaction, or `nothing` if the reaction is not enzymatic. If the reaction +has multiple isozymes, use [`smoment_isozyme_score`](@ref) to select the "best" +one, as recommended by the sMOMENT approach. + +`gene_product_molar_mass` parameter is a function that returns a molar mass of +each gene product (relative to `total_enzyme_capacity` and the specified +kcats), as specified by sMOMENT. + +`total_enzyme_capacity` is the maximum "enzyme capacity" consumption of the +model. """ function make_smoment_model( model::MetabolicModel; From ea25d3149b2887dab8a5327ba64a468ee56846b2 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 15:39:49 +0200 Subject: [PATCH 076/109] gecko stage 1 (still TODO: the mass fraction coupling) --- src/analysis/gecko.jl | 270 ++++++++++-------------- src/analysis/smoment.jl | 71 +++---- src/base/types/wrappers/GeckoModel.jl | 197 ++++++++--------- src/base/types/wrappers/SMomentModel.jl | 18 +- src/base/utils/smoment.jl | 23 +- src/reconstruction/gecko.jl | 11 + test/analysis/gecko.jl | 67 +++--- 7 files changed, 293 insertions(+), 364 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index b473003d7..1613e83b3 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -1,178 +1,126 @@ -""" - make_geckomodel( - model::StandardModel; - rid_isozymes = Dict{String, Vector{Isozyme}}(), - enzyme_capacities = [(),], - ) - -Construct a `GeckoModel` based on `model` using the kinetic data encoded by -`rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`, -which is a vector of tuples. In the first position of the tuple is a list of gene ids, -and the second position is mass upperbound of the sum of these gene ids. - -The units of the fluxes and protein concentration depend on those used in -`rid_isozymes` for the kcats and the molar masses encoded in the genes of -`model`. Currently only `modifications` that change attributes of the -`optimizer` are supported. - -# Example -``` -gm = make_geckomodel( - model; - rid_isozymes, - enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], -) - -opt_model = flux_balance_analysis( - gm, - Tulip.Optimizer -) - -rxn_fluxes = flux_dict(gm, opt_model) -prot_concens = protein_dict(gm, opt_model) -``` -""" -function make_geckomodel( +function make_gecko_model( model::StandardModel; - rid_isozymes = Dict{String,Vector{Isozyme}}(), - enzyme_capacities = [()], + reaction_isozymes::Function, + reaction_isozyme_masses::Function, + gene_product_limit::Function, + reaction_mass_group::Function = _ -> "uncategorized", + mass_faction_limit::Function, ) - S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map = - _build_irreversible_stoichiometric_matrix(model, rid_isozymes) - - #: find all gene products that have kcats associated with them - gene_ids = get_genes_with_kcats(rid_isozymes) - - #: size of resultant model - num_reactions = size(S, 2) - num_genes = length(gene_ids) - num_metabolites = size(S, 1) - num_vars = num_reactions + num_genes - #: equality lhs - E_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - ) + columns = Vector{_gecko_column}() + coupling_row_reaction = Int[] + coupling_row_gene_product = Int[] + coupling_row_mass_group = String[] - for (rid, col_idx) in reaction_map - original_rid = string(split(rid, "§")[1]) + gene_name_lookup = Dict(genes(model) .=> 1:n_genes(model)) + gene_row_lookup = Dict{Int,Int}() + mass_group_lookup = Dict{String,Int}() - # skip these entries - contains(rid, "§ARM") && continue - !haskey(rid_isozymes, original_rid) && continue + (lbs, ubs) = bounds(model) + rids = reactions(model) - # these entries have kcats - if contains(rid, "§ISO") - iso_num = parse( - Int, - replace( - first(filter(startswith("ISO"), split(rid, "§")[2:end])), - "ISO" => "", - ), - ) - else # only one enzyme - iso_num = 1 + for i = 1:n_reactions(model) + isozymes = reaction_isozymes(rids[i]) + if isempty(isozymes) + push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], 0, 0, 0, 0)) + continue end - # add all entries to column of matrix - _add_enzyme_variable( - rid_isozymes, - iso_num, # only one enzyme - rid, - original_rid, - E_components, - col_idx, - gene_ids, - ) + group = reaction_mass_group(rids[i]) + mass_group_row = + isnothing(group) ? 0 : + haskey(massgroup_lookup, group) ? mass_group_lookup[group] : + begin + push!(coupling_row_mass_group, group) + mass_group_lookup[group] = length(coupling_row_mass_group) + end + + push!(coupling_row_reaction, i) + reaction_coupling_row = length(coupling_row_reaction) + + masses = group > 0 ? reaction_isozyme_masses(rids[i]) : zeros(length(isozymes)) + + for (iidx, isozyme) in enumerate(isozymes) + if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance + # reaction can run in reverse + push!( + columns, + _gecko_column( + i, + iidx, + -1, + reaction_coupling_row, + max(-ubs[i], 0), + -lbs[i], + _gecko_make_gene_product_coupling( + isozyme.gene_product_count, + isozyme.kcat_reverse, + gene_name_lookup, + gene_row_lookup, + coupling_row_gene_product, + ), + mass_group_row, + masses[iidx] / isozyme.kcat_reverse, + ), + ) + end + if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance + # reaction can run forward + push!( + columns, + _gecko_column( + i, + iidx, + 1, + reaction_coupling_row, + max(lbs[i], 0), + ubs[i], + _gecko_make_gene_product_coupling( + isozyme.gene_product_count, + isozyme.kcat_forward, + gene_name_lookup, + gene_row_lookup, + coupling_row_gene_product, + ), + mass_group_row, + masses[iidx] / isozyme.kcat_forward, + ), + ) + end + end end - Se = sparse( - E_components.row_idxs, - E_components.col_idxs, - E_components.coeffs, - num_genes, - num_reactions, - ) + coupling_row_mass_group = + collect(zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group))) - stoich_mat = sparse([ - S zeros(num_metabolites, num_genes) - Se I(num_genes) - ]) - - #: equality rhs - b = spzeros(num_metabolites + num_genes) - - #: find objective (assume objective is forward) - obj_idx_orig = first(findnz(objective(model))[1]) - obj_id_orig = reactions(model)[obj_idx_orig] - obj_id = obj_id_orig * "§FOR" - c = spzeros(num_vars) - obj_idx = reaction_map[obj_id] - c[obj_idx] = 1.0 - - #: inequality constraints - xl = sparse([lb_fluxes; fill(0.0, num_genes)]) - xu = sparse([ub_fluxes; fill(1000.0, num_genes)]) - - #: enzyme capacity constraints - mw_proteins = [model.genes[pid].molar_mass for pid in gene_ids] - C = spzeros(length(enzyme_capacities), num_vars) - cl = spzeros(length(enzyme_capacities)) - cu = spzeros(length(enzyme_capacities)) - - for (i, enz_cap) in enumerate(enzyme_capacities) - enz_idxs = indexin(first(enz_cap), gene_ids) - C[i, num_reactions.+enz_idxs] .= mw_proteins[enz_idxs] - cu[i] = last(enz_cap) - end + coupling_row_gene_product = collect( + zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), + ) return GeckoModel( - reactions(model), - _order_id_to_idx_dict(reaction_map), - _order_id_to_idx_dict(metabolite_map), - gene_ids, - c, - stoich_mat, - b, - xl, - xu, - C, - cl, - cu, + columns, + coupling_row_reaction, + coupling_row_gene_product, + coupling_row_mass_group, + model, ) end -""" - _add_enzyme_variable( - rid_isozymes, - iso_num, - rid, - original_rid, - E_components, - col_idx, - gene_ids, - ) - -Helper function to add an column into the enzyme stoichiometric matrix. -""" -function _add_enzyme_variable( - rid_isozymes, - iso_num, - rid, - original_rid, - E_components, - col_idx, - gene_ids, +_gecko_make_gene_product_coupling( + gene_product_count::Dict{String,Int}, + kcat::Float64, + name_lookup::Dict{String,Int}, + row_lookup::Dict{Int,Int}, + rows::Vector{Int}, +) = collect( + begin + gidx = name_lookup[gene] + row_idx = if haskey(row_lookup, gidx) + row_lookup[gidx] + else + push!(rows, gidx) + row_lookup[gidx] = length(rows) + end + (row_idx, 1 / kcat) + end for (gene, count) in gene_product_count if haskey(name_lookup, gene) ) - pstoich = rid_isozymes[original_rid][iso_num].stoichiometry - kcat = - contains(rid, "§FOR") ? rid_isozymes[original_rid][iso_num].kcats[1] : - rid_isozymes[original_rid][iso_num].kcats[2] - for (pid, pst) in pstoich - push!(E_components.row_idxs, first(indexin([pid], gene_ids))) - push!(E_components.col_idxs, col_idx) - push!(E_components.coeffs, -pst / kcat) - end -end diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index a82dd4a68..185a82051 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -39,45 +39,44 @@ function make_smoment_model( if isnothing(isozyme) # non-enzymatic reaction (or a totally ignored one) push!(columns, _smoment_column(i, 0, 0, lbs[i], ubs[i], 0)) - else - # pick a new row for "arm reaction" coupling - coupling_row = length(coupling_row_reaction) + 1 - push!(coupling_row_reaction, i) + continue + end + # pick a new row for "arm reaction" coupling + push!(coupling_row_reaction, i) + coupling_row = length(coupling_row_reaction) - mw = sum( - gene_product_molar_mass(gid) * ps for - (gid, ps) in isozyme.gene_product_count - ) + mw = sum( + gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.gene_product_count + ) - if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance - # reaction can run in reverse - push!( - columns, - _smoment_column( - i, - -1, - coupling_row, - max(-ubs[i], 0), - -lbs[i], - mw / isozyme.kcat_reverse, - ), - ) - end + if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance + # reaction can run in reverse + push!( + columns, + _smoment_column( + i, + -1, + coupling_row, + max(-ubs[i], 0), + -lbs[i], + mw / isozyme.kcat_reverse, + ), + ) + end - if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance - # reaction can run forward - push!( - columns, - _smoment_column( - i, - 1, - coupling_row, - max(lbs[i], 0), - ubs[i], - mw / isozyme.kcat_forward, - ), - ) - end + if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance + # reaction can run forward + push!( + columns, + _smoment_column( + i, + 1, + coupling_row, + max(lbs[i], 0), + ubs[i], + mw / isozyme.kcat_forward, + ), + ) end end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 11b535349..f2c3da6f4 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -1,155 +1,136 @@ -""" - mutable struct GeckoModel <: MetabolicModel - -A model that incorporates enzyme capacity and kinetic constraints via the GECKO -formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype -predictions of a yeast genome‐scale metabolic model by incorporating enzymatic -constraints." Molecular systems biology, 2017.` for implementation details. - -Note, since the model uses irreversible reactions internally, `"§FOR"` (for the -forward direction) and `"§REV"` (for the reverse direction) is appended to each -reaction internally. Hence, `"§"` is reserved for internal use as a delimiter, -no reaction id should contain this character. - -To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`. - -# Fields -``` -reaction_ids::Vector{String} -irrev_reaction_ids::Vector{String} -metabolites::Vector{String} -gene_ids::Vector{String} -c::SparseVec -S::SparseMat -b::SparseVec -xl::SparseVec -xu::SparseVec -C::SparseMat -cl::Vector{Float64} -cu::Vector{Float64} -``` -""" -mutable struct GeckoModel <: MetabolicModel - reaction_ids::Vector{String} - irrev_reaction_ids::Vector{String} - metabolites::Vector{String} - gene_ids::Vector{String} - - # gecko - c::SparseVec - S::SparseMat - b::SparseVec - xl::SparseVec - xu::SparseVec - - # enzyme capacity constraints - C::SparseMat - cl::Vector{Float64} - cu::Vector{Float64} -end - -""" - stoichiometry(model::GeckoModel) - -Return stoichiometry matrix that includes enzymes as metabolites. -""" -stoichiometry(model::GeckoModel) = model.S -""" - balance(model::GeckoModel) +struct _gecko_column + reaction_idx::Int + isozyme_idx::Int + direction::Int + reaction_coupling_row::Int + lb::Float64 + ub::Float64 + gene_product_coupling::Vector{Tuple{Int,Float64}} + mass_group_row::Int + mass_required::Float64 +end -Return stoichiometric balance. -""" -balance(model::GeckoModel) = model.b +struct GeckoModel <: ModelWrapper + columns::Vector{_gecko_column} + coupling_row_reaction::Vector{Int} + coupling_row_gene_product::Vector{Tuple{Int,Float64}} + coupling_row_mass_group::Vector{Tuple{String,Float64}} #TODO add to matrices -""" - objective(model::GeckoModel) + inner::MetabolicModel +end -Return objective of `model`. -""" -objective(model::GeckoModel) = model.c +unwrap_model(model::GeckoModel) = model.inner """ - fluxes(model::GeckoModel) + stoichiometry(model::GeckoModel) -Returns the reversible reactions in `model`. For -the irreversible reactions, use [`reactions`][@ref]. +Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are +split into unidirectional forward and reverse ones, each of which may have +multiple variants per isozyme. """ -fluxes(model::GeckoModel) = model.reaction_ids +stoichiometry(model::GeckoModel) = + stoichiometry(model.inner) * _gecko_column_reactions(model) """ - n_reactions(model::GeckoModel) + objective(model::GeckoModel) -Returns the number of reversible reactions in the model. +Reconstruct an objective of the [`GeckoModel`](@ref), following the objective +of the inner model. """ -n_fluxes(model::GeckoModel) = length(model.reaction_ids) +objective(model::GeckoModel) = _gecko_column_reactions(model)' * objective(model.inner) """ reactions(model::GeckoModel) -Returns the irreversible reactions in `model`. -""" -reactions(model::GeckoModel) = model.irrev_reaction_ids +Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split +to forward- and reverse-only parts with different isozyme indexes; reactions +IDs are mangled accordingly with suffixes). +""" +reactions(model::GeckoModel) = + let inner_reactions = reactions(model.inner) + [ + _gecko_reaction_name( + inner_reactions[col.reaction_idx], + col.direction, + col.isozyme_idx, + ) for col in model.columns + ] + end """ reactions(model::GeckoModel) Returns the number of all irreversible reactions in `model`. """ -n_reactions(model::GeckoModel) = length(model.irrev_reaction_ids) - -""" - genes(model::GeckoModel) - -Returns the genes (proteins) in the order as they appear as variables in the -model. -""" -genes(model::GeckoModel) = model.gene_ids - -""" - n_genes(model::GeckoModel) +n_reactions(model::GeckoModel) = length(model.columns) -Returns the number of genes in the model. """ -n_genes(model::GeckoModel) = length(model.gene_ids) - -""" - metabolites(model::GeckoModel) + bounds(model::GeckoModel) -Return the metabolites in `model`. +Return variable bounds for [`GeckoModel`](@ref). """ -metabolites(model::GeckoModel) = model.metabolites +bounds(model::GeckoModel) = + ([col.lb for col in model.columns], [col.ub for col in model.columns]) """ - n_metabolites(model::GeckoModel) = + reaction_flux(model::GeckoModel) -Return the number of metabolites in `model`. +Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original +fluxes in the wrapped model. """ -n_metabolites(model::GeckoModel) = length(metabolites(model)) +reaction_flux(model::GeckoModel) = + _gecko_column_reactions(model)' * reaction_flux(model.inner) """ - bounds(model::GeckoModel) + coupling(model::GeckoModel) -Return variable bounds for `GeckoModel`. +Return the coupling of [`GeckoModel`](@ref). That combines the coupling of +the wrapped model, coupling for split reactions, and the coupling for the total +enzyme capacity. """ -bounds(model::GeckoModel) = (model.xl, model.xu) +coupling(model::GeckoModel) = vcat( + coupling(model.inner) * _gecko_column_reactions(model), + _gecko_reaction_coupling(model), + _gecko_gene_product_coupling(model), +) """ - coupling(model::GeckoModel) + n_coupling_constraints(model::GeckoModel) -Coupling constraint matrix for a `GeckoModel`. +Count the coupling constraints in [`GeckoModel`](@ref) (refer to +[`coupling`](@ref) for details). """ -coupling(model::GeckoModel) = model.C +n_coupling_constraints(model::GeckoModel) = + n_coupling_constraints(model.inner) + + length(model.coupling_row_reaction) + + length(model.coupling_row_gene_product) """ coupling_bounds(model::GeckoModel) -Coupling bounds for a `GeckoModel`. -""" -coupling_bounds(model::GeckoModel) = (model.cl, model.cu) +The coupling bounds for [`GeckoModel`](@ref) (refer to [`coupling`](@ref) for +details). +""" +function coupling_bounds(model::GeckoModel) + (iclb, icub) = coupling_bounds(model.inner) + (ilb, iub) = bounds(model.inner) + return ( + vcat( + iclb, + ilb[model.coupling_row_reaction], + [0.0 for _ in model.coupling_row_gene_product], + ), + vcat( + icub, + rub[model.coupling_row_reaction], + [c for (i, c) in model.coupling_row_gene_product], + ), + ) +end """ - reaction_flux(model::MetabolicModel) + reaction_flux(model::GeckoModel) Helper function to get fluxes from optimization problem. """ diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index e2ad0f450..995bb5afb 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -5,7 +5,7 @@ A helper type that describes the contents of [`SMomentModel`](@ref)s. """ struct _smoment_column - reaction_id::Int # number of the corresponding reaction in the inner model + reaction_idx::Int # number of the corresponding reaction in the inner model direction::Int # 0 if "as is" and unique, -1 if reverse-only part, 1 if forward-only part coupling_row::Int # number of row in the coupling (0 if direction==0) lb::Float64 # must be 0 if the reaction is unidirectional (if direction!=0) @@ -86,13 +86,13 @@ objective(model::SMomentModel) = _smoment_column_reactions(model)' * objective(m reactions(model::SMomentModel) Returns the internal reactions in a [`SMomentModel`](@ref) (these may be split -to forward- and reverse-only parts; reactions IDs mangled accordingly with +to forward- and reverse-only parts; reactions IDs are mangled accordingly with suffixes). """ reactions(model::SMomentModel) = let inner_reactions = reactions(model.inner) [ - _smoment_reaction_name(inner_reactions[col.reaction_id], col.direction) for + _smoment_reaction_name(inner_reactions[col.reaction_idx], col.direction) for col in model.columns ] end @@ -120,6 +120,7 @@ fluxes in the wrapped model. """ reaction_flux(model::SMomentModel) = _smoment_column_reactions(model)' * reaction_flux(model.inner) + """ coupling(model::SMomentModel) @@ -140,7 +141,7 @@ Count the coupling constraints in [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ n_coupling_constraints(model::SMomentModel) = - n_coupling_constraints(model.inner) + _smoment_n_reaction_couplings(model) + 1 + n_coupling_constraints(model.inner) + length(model.coupling_row_reaction) + 1 """ coupling_bounds(model::SMomentModel) @@ -149,7 +150,10 @@ The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ function coupling_bounds(model::SMomentModel) - (ilb, iub) = coupling_bounds(model.inner) - (rlb, rub) = _smoment_reaction_coupling_bounds(model) - return (vcat(ilb, rlb, [0.0]), vcat(iub, rub, [model.total_enzyme_capacity])) + (iclb, icub) = coupling_bounds(model.inner) + (ilb, iub) = bounds(model.inner) + return ( + vcat(iclb, ilb[model.coupling_row_reaction], [0.0]), + vcat(icub, iub[model.coupling_row_reaction], [model.total_enzyme_capacity]), + ) end diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl index 2f03575f2..f654738c8 100644 --- a/src/base/utils/smoment.jl +++ b/src/base/utils/smoment.jl @@ -15,7 +15,7 @@ Retrieve a utility mapping between reactions and split reactions; rows correspond to "original" reactions, columns correspond to "split" reactions. """ _smoment_column_reactions(model::SMomentModel) = sparse( - [col.reaction_id for col in model.columns], + [col.reaction_idx for col in model.columns], 1:length(model.columns), [col.direction >= 0 ? 1 : -1 for col in model.columns], n_reactions(model.inner), @@ -32,29 +32,10 @@ _smoment_reaction_coupling(model::SMomentModel) = sparse( [col.coupling_row for col in model.columns if col.direction != 0], [i for (i, col) in enumerate(model.columns) if col.direction != 0], [col.direction for col in model.columns if col.direction != 0], - _smoment_n_reaction_couplings(model), + length(model.coupling_row_reaction), length(model.columns), ) -""" - _smoment_n_reaction_couplings(model::SMomentModel) - -Internal helper for determining the number of required couplings to account for -"arm" reactions. -""" -_smoment_n_reaction_couplings(model::SMomentModel) = length(model.coupling_row_reaction) - -""" - _smoment_reaction_coupling_bounds(model::SMomentModel) - -Return bounds that limit the "arm" reactions in [`SMomentModel`](@ref). The -values are taken from the "original" inner model. -""" -_smoment_reaction_coupling_bounds(model::SMomentModel) = - let (lbs, ubs) = bounds(model.inner) - (lbs[model.coupling_row_reaction], ubs[model.coupling_row_reaction]) - end - """ smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) diff --git a/src/reconstruction/gecko.jl b/src/reconstruction/gecko.jl index 523bf4a2e..488dcb382 100644 --- a/src/reconstruction/gecko.jl +++ b/src/reconstruction/gecko.jl @@ -1,3 +1,14 @@ + + +""" + with_gecko(; kwargs...) + +Specifies a model variant which adds extra semantics of the Gecko algorithm, +giving a [`GeckoModel`](@ref). The arguments are forwarded to +[`make_gecko_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_gecko(; kwargs...) = model -> make_gecko_model(model; kwargs...) + """ change_bound!(model::GeckoModel, id; lower=nothing, upper=nothing) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index f7e29e7b2..129356fa2 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -1,41 +1,46 @@ @testset "GECKO" begin model = load_model(StandardModel, model_paths["e_coli_core.json"]) - total_protein_mass = 100 # mg/gdW - - #: construct isozymes from model - rid_isozymes = Dict{String,Vector{Isozyme}}() - for (rid, kcats) in ecoli_core_reaction_kcats - grrs = reaction_gene_association(model, rid) - rid_isozymes[rid] = [ - Isozyme( - Dict(grrs[i] .=> ecoli_core_protein_stoichiometry[rid][i]), - (kcats[i][1], kcats[i][2]), - ) for i = 1:length(grrs) - ] - end - - #: add molar mass to genes in model - for (gid, g) in model.genes - model.genes[gid].molar_mass = get(ecoli_core_protein_masses, gid, nothing) - end - - gm = make_geckomodel( - model; - rid_isozymes, - enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)], - ) - change_bounds!( - gm, - ["EX_glc__D_e", "b2779", "GLCpts"]; - lower = [-1000.0, 0.01, -1.0], - upper = [nothing, 0.06, 12.0], - ) + + get_reaction_isozymes = + rid -> + haskey(ecoli_core_reaction_kcats, rid) ? + collect( + Isozyme( + Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]), + ecoli_core_reaction_kcats[rid][i]..., + ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) + ) : Isozyme[] + + get_reaction_isozyme_masses = + rid -> + haskey(ecoli_core_protein_stoichiometry, rid) ? + [ + sum( + values(counts) .* + get.(Ref(ecoli_core_protein_masses), keys(counts), 0.0), + ) for (iidx, counts) in enumerate(ecoli_core_protein_stoichiometry[rid]) + ] : [] + + total_protein_mass = 100.0 + + gm = + model |> + with_changed_bounds( + ["EX_glc__D_e", "b2779", "GLCpts"]; + lower = [-1000.0, 0.01, -1.0], + upper = [nothing, 0.06, 12.0], + ) |> + with_gecko( + reaction_isozymes = get_reaction_isozymes, + reaction_isozyme_masses = get_reaction_oisozyme_masses, + gene_product_limit = _ -> 1.0, + mass_fraction_limit = _ -> total_protein_mass, + ) opt_model = flux_balance_analysis( gm, Tulip.Optimizer; modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], - sense = COBREXA.MOI.MAX_SENSE, ) rxn_fluxes = flux_dict(gm, opt_model) From 09ad6a21dd3102432e40230b3fa3e000b50f894f Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 15:40:28 +0200 Subject: [PATCH 077/109] fixup --- src/analysis/gecko.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 1613e83b3..42ba41253 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -4,7 +4,7 @@ function make_gecko_model( reaction_isozyme_masses::Function, gene_product_limit::Function, reaction_mass_group::Function = _ -> "uncategorized", - mass_faction_limit::Function, + mass_fraction_limit::Function, ) columns = Vector{_gecko_column}() From 308190aad6b71eaa14cbb435217c535fff04c2e1 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 17:21:57 +0200 Subject: [PATCH 078/109] fixups vol 2 --- src/analysis/gecko.jl | 17 ++++++---------- src/base/types/wrappers/GeckoModel.jl | 28 +++++---------------------- src/base/utils/smoment.jl | 21 +++++++++++++------- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 42ba41253..541ac23f7 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -90,19 +90,14 @@ function make_gecko_model( end end - coupling_row_mass_group = - collect(zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group))) - - coupling_row_gene_product = collect( - zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), - ) - - return GeckoModel( + coupling_row_mass_group = return GeckoModel( columns, coupling_row_reaction, - coupling_row_gene_product, - coupling_row_mass_group, - model, + collect( + zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), + )collect( + zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)), + )model, ) end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index f2c3da6f4..9190a76be 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -93,6 +93,7 @@ coupling(model::GeckoModel) = vcat( coupling(model.inner) * _gecko_column_reactions(model), _gecko_reaction_coupling(model), _gecko_gene_product_coupling(model), + _gecko_mass_group_coupling(model), ) """ @@ -104,7 +105,8 @@ Count the coupling constraints in [`GeckoModel`](@ref) (refer to n_coupling_constraints(model::GeckoModel) = n_coupling_constraints(model.inner) + length(model.coupling_row_reaction) + - length(model.coupling_row_gene_product) + length(model.coupling_row_gene_product) + + length(model.coupling_row_mass_group) """ coupling_bounds(model::GeckoModel) @@ -120,33 +122,13 @@ function coupling_bounds(model::GeckoModel) iclb, ilb[model.coupling_row_reaction], [0.0 for _ in model.coupling_row_gene_product], + [0.0 for _ in model.coupling_row_mass_group], ), vcat( icub, rub[model.coupling_row_reaction], [c for (i, c) in model.coupling_row_gene_product], + [c for (i, c) in model.coupling_row_mass_group], ), ) end - -""" - reaction_flux(model::GeckoModel) - -Helper function to get fluxes from optimization problem. -""" -function reaction_flux(model::GeckoModel) - R = spzeros(n_fluxes(model), n_genes(model) + n_reactions(model)) - for (i, rid) in enumerate(fluxes(model)) - for_idx = findfirst( - x -> x == rid * "§ARM§FOR" || x == rid * "§FOR", - model.irrev_reaction_ids, - ) - rev_idx = findfirst( - x -> x == rid * "§ARM§REV" || x == rid * "§REV", - model.irrev_reaction_ids, - ) - !isnothing(for_idx) && (R[i, for_idx] = 1.0) - !isnothing(rev_idx) && (R[i, rev_idx] = -1.0) - end - return R' -end diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl index f654738c8..02befa605 100644 --- a/src/base/utils/smoment.jl +++ b/src/base/utils/smoment.jl @@ -28,13 +28,20 @@ _smoment_column_reactions(model::SMomentModel) = sparse( Compute the part of the coupling for [`SMomentModel`](@ref) that limits the "arm" reactions (which group the individual split unidirectional reactions). """ -_smoment_reaction_coupling(model::SMomentModel) = sparse( - [col.coupling_row for col in model.columns if col.direction != 0], - [i for (i, col) in enumerate(model.columns) if col.direction != 0], - [col.direction for col in model.columns if col.direction != 0], - length(model.coupling_row_reaction), - length(model.columns), -) +_smoment_reaction_coupling(model::SMomentModel) = + let tmp = [ + (col.coupling_row, i, col.direction) for + (i, col) = enumerate(model.columns) if col.direction != 0 + ] + + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_reaction), + length(model.columns), + ) + end """ smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) From 6663c2f72ac95aa75a038e2d87e347a23befd8e5 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 19:49:08 +0200 Subject: [PATCH 079/109] works partially --- src/analysis/gecko.jl | 12 ++--- src/base/types/wrappers/GeckoModel.jl | 12 ++--- src/base/utils/enzyme.jl | 64 +++++++-------------------- test/analysis/gecko.jl | 16 +++---- test/runtests.jl | 43 +++++++++--------- 5 files changed, 57 insertions(+), 90 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 541ac23f7..e9cfd0086 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -22,14 +22,14 @@ function make_gecko_model( for i = 1:n_reactions(model) isozymes = reaction_isozymes(rids[i]) if isempty(isozymes) - push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], 0, 0, 0, 0)) + push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], 0, 0)) continue end group = reaction_mass_group(rids[i]) mass_group_row = isnothing(group) ? 0 : - haskey(massgroup_lookup, group) ? mass_group_lookup[group] : + haskey(mass_group_lookup, group) ? mass_group_lookup[group] : begin push!(coupling_row_mass_group, group) mass_group_lookup[group] = length(coupling_row_mass_group) @@ -38,7 +38,7 @@ function make_gecko_model( push!(coupling_row_reaction, i) reaction_coupling_row = length(coupling_row_reaction) - masses = group > 0 ? reaction_isozyme_masses(rids[i]) : zeros(length(isozymes)) + masses = mass_group_row > 0 ? reaction_isozyme_masses(rids[i]) : zeros(length(isozymes)) for (iidx, isozyme) in enumerate(isozymes) if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance @@ -95,9 +95,9 @@ function make_gecko_model( coupling_row_reaction, collect( zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), - )collect( - zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)), - )model, + ), + collect( zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group))), + model, ) end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 9190a76be..b3f276362 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -14,8 +14,8 @@ end struct GeckoModel <: ModelWrapper columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} - coupling_row_gene_product::Vector{Tuple{Int,Float64}} - coupling_row_mass_group::Vector{Tuple{String,Float64}} #TODO add to matrices + coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} + coupling_row_mass_group::Vector{Tuple{String,Float64}} inner::MetabolicModel end @@ -121,14 +121,14 @@ function coupling_bounds(model::GeckoModel) vcat( iclb, ilb[model.coupling_row_reaction], - [0.0 for _ in model.coupling_row_gene_product], + [lb for (_, (lb, _)) in model.coupling_row_gene_product], [0.0 for _ in model.coupling_row_mass_group], ), vcat( icub, - rub[model.coupling_row_reaction], - [c for (i, c) in model.coupling_row_gene_product], - [c for (i, c) in model.coupling_row_mass_group], + iub[model.coupling_row_reaction], + [ub for (_, (_, ub)) in model.coupling_row_gene_product], + [ub for (_, ub) in model.coupling_row_mass_group], ), ) end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 630a90ee2..5ef817408 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -5,69 +5,35 @@ Return a dictionary mapping protein concentrations to their ids. The argument `opt_model` is a solved optimization problem, typically returned by [`flux_balance_analysis`](@ref). """ -protein_dict(model::GeckoModel, opt_model) = +protein_dict(model::GeckoModel, opt_model) = let gids = genes(model) is_solved(opt_model) ? Dict( - model.gene_ids .=> value.(opt_model[:x][(length(model.irrev_reaction_ids)+1):end]), + [gids[gidx] for (gidx,_) = model.coupling_row_gene_product] .=> _gecko_gene_product_coupling(model) * value.(opt_model[:x]), ) : nothing +end """ protein_dict(model::GeckoModel) -A pipe-able variant of `protein_dict`. +A pipe-able variant of [`protein_dict`](@ref). """ protein_dict(model::GeckoModel) = x -> protein_dict(model, x) -""" - get_genes_with_kcats(rid_isozymes::Dict{String, Vector{Isozyme}}) -Return all protein (gene ids) that have a kcat from `model` based on `reaction_kcats` field. -Assume that if a reaction has a kcat then each isozyme has a kcat. """ -function get_genes_with_kcats(rid_isozymes::Dict{String,Vector{Isozyme}}) - gids = String[] - for isozymes in values(rid_isozymes) - for isozyme in isozymes - append!(gids, collect(keys(isozyme.stoichiometry))) - end - end - return unique(gids) -end + mass_group_dict(model::GeckoModel, opt_model) +Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). """ - remove_low_expressed_isozymes!( - model::StandardModel, - rid_isozymes = Dict{String, Vector{Isozyme}}() - gid_measurements = Dict(), - ) +mass_group_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + Dict( + (group for (group,_)=model.coupling_row_mass_group) .=> _gecko_mass_group_coupling(model) * value.(opt_model[:x]) + ) : nothing -Modify `rid_isozymes` in place by keeping only the highest expressed isozyme. """ -function remove_low_expressed_isozymes!( - model::StandardModel, - rid_isozymes = Dict{String,Vector{Isozyme}}(), - gid_measurements = Dict(), -) - - for (rid, isozymes) in rid_isozymes - measured_proteins = Float64[] - for isozyme in isozymes - gid_stoich = isozyme.stoichiometry - push!( - measured_proteins, - sum( - map( - *, - collect(values(gid_stoich)), - [get(gid_measurements, gid, 0.0) for gid in keys(gid_stoich)], - [model.genes[gid].molar_mass for gid in keys(gid_stoich)], - ), - ), - ) - end - idx = argmax(measured_proteins) - rid_isozymes[rid] = [rid_isozymes[rid][idx]] - end + mass_group_dict(model::GeckoModel) - return nothing -end +A pipe-able variant of [`mass_group_dict`](@ref). +""" +mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 129356fa2..9f86f80a7 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -16,9 +16,9 @@ haskey(ecoli_core_protein_stoichiometry, rid) ? [ sum( - values(counts) .* - get.(Ref(ecoli_core_protein_masses), keys(counts), 0.0), - ) for (iidx, counts) in enumerate(ecoli_core_protein_stoichiometry[rid]) + counts .* + get.(Ref(ecoli_core_protein_masses), gids, 0.0), + ) for (gids, counts) in zip(reaction_gene_association(model, rid), ecoli_core_protein_stoichiometry[rid]) ] : [] total_protein_mass = 100.0 @@ -26,14 +26,14 @@ gm = model |> with_changed_bounds( - ["EX_glc__D_e", "b2779", "GLCpts"]; - lower = [-1000.0, 0.01, -1.0], - upper = [nothing, 0.06, 12.0], + ["EX_glc__D_e", "GLCpts"]; + lower = [-1000.0, -1.0], + upper = [nothing, 12.0], ) |> with_gecko( reaction_isozymes = get_reaction_isozymes, - reaction_isozyme_masses = get_reaction_oisozyme_masses, - gene_product_limit = _ -> 1.0, + reaction_isozyme_masses = get_reaction_isozyme_masses, + gene_product_limit = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), mass_fraction_limit = _ -> total_protein_mass, ) diff --git a/test/runtests.jl b/test/runtests.jl index 847c7f35e..e97783ea1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,16 +37,16 @@ end # set up the workers for Distributed, so that the tests that require more # workers do not unnecessarily load the stuff multiple times -W = addprocs(2) -t = @elapsed @everywhere using COBREXA, Tulip, JuMP -print_timing("import of packages", t) -t = @elapsed @everywhere begin - model = Model(Tulip.Optimizer) - @variable(model, 0 <= x <= 1) - @objective(model, Max, x) - optimize!(model) -end -print_timing("JuMP+Tulip code warmup", t) +#W = addprocs(2) +#t = @elapsed @everywhere using COBREXA, Tulip, JuMP +#print_timing("import of packages", t) +#t = @elapsed @everywhere begin +#model = Model(Tulip.Optimizer) +#@variable(model, 0 <= x <= 1) +#@objective(model, Max, x) +#optimize!(model) +#end +#print_timing("JuMP+Tulip code warmup", t) # make sure there's a directory for temporary data tmpdir = "tmpfiles" @@ -59,16 +59,17 @@ run_test_file("data_downloaded.jl") # import base files @testset "COBREXA test suite" begin - run_test_dir(joinpath("base", "types", "abstract"), "Abstract types") - run_test_dir(joinpath("base", "types"), "Base model types") - run_test_dir(joinpath("base", "logging"), "Logging") - run_test_dir("base", "Base functionality") - run_test_dir(joinpath("base", "utils"), "Utilities") - run_test_dir("io", "I/O functions") - run_test_dir("reconstruction") - run_test_dir("analysis") - run_test_dir(joinpath("analysis", "sampling"), "Sampling") - run_test_file("aqua.jl") + #run_test_dir(joinpath("base", "types", "abstract"), "Abstract types") + #run_test_dir(joinpath("base", "types"), "Base model types") + #run_test_dir(joinpath("base", "logging"), "Logging") + #run_test_dir("base", "Base functionality") + #run_test_dir(joinpath("base", "utils"), "Utilities") + #run_test_dir("io", "I/O functions") + #run_test_dir("reconstruction") + #run_test_dir("analysis") + #run_test_dir(joinpath("analysis", "sampling"), "Sampling") + #run_test_file("analysis/smoment.jl") + run_test_file("analysis/gecko.jl") end -rmprocs(W) +#rmprocs(W) From 54d9bed316a4164db27cff34726cfc1acc7c315b Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 19:49:25 +0200 Subject: [PATCH 080/109] add gecko.jl (finally!) --- src/base/utils/gecko.jl | 85 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/base/utils/gecko.jl diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl new file mode 100644 index 000000000..bb3446a23 --- /dev/null +++ b/src/base/utils/gecko.jl @@ -0,0 +1,85 @@ + +""" + _gecko_reaction_name(original_name::String, direction::Int) + +Internal helper for systematically naming reactions in [`GeckoModel`](@ref). +""" +_gecko_reaction_name(original_name::String, direction::Int, isozyme_idx::Int) = + direction == 0 ? original_name : + direction > 0 ? "$original_name#forward#$isozyme_idx" : + "$original_name#reverse#$isozyme_idx" + +""" + _gecko_column_reactions(model::GeckoModel) + +Retrieve a utility mapping between reactions and split reactions; rows +correspond to "original" reactions, columns correspond to "split" reactions. +""" +_gecko_column_reactions(model::GeckoModel) = sparse( + [col.reaction_idx for col in model.columns], + 1:length(model.columns), + [col.direction >= 0 ? 1 : -1 for col in model.columns], + n_reactions(model.inner), + length(model.columns), +) + +""" + _gecko_reaction_coupling(model::GeckoModel) + +Compute the part of the coupling for [`GeckoModel`](@ref) that limits the +"arm" reactions (which group the individual split unidirectional reactions). +""" +_gecko_reaction_coupling(model::GeckoModel) = + let tmp = [ + (col.reaction_coupling_row, i, col.direction) for + (i, col) = enumerate(model.columns) if col.direction != 0 + ] + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_reaction), + length(model.columns), + ) + end + +""" + _gecko_gene_product_coupling(model::GeckoModel) + +Compute the part of the coupling for GeckoModel that limits the amount of each +kind of protein available. +""" +_gecko_gene_product_coupling(model::GeckoModel) = + let + tmp = [ + (row, i, val) for (i, col) = enumerate(model.columns) if col.direction != 0 + for (row, val) in col.gene_product_coupling + ] + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_gene_product), + length(model.columns), + ) + end + +""" + _gecko_mass_group_coupling(model::GeckoModel) + +Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total +mass of enzymes available in each group of reactions. +""" +_gecko_mass_group_coupling(model::GeckoModel) = + let tmp = [ + (col.mass_group_row, i, col.mass_required) for + (i, col) = enumerate(model.columns) if col.direction != 0 + ] + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_mass_group), + length(model.columns), + ) + end From a9677a09a6ef20c8a4abcf3ae72ed3156aad3d53 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 20:55:49 +0200 Subject: [PATCH 081/109] switch gecko to gene-based mass groups (seems simpler) --- src/analysis/gecko.jl | 74 +++++++++++++++++++-------- src/base/types/wrappers/GeckoModel.jl | 3 +- src/base/utils/enzyme.jl | 28 +++++----- src/base/utils/gecko.jl | 9 ++-- test/analysis/gecko.jl | 12 +---- 5 files changed, 76 insertions(+), 50 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index e9cfd0086..c0f93749f 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -1,9 +1,9 @@ function make_gecko_model( model::StandardModel; reaction_isozymes::Function, - reaction_isozyme_masses::Function, + gene_product_mass::Function, gene_product_limit::Function, - reaction_mass_group::Function = _ -> "uncategorized", + gene_mass_group::Function = _ -> "uncategorized", mass_fraction_limit::Function, ) @@ -22,27 +22,15 @@ function make_gecko_model( for i = 1:n_reactions(model) isozymes = reaction_isozymes(rids[i]) if isempty(isozymes) - push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], 0, 0)) + push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], [])) continue end - group = reaction_mass_group(rids[i]) - mass_group_row = - isnothing(group) ? 0 : - haskey(mass_group_lookup, group) ? mass_group_lookup[group] : - begin - push!(coupling_row_mass_group, group) - mass_group_lookup[group] = length(coupling_row_mass_group) - end - push!(coupling_row_reaction, i) reaction_coupling_row = length(coupling_row_reaction) - masses = mass_group_row > 0 ? reaction_isozyme_masses(rids[i]) : zeros(length(isozymes)) - for (iidx, isozyme) in enumerate(isozymes) if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance - # reaction can run in reverse push!( columns, _gecko_column( @@ -59,13 +47,18 @@ function make_gecko_model( gene_row_lookup, coupling_row_gene_product, ), - mass_group_row, - masses[iidx] / isozyme.kcat_reverse, + _gecko_make_mass_group_coupling( + isozyme.gene_product_count, + isozyme.kcat_reverse, + gene_mass_group, + gene_product_mass, + mass_group_lookup, + coupling_row_mass_group, + ), ), ) end if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance - # reaction can run forward push!( columns, _gecko_column( @@ -82,8 +75,14 @@ function make_gecko_model( gene_row_lookup, coupling_row_gene_product, ), - mass_group_row, - masses[iidx] / isozyme.kcat_forward, + _gecko_make_mass_group_coupling( + isozyme.gene_product_count, + isozyme.kcat_forward, + gene_mass_group, + gene_product_mass, + mass_group_lookup, + coupling_row_mass_group, + ), ), ) end @@ -96,7 +95,9 @@ function make_gecko_model( collect( zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), ), - collect( zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group))), + collect( + zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)), + ), model, ) end @@ -119,3 +120,34 @@ _gecko_make_gene_product_coupling( (row_idx, 1 / kcat) end for (gene, count) in gene_product_count if haskey(name_lookup, gene) ) + +function _gecko_make_mass_group_coupling( + gene_product_count::Dict{String,Int}, + kcat::Float64, + gene_row::Function, + gene_product_mass::Function, + row_lookup::Dict{String,Int}, + rows::Vector{String}, +) + gp_groups = gene_row.(keys(gene_product_count)) + gp_mass = gene_product_mass.(keys(gene_product_count)) + groups = unique(filter(!isnothing, gp_groups)) + group_idx = Dict(groups .=> 1:length(groups)) + vals = [0.0 for _ in groups] + + for (gpg, mass) in zip(gp_groups, gp_mass) + if !isnothing(gpg) + vals[group_idx[gpg]] += mass / kcat + end + end + + collect( + isnothing(group) ? 0 : begin + if !haskey(row_lookup, group) + push!(rows, group) + row_lookup[group] = length(rows) + end + (row_lookup[group], val) + end for (group, val) in zip(groups, vals) + ) +end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index b3f276362..0893d25be 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -7,8 +7,7 @@ struct _gecko_column lb::Float64 ub::Float64 gene_product_coupling::Vector{Tuple{Int,Float64}} - mass_group_row::Int - mass_required::Float64 + mass_group_coupling::Vector{Tuple{Int,Float64}} end struct GeckoModel <: ModelWrapper diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 5ef817408..8e76d257e 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -1,16 +1,17 @@ """ protein_dict(model::GeckoModel, opt_model) -Return a dictionary mapping protein concentrations to their ids. The argument -`opt_model` is a solved optimization problem, typically returned by +Return a dictionary mapping protein molar concentrations to their ids. The +argument `opt_model` is a solved optimization problem, typically returned by [`flux_balance_analysis`](@ref). """ -protein_dict(model::GeckoModel, opt_model) = let gids = genes(model) - is_solved(opt_model) ? - Dict( - [gids[gidx] for (gidx,_) = model.coupling_row_gene_product] .=> _gecko_gene_product_coupling(model) * value.(opt_model[:x]), - ) : nothing -end +protein_dict(model::GeckoModel, opt_model) = + let gids = genes(model) + is_solved(opt_model) ? + Dict( + [gids[gidx] for (gidx, _) in model.coupling_row_gene_product] .=> _gecko_gene_product_coupling(model) * value.(opt_model[:x]), + ) : nothing + end """ protein_dict(model::GeckoModel) @@ -21,19 +22,20 @@ protein_dict(model::GeckoModel) = x -> protein_dict(model, x) """ - mass_group_dict(model::GeckoModel, opt_model) + protein_mass_group_dict(model::GeckoModel, opt_model) Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). """ -mass_group_dict(model::GeckoModel, opt_model) = +protein_mass_group_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict( - (group for (group,_)=model.coupling_row_mass_group) .=> _gecko_mass_group_coupling(model) * value.(opt_model[:x]) + (group for (group, _) in model.coupling_row_mass_group) .=> + _gecko_mass_group_coupling(model) * value.(opt_model[:x]), ) : nothing """ - mass_group_dict(model::GeckoModel) + protein_mass_group_dict(model::GeckoModel) A pipe-able variant of [`mass_group_dict`](@ref). """ -mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) +protein_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index bb3446a23..85d6453ca 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -68,12 +68,13 @@ _gecko_gene_product_coupling(model::GeckoModel) = _gecko_mass_group_coupling(model::GeckoModel) Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total -mass of enzymes available in each group of reactions. +mass of each group of gene products. """ _gecko_mass_group_coupling(model::GeckoModel) = - let tmp = [ - (col.mass_group_row, i, col.mass_required) for - (i, col) = enumerate(model.columns) if col.direction != 0 + let + tmp = [ + (row, i, val) for (i, col) = enumerate(model.columns) if col.direction != 0 + for (row, val) in col.mass_group_coupling ] sparse( [row for (row, _, _) in tmp], diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 9f86f80a7..658f4e8b3 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -11,15 +11,7 @@ ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) ) : Isozyme[] - get_reaction_isozyme_masses = - rid -> - haskey(ecoli_core_protein_stoichiometry, rid) ? - [ - sum( - counts .* - get.(Ref(ecoli_core_protein_masses), gids, 0.0), - ) for (gids, counts) in zip(reaction_gene_association(model, rid), ecoli_core_protein_stoichiometry[rid]) - ] : [] + get_gene_product_mass = gid -> get(ecoli_core_protein_masses, gid, 0.0) total_protein_mass = 100.0 @@ -32,8 +24,8 @@ ) |> with_gecko( reaction_isozymes = get_reaction_isozymes, - reaction_isozyme_masses = get_reaction_isozyme_masses, gene_product_limit = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), + gene_product_mass = get_gene_product_mass, mass_fraction_limit = _ -> total_protein_mass, ) From 507350097e08f4b52ea09aa29397a020234fe296 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 13 Apr 2022 21:03:45 +0200 Subject: [PATCH 082/109] remove unused code, sprinkle TODOs --- src/base/types/StandardModel.jl | 83 ------- src/base/utils/enzyme.jl | 2 + src/base/utils/irreversible_stoichiometry.jl | 223 ------------------- src/reconstruction/gecko.jl | 56 +---- src/reconstruction/smoment.jl | 46 +--- test/runtests.jl | 43 ++-- 6 files changed, 27 insertions(+), 426 deletions(-) delete mode 100644 src/base/utils/irreversible_stoichiometry.jl diff --git a/src/base/types/StandardModel.jl b/src/base/types/StandardModel.jl index f4dd74e82..e842525e9 100644 --- a/src/base/types/StandardModel.jl +++ b/src/base/types/StandardModel.jl @@ -382,86 +382,3 @@ function Base.convert(::Type{StandardModel}, model::MetabolicModel) genes = modelgenes, ) end - -#TODO generalize these to other model types - -""" - reaction_bounds(model::StandardModel, rid::String) - -Return lower and upper bounds for `rid` in `model`. -""" -function reaction_bounds(model::StandardModel, rid::String) - model.reactions[rid].lb, model.reactions[rid].ub -end - -""" - is_reaction_reversible(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is reversible. -""" -function is_reaction_reversible(model::StandardModel, rid::String) - lb, ub = reaction_bounds(model, rid) - lb < 0 && ub > 0 -end - -""" - is_reaction_forward_only(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is forward only. -""" -function is_reaction_forward_only(model::StandardModel, rid::String) - lb, ub = reaction_bounds(model, rid) - lb >= 0 && ub > 0 -end - -""" - is_reaction_backward_only(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is backward only. -""" -function is_reaction_backward_only(model::StandardModel, rid::String) - lb, ub = reaction_bounds(model, rid) - lb < 0 && ub <= 0 -end - -""" - is_reaction_unidirectional(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is unidirectional. -""" -function is_reaction_unidirectional(model::StandardModel, rid::String) - is_reaction_forward_only(model, rid) || is_reaction_backward_only(model, rid) -end - -""" - is_reaction_blocked(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is blocked. -""" -function is_reaction_blocked(model::StandardModel, rid::String) - lb, ub = reaction_bounds(model, rid) - lb == ub == 0 -end - -""" - reaction_has_multiple_isozymes(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` is catalyzed by multiple enzymes, -i.e. it has isozymes according to the gene reaction rules. -""" -function reaction_has_multiple_isozymes(model::StandardModel, rid::String) - length(reaction_gene_association(model, rid)) > 1 -end - -""" - reaction_has_valid_gene_association(model::StandardModel, rid::String) - -Check if reaction `rid` in `model` has a gene reaction rule entry. -""" -function reaction_has_valid_gene_association(model::StandardModel, rid::String) - #TODO simplify this once COBREXA enforces universal rules for GRR representation - haskey(model.reactions, rid) && - !isnothing(reaction_gene_association(model, rid)) && - reaction_gene_association(model, rid) != [[]] && - !isempty(first(reaction_gene_association(model, rid))) -end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 8e76d257e..181d419e5 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -39,3 +39,5 @@ protein_mass_group_dict(model::GeckoModel, opt_model) = A pipe-able variant of [`mass_group_dict`](@ref). """ protein_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) + +#TODO: total protein mass for sMOMENT diff --git a/src/base/utils/irreversible_stoichiometry.jl b/src/base/utils/irreversible_stoichiometry.jl deleted file mode 100644 index 8a1178167..000000000 --- a/src/base/utils/irreversible_stoichiometry.jl +++ /dev/null @@ -1,223 +0,0 @@ -""" - _build_irreversible_stoichiometric_matrix(model::StandardModel) - -Return a stoichiometric matrix where all reactions are forward only i.e. only -positive fluxes are allowed. To accomplish this for models with isozymes, -so-called arm reactions are included. Note, reactions that are irreversible -in the original model will be irreversible in this model. E.g., if a reaction -is forward only in the original model, then there will be no reverse component -for this reaction in the irreversible stoichiometric matrix. -""" -function _build_irreversible_stoichiometric_matrix( - model::StandardModel, - rid_isozymes = Dict{String,Vector{Isozyme}}(), -) - # components used to build stoichiometric matrix - S_components = ( #TODO add size hints if possible - row_idxs = Vector{Int}(), - col_idxs = Vector{Int}(), - coeffs = Vector{Float64}(), - lbs = Vector{Float64}(), - ubs = Vector{Float64}(), - ) - - # establish the ordering in a named tuple - idxs = ( #: pseudo metabolites and reactions are added to model - met_idxs = Dict{String,Int}(), - rxn_idxs = Dict{String,Int}(), - max_rxn_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - max_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - pseudo_met_idx = [1], #TODO maybe fix, this is a dodgy way of adding a counter to a named tuple - ) - #TODO for the counter thing, basically I wanted e.g. max_rxn_idx = 1 and then update it, - #TODO but named tuples are immutable... :( - - # fill the matrix entries - #: blocked treated as reversible because unclear what direction the reaction would go - for rid in reactions(model) - if haskey(rid_isozymes, rid) && length(rid_isozymes[rid]) > 1 - if is_reaction_unidirectional(model, rid) - dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_isozyme_to_irrev_stoich_mat( - model, - rid_isozymes, - rid, - idxs, - S_components, - dir, - ) - elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_isozyme_to_irrev_stoich_mat( - model, - rid_isozymes, - rid, - idxs, - S_components, - "§FOR", - ) - _add_isozyme_to_irrev_stoich_mat( - model, - rid_isozymes, - rid, - idxs, - S_components, - "§REV", - ) - else - @warn "Unhandled bound type for $rid" - end - else # no grr or single enzyme only - if is_reaction_unidirectional(model, rid) - dir = is_reaction_forward_only(model, rid) ? "§FOR" : "§REV" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - elseif is_reaction_reversible(model, rid) || is_reaction_blocked(model, rid) - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§FOR") - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, "§REV") - else - @warn "Unhandled bound type for $rid" - end - end - end - - S = sparse( - S_components.row_idxs, - S_components.col_idxs, - S_components.coeffs, - length(idxs.met_idxs), - length(idxs.rxn_idxs), - ) - - return S, S_components.lbs, S_components.ubs, idxs.rxn_idxs, idxs.met_idxs -end - -""" - _add_enzyme_to_irrev_stoich_mat(model, rid, idxs, S_components, dir) - -Add entries to the components that will be used to build the stoichiometric -matrix. Simple variant that does not deal with isozymes and arm reactions. -""" -function _add_enzyme_to_irrev_stoich_mat(model::StandardModel, rid, idxs, S_components, dir) - idxs.rxn_idxs[rid*dir] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[rid*dir]) - push!(S_components.coeffs, fix_sign * coeff) - end - lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end -end - -""" - _add_isozyme_to_irrev_stoich_mat( - model::StandardModel, - rid, - idxs, - S_components, - dir, - ) - -Add entries to the components that will be used to build the stoichiometric matrix. -Complex variant that deals with isozymes and arm reactions. -""" -function _add_isozyme_to_irrev_stoich_mat( - model::StandardModel, - rid_isoyzmes, - rid, - idxs, - S_components, - dir, -) - # add pseudo metabolite - pm = "§PM$(idxs.pseudo_met_idx[1])" - idxs.pseudo_met_idx[1] += 1 - idxs.met_idxs[pm] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - # find half reactions to get arm reaction - lhs = [] - rhs = [] - for (mid, coeff) in reaction_stoichiometry(model, rid) - if !haskey(idxs.met_idxs, mid) - idxs.met_idxs[mid] = idxs.max_met_idx[1] - idxs.max_met_idx[1] += 1 - end - if coeff <= 0 - push!(lhs, (mid, coeff)) - else - push!(rhs, (mid, coeff)) - end - end - product_half_reaction = dir == "§FOR" ? rhs : lhs - reagent_half_reaction = dir == "§FOR" ? lhs : rhs - # add arm reaction - fix_sign = dir == "§FOR" ? 1 : -1 # change direction of reaction - pr = rid * "§ARM" * dir - idxs.rxn_idxs[pr] = idxs.max_rxn_idx[1] #! this needs to get added first because of blocked possibility - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, 1) - for (mid, coeff) in reagent_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[pr]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds for ARM reaction that corresponds to original model's bounds - lb, ub = abs.(reaction_bounds(model, rid)) # assumes lb < ub - if dir == "§FOR" - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, lb) - push!(S_components.ubs, ub) - else - is_reaction_reversible(model, rid) ? push!(S_components.lbs, 0) : - push!(S_components.lbs, ub) - push!(S_components.ubs, lb) - end - # add isozyme reactions - for (i, _) in enumerate(rid_isoyzmes[rid]) - iso_rid = rid * "§ISO$i" * dir - idxs.rxn_idxs[iso_rid] = idxs.max_rxn_idx[1] - idxs.max_rxn_idx[1] += 1 - push!(S_components.row_idxs, idxs.met_idxs[pm]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, -1) - for (mid, coeff) in product_half_reaction - push!(S_components.row_idxs, idxs.met_idxs[mid]) - push!(S_components.col_idxs, idxs.rxn_idxs[iso_rid]) - push!(S_components.coeffs, fix_sign * coeff) - end - # add bounds - push!(S_components.lbs, 0) - if is_reaction_blocked(model, rid) - push!(S_components.ubs, 0) - else - push!(S_components.ubs, 1000) # arbitrary upper bound - end - end -end - -""" - _order_id_to_idx_dict(id_to_idx_dict) - -Return the keys of `id_to_idx_dict` sorted by the values, which -are taken to be the indices. This is a helper function for -[`reactions`](@ref) and [`metabolites`](@ref). -""" -function _order_id_to_idx_dict(dmap) - ks = collect(keys(dmap)) - vs = collect(values(dmap)) - return ks[sortperm(vs)] -end diff --git a/src/reconstruction/gecko.jl b/src/reconstruction/gecko.jl index 488dcb382..e556166f4 100644 --- a/src/reconstruction/gecko.jl +++ b/src/reconstruction/gecko.jl @@ -19,57 +19,7 @@ permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ function change_bound!(model::GeckoModel, id; lower = nothing, upper = nothing) - gene_idx = first(indexin([id], model.gene_ids)) - - if isnothing(gene_idx) - flux_for_idx = findfirst( - x -> x == id * "§ARM§FOR" || x == id * "§FOR", - model.irrev_reaction_ids, - ) - if !isnothing(flux_for_idx) - if !isnothing(lower) - if lower <= 0 - model.xl[flux_for_idx] = 0 - else - model.xl[flux_for_idx] = lower - end - end - if !isnothing(upper) - if upper <= 0 - model.xu[flux_for_idx] = 0 - else - model.xu[flux_for_idx] = upper - end - end - end - - flux_rev_idx = findfirst( - x -> x == id * "§ARM§REV" || x == id * "§REV", - model.irrev_reaction_ids, - ) - if !isnothing(flux_rev_idx) - if !isnothing(lower) - if lower >= 0 - model.xu[flux_rev_idx] = 0 - else - model.xu[flux_rev_idx] = -lower - end - if !isnothing(upper) - if upper >= 0 - model.xl[flux_rev_idx] = 0 - else - model.xl[flux_rev_idx] = -upper - end - end - end - end - else - n = length(model.irrev_reaction_ids) - !isnothing(lower) && (model.xl[n+gene_idx] = lower) - !isnothing(upper) && (model.xu[n+gene_idx] = upper) - end - - return nothing + #TODO end """ @@ -84,7 +34,5 @@ function change_bounds!( lower = fill(nothing, length(ids)), upper = fill(nothing, length(ids)), ) - for (id, lower, upper) in zip(ids, lower, upper) - change_bound!(model, id; lower = lower, upper = upper) - end + #TODO end diff --git a/src/reconstruction/smoment.jl b/src/reconstruction/smoment.jl index a5c5ae48c..ba142b09c 100644 --- a/src/reconstruction/smoment.jl +++ b/src/reconstruction/smoment.jl @@ -19,47 +19,7 @@ permanently irreversible in the model, i.e. changing their bounds to make them reversible will have no effect. """ function change_bound!(model::SMomentModel, id; lower = nothing, upper = nothing) - - - flux_for_idx = - findfirst(x -> x == id * "§ARM§FOR" || x == id * "§FOR", model.irrev_reaction_ids) - if !isnothing(flux_for_idx) - if !isnothing(lower) - if lower <= 0 - model.xl[flux_for_idx] = 0 - else - model.xl[flux_for_idx] = lower - end - end - if !isnothing(upper) - if upper <= 0 - model.xu[flux_for_idx] = 0 - else - model.xu[flux_for_idx] = upper - end - end - end - - flux_rev_idx = - findfirst(x -> x == id * "§ARM§REV" || x == id * "§REV", model.irrev_reaction_ids) - if !isnothing(flux_rev_idx) - if !isnothing(lower) - if lower >= 0 - model.xu[flux_rev_idx] = 0 - else - model.xu[flux_rev_idx] = -lower - end - if !isnothing(upper) - if upper >= 0 - model.xl[flux_rev_idx] = 0 - else - model.xl[flux_rev_idx] = -upper - end - end - end - end - - return nothing + # TODO end """ @@ -74,7 +34,5 @@ function change_bounds!( lower = fill(nothing, length(ids)), upper = fill(nothing, length(ids)), ) - for (id, lower, upper) in zip(ids, lower, upper) - change_bound!(model, id; lower = lower, upper = upper) - end + # TODO end diff --git a/test/runtests.jl b/test/runtests.jl index e97783ea1..847c7f35e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,16 +37,16 @@ end # set up the workers for Distributed, so that the tests that require more # workers do not unnecessarily load the stuff multiple times -#W = addprocs(2) -#t = @elapsed @everywhere using COBREXA, Tulip, JuMP -#print_timing("import of packages", t) -#t = @elapsed @everywhere begin -#model = Model(Tulip.Optimizer) -#@variable(model, 0 <= x <= 1) -#@objective(model, Max, x) -#optimize!(model) -#end -#print_timing("JuMP+Tulip code warmup", t) +W = addprocs(2) +t = @elapsed @everywhere using COBREXA, Tulip, JuMP +print_timing("import of packages", t) +t = @elapsed @everywhere begin + model = Model(Tulip.Optimizer) + @variable(model, 0 <= x <= 1) + @objective(model, Max, x) + optimize!(model) +end +print_timing("JuMP+Tulip code warmup", t) # make sure there's a directory for temporary data tmpdir = "tmpfiles" @@ -59,17 +59,16 @@ run_test_file("data_downloaded.jl") # import base files @testset "COBREXA test suite" begin - #run_test_dir(joinpath("base", "types", "abstract"), "Abstract types") - #run_test_dir(joinpath("base", "types"), "Base model types") - #run_test_dir(joinpath("base", "logging"), "Logging") - #run_test_dir("base", "Base functionality") - #run_test_dir(joinpath("base", "utils"), "Utilities") - #run_test_dir("io", "I/O functions") - #run_test_dir("reconstruction") - #run_test_dir("analysis") - #run_test_dir(joinpath("analysis", "sampling"), "Sampling") - #run_test_file("analysis/smoment.jl") - run_test_file("analysis/gecko.jl") + run_test_dir(joinpath("base", "types", "abstract"), "Abstract types") + run_test_dir(joinpath("base", "types"), "Base model types") + run_test_dir(joinpath("base", "logging"), "Logging") + run_test_dir("base", "Base functionality") + run_test_dir(joinpath("base", "utils"), "Utilities") + run_test_dir("io", "I/O functions") + run_test_dir("reconstruction") + run_test_dir("analysis") + run_test_dir(joinpath("analysis", "sampling"), "Sampling") + run_test_file("aqua.jl") end -#rmprocs(W) +rmprocs(W) From ab630f1eab696e2c85ee0583706e607be45b4de4 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 11:48:26 +0200 Subject: [PATCH 083/109] remove the unnecessary reaction coupling from sMOMENT --- src/base/types/wrappers/SMomentModel.jl | 31 ++++++++----------------- src/base/utils/smoment.jl | 21 ----------------- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 995bb5afb..c5aef75f8 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -7,13 +7,11 @@ A helper type that describes the contents of [`SMomentModel`](@ref)s. struct _smoment_column reaction_idx::Int # number of the corresponding reaction in the inner model direction::Int # 0 if "as is" and unique, -1 if reverse-only part, 1 if forward-only part - coupling_row::Int # number of row in the coupling (0 if direction==0) lb::Float64 # must be 0 if the reaction is unidirectional (if direction!=0) ub::Float64 capacity_required::Float64 # must be 0 for bidirectional reactions (if direction==0) end -# TODO fix the docstring """ struct SMomentModel <: ModelWrapper @@ -29,8 +27,6 @@ The model is constructed as follows: - stoichiometry of the original model is retained as much as possible, but enzymatic reations are split into forward and reverse parts (marked by a suffix like `...#forward` and `...#reverse`), -- sums of forward and reverse reaction pair fluxes are constrained accordingly - to the original model, - stoichiometry is expanded by a virtual metabolite "enzyme capacity" which is consumed by all enzymatic reactions at a rate given by enzyme mass divided by the corresponding kcat, @@ -40,25 +36,21 @@ The model is constructed as follows: The `SMomentModel` structure contains a worked-out representation of the optimization problem atop a wrapped [`MetabolicModel`](@ref), in particular the separation of certain reactions into unidirectional forward and reverse parts, -the grouping of these reactions together into virtual "arm" reactions constrained -by bounds from the inner model, an "enzyme capacity" required for each -reaction, and the value of the maximum capacity constraint. +an "enzyme capacity" required for each reaction, and the value of the maximum +capacity constraint. Original coupling is retained. In the structure, field `columns` describes the correspondence of stoichiometry -columns to the stoichiometry and data of the internal wrapped model; field -`coupling_row_reaction` maps the generated coupling constraints to reaction -indexes in the wrapped model, and `total_enzyme_capacity` is the total bound on -the enzyme capacity consumption as specified in sMOMENT algorithm. +columns to the stoichiometry and data of the internal wrapped model, and +`total_enzyme_capacity` is the total bound on the enzyme capacity consumption +as specified in sMOMENT algorithm. This implementation allows easy access to fluxes from the split reactions (available in `reactions(model)`), while the original "simple" reactions from the wrapped model are retained as [`fluxes`](@ref). All additional constraints are implemented using [`coupling`](@ref) and [`coupling_bounds`](@ref). -Original coupling is retained. """ struct SMomentModel <: ModelWrapper columns::Vector{_smoment_column} - coupling_row_reaction::Vector{Int} total_enzyme_capacity::Float64 inner::MetabolicModel @@ -130,7 +122,6 @@ enzyme capacity. """ coupling(model::SMomentModel) = vcat( coupling(model.inner) * _smoment_column_reactions(model), - _smoment_reaction_coupling(model), [col.capacity_required for col in model.columns]', ) @@ -141,7 +132,7 @@ Count the coupling constraints in [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ n_coupling_constraints(model::SMomentModel) = - n_coupling_constraints(model.inner) + length(model.coupling_row_reaction) + 1 + n_coupling_constraints(model.inner) + 1 """ coupling_bounds(model::SMomentModel) @@ -149,11 +140,9 @@ n_coupling_constraints(model::SMomentModel) = The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ -function coupling_bounds(model::SMomentModel) - (iclb, icub) = coupling_bounds(model.inner) - (ilb, iub) = bounds(model.inner) - return ( - vcat(iclb, ilb[model.coupling_row_reaction], [0.0]), - vcat(icub, iub[model.coupling_row_reaction], [model.total_enzyme_capacity]), +coupling_bounds(model::SMomentModel) = let (iclb, icub) = coupling_bounds(model.inner) + ( + vcat(iclb, [0.0]), + vcat(icub, [model.total_enzyme_capacity]), ) end diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl index 02befa605..15751715c 100644 --- a/src/base/utils/smoment.jl +++ b/src/base/utils/smoment.jl @@ -22,27 +22,6 @@ _smoment_column_reactions(model::SMomentModel) = sparse( length(model.columns), ) -""" - _smoment_reaction_coupling(model::SMomentModel) - -Compute the part of the coupling for [`SMomentModel`](@ref) that limits the -"arm" reactions (which group the individual split unidirectional reactions). -""" -_smoment_reaction_coupling(model::SMomentModel) = - let tmp = [ - (col.coupling_row, i, col.direction) for - (i, col) = enumerate(model.columns) if col.direction != 0 - ] - - sparse( - [row for (row, _, _) in tmp], - [col for (_, col, _) in tmp], - [val for (_, _, val) in tmp], - length(model.coupling_row_reaction), - length(model.columns), - ) - end - """ smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) From b06877fe04db610ea0740f17df3179a4ad7c0890 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 11:55:17 +0200 Subject: [PATCH 084/109] formatting --- src/base/types/wrappers/SMomentModel.jl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index c5aef75f8..f28a1ec0c 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -131,8 +131,7 @@ coupling(model::SMomentModel) = vcat( Count the coupling constraints in [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ -n_coupling_constraints(model::SMomentModel) = - n_coupling_constraints(model.inner) + 1 +n_coupling_constraints(model::SMomentModel) = n_coupling_constraints(model.inner) + 1 """ coupling_bounds(model::SMomentModel) @@ -140,9 +139,7 @@ n_coupling_constraints(model::SMomentModel) = The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for details). """ -coupling_bounds(model::SMomentModel) = let (iclb, icub) = coupling_bounds(model.inner) - ( - vcat(iclb, [0.0]), - vcat(icub, [model.total_enzyme_capacity]), - ) -end +coupling_bounds(model::SMomentModel) = + let (iclb, icub) = coupling_bounds(model.inner) + (vcat(iclb, [0.0]), vcat(icub, [model.total_enzyme_capacity])) + end From 86959197edd71093e9a9f3e6ea2515193f6ba595 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 11:58:59 +0200 Subject: [PATCH 085/109] fixup: constructors --- src/analysis/smoment.jl | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 185a82051..f4c5255f6 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -29,7 +29,6 @@ function make_smoment_model( total_enzyme_capacity::Float64, ) columns = Vector{_smoment_column}() - coupling_row_reaction = Int[] (lbs, ubs) = bounds(model) rids = reactions(model) @@ -38,12 +37,9 @@ function make_smoment_model( isozyme = reaction_isozymes(rids[i]) if isnothing(isozyme) # non-enzymatic reaction (or a totally ignored one) - push!(columns, _smoment_column(i, 0, 0, lbs[i], ubs[i], 0)) + push!(columns, _smoment_column(i, 0, lbs[i], ubs[i], 0)) continue end - # pick a new row for "arm reaction" coupling - push!(coupling_row_reaction, i) - coupling_row = length(coupling_row_reaction) mw = sum( gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.gene_product_count @@ -53,14 +49,7 @@ function make_smoment_model( # reaction can run in reverse push!( columns, - _smoment_column( - i, - -1, - coupling_row, - max(-ubs[i], 0), - -lbs[i], - mw / isozyme.kcat_reverse, - ), + _smoment_column(i, -1, max(-ubs[i], 0), -lbs[i], mw / isozyme.kcat_reverse), ) end @@ -68,17 +57,10 @@ function make_smoment_model( # reaction can run forward push!( columns, - _smoment_column( - i, - 1, - coupling_row, - max(lbs[i], 0), - ubs[i], - mw / isozyme.kcat_forward, - ), + _smoment_column(i, 1, max(lbs[i], 0), ubs[i], mw / isozyme.kcat_forward), ) end end - return SMomentModel(columns, coupling_row_reaction, total_enzyme_capacity, model) + return SMomentModel(columns, total_enzyme_capacity, model) end From 7244ee67e45656091feae92438ec170f6cce0a21 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 14:48:59 +0200 Subject: [PATCH 086/109] simplify gecko a bit (remove unnecessary reaction arms) --- src/analysis/gecko.jl | 92 ++++++++++++++++------------------------ src/base/utils/enzyme.jl | 1 - src/base/utils/gecko.jl | 10 ++--- 3 files changed, 41 insertions(+), 62 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index c0f93749f..83118e41e 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -26,65 +26,45 @@ function make_gecko_model( continue end - push!(coupling_row_reaction, i) - reaction_coupling_row = length(coupling_row_reaction) + reaction_coupling_row = + length(isozymes) > 1 ? begin + push!(coupling_row_reaction, i) + length(coupling_row_reaction) + end : 0 for (iidx, isozyme) in enumerate(isozymes) - if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance - push!( - columns, - _gecko_column( - i, - iidx, - -1, - reaction_coupling_row, - max(-ubs[i], 0), - -lbs[i], - _gecko_make_gene_product_coupling( - isozyme.gene_product_count, - isozyme.kcat_reverse, - gene_name_lookup, - gene_row_lookup, - coupling_row_gene_product, + for (lb, ub, kcat, dir) in [ + (-ubs[i], -lbs[i], isozyme.kcat_reverse, -1), + (lbs[i], ubs[i], isozyme.kcat_forward, 1), + ] + if max(lb, ub) > 0 && kcat > _constants.tolerance + push!( + columns, + _gecko_column( + i, + iidx, + dir, + reaction_coupling_row, + max(lb, 0), + ub, + _gecko_make_gene_product_coupling( + isozyme.gene_product_count, + kcat, + gene_name_lookup, + gene_row_lookup, + coupling_row_gene_product, + ), + _gecko_make_mass_group_coupling( + isozyme.gene_product_count, + kcat, + gene_mass_group, + gene_product_mass, + mass_group_lookup, + coupling_row_mass_group, + ), ), - _gecko_make_mass_group_coupling( - isozyme.gene_product_count, - isozyme.kcat_reverse, - gene_mass_group, - gene_product_mass, - mass_group_lookup, - coupling_row_mass_group, - ), - ), - ) - end - if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance - push!( - columns, - _gecko_column( - i, - iidx, - 1, - reaction_coupling_row, - max(lbs[i], 0), - ubs[i], - _gecko_make_gene_product_coupling( - isozyme.gene_product_count, - isozyme.kcat_forward, - gene_name_lookup, - gene_row_lookup, - coupling_row_gene_product, - ), - _gecko_make_mass_group_coupling( - isozyme.gene_product_count, - isozyme.kcat_forward, - gene_mass_group, - gene_product_mass, - mass_group_lookup, - coupling_row_mass_group, - ), - ), - ) + ) + end end end end diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index 181d419e5..d6866569f 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -20,7 +20,6 @@ A pipe-able variant of [`protein_dict`](@ref). """ protein_dict(model::GeckoModel) = x -> protein_dict(model, x) - """ protein_mass_group_dict(model::GeckoModel, opt_model) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index 85d6453ca..fad105a13 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -32,7 +32,7 @@ Compute the part of the coupling for [`GeckoModel`](@ref) that limits the _gecko_reaction_coupling(model::GeckoModel) = let tmp = [ (col.reaction_coupling_row, i, col.direction) for - (i, col) = enumerate(model.columns) if col.direction != 0 + (i, col) = enumerate(model.columns) if col.reaction_coupling_row != 0 ] sparse( [row for (row, _, _) in tmp], @@ -52,8 +52,8 @@ kind of protein available. _gecko_gene_product_coupling(model::GeckoModel) = let tmp = [ - (row, i, val) for (i, col) = enumerate(model.columns) if col.direction != 0 - for (row, val) in col.gene_product_coupling + (row, i, val) for (i, col) in enumerate(model.columns) for + (row, val) in col.gene_product_coupling ] sparse( [row for (row, _, _) in tmp], @@ -73,8 +73,8 @@ mass of each group of gene products. _gecko_mass_group_coupling(model::GeckoModel) = let tmp = [ - (row, i, val) for (i, col) = enumerate(model.columns) if col.direction != 0 - for (row, val) in col.mass_group_coupling + (row, i, val) for (i, col) in enumerate(model.columns) for + (row, val) in col.mass_group_coupling ] sparse( [row for (row, _, _) in tmp], From f445a6d3850b81ad0a25f946036294abbdda657d Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 15:00:49 +0200 Subject: [PATCH 087/109] add a shortcut for extracting sMOMENT mass utilization --- src/base/utils/enzyme.jl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzyme.jl index d6866569f..eb834a3df 100644 --- a/src/base/utils/enzyme.jl +++ b/src/base/utils/enzyme.jl @@ -39,4 +39,20 @@ A pipe-able variant of [`mass_group_dict`](@ref). """ protein_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) -#TODO: total protein mass for sMOMENT + +""" + protein_mass(model::SMomentModel) + +Extract the total mass utilization in a solved [`SMomentModel`](@ref). +""" +protein_mass(model::SMomentModel, opt_model) = + is_solved(opt_model) ? + sum((col.capacity_required for col in model.columns) .* value.(opt_model[:x])) : nothing + +""" + protein_mass(model::SMomentModel) + + +A pipe-able variant of [`protein_mass`](@ref). +""" +protein_mass(model::SMomentModel) = x -> protein_mass(model, x) From f2b59ef916b28e98389adb588504680211e09136 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 15:11:44 +0200 Subject: [PATCH 088/109] more cleaning --- src/analysis/gecko.jl | 115 ++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 66 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 83118e41e..c76fbaa80 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -26,6 +26,9 @@ function make_gecko_model( continue end + # if the reaction has multiple isozymes, it needs extra coupling to + # ensure that the total rate of the reaction doesn't exceed the + # "global" limit reaction_coupling_row = length(isozymes) > 1 ? begin push!(coupling_row_reaction, i) @@ -33,11 +36,54 @@ function make_gecko_model( end : 0 for (iidx, isozyme) in enumerate(isozymes) + # loop over both directions for all isozymes for (lb, ub, kcat, dir) in [ (-ubs[i], -lbs[i], isozyme.kcat_reverse, -1), (lbs[i], ubs[i], isozyme.kcat_forward, 1), ] if max(lb, ub) > 0 && kcat > _constants.tolerance + # prepare the coupling with gene product molar + gene_product_coupling = collect( + begin + gidx = gene_name_lookup[gene] + row_idx = if haskey(gene_row_lookup, gidx) + gene_row_lookup[gidx] + else + push!(coupling_row_gene_product, gidx) + gene_row_lookup[gidx] = + length(coupling_row_gene_product) + end + (row_idx, 1 / kcat) + end for (gene, count) in isozyme.gene_product_count if + haskey(gene_name_lookup, gene) + ) + + # prepare the coupling with the mass groups + gp_groups = gene_mass_group.(keys(isozyme.gene_product_count)) + gp_mass = gene_product_mass.(keys(isozyme.gene_product_count)) + groups = unique(filter(!isnothing, gp_groups)) + group_idx = Dict(groups .=> 1:length(groups)) + vals = [0.0 for _ in groups] + + for (gpg, mass) in zip(gp_groups, gp_mass) + if !isnothing(gpg) + vals[group_idx[gpg]] += mass / kcat + end + end + + mass_group_coupling = collect( + isnothing(group) ? 0 : + begin + if !haskey(mass_group_lookup, group) + push!(coupling_row_mass_group, group) + mass_group_lookup[group] = + length(coupling_row_mass_group) + end + (mass_group_lookup[group], val) + end for (group, val) in zip(groups, vals) + ) + + # make a new column push!( columns, _gecko_column( @@ -47,21 +93,8 @@ function make_gecko_model( reaction_coupling_row, max(lb, 0), ub, - _gecko_make_gene_product_coupling( - isozyme.gene_product_count, - kcat, - gene_name_lookup, - gene_row_lookup, - coupling_row_gene_product, - ), - _gecko_make_mass_group_coupling( - isozyme.gene_product_count, - kcat, - gene_mass_group, - gene_product_mass, - mass_group_lookup, - coupling_row_mass_group, - ), + gene_product_coupling, + mass_group_coupling, ), ) end @@ -69,7 +102,7 @@ function make_gecko_model( end end - coupling_row_mass_group = return GeckoModel( + GeckoModel( columns, coupling_row_reaction, collect( @@ -81,53 +114,3 @@ function make_gecko_model( model, ) end - -_gecko_make_gene_product_coupling( - gene_product_count::Dict{String,Int}, - kcat::Float64, - name_lookup::Dict{String,Int}, - row_lookup::Dict{Int,Int}, - rows::Vector{Int}, -) = collect( - begin - gidx = name_lookup[gene] - row_idx = if haskey(row_lookup, gidx) - row_lookup[gidx] - else - push!(rows, gidx) - row_lookup[gidx] = length(rows) - end - (row_idx, 1 / kcat) - end for (gene, count) in gene_product_count if haskey(name_lookup, gene) -) - -function _gecko_make_mass_group_coupling( - gene_product_count::Dict{String,Int}, - kcat::Float64, - gene_row::Function, - gene_product_mass::Function, - row_lookup::Dict{String,Int}, - rows::Vector{String}, -) - gp_groups = gene_row.(keys(gene_product_count)) - gp_mass = gene_product_mass.(keys(gene_product_count)) - groups = unique(filter(!isnothing, gp_groups)) - group_idx = Dict(groups .=> 1:length(groups)) - vals = [0.0 for _ in groups] - - for (gpg, mass) in zip(gp_groups, gp_mass) - if !isnothing(gpg) - vals[group_idx[gpg]] += mass / kcat - end - end - - collect( - isnothing(group) ? 0 : begin - if !haskey(row_lookup, group) - push!(rows, group) - row_lookup[group] = length(rows) - end - (row_lookup[group], val) - end for (group, val) in zip(groups, vals) - ) -end From 352df8652f94319f1350965f8c4846358d0a9df6 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 14 Apr 2022 15:45:15 +0200 Subject: [PATCH 089/109] please add function types --- src/analysis/gecko.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index c76fbaa80..e01d8659d 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -12,13 +12,14 @@ function make_gecko_model( coupling_row_gene_product = Int[] coupling_row_mass_group = String[] - gene_name_lookup = Dict(genes(model) .=> 1:n_genes(model)) - gene_row_lookup = Dict{Int,Int}() - mass_group_lookup = Dict{String,Int}() - + gids = genes(model) (lbs, ubs) = bounds(model) rids = reactions(model) + gene_name_lookup = Dict(gids .=> 1:length(gids)) + gene_row_lookup = Dict{Int,Int}() + mass_group_lookup = Dict{String,Int}() + for i = 1:n_reactions(model) isozymes = reaction_isozymes(rids[i]) if isempty(isozymes) @@ -106,7 +107,10 @@ function make_gecko_model( columns, coupling_row_reaction, collect( - zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)), + zip( + coupling_row_gene_product, + gene_product_limit.(gids[coupling_row_gene_product]), + ), ), collect( zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)), From 3bcccccc3ad778fbc8fd287d905b81f030d7f090 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 15 Apr 2022 10:34:23 +0200 Subject: [PATCH 090/109] regroup enzymatic model utilities, remove bounds-changing functions We might reintroduce bounds changing, but at the current state it its easier to change the bounds in the inner model and then just regenerate the top layer. --- src/base/utils/{enzyme.jl => enzymes.jl} | 0 src/reconstruction/enzymes.jl | 17 +++++++++++ src/reconstruction/gecko.jl | 38 ------------------------ src/reconstruction/smoment.jl | 38 ------------------------ 4 files changed, 17 insertions(+), 76 deletions(-) rename src/base/utils/{enzyme.jl => enzymes.jl} (100%) create mode 100644 src/reconstruction/enzymes.jl delete mode 100644 src/reconstruction/gecko.jl delete mode 100644 src/reconstruction/smoment.jl diff --git a/src/base/utils/enzyme.jl b/src/base/utils/enzymes.jl similarity index 100% rename from src/base/utils/enzyme.jl rename to src/base/utils/enzymes.jl diff --git a/src/reconstruction/enzymes.jl b/src/reconstruction/enzymes.jl new file mode 100644 index 000000000..09dac3dc6 --- /dev/null +++ b/src/reconstruction/enzymes.jl @@ -0,0 +1,17 @@ +""" + with_smoment(; kwargs...) + +Specifies a model variant which adds extra semantics of the sMOMENT algorithm, +giving a [`SMomentModel`](@ref). The arguments are forwarded to +[`make_smoment_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_smoment(; kwargs...) = model -> make_smoment_model(model; kwargs...) + +""" + with_gecko(; kwargs...) + +Specifies a model variant which adds extra semantics of the Gecko algorithm, +giving a [`GeckoModel`](@ref). The arguments are forwarded to +[`make_gecko_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_gecko(; kwargs...) = model -> make_gecko_model(model; kwargs...) diff --git a/src/reconstruction/gecko.jl b/src/reconstruction/gecko.jl deleted file mode 100644 index e556166f4..000000000 --- a/src/reconstruction/gecko.jl +++ /dev/null @@ -1,38 +0,0 @@ - - -""" - with_gecko(; kwargs...) - -Specifies a model variant which adds extra semantics of the Gecko algorithm, -giving a [`GeckoModel`](@ref). The arguments are forwarded to -[`make_gecko_model`](@ref). Intended for usage with [`screen`](@ref). -""" -with_gecko(; kwargs...) = model -> make_gecko_model(model; kwargs...) - -""" - change_bound!(model::GeckoModel, id; lower=nothing, upper=nothing) - -Change the bound of variable in `model`. Does not change the bound if respective -bound is `nothing`. Note, for `GeckoModel`s, if the model used to construct the -`GeckoModel` has irreversible reactions, then these reactions will be -permanently irreversible in the model, i.e. changing their bounds to make them -reversible will have no effect. -""" -function change_bound!(model::GeckoModel, id; lower = nothing, upper = nothing) - #TODO -end - -""" - change_bounds!(model::GeckoModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) - -Change the bounds of multiple variables in `model` simultaneously. See -[`change_bound`](@ref) for details. -""" -function change_bounds!( - model::GeckoModel, - ids; - lower = fill(nothing, length(ids)), - upper = fill(nothing, length(ids)), -) - #TODO -end diff --git a/src/reconstruction/smoment.jl b/src/reconstruction/smoment.jl deleted file mode 100644 index ba142b09c..000000000 --- a/src/reconstruction/smoment.jl +++ /dev/null @@ -1,38 +0,0 @@ - - -""" - with_smoment(; kwargs...) - -Specifies a model variant which adds extra semantics of the sMOMENT algorithm, -giving a [`SMomentModel`](@ref). The arguments are forwarded to -[`make_smoment_model`](@ref). Intended for usage with [`screen`](@ref). -""" -with_smoment(; kwargs...) = model -> make_smoment_model(model; kwargs...) - -""" - change_bound!(model::SMomentModel, id; lower=nothing, upper=nothing) - -Change the bound of variable in `model`. Does not change the bound if respective -bound is `nothing`. Note, for `SMomentModel`s, if the model used to construct the -`SMomentModel` has irreversible reactions, then these reactions will be -permanently irreversible in the model, i.e. changing their bounds to make them -reversible will have no effect. -""" -function change_bound!(model::SMomentModel, id; lower = nothing, upper = nothing) - # TODO -end - -""" - change_bounds!(model::SMomentModel, ids; lower=fill(nothing, length(ids)), upper=fill(nothing, length(ids))) - -Change the bounds of multiple variables in `model` simultaneously. See -[`change_bound`](@ref) for details. -""" -function change_bounds!( - model::SMomentModel, - ids; - lower = fill(nothing, length(ids)), - upper = fill(nothing, length(ids)), -) - # TODO -end From a563a2ae80b871a31f9c915095513977f33daf01 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 15 Apr 2022 10:58:07 +0200 Subject: [PATCH 091/109] clean up --- src/analysis/gecko.jl | 131 ++++++++++++++++++------ src/analysis/smoment.jl | 44 ++++---- src/base/types/wrappers/GeckoModel.jl | 47 ++++++++- src/base/types/wrappers/SMomentModel.jl | 15 ++- test/analysis/gecko.jl | 37 +++++-- 5 files changed, 208 insertions(+), 66 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index e01d8659d..d7a111adf 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -1,11 +1,66 @@ +""" + make_gecko_model( + model::MetabolicModel; + reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + group_mass_limit::Union{Function,Dict{String,Float64}}, + relaxed_arm_reaction_bounds = false, + ) + +Wrap a model into a [`GeckoModel`](@ref), following the structure given by +GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). + +# Arguments + +- `reaction_isozymes` is a function that returns a vector of [`Isozyme`](@ref)s + for each reaction, or empty vector if the reaction is not enzymatic. +- `gene_product_limit` is a function that returns lower and upper bound for + concentration for a given gene product (specified by the same string gene ID as in + `reaction_isozymes`), as `Tuple{Float64,Float64}`. +- `gene_product_molar_mass` is a function that returns a numeric molar mass of + a given gene product specified by string gene ID. +- `gene_mass_group` is a function that returns a string group identifier for a + given gene product, again specified by string gene ID. By default, all gene + products belong to group `"uncategorized"` which is the behavior of original + GECKO. +- `group_mass_limit` is a function that returns the maximum mass for a given + mass group. +- `relaxed_arm_reaction_bounds` is a boolean flag that relaxes the constraints + on the "arm" reactions specified by GECKO. By default (value `false`), there + is a separate constraint that limits the total flux through forward-direction + reaction for all its isozymes (ensuring that the sum of forward rates is less + than "global" upper bound), and another separate constraint that limits the + total flux through reverse-direction reaction isozymes. Value `true` groups + both forward and reverse reactions in a single constraint, allowing the total + forward flux to be actually greater than the upper bound IF the reverse flux + can balance it to fit into the upper and lower bound constraints (in turn, + more enzyme can be "wasted" by a reaction that runs in both directions). + +Alternatively, all function arguments may be also supplied as dictionaries that +provide the same data lookup. +""" function make_gecko_model( - model::StandardModel; - reaction_isozymes::Function, - gene_product_mass::Function, - gene_product_limit::Function, - gene_mass_group::Function = _ -> "uncategorized", - mass_fraction_limit::Function, + model::MetabolicModel; + reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + group_mass_limit::Union{Function,Dict{String,Float64}}, + relaxed_arm_reaction_bounds = false, ) + ris_ = + reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid]) + gpl_ = + gene_product_limit isa Function ? gene_product_limit : + (gid -> gene_product_limit[gid]) + gpmm_ = + gene_product_molar_mass isa Function ? gene_product_molar_mass : + (gid -> gene_product_molar_mass[gid]) + gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid]) + gml_ = group_mass_limit isa Function ? group_mass_limit : (grp -> group_mass_limit[grp]) + # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch columns = Vector{_gecko_column}() coupling_row_reaction = Int[] @@ -21,7 +76,7 @@ function make_gecko_model( mass_group_lookup = Dict{String,Int}() for i = 1:n_reactions(model) - isozymes = reaction_isozymes(rids[i]) + isozymes = ris_(rids[i]) if isempty(isozymes) push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], [])) continue @@ -30,19 +85,40 @@ function make_gecko_model( # if the reaction has multiple isozymes, it needs extra coupling to # ensure that the total rate of the reaction doesn't exceed the # "global" limit - reaction_coupling_row = - length(isozymes) > 1 ? begin - push!(coupling_row_reaction, i) - length(coupling_row_reaction) - end : 0 - - for (iidx, isozyme) in enumerate(isozymes) - # loop over both directions for all isozymes - for (lb, ub, kcat, dir) in [ - (-ubs[i], -lbs[i], isozyme.kcat_reverse, -1), - (lbs[i], ubs[i], isozyme.kcat_forward, 1), - ] - if max(lb, ub) > 0 && kcat > _constants.tolerance + if relaxed_arm_reaction_bounds + reaction_coupling_row = + length(isozymes) > 1 ? begin + push!(coupling_row_reaction, i) + length(coupling_row_reaction) + end : 0 + end + + # loop over both directions for all isozymes + for (lb, ub, kcatf, dir) in [ + (-ubs[i], -lbs[i], i -> i.kcat_reverse, -1), + (lbs[i], ubs[i], i -> i.kcat_forward, 1), + ] + if !relaxed_arm_reaction_bounds + # In this case, the coefficients in the coupling matrix will be + # the same as in the combined case, only categorized in + # separate rows for negative and positive ones. Surprisingly, + # we do not need to explicitly remember the bounds, because the + # ones taken from the original model are perfectly okay -- the + # "reverse" direction is unreachable because of individual + # bounds on split reactions, and the "forward" direction is + # properly negated in the reverse case to work nicely with the + # global lower bound. + reaction_coupling_row = + ub > 0 && length(isozymes) > 1 ? begin + push!(coupling_row_reaction, i) + length(coupling_row_reaction) + end : 0 + end + + # all isozymes in this direction + for (iidx, isozyme) in enumerate(isozymes) + kcat = kcatf(isozyme) + if ub > 0 && kcat > _constants.tolerance # prepare the coupling with gene product molar gene_product_coupling = collect( begin @@ -60,8 +136,8 @@ function make_gecko_model( ) # prepare the coupling with the mass groups - gp_groups = gene_mass_group.(keys(isozyme.gene_product_count)) - gp_mass = gene_product_mass.(keys(isozyme.gene_product_count)) + gp_groups = gmg_.(keys(isozyme.gene_product_count)) + gp_mass = gpmm_.(keys(isozyme.gene_product_count)) groups = unique(filter(!isnothing, gp_groups)) group_idx = Dict(groups .=> 1:length(groups)) vals = [0.0 for _ in groups] @@ -106,15 +182,8 @@ function make_gecko_model( GeckoModel( columns, coupling_row_reaction, - collect( - zip( - coupling_row_gene_product, - gene_product_limit.(gids[coupling_row_gene_product]), - ), - ), - collect( - zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)), - ), + collect(zip(coupling_row_gene_product, gpl_.(gids[coupling_row_gene_product]))), + collect(zip(coupling_row_mass_group, gml_.(coupling_row_mass_group))), model, ) end diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index f4c5255f6..df6236d90 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -2,48 +2,56 @@ """ make_smoment_model( model::MetabolicModel; - reaction_isozymes::Function, - gene_product_molar_mass::Function, + reaction_isozymes::Union{Function, Dict{String, Isozyme}}, + gene_product_molar_mass::Union{Function, Dict{String, Float64}}, total_enzyme_capacity::Float64, ) Construct a model with a structure given by sMOMENT algorithm; returns a -[`SMomentModel`](@ref) (see the documentation for details. +[`SMomentModel`](@ref) (see the documentation for details). -`reaction_isozymes` parameter is a function that returns a single isozyme for -each reaction, or `nothing` if the reaction is not enzymatic. If the reaction -has multiple isozymes, use [`smoment_isozyme_score`](@ref) to select the "best" -one, as recommended by the sMOMENT approach. +# Arguments -`gene_product_molar_mass` parameter is a function that returns a molar mass of -each gene product (relative to `total_enzyme_capacity` and the specified -kcats), as specified by sMOMENT. +- `reaction_isozymes` parameter is a function that returns a single + [`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not + enzymatic. If the reaction has multiple isozymes, use + [`smoment_isozyme_score`](@ref) to select the "best" one, as recommended by + the sMOMENT approach. +- `gene_product_molar_mass` parameter is a function that returns a molar mass + of each gene product (relative to `total_enzyme_capacity` and the specified + kcats), as specified by sMOMENT. +- `total_enzyme_capacity` is the maximum "enzyme capacity" consumption in the + model. -`total_enzyme_capacity` is the maximum "enzyme capacity" consumption of the -model. +Alternatively, all function arguments also accept dictionaries that are used to +provide the same data lookup. """ function make_smoment_model( model::MetabolicModel; - reaction_isozymes::Function, - gene_product_molar_mass::Function, + reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + gene_product_molar_mass::::Union{Function,Dict{String,Float64}}, total_enzyme_capacity::Float64, ) + ris_ = + reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid]) + gpmm_ = + gene_product_molar_mass isa Function ? gene_product_molar_mass : + (gid -> gene_product_molar_mass[gid]) + columns = Vector{_smoment_column}() (lbs, ubs) = bounds(model) rids = reactions(model) for i = 1:n_reactions(model) - isozyme = reaction_isozymes(rids[i]) + isozyme = ris_(rids[i]) if isnothing(isozyme) # non-enzymatic reaction (or a totally ignored one) push!(columns, _smoment_column(i, 0, lbs[i], ubs[i], 0)) continue end - mw = sum( - gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.gene_product_count - ) + mw = sum(gpmm_(gid) * ps for (gid, ps) in isozyme.gene_product_count) if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance # reaction can run in reverse diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 0893d25be..507ffe497 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -1,4 +1,9 @@ +""" + struct _gecko_column + +A helper type for describing the contents of [`GeckoModel`](@ref)s. +""" struct _gecko_column reaction_idx::Int isozyme_idx::Int @@ -10,6 +15,44 @@ struct _gecko_column mass_group_coupling::Vector{Tuple{Int,Float64}} end +""" + struct GeckoModel <: ModelWrapper + +A model with complex enzyme concentration and capacity bounds, as described in +*Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast +genome‐scale metabolic model by incorporating enzymatic constraints." Molecular +systems biology 13.8 (2017): 935.* + +Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind +of models. + +The model wraps another "internal" model, and adds following modifications: +- enzymatic reactions with known enzyme information are split into multiple + forward and reverse variants for each isozyme, +- reaction coupling is added to ensure the groups of isozyme reactions obey the + global reaction flux bounds from the original model, +- coupling is added to simulate available gene concentrations as "virtual + metabolites" consumed by each reaction by its gene product stoichiometry, + which can constrained by the user (to reflect realistic measurements such as + from mass spectrometry), +- additional coupling is added to simulate total masses of different proteins + grouped by type (e.g., membrane-bound and free-floating proteins), which can + be again constrained by the user (this is slightly generalized from original + GECKO algorithm, which only considers a single group of indiscernible + proteins). + +The structure contains fields `columns` that describe the contents of the +coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and +`coupling_row_mass_group` that describe correspondence of the coupling rows to +original model and determine the coupling bounds, and `inner`, which is the +original wrapped model. + +Implementation exposes the split reactions (available as `reactions(model)`), +but retains the original "simple" reactions accessible by [`fluxes`](@ref). All +constraints are implemented using [`coupling`](@ref) and +[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are +purely virtual and do not occur in [`metabolites`](@ref). +""" struct GeckoModel <: ModelWrapper columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} @@ -84,8 +127,8 @@ reaction_flux(model::GeckoModel) = """ coupling(model::GeckoModel) -Return the coupling of [`GeckoModel`](@ref). That combines the coupling of -the wrapped model, coupling for split reactions, and the coupling for the total +Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the +wrapped model, coupling for split reactions, and the coupling for the total enzyme capacity. """ coupling(model::GeckoModel) = vcat( diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index f28a1ec0c..543115eee 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -15,10 +15,9 @@ end """ struct SMomentModel <: ModelWrapper -Construct an enzyme-capacity constrained model using sMOMENT algorithm, as -described by *Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic -construction of metabolic models with enzyme constraints" BMC bioinformatics, -2020*. +An enzyme-capacity-constrained model using sMOMENT algorithm, as described by +*Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic construction of +metabolic models with enzyme constraints" BMC bioinformatics, 2020*. Use [`make_smoment_model`](@ref) or [`with_smoment`](@ref) to construct the models. @@ -27,10 +26,10 @@ The model is constructed as follows: - stoichiometry of the original model is retained as much as possible, but enzymatic reations are split into forward and reverse parts (marked by a suffix like `...#forward` and `...#reverse`), -- stoichiometry is expanded by a virtual metabolite "enzyme capacity" which is - consumed by all enzymatic reactions at a rate given by enzyme mass divided by - the corresponding kcat, -- the total consumption of the enzyme capacity is constrained by a fixed +- coupling is added to simulate a virtual metabolite "enzyme capacity", which + is consumed by all enzymatic reactions at a rate given by enzyme mass divided + by the corresponding kcat, +- the total consumption of the enzyme capacity is constrained to a fixed maximum. The `SMomentModel` structure contains a worked-out representation of the diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 658f4e8b3..6a928d50d 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -15,18 +15,19 @@ total_protein_mass = 100.0 - gm = - model |> - with_changed_bounds( + bounded_model = + model |> with_changed_bounds( ["EX_glc__D_e", "GLCpts"]; lower = [-1000.0, -1.0], upper = [nothing, 12.0], - ) |> - with_gecko( + ) + + gm = + bounded_model |> with_gecko( reaction_isozymes = get_reaction_isozymes, gene_product_limit = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), - gene_product_mass = get_gene_product_mass, - mass_fraction_limit = _ -> total_protein_mass, + gene_product_molar_mass = get_gene_product_mass, + group_mass_limit = _ -> total_protein_mass, ) opt_model = flux_balance_analysis( @@ -47,4 +48,26 @@ prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) + + gm = + bounded_model |> with_gecko( + reaction_isozymes = get_reaction_isozymes, + gene_product_limit = _ -> (0.0, 0.05), + gene_product_molar_mass = get_gene_product_mass, + group_mass_limit = _ -> total_protein_mass, + relaxed_arm_reaction_bounds = true, + ) + + rxn_fluxes = flux_balance_analysis_dict( + gm, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.1877932315030117, + atol = TEST_TOLERANCE, + ) + end From 73b8f61723635d6bb2d6219e0b23c343e397d5ed Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Fri, 15 Apr 2022 11:16:36 +0200 Subject: [PATCH 092/109] fix smoment syntax --- src/analysis/smoment.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index df6236d90..8e7c2819c 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -2,8 +2,8 @@ """ make_smoment_model( model::MetabolicModel; - reaction_isozymes::Union{Function, Dict{String, Isozyme}}, - gene_product_molar_mass::Union{Function, Dict{String, Float64}}, + reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, total_enzyme_capacity::Float64, ) @@ -29,7 +29,7 @@ provide the same data lookup. function make_smoment_model( model::MetabolicModel; reaction_isozymes::Union{Function,Dict{String,Isozyme}}, - gene_product_molar_mass::::Union{Function,Dict{String,Float64}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, total_enzyme_capacity::Float64, ) ris_ = From c9766ea3f3139e4630764aad48aeadfbb2cd0ea0 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 18 Apr 2022 20:40:08 +0200 Subject: [PATCH 093/109] Minor docstring fixes --- src/analysis/smoment.jl | 10 ++++------ src/base/types/wrappers/SMomentModel.jl | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index 8e7c2819c..bc130fb38 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -15,13 +15,11 @@ Construct a model with a structure given by sMOMENT algorithm; returns a - `reaction_isozymes` parameter is a function that returns a single [`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not enzymatic. If the reaction has multiple isozymes, use - [`smoment_isozyme_score`](@ref) to select the "best" one, as recommended by - the sMOMENT approach. + [`smoment_isozyme_speed`](@ref) to select the fastest one, as recommended by + the sMOMENT paper. - `gene_product_molar_mass` parameter is a function that returns a molar mass - of each gene product (relative to `total_enzyme_capacity` and the specified - kcats), as specified by sMOMENT. -- `total_enzyme_capacity` is the maximum "enzyme capacity" consumption in the - model. + of each gene product as specified by sMOMENT. +- `total_enzyme_capacity` is the maximum "enzyme capacity" in the model. Alternatively, all function arguments also accept dictionaries that are used to provide the same data lookup. diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl index 543115eee..bdf50a71d 100644 --- a/src/base/types/wrappers/SMomentModel.jl +++ b/src/base/types/wrappers/SMomentModel.jl @@ -36,9 +36,9 @@ The `SMomentModel` structure contains a worked-out representation of the optimization problem atop a wrapped [`MetabolicModel`](@ref), in particular the separation of certain reactions into unidirectional forward and reverse parts, an "enzyme capacity" required for each reaction, and the value of the maximum -capacity constraint. Original coupling is retained. +capacity constraint. Original coupling in the inner model is retained. -In the structure, field `columns` describes the correspondence of stoichiometry +In the structure, the field `columns` describes the correspondence of stoichiometry columns to the stoichiometry and data of the internal wrapped model, and `total_enzyme_capacity` is the total bound on the enzyme capacity consumption as specified in sMOMENT algorithm. From 36cdbcb549d28985329251f5f65cddb005b6d60c Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Mon, 18 Apr 2022 23:41:50 +0200 Subject: [PATCH 094/109] fix errors --- src/analysis/gecko.jl | 34 +++++++++++++-------------- src/analysis/smoment.jl | 8 +++---- src/base/types/wrappers/GeckoModel.jl | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index d7a111adf..8685fa8fe 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -1,11 +1,11 @@ """ make_gecko_model( model::MetabolicModel; - reaction_isozymes::Union{Function,Dict{String,Isozyme}}, - gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}} + gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", - group_mass_limit::Union{Function,Dict{String,Float64}}, + gene_mass_group_bound::Union{Function,Dict{String,Float64}}, relaxed_arm_reaction_bounds = false, ) @@ -16,7 +16,7 @@ GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). - `reaction_isozymes` is a function that returns a vector of [`Isozyme`](@ref)s for each reaction, or empty vector if the reaction is not enzymatic. -- `gene_product_limit` is a function that returns lower and upper bound for +- `gene_product_bounds` is a function that returns lower and upper bound for concentration for a given gene product (specified by the same string gene ID as in `reaction_isozymes`), as `Tuple{Float64,Float64}`. - `gene_product_molar_mass` is a function that returns a numeric molar mass of @@ -25,7 +25,7 @@ GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). given gene product, again specified by string gene ID. By default, all gene products belong to group `"uncategorized"` which is the behavior of original GECKO. -- `group_mass_limit` is a function that returns the maximum mass for a given +- `gene_mass_group_bound` is a function that returns the maximum mass for a given mass group. - `relaxed_arm_reaction_bounds` is a boolean flag that relaxes the constraints on the "arm" reactions specified by GECKO. By default (value `false`), there @@ -43,23 +43,23 @@ provide the same data lookup. """ function make_gecko_model( model::MetabolicModel; - reaction_isozymes::Union{Function,Dict{String,Isozyme}}, - gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}}, + gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", - group_mass_limit::Union{Function,Dict{String,Float64}}, + gene_mass_group_bound::Union{Function,Dict{String,Float64}}, relaxed_arm_reaction_bounds = false, ) ris_ = - reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid]) - gpl_ = - gene_product_limit isa Function ? gene_product_limit : - (gid -> gene_product_limit[gid]) + reaction_isozymes isa Function ? reaction_isozymes : (rid -> get(reaction_isozymes, rid, [])) + gpb_ = + gene_product_bounds isa Function ? gene_product_bounds : + (gid -> gene_product_bounds[gid]) gpmm_ = gene_product_molar_mass isa Function ? gene_product_molar_mass : (gid -> gene_product_molar_mass[gid]) gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid]) - gml_ = group_mass_limit isa Function ? group_mass_limit : (grp -> group_mass_limit[grp]) + gmgb_ = gene_mass_group_bound isa Function ? gene_mass_group_bound : (grp -> gene_mass_group_bound[grp]) # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch columns = Vector{_gecko_column}() @@ -130,8 +130,8 @@ function make_gecko_model( gene_row_lookup[gidx] = length(coupling_row_gene_product) end - (row_idx, 1 / kcat) - end for (gene, count) in isozyme.gene_product_count if + (row_idx, stoich / kcat) + end for (gene, stoich) in isozyme.gene_product_count if haskey(gene_name_lookup, gene) ) @@ -182,8 +182,8 @@ function make_gecko_model( GeckoModel( columns, coupling_row_reaction, - collect(zip(coupling_row_gene_product, gpl_.(gids[coupling_row_gene_product]))), - collect(zip(coupling_row_mass_group, gml_.(coupling_row_mass_group))), + collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))), + collect(zip(coupling_row_mass_group, gmgb_.(coupling_row_mass_group))), model, ) end diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index bc130fb38..dadea190b 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -2,7 +2,7 @@ """ make_smoment_model( model::MetabolicModel; - reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + reaction_isozyme::Union{Function,Dict{String,Isozyme}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, total_enzyme_capacity::Float64, ) @@ -12,7 +12,7 @@ Construct a model with a structure given by sMOMENT algorithm; returns a # Arguments -- `reaction_isozymes` parameter is a function that returns a single +- `reaction_isozyme` parameter is a function that returns a single [`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not enzymatic. If the reaction has multiple isozymes, use [`smoment_isozyme_speed`](@ref) to select the fastest one, as recommended by @@ -26,12 +26,12 @@ provide the same data lookup. """ function make_smoment_model( model::MetabolicModel; - reaction_isozymes::Union{Function,Dict{String,Isozyme}}, + reaction_isozyme::Union{Function,Dict{String,Isozyme}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, total_enzyme_capacity::Float64, ) ris_ = - reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid]) + reaction_isozyme isa Function ? reaction_isozyme : (rid -> get(reaction_isozyme, rid, nothing)) gpmm_ = gene_product_molar_mass isa Function ? gene_product_molar_mass : (gid -> gene_product_molar_mass[gid]) diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 507ffe497..bfdd9bc6a 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -24,7 +24,7 @@ genome‐scale metabolic model by incorporating enzymatic constraints." Molecula systems biology 13.8 (2017): 935.* Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind -of models. +of model. The model wraps another "internal" model, and adds following modifications: - enzymatic reactions with known enzyme information are split into multiple From f2d37f03ff0ffebcc6ff7219c7640f0606dc7449 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 10:12:06 +0200 Subject: [PATCH 095/109] fix test kwargs --- test/analysis/gecko.jl | 4 ++-- test/analysis/smoment.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 6a928d50d..e718184a8 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -52,9 +52,9 @@ gm = bounded_model |> with_gecko( reaction_isozymes = get_reaction_isozymes, - gene_product_limit = _ -> (0.0, 0.05), + gene_product_bounds = _ -> (0.0, 0.05), gene_product_molar_mass = get_gene_product_mass, - group_mass_limit = _ -> total_protein_mass, + gene_mass_group_bound = _ -> total_protein_mass, relaxed_arm_reaction_bounds = true, ) diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index f14d2d2d6..41323a900 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -22,7 +22,7 @@ upper = [nothing, 12.0], ) |> with_smoment( - reaction_isozymes = get_reaction_isozyme, + reaction_isozyme = get_reaction_isozyme, gene_product_molar_mass = get_gene_product_mass, total_enzyme_capacity = 100.0, ) From 64291b6a1ef5268e9459b27c06ed75ee91ec7f45 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 10:21:54 +0200 Subject: [PATCH 096/109] fix more kwargs --- src/base/types/wrappers/GeckoModel.jl | 4 ++-- test/analysis/gecko.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index bfdd9bc6a..dea29bab7 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -162,13 +162,13 @@ function coupling_bounds(model::GeckoModel) return ( vcat( iclb, - ilb[model.coupling_row_reaction], + ilb[model.coupling_row_reaction], #! fix bound [lb for (_, (lb, _)) in model.coupling_row_gene_product], [0.0 for _ in model.coupling_row_mass_group], ), vcat( icub, - iub[model.coupling_row_reaction], + iub[model.coupling_row_reaction], #! fix bound [ub for (_, (_, ub)) in model.coupling_row_gene_product], [ub for (_, ub) in model.coupling_row_mass_group], ), diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index e718184a8..35c99fae7 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -25,9 +25,9 @@ gm = bounded_model |> with_gecko( reaction_isozymes = get_reaction_isozymes, - gene_product_limit = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), + gene_product_bounds = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), gene_product_molar_mass = get_gene_product_mass, - group_mass_limit = _ -> total_protein_mass, + gene_mass_group_bound = _ -> total_protein_mass, ) opt_model = flux_balance_analysis( From 54a6be83d8f982f5138e58c07e8f0f8b0e99e70f Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 10:47:34 +0200 Subject: [PATCH 097/109] remove relaxed arms --- src/analysis/gecko.jl | 43 ++++++++++++++----------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 8685fa8fe..4afccf6a3 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -48,7 +48,6 @@ function make_gecko_model( gene_product_molar_mass::Union{Function,Dict{String,Float64}}, gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", gene_mass_group_bound::Union{Function,Dict{String,Float64}}, - relaxed_arm_reaction_bounds = false, ) ris_ = reaction_isozymes isa Function ? reaction_isozymes : (rid -> get(reaction_isozymes, rid, [])) @@ -82,39 +81,25 @@ function make_gecko_model( continue end - # if the reaction has multiple isozymes, it needs extra coupling to - # ensure that the total rate of the reaction doesn't exceed the - # "global" limit - if relaxed_arm_reaction_bounds - reaction_coupling_row = - length(isozymes) > 1 ? begin - push!(coupling_row_reaction, i) - length(coupling_row_reaction) - end : 0 - end - # loop over both directions for all isozymes for (lb, ub, kcatf, dir) in [ (-ubs[i], -lbs[i], i -> i.kcat_reverse, -1), (lbs[i], ubs[i], i -> i.kcat_forward, 1), ] - if !relaxed_arm_reaction_bounds - # In this case, the coefficients in the coupling matrix will be - # the same as in the combined case, only categorized in - # separate rows for negative and positive ones. Surprisingly, - # we do not need to explicitly remember the bounds, because the - # ones taken from the original model are perfectly okay -- the - # "reverse" direction is unreachable because of individual - # bounds on split reactions, and the "forward" direction is - # properly negated in the reverse case to work nicely with the - # global lower bound. - reaction_coupling_row = - ub > 0 && length(isozymes) > 1 ? begin - push!(coupling_row_reaction, i) - length(coupling_row_reaction) - end : 0 - end - + # The coefficients in the coupling matrix will be categorized in + # separate rows for negative and positive reactions. Surprisingly, + # we do not need to explicitly remember the bounds, because the + # ones taken from the original model are perfectly okay -- the + # "reverse" direction is unreachable because of individual + # bounds on split reactions, and the "forward" direction is + # properly negated in the reverse case to work nicely with the + # global lower bound. + reaction_coupling_row = + ub > 0 && length(isozymes) > 1 ? begin + push!(coupling_row_reaction, i) + length(coupling_row_reaction) + end : 0 + # all isozymes in this direction for (iidx, isozyme) in enumerate(isozymes) kcat = kcatf(isozyme) From 0939347000f813ce43f7a85dc350c4b1912ce528 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 10:48:36 +0200 Subject: [PATCH 098/109] remove relaxed arm test --- test/analysis/gecko.jl | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 35c99fae7..15ff176b8 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -48,26 +48,4 @@ prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) - - gm = - bounded_model |> with_gecko( - reaction_isozymes = get_reaction_isozymes, - gene_product_bounds = _ -> (0.0, 0.05), - gene_product_molar_mass = get_gene_product_mass, - gene_mass_group_bound = _ -> total_protein_mass, - relaxed_arm_reaction_bounds = true, - ) - - rxn_fluxes = flux_balance_analysis_dict( - gm, - Tulip.Optimizer; - modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], - ) - - @test isapprox( - rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], - 0.1877932315030117, - atol = TEST_TOLERANCE, - ) - end From c240484af0511cd381d9ed73d0d6c7c01431663b Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 16:21:16 +0200 Subject: [PATCH 099/109] make gecko resemble published version --- src/analysis/gecko.jl | 81 ++++++++-------- src/base/types/wrappers/GeckoModel.jl | 128 ++++++++++++++++++-------- src/base/utils/gecko.jl | 27 +++--- 3 files changed, 141 insertions(+), 95 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 4afccf6a3..1f010f2a2 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -6,7 +6,6 @@ gene_product_molar_mass::Union{Function,Dict{String,Float64}}, gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", gene_mass_group_bound::Union{Function,Dict{String,Float64}}, - relaxed_arm_reaction_bounds = false, ) Wrap a model into a [`GeckoModel`](@ref), following the structure given by @@ -27,16 +26,6 @@ GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). GECKO. - `gene_mass_group_bound` is a function that returns the maximum mass for a given mass group. -- `relaxed_arm_reaction_bounds` is a boolean flag that relaxes the constraints - on the "arm" reactions specified by GECKO. By default (value `false`), there - is a separate constraint that limits the total flux through forward-direction - reaction for all its isozymes (ensuring that the sum of forward rates is less - than "global" upper bound), and another separate constraint that limits the - total flux through reverse-direction reaction isozymes. Value `true` groups - both forward and reverse reactions in a single constraint, allowing the total - forward flux to be actually greater than the upper bound IF the reverse flux - can balance it to fit into the upper and lower bound constraints (in turn, - more enzyme can be "wasted" by a reaction that runs in both directions). Alternatively, all function arguments may be also supplied as dictionaries that provide the same data lookup. @@ -50,7 +39,8 @@ function make_gecko_model( gene_mass_group_bound::Union{Function,Dict{String,Float64}}, ) ris_ = - reaction_isozymes isa Function ? reaction_isozymes : (rid -> get(reaction_isozymes, rid, [])) + reaction_isozymes isa Function ? reaction_isozymes : + (rid -> get(reaction_isozymes, rid, [])) gpb_ = gene_product_bounds isa Function ? gene_product_bounds : (gid -> gene_product_bounds[gid]) @@ -58,13 +48,15 @@ function make_gecko_model( gene_product_molar_mass isa Function ? gene_product_molar_mass : (gid -> gene_product_molar_mass[gid]) gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid]) - gmgb_ = gene_mass_group_bound isa Function ? gene_mass_group_bound : (grp -> gene_mass_group_bound[grp]) + gmgb_ = + gene_mass_group_bound isa Function ? gene_mass_group_bound : + (grp -> gene_mass_group_bound[grp]) # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch columns = Vector{_gecko_column}() coupling_row_reaction = Int[] coupling_row_gene_product = Int[] - coupling_row_mass_group = String[] + # coupling_row_mass_group = String[] gids = genes(model) (lbs, ubs) = bounds(model) @@ -72,12 +64,11 @@ function make_gecko_model( gene_name_lookup = Dict(gids .=> 1:length(gids)) gene_row_lookup = Dict{Int,Int}() - mass_group_lookup = Dict{String,Int}() for i = 1:n_reactions(model) isozymes = ris_(rids[i]) if isempty(isozymes) - push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], [])) + push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [])) continue end @@ -99,7 +90,7 @@ function make_gecko_model( push!(coupling_row_reaction, i) length(coupling_row_reaction) end : 0 - + # all isozymes in this direction for (iidx, isozyme) in enumerate(isozymes) kcat = kcatf(isozyme) @@ -120,31 +111,6 @@ function make_gecko_model( haskey(gene_name_lookup, gene) ) - # prepare the coupling with the mass groups - gp_groups = gmg_.(keys(isozyme.gene_product_count)) - gp_mass = gpmm_.(keys(isozyme.gene_product_count)) - groups = unique(filter(!isnothing, gp_groups)) - group_idx = Dict(groups .=> 1:length(groups)) - vals = [0.0 for _ in groups] - - for (gpg, mass) in zip(gp_groups, gp_mass) - if !isnothing(gpg) - vals[group_idx[gpg]] += mass / kcat - end - end - - mass_group_coupling = collect( - isnothing(group) ? 0 : - begin - if !haskey(mass_group_lookup, group) - push!(coupling_row_mass_group, group) - mass_group_lookup[group] = - length(coupling_row_mass_group) - end - (mass_group_lookup[group], val) - end for (group, val) in zip(groups, vals) - ) - # make a new column push!( columns, @@ -156,7 +122,6 @@ function make_gecko_model( max(lb, 0), ub, gene_product_coupling, - mass_group_coupling, ), ) end @@ -164,11 +129,37 @@ function make_gecko_model( end end - GeckoModel( + # prepare enzyme capacity constraints + mg_gid_lookup = Dict{String, Vector{String}}() + for gid in gids[coupling_row_gene_product] + mg = gmg_(gid) + if haskey(mg_gid_lookup, mg) + push!(mg_gid_lookup[mg], gid) + else + mg_gid_lookup[mg] = [gid] + end + end + coupling_row_mass_group = Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}}() + for (grp, gs) in mg_gid_lookup + idxs = Int.(indexin(gs, gids)) + mms = gpmm_.(gs) + push!(coupling_row_mass_group, (idxs, mms, gmgb_(grp))) + end + + gm = GeckoModel( + spzeros(length(columns) + length(coupling_row_gene_product)), columns, coupling_row_reaction, collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))), - collect(zip(coupling_row_mass_group, gmgb_.(coupling_row_mass_group))), + coupling_row_mass_group, model, ) + + # set objective (do separately because gene products can also be objectives) + gm.objective .= [ + _gecko_column_reactions(gm)' * objective(gm.inner) + spzeros(length(coupling_row_gene_product)) + ] + + return gm end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index dea29bab7..0410defba 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -1,4 +1,3 @@ - """ struct _gecko_column @@ -12,7 +11,6 @@ struct _gecko_column lb::Float64 ub::Float64 gene_product_coupling::Vector{Tuple{Int,Float64}} - mass_group_coupling::Vector{Tuple{Int,Float64}} end """ @@ -20,7 +18,7 @@ end A model with complex enzyme concentration and capacity bounds, as described in *Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast -genome‐scale metabolic model by incorporating enzymatic constraints." Molecular +genome-scale metabolic model by incorporating enzymatic constraints." Molecular systems biology 13.8 (2017): 935.* Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind @@ -31,10 +29,9 @@ The model wraps another "internal" model, and adds following modifications: forward and reverse variants for each isozyme, - reaction coupling is added to ensure the groups of isozyme reactions obey the global reaction flux bounds from the original model, -- coupling is added to simulate available gene concentrations as "virtual - metabolites" consumed by each reaction by its gene product stoichiometry, - which can constrained by the user (to reflect realistic measurements such as - from mass spectrometry), +- gene concentrations specified by each reaction and its gene product stoichiometry, + can constrained by the user to reflect measurements, such as + from mass spectrometry, - additional coupling is added to simulate total masses of different proteins grouped by type (e.g., membrane-bound and free-floating proteins), which can be again constrained by the user (this is slightly generalized from original @@ -54,10 +51,11 @@ constraints are implemented using [`coupling`](@ref) and purely virtual and do not occur in [`metabolites`](@ref). """ struct GeckoModel <: ModelWrapper + objective::SparseVec columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} - coupling_row_mass_group::Vector{Tuple{String,Float64}} + coupling_row_mass_group::Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}} inner::MetabolicModel end @@ -71,26 +69,33 @@ Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are split into unidirectional forward and reverse ones, each of which may have multiple variants per isozyme. """ -stoichiometry(model::GeckoModel) = - stoichiometry(model.inner) * _gecko_column_reactions(model) +function stoichiometry(model::GeckoModel) + irrevS = stoichiometry(model.inner) * COBREXA._gecko_column_reactions(model) + enzS = COBREXA._gecko_gene_product_coupling(model) + [ + irrevS spzeros(size(irrevS, 1), size(enzS, 1)) + -enzS I(size(enzS, 1)) + ] +end """ objective(model::GeckoModel) -Reconstruct an objective of the [`GeckoModel`](@ref), following the objective -of the inner model. +Reconstruct an objective of the [`GeckoModel`](@ref). """ -objective(model::GeckoModel) = _gecko_column_reactions(model)' * objective(model.inner) +objective(model::GeckoModel) = model.objective """ reactions(model::GeckoModel) Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split to forward- and reverse-only parts with different isozyme indexes; reactions -IDs are mangled accordingly with suffixes). +IDs are mangled accordingly with suffixes), as well as the genes associated +with enzymatic reactions. In the context of a GeckoModel, this is better described +as the variables in the problem. """ -reactions(model::GeckoModel) = - let inner_reactions = reactions(model.inner) +function reactions(model::GeckoModel) + rxnnames = let inner_reactions = reactions(model.inner) [ _gecko_reaction_name( inner_reactions[col.reaction_idx], @@ -99,21 +104,35 @@ reactions(model::GeckoModel) = ) for col in model.columns ] end + [rxnnames; genes(model)] +end """ - reactions(model::GeckoModel) + n_reactions(model::GeckoModel) -Returns the number of all irreversible reactions in `model`. +Returns the number of all irreversible reactions in `model` as well as the number of gene products +that take part in enzymatic reactions. In the context of a GeckoModel, this is better described +as the number of variables in the problem. """ -n_reactions(model::GeckoModel) = length(model.columns) +n_reactions(model::GeckoModel) = + length(model.columns) + length(model.coupling_row_gene_product) """ bounds(model::GeckoModel) Return variable bounds for [`GeckoModel`](@ref). """ -bounds(model::GeckoModel) = - ([col.lb for col in model.columns], [col.ub for col in model.columns]) +function bounds(model::GeckoModel) + lbs = [ + [col.lb for col in model.columns] + [lb for (_, (lb, _)) in model.coupling_row_gene_product] + ] + ubs = [ + [col.ub for col in model.columns] + [ub for (_, (_, ub)) in model.coupling_row_gene_product] + ] + (lbs, ubs) +end """ reaction_flux(model::GeckoModel) @@ -121,22 +140,32 @@ bounds(model::GeckoModel) = Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original fluxes in the wrapped model. """ -reaction_flux(model::GeckoModel) = - _gecko_column_reactions(model)' * reaction_flux(model.inner) +function reaction_flux(model::GeckoModel) + gecko_mat = _gecko_column_reactions(model)' + inner_mat = reaction_flux(model.inner) + [ + gecko_mat*inner_mat spzeros(size(gecko_mat, 1), n_genes(model)) + spzeros(n_genes(model), size(inner_mat, 2)) I(n_genes(model)) + ] +end """ coupling(model::GeckoModel) Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the -wrapped model, coupling for split reactions, and the coupling for the total +wrapped model, coupling for split (arm) reactions, and the coupling for the total enzyme capacity. """ -coupling(model::GeckoModel) = vcat( - coupling(model.inner) * _gecko_column_reactions(model), - _gecko_reaction_coupling(model), - _gecko_gene_product_coupling(model), - _gecko_mass_group_coupling(model), -) +function coupling(model::GeckoModel) + innerC = coupling(model.inner) * _gecko_column_reactions(model) + rxnC = _gecko_reaction_coupling(model) + enzcap = _gecko_mass_group_coupling(model) + [ + innerC spzeros(size(innerC, 1), n_genes(model)) + rxnC spzeros(size(rxnC, 1), n_genes(model)) + spzeros(length(model.coupling_row_mass_group), length(model.columns)) enzcap + ] +end """ n_coupling_constraints(model::GeckoModel) @@ -147,7 +176,6 @@ Count the coupling constraints in [`GeckoModel`](@ref) (refer to n_coupling_constraints(model::GeckoModel) = n_coupling_constraints(model.inner) + length(model.coupling_row_reaction) + - length(model.coupling_row_gene_product) + length(model.coupling_row_mass_group) """ @@ -162,15 +190,43 @@ function coupling_bounds(model::GeckoModel) return ( vcat( iclb, - ilb[model.coupling_row_reaction], #! fix bound - [lb for (_, (lb, _)) in model.coupling_row_gene_product], + ilb[model.coupling_row_reaction], [0.0 for _ in model.coupling_row_mass_group], ), vcat( icub, - iub[model.coupling_row_reaction], #! fix bound - [ub for (_, (_, ub)) in model.coupling_row_gene_product], - [ub for (_, ub) in model.coupling_row_mass_group], + iub[model.coupling_row_reaction], + [ub for (_, _, ub) in model.coupling_row_mass_group], ), ) end + +""" + balance(model::GeckoModel) + +Return the balance of the inner model, concatenated with a vector of +zeros representing the enzyme balance of a [`GeckoModel`](@ref). +""" +balance(model::GeckoModel) = [balance(model.inner); spzeros(length(model.coupling_row_gene_product))] + +""" + n_genes(model::GeckoModel) + +Return the number of genes that have enzymatic constraints associated with them. +""" +n_genes(model::GeckoModel) = length(model.coupling_row_gene_product) + +""" + genes(model::GeckoModel) + +Return the gene ids of genes that have enzymatic constraints associated with them. +""" +genes(model::GeckoModel) = genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] + +""" + fluxes(model::GeckoModel) + +Return the original reaction ids and the gene ids that were variables +in a [`GeckoModel`](@ref). +""" +fluxes(model::GeckoModel) = [reactions(model.inner); genes(model)] \ No newline at end of file diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index fad105a13..467417ba4 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -70,17 +70,16 @@ _gecko_gene_product_coupling(model::GeckoModel) = Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total mass of each group of gene products. """ -_gecko_mass_group_coupling(model::GeckoModel) = - let - tmp = [ - (row, i, val) for (i, col) in enumerate(model.columns) for - (row, val) in col.mass_group_coupling - ] - sparse( - [row for (row, _, _) in tmp], - [col for (_, col, _) in tmp], - [val for (_, _, val) in tmp], - length(model.coupling_row_mass_group), - length(model.columns), - ) - end +function _gecko_mass_group_coupling(model::GeckoModel) + tmp = [ + (i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for + (j, mm) in zip(mg[1], mg[2]) + ] + sparse( + [i for (i, _, _) in tmp], + [j for (_, j, _) in tmp], + [mm for (_, _, mm) in tmp], + length(model.coupling_row_mass_group), + n_genes(model), + ) +end \ No newline at end of file From 5c6c4f92f529cc84c5a02bbf8c0ff2cccef738a0 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 17:19:42 +0200 Subject: [PATCH 100/109] fixed gecko --- src/analysis/gecko.jl | 6 +- src/base/types/wrappers/GeckoModel.jl | 44 +++++------ src/base/utils/enzymes.jl | 13 +--- src/base/utils/gecko.jl | 14 +++- test/analysis/gecko.jl | 104 ++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 39 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 1f010f2a2..aa833191d 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -139,11 +139,11 @@ function make_gecko_model( mg_gid_lookup[mg] = [gid] end end - coupling_row_mass_group = Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}}() + coupling_row_mass_group = Vector{Tuple{String, Vector{Int}, Vector{Float64}, Float64}}() for (grp, gs) in mg_gid_lookup - idxs = Int.(indexin(gs, gids)) + idxs = [gene_row_lookup[x] for x in Int.(indexin(gs, gids))] mms = gpmm_.(gs) - push!(coupling_row_mass_group, (idxs, mms, gmgb_(grp))) + push!(coupling_row_mass_group, (grp, idxs, mms, gmgb_(grp))) end gm = GeckoModel( diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 0410defba..0c2b81fe7 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -55,7 +55,7 @@ struct GeckoModel <: ModelWrapper columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} - coupling_row_mass_group::Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}} + coupling_row_mass_group::Vector{Tuple{String, Vector{Int}, Vector{Float64}, Float64}} inner::MetabolicModel end @@ -91,11 +91,10 @@ objective(model::GeckoModel) = model.objective Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split to forward- and reverse-only parts with different isozyme indexes; reactions IDs are mangled accordingly with suffixes), as well as the genes associated -with enzymatic reactions. In the context of a GeckoModel, this is better described -as the variables in the problem. +with enzymatic reactions. """ -function reactions(model::GeckoModel) - rxnnames = let inner_reactions = reactions(model.inner) +reactions(model::GeckoModel) = + let inner_reactions = reactions(model.inner) [ _gecko_reaction_name( inner_reactions[col.reaction_idx], @@ -104,18 +103,14 @@ function reactions(model::GeckoModel) ) for col in model.columns ] end - [rxnnames; genes(model)] -end """ n_reactions(model::GeckoModel) Returns the number of all irreversible reactions in `model` as well as the number of gene products -that take part in enzymatic reactions. In the context of a GeckoModel, this is better described -as the number of variables in the problem. +that take part in enzymatic reactions. """ -n_reactions(model::GeckoModel) = - length(model.columns) + length(model.coupling_row_gene_product) +n_reactions(model::GeckoModel) = length(reactions(model)) """ bounds(model::GeckoModel) @@ -140,14 +135,7 @@ end Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original fluxes in the wrapped model. """ -function reaction_flux(model::GeckoModel) - gecko_mat = _gecko_column_reactions(model)' - inner_mat = reaction_flux(model.inner) - [ - gecko_mat*inner_mat spzeros(size(gecko_mat, 1), n_genes(model)) - spzeros(n_genes(model), size(inner_mat, 2)) I(n_genes(model)) - ] -end +reaction_flux(model::GeckoModel) = _gecko_column_reactions(model)' * reaction_flux(model.inner) """ coupling(model::GeckoModel) @@ -196,7 +184,7 @@ function coupling_bounds(model::GeckoModel) vcat( icub, iub[model.coupling_row_reaction], - [ub for (_, _, ub) in model.coupling_row_mass_group], + [ub for (_, _, _, ub) in model.coupling_row_mass_group], ), ) end @@ -214,7 +202,7 @@ balance(model::GeckoModel) = [balance(model.inner); spzeros(length(model.couplin Return the number of genes that have enzymatic constraints associated with them. """ -n_genes(model::GeckoModel) = length(model.coupling_row_gene_product) +n_genes(model::GeckoModel) = length(genes(model)) """ genes(model::GeckoModel) @@ -224,9 +212,15 @@ Return the gene ids of genes that have enzymatic constraints associated with the genes(model::GeckoModel) = genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] """ - fluxes(model::GeckoModel) + metabolites(model::GeckoModel) + +Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref). +""" +metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model).*"#supply"] + +""" + n_metabolites(model::GeckoModel) -Return the original reaction ids and the gene ids that were variables -in a [`GeckoModel`](@ref). +Return the number of metabolites, both real and pseudo, for a [`GeckoModel`](@ref). """ -fluxes(model::GeckoModel) = [reactions(model.inner); genes(model)] \ No newline at end of file +n_metabolites(model::GeckoModel) = length(metabolites(model)) \ No newline at end of file diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl index eb834a3df..e6b73bc73 100644 --- a/src/base/utils/enzymes.jl +++ b/src/base/utils/enzymes.jl @@ -6,12 +6,8 @@ argument `opt_model` is a solved optimization problem, typically returned by [`flux_balance_analysis`](@ref). """ protein_dict(model::GeckoModel, opt_model) = - let gids = genes(model) - is_solved(opt_model) ? - Dict( - [gids[gidx] for (gidx, _) in model.coupling_row_gene_product] .=> _gecko_gene_product_coupling(model) * value.(opt_model[:x]), - ) : nothing - end + is_solved(opt_model) ? + Dict(genes(model) .=> value.(opt_model[:x])[(n_reactions(model)+1):end]) : nothing """ protein_dict(model::GeckoModel) @@ -28,8 +24,8 @@ Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). protein_mass_group_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict( - (group for (group, _) in model.coupling_row_mass_group) .=> - _gecko_mass_group_coupling(model) * value.(opt_model[:x]), + grp[1] => dot(value.(opt_model[:x])[n_reactions(model) .+ grp[2]], grp[3]) for + grp in model.coupling_row_mass_group ) : nothing """ @@ -39,7 +35,6 @@ A pipe-able variant of [`mass_group_dict`](@ref). """ protein_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) - """ protein_mass(model::SMomentModel) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index 467417ba4..daac1026f 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -73,7 +73,7 @@ mass of each group of gene products. function _gecko_mass_group_coupling(model::GeckoModel) tmp = [ (i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for - (j, mm) in zip(mg[1], mg[2]) + (j, mm) in zip(mg[2], mg[3]) ] sparse( [i for (i, _, _) in tmp], @@ -82,4 +82,14 @@ function _gecko_mass_group_coupling(model::GeckoModel) length(model.coupling_row_mass_group), n_genes(model), ) -end \ No newline at end of file +end + +""" + flux_dict(model::GeckoModel, opt_model) + +Returns the fluxes (not gene product concentrations) of the model as a +reaction-keyed dictionary, if solved. +""" +flux_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + Dict(fluxes(model) .=> reaction_flux(model)' * value.(opt_model[:x])[1:n_reactions(model)] ) : nothing \ No newline at end of file diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 15ff176b8..f39991fa1 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -49,3 +49,107 @@ @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) end + +@testset "GECKO small model" begin + #= + Implement the small model found in the supplment of the + original GECKO paper. This model is nice to troubleshoot with, + because the stoich matrix is small. + =# + m = StandardModel("gecko") + m1 = Metabolite("m1") + m2 = Metabolite("m2") + m3 = Metabolite("m3") + m4 = Metabolite("m4") + + @add_reactions! m begin + "r1", nothing → m1, -100, 100 + "r2", nothing → m2, -100, 100 + "r3", m1 + m2 → m3, 0, 100 + "r4", m3 ↔ m4, -100, 100 # make reversible instead + "r5", m2 ↔ m4, -100, 100 + "r6", nothing → m4, 0, 100 + end + + gs = [Gene("g$i") for i in 1:4] + + m.reactions["r3"].grr = [["g1"]] + m.reactions["r4"].grr = [["g1"], ["g2"]] + m.reactions["r5"].grr = [["g3", "g4"]] + m.reactions["r4"].objective_coefficient = 1.0 + + add_genes!(m, gs) + add_metabolites!(m, [m1, m2, m3, m4]) + + reaction_isozymes = Dict( + "r3" => [ + Isozyme( + Dict("g1" => 1), + 1.0, + 1.0, + ), + ], + "r4" => [ + Isozyme( + Dict("g1" => 1), + 2.0, + 2.0, + ), + Isozyme( + Dict("g2" => 1), + 3.0, + 3.0, + ), + ], + "r5" => [ + Isozyme( + Dict("g3" => 1, "g4" => 2), + 5.0, + 5.0, + ), + ], + ) + gene_product_bounds = Dict( + "g1" => (0.0, 10.0), + "g2" => (0.0, 10.0), + "g3" => (0.0, 10.0), + "g4" => (0.0, 10.0), + ) + + gene_product_molar_mass = Dict( + "g1" => 1.0, + "g2" => 2.0, + "g3" => 3.0, + "g4" => 4.0, + ) + + gene_mass_group_bound = Dict("uncategorized" => 0.5) + + gm = make_gecko_model( + m; + reaction_isozymes, + gene_product_bounds, + gene_product_molar_mass, + gene_mass_group_bound, + ) + + S = stoichiometry(gm) + l, u = bounds(gm) + + coupling(gm) + cl, cu = coupling_bounds(gm) + + opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + rxn_fluxes = flux_dict(gm, opt_model) + gene_products = protein_dict(gm, opt_model) + mass_groups = protein_mass_group_dict(gm, opt_model) + + @test isapprox(rxn_fluxes["r4"], 0.142857, atol = TEST_TOLERANCE) + @test isapprox(gene_products["g3"], 0.0285714, atol = TEST_TOLERANCE) + @test isapprox(mass_groups["uncategorized"], 0.5, atol = TEST_TOLERANCE) +end \ No newline at end of file From 6a5393731052f6f807c3ee077dcfa4260d41d54e Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 17:21:00 +0200 Subject: [PATCH 101/109] format --- src/analysis/gecko.jl | 4 ++-- src/analysis/smoment.jl | 3 ++- src/base/types/wrappers/GeckoModel.jl | 21 ++++++++++++--------- src/base/utils/gecko.jl | 9 ++++++--- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index aa833191d..61b12b790 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -130,7 +130,7 @@ function make_gecko_model( end # prepare enzyme capacity constraints - mg_gid_lookup = Dict{String, Vector{String}}() + mg_gid_lookup = Dict{String,Vector{String}}() for gid in gids[coupling_row_gene_product] mg = gmg_(gid) if haskey(mg_gid_lookup, mg) @@ -139,7 +139,7 @@ function make_gecko_model( mg_gid_lookup[mg] = [gid] end end - coupling_row_mass_group = Vector{Tuple{String, Vector{Int}, Vector{Float64}, Float64}}() + coupling_row_mass_group = Vector{Tuple{String,Vector{Int},Vector{Float64},Float64}}() for (grp, gs) in mg_gid_lookup idxs = [gene_row_lookup[x] for x in Int.(indexin(gs, gids))] mms = gpmm_.(gs) diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl index dadea190b..391345fb9 100644 --- a/src/analysis/smoment.jl +++ b/src/analysis/smoment.jl @@ -31,7 +31,8 @@ function make_smoment_model( total_enzyme_capacity::Float64, ) ris_ = - reaction_isozyme isa Function ? reaction_isozyme : (rid -> get(reaction_isozyme, rid, nothing)) + reaction_isozyme isa Function ? reaction_isozyme : + (rid -> get(reaction_isozyme, rid, nothing)) gpmm_ = gene_product_molar_mass isa Function ? gene_product_molar_mass : (gid -> gene_product_molar_mass[gid]) diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 0c2b81fe7..c6702ba2a 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -55,7 +55,7 @@ struct GeckoModel <: ModelWrapper columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} - coupling_row_mass_group::Vector{Tuple{String, Vector{Int}, Vector{Float64}, Float64}} + coupling_row_mass_group::Vector{Tuple{String,Vector{Int},Vector{Float64},Float64}} inner::MetabolicModel end @@ -93,7 +93,7 @@ to forward- and reverse-only parts with different isozyme indexes; reactions IDs are mangled accordingly with suffixes), as well as the genes associated with enzymatic reactions. """ -reactions(model::GeckoModel) = +reactions(model::GeckoModel) = let inner_reactions = reactions(model.inner) [ _gecko_reaction_name( @@ -135,7 +135,8 @@ end Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original fluxes in the wrapped model. """ -reaction_flux(model::GeckoModel) = _gecko_column_reactions(model)' * reaction_flux(model.inner) +reaction_flux(model::GeckoModel) = + _gecko_column_reactions(model)' * reaction_flux(model.inner) """ coupling(model::GeckoModel) @@ -146,13 +147,13 @@ enzyme capacity. """ function coupling(model::GeckoModel) innerC = coupling(model.inner) * _gecko_column_reactions(model) - rxnC = _gecko_reaction_coupling(model) + rxnC = _gecko_reaction_coupling(model) enzcap = _gecko_mass_group_coupling(model) [ innerC spzeros(size(innerC, 1), n_genes(model)) rxnC spzeros(size(rxnC, 1), n_genes(model)) spzeros(length(model.coupling_row_mass_group), length(model.columns)) enzcap - ] + ] end """ @@ -195,7 +196,8 @@ end Return the balance of the inner model, concatenated with a vector of zeros representing the enzyme balance of a [`GeckoModel`](@ref). """ -balance(model::GeckoModel) = [balance(model.inner); spzeros(length(model.coupling_row_gene_product))] +balance(model::GeckoModel) = + [balance(model.inner); spzeros(length(model.coupling_row_gene_product))] """ n_genes(model::GeckoModel) @@ -209,18 +211,19 @@ n_genes(model::GeckoModel) = length(genes(model)) Return the gene ids of genes that have enzymatic constraints associated with them. """ -genes(model::GeckoModel) = genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] +genes(model::GeckoModel) = + genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] """ metabolites(model::GeckoModel) Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref). """ -metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model).*"#supply"] +metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#supply"] """ n_metabolites(model::GeckoModel) Return the number of metabolites, both real and pseudo, for a [`GeckoModel`](@ref). """ -n_metabolites(model::GeckoModel) = length(metabolites(model)) \ No newline at end of file +n_metabolites(model::GeckoModel) = length(metabolites(model)) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index daac1026f..1152fcbe3 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -80,8 +80,8 @@ function _gecko_mass_group_coupling(model::GeckoModel) [j for (_, j, _) in tmp], [mm for (_, _, mm) in tmp], length(model.coupling_row_mass_group), - n_genes(model), - ) + n_genes(model), + ) end """ @@ -92,4 +92,7 @@ reaction-keyed dictionary, if solved. """ flux_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? - Dict(fluxes(model) .=> reaction_flux(model)' * value.(opt_model[:x])[1:n_reactions(model)] ) : nothing \ No newline at end of file + Dict( + fluxes(model) .=> + reaction_flux(model)' * value.(opt_model[:x])[1:n_reactions(model)], + ) : nothing From a140d45e9705fe21f02b5baa8f1beb775510f435 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 17:29:02 +0200 Subject: [PATCH 102/109] format again --- src/base/utils/enzymes.jl | 2 +- test/analysis/gecko.jl | 51 +++++++-------------------------------- 2 files changed, 10 insertions(+), 43 deletions(-) diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl index e6b73bc73..bff8eee20 100644 --- a/src/base/utils/enzymes.jl +++ b/src/base/utils/enzymes.jl @@ -24,7 +24,7 @@ Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). protein_mass_group_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict( - grp[1] => dot(value.(opt_model[:x])[n_reactions(model) .+ grp[2]], grp[3]) for + grp[1] => dot(value.(opt_model[:x])[n_reactions(model).+grp[2]], grp[3]) for grp in model.coupling_row_mass_group ) : nothing diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index f39991fa1..0212c0963 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -68,10 +68,10 @@ end "r3", m1 + m2 → m3, 0, 100 "r4", m3 ↔ m4, -100, 100 # make reversible instead "r5", m2 ↔ m4, -100, 100 - "r6", nothing → m4, 0, 100 + "r6", nothing → m4, 0, 100 end - gs = [Gene("g$i") for i in 1:4] + gs = [Gene("g$i") for i = 1:4] m.reactions["r3"].grr = [["g1"]] m.reactions["r4"].grr = [["g1"], ["g2"]] @@ -82,32 +82,10 @@ end add_metabolites!(m, [m1, m2, m3, m4]) reaction_isozymes = Dict( - "r3" => [ - Isozyme( - Dict("g1" => 1), - 1.0, - 1.0, - ), - ], - "r4" => [ - Isozyme( - Dict("g1" => 1), - 2.0, - 2.0, - ), - Isozyme( - Dict("g2" => 1), - 3.0, - 3.0, - ), - ], - "r5" => [ - Isozyme( - Dict("g3" => 1, "g4" => 2), - 5.0, - 5.0, - ), - ], + "r3" => [Isozyme(Dict("g1" => 1), 1.0, 1.0)], + "r4" => + [Isozyme(Dict("g1" => 1), 2.0, 2.0), Isozyme(Dict("g2" => 1), 3.0, 3.0)], + "r5" => [Isozyme(Dict("g3" => 1, "g4" => 2), 5.0, 5.0)], ) gene_product_bounds = Dict( "g1" => (0.0, 10.0), @@ -116,16 +94,11 @@ end "g4" => (0.0, 10.0), ) - gene_product_molar_mass = Dict( - "g1" => 1.0, - "g2" => 2.0, - "g3" => 3.0, - "g4" => 4.0, - ) + gene_product_molar_mass = Dict("g1" => 1.0, "g2" => 2.0, "g3" => 3.0, "g4" => 4.0) gene_mass_group_bound = Dict("uncategorized" => 0.5) - gm = make_gecko_model( + gm = make_gecko_model( m; reaction_isozymes, gene_product_bounds, @@ -133,12 +106,6 @@ end gene_mass_group_bound, ) - S = stoichiometry(gm) - l, u = bounds(gm) - - coupling(gm) - cl, cu = coupling_bounds(gm) - opt_model = flux_balance_analysis( gm, Tulip.Optimizer; @@ -152,4 +119,4 @@ end @test isapprox(rxn_fluxes["r4"], 0.142857, atol = TEST_TOLERANCE) @test isapprox(gene_products["g3"], 0.0285714, atol = TEST_TOLERANCE) @test isapprox(mass_groups["uncategorized"], 0.5, atol = TEST_TOLERANCE) -end \ No newline at end of file +end From 3fcc98a16a958dced3b1794622d162251849e057 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 18:44:25 +0200 Subject: [PATCH 103/109] fix model --- test/analysis/gecko.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 0212c0963..188129941 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -63,20 +63,19 @@ end m4 = Metabolite("m4") @add_reactions! m begin - "r1", nothing → m1, -100, 100 - "r2", nothing → m2, -100, 100 + "r1", nothing → m1, 0, 100 + "r2", nothing → m2, 0, 100 "r3", m1 + m2 → m3, 0, 100 - "r4", m3 ↔ m4, -100, 100 # make reversible instead + "r4", m3 → m4, 0, 100 "r5", m2 ↔ m4, -100, 100 - "r6", nothing → m4, 0, 100 + "r6", m4 → nothing, 0, 100 end gs = [Gene("g$i") for i = 1:4] - m.reactions["r3"].grr = [["g1"]] m.reactions["r4"].grr = [["g1"], ["g2"]] m.reactions["r5"].grr = [["g3", "g4"]] - m.reactions["r4"].objective_coefficient = 1.0 + m.reactions["r6"].objective_coefficient = 1.0 add_genes!(m, gs) add_metabolites!(m, [m1, m2, m3, m4]) @@ -85,7 +84,7 @@ end "r3" => [Isozyme(Dict("g1" => 1), 1.0, 1.0)], "r4" => [Isozyme(Dict("g1" => 1), 2.0, 2.0), Isozyme(Dict("g2" => 1), 3.0, 3.0)], - "r5" => [Isozyme(Dict("g3" => 1, "g4" => 2), 5.0, 5.0)], + "r5" => [Isozyme(Dict("g3" => 1, "g4" => 2), 70.0, 70.0)], ) gene_product_bounds = Dict( "g1" => (0.0, 10.0), @@ -116,7 +115,7 @@ end gene_products = protein_dict(gm, opt_model) mass_groups = protein_mass_group_dict(gm, opt_model) - @test isapprox(rxn_fluxes["r4"], 0.142857, atol = TEST_TOLERANCE) - @test isapprox(gene_products["g3"], 0.0285714, atol = TEST_TOLERANCE) + @test isapprox(rxn_fluxes["r6"], 3.181818181753438, atol = TEST_TOLERANCE) + @test isapprox(gene_products["g4"], 0.09090909090607537, atol = TEST_TOLERANCE) @test isapprox(mass_groups["uncategorized"], 0.5, atol = TEST_TOLERANCE) end From 4242a2133a64af349e821968eb3768a9214b086c Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 19:01:46 +0200 Subject: [PATCH 104/109] add more docstrings --- src/analysis/gecko.jl | 7 +++++-- src/base/types/wrappers/GeckoModel.jl | 15 +++++++++------ src/base/utils/enzymes.jl | 3 ++- src/base/utils/gecko.jl | 5 +++-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index 61b12b790..ef9fb9ae6 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -56,7 +56,6 @@ function make_gecko_model( columns = Vector{_gecko_column}() coupling_row_reaction = Int[] coupling_row_gene_product = Int[] - # coupling_row_mass_group = String[] gids = genes(model) (lbs, ubs) = bounds(model) @@ -146,6 +145,7 @@ function make_gecko_model( push!(coupling_row_mass_group, (grp, idxs, mms, gmgb_(grp))) end + # create model with dummy objective gm = GeckoModel( spzeros(length(columns) + length(coupling_row_gene_product)), columns, @@ -155,7 +155,10 @@ function make_gecko_model( model, ) - # set objective (do separately because gene products can also be objectives) + #= + Set objective. This is a separate field because gene products can also be objectives. + This way they can be set as objectives by the user. + =# gm.objective .= [ _gecko_column_reactions(gm)' * objective(gm.inner) spzeros(length(coupling_row_gene_product)) diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index c6702ba2a..6c55d3ac3 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -81,7 +81,11 @@ end """ objective(model::GeckoModel) -Reconstruct an objective of the [`GeckoModel`](@ref). +Return the objective of the [`GeckoModel`](@ref). Note, the objective is with +respect to the internal variables, i.e. [`reactions(model)`](@ref) and +[`genes(model)`](@ref). To manually set the objective, index into +`model.objective` appropriately, and remember to set the previous coefficients +to zero. """ objective(model::GeckoModel) = model.objective @@ -90,8 +94,7 @@ objective(model::GeckoModel) = model.objective Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split to forward- and reverse-only parts with different isozyme indexes; reactions -IDs are mangled accordingly with suffixes), as well as the genes associated -with enzymatic reactions. +IDs are mangled accordingly with suffixes). """ reactions(model::GeckoModel) = let inner_reactions = reactions(model.inner) @@ -107,8 +110,8 @@ reactions(model::GeckoModel) = """ n_reactions(model::GeckoModel) -Returns the number of all irreversible reactions in `model` as well as the number of gene products -that take part in enzymatic reactions. +Returns the number of all irreversible reactions in `model` as well as the +number of gene products that take part in enzymatic reactions. """ n_reactions(model::GeckoModel) = length(reactions(model)) @@ -193,7 +196,7 @@ end """ balance(model::GeckoModel) -Return the balance of the inner model, concatenated with a vector of +Return the balance of the reactions in the inner model, concatenated with a vector of zeros representing the enzyme balance of a [`GeckoModel`](@ref). """ balance(model::GeckoModel) = diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl index bff8eee20..d7760adf3 100644 --- a/src/base/utils/enzymes.jl +++ b/src/base/utils/enzymes.jl @@ -3,7 +3,8 @@ Return a dictionary mapping protein molar concentrations to their ids. The argument `opt_model` is a solved optimization problem, typically returned by -[`flux_balance_analysis`](@ref). +[`flux_balance_analysis`](@ref). See [`flux_dict`](@ref) for the corresponding +function that returns a dictionary of solved fluxes. """ protein_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index 1152fcbe3..c7514e5a1 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -71,7 +71,7 @@ Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total mass of each group of gene products. """ function _gecko_mass_group_coupling(model::GeckoModel) - tmp = [ + tmp = [ # mm = molar mass, mg = mass group, i = row idx, j = col idx (i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for (j, mm) in zip(mg[2], mg[3]) ] @@ -88,7 +88,8 @@ end flux_dict(model::GeckoModel, opt_model) Returns the fluxes (not gene product concentrations) of the model as a -reaction-keyed dictionary, if solved. +reaction-keyed dictionary, if solved. See [`protein_dict`](@ref) for a +function to get the gene product concentrations. """ flux_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? From 0127dfc46c011c5f8a1c8f7fe50d90a53a46ad03 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Tue, 19 Apr 2022 19:04:15 +0200 Subject: [PATCH 105/109] format again --- src/analysis/gecko.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index ef9fb9ae6..dba19df3c 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -158,7 +158,7 @@ function make_gecko_model( #= Set objective. This is a separate field because gene products can also be objectives. This way they can be set as objectives by the user. - =# + =# gm.objective .= [ _gecko_column_reactions(gm)' * objective(gm.inner) spzeros(length(coupling_row_gene_product)) From efdd9fb1f721e9d348e26aa36b2b405672a0d971 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Wed, 20 Apr 2022 12:31:14 +0200 Subject: [PATCH 106/109] implement reviews --- src/analysis/gecko.jl | 24 ++++++++++--------- src/base/types/wrappers/GeckoModel.jl | 32 +++++++++++++++++++------ src/base/utils/enzymes.jl | 34 ++++++++++++++------------- src/base/utils/gecko.jl | 16 +------------ test/analysis/gecko.jl | 20 ++++++++-------- test/analysis/smoment.jl | 2 +- 6 files changed, 68 insertions(+), 60 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index dba19df3c..e3b109b9b 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -4,8 +4,8 @@ reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}} gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, - gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", - gene_mass_group_bound::Union{Function,Dict{String,Float64}}, + gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + gene_product_mass_group_bound::Union{Function,Dict{String,Float64}}, ) Wrap a model into a [`GeckoModel`](@ref), following the structure given by @@ -20,11 +20,11 @@ GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). `reaction_isozymes`), as `Tuple{Float64,Float64}`. - `gene_product_molar_mass` is a function that returns a numeric molar mass of a given gene product specified by string gene ID. -- `gene_mass_group` is a function that returns a string group identifier for a +- `gene_product_mass_group` is a function that returns a string group identifier for a given gene product, again specified by string gene ID. By default, all gene products belong to group `"uncategorized"` which is the behavior of original GECKO. -- `gene_mass_group_bound` is a function that returns the maximum mass for a given +- `gene_product_mass_group_bound` is a function that returns the maximum mass for a given mass group. Alternatively, all function arguments may be also supplied as dictionaries that @@ -35,8 +35,8 @@ function make_gecko_model( reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}}, gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, gene_product_molar_mass::Union{Function,Dict{String,Float64}}, - gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", - gene_mass_group_bound::Union{Function,Dict{String,Float64}}, + gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + gene_product_mass_group_bound::Union{Function,Dict{String,Float64}}, ) ris_ = reaction_isozymes isa Function ? reaction_isozymes : @@ -47,10 +47,12 @@ function make_gecko_model( gpmm_ = gene_product_molar_mass isa Function ? gene_product_molar_mass : (gid -> gene_product_molar_mass[gid]) - gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid]) + gmg_ = + gene_product_mass_group isa Function ? gene_product_mass_group : + (gid -> gene_product_mass_group[gid]) gmgb_ = - gene_mass_group_bound isa Function ? gene_mass_group_bound : - (grp -> gene_mass_group_bound[grp]) + gene_product_mass_group_bound isa Function ? gene_product_mass_group_bound : + (grp -> gene_product_mass_group_bound[grp]) # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch columns = Vector{_gecko_column}() @@ -138,11 +140,11 @@ function make_gecko_model( mg_gid_lookup[mg] = [gid] end end - coupling_row_mass_group = Vector{Tuple{String,Vector{Int},Vector{Float64},Float64}}() + coupling_row_mass_group = Vector{_gecko_capacity}() for (grp, gs) in mg_gid_lookup idxs = [gene_row_lookup[x] for x in Int.(indexin(gs, gids))] mms = gpmm_.(gs) - push!(coupling_row_mass_group, (grp, idxs, mms, gmgb_(grp))) + push!(coupling_row_mass_group, _gecko_capacity(grp, idxs, mms, gmgb_(grp))) end # create model with dummy objective diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 6c55d3ac3..2f0f35421 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -13,6 +13,19 @@ struct _gecko_column gene_product_coupling::Vector{Tuple{Int,Float64}} end +""" + struct _gecko_capacity + +A helper struct that contains the gene product capacity terms organized by +the grouping type, e.g. metabolic or membrane groups etc. +""" +struct _gecko_capacity + group_id::String + gene_product_idxs::Vector{Int} + gene_product_molar_masses::Vector{Float64} + group_upper_bound::Float64 +end + """ struct GeckoModel <: ModelWrapper @@ -55,7 +68,7 @@ struct GeckoModel <: ModelWrapper columns::Vector{_gecko_column} coupling_row_reaction::Vector{Int} coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} - coupling_row_mass_group::Vector{Tuple{String,Vector{Int},Vector{Float64},Float64}} + coupling_row_mass_group::Vector{_gecko_capacity} inner::MetabolicModel end @@ -113,7 +126,7 @@ reactions(model::GeckoModel) = Returns the number of all irreversible reactions in `model` as well as the number of gene products that take part in enzymatic reactions. """ -n_reactions(model::GeckoModel) = length(reactions(model)) +n_reactions(model::GeckoModel) = length(model.columns) """ bounds(model::GeckoModel) @@ -138,8 +151,13 @@ end Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original fluxes in the wrapped model. """ -reaction_flux(model::GeckoModel) = - _gecko_column_reactions(model)' * reaction_flux(model.inner) +function reaction_flux(model::GeckoModel) + rxnmat = _gecko_column_reactions(model)' * reaction_flux(model.inner) + [ + rxnmat + spzeros(n_genes(model), size(rxnmat, 2)) + ] +end """ coupling(model::GeckoModel) @@ -188,7 +206,7 @@ function coupling_bounds(model::GeckoModel) vcat( icub, iub[model.coupling_row_reaction], - [ub for (_, _, _, ub) in model.coupling_row_mass_group], + [grp.group_upper_bound for grp in model.coupling_row_mass_group], ), ) end @@ -207,7 +225,7 @@ balance(model::GeckoModel) = Return the number of genes that have enzymatic constraints associated with them. """ -n_genes(model::GeckoModel) = length(genes(model)) +n_genes(model::GeckoModel) = length(model.coupling_row_gene_product) """ genes(model::GeckoModel) @@ -229,4 +247,4 @@ metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#su Return the number of metabolites, both real and pseudo, for a [`GeckoModel`](@ref). """ -n_metabolites(model::GeckoModel) = length(metabolites(model)) +n_metabolites(model::GeckoModel) = n_metabolites(model.inner) + n_genes(model) diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl index d7760adf3..6ee4091b0 100644 --- a/src/base/utils/enzymes.jl +++ b/src/base/utils/enzymes.jl @@ -1,54 +1,56 @@ """ - protein_dict(model::GeckoModel, opt_model) + gene_product_dict(model::GeckoModel, opt_model) Return a dictionary mapping protein molar concentrations to their ids. The argument `opt_model` is a solved optimization problem, typically returned by [`flux_balance_analysis`](@ref). See [`flux_dict`](@ref) for the corresponding function that returns a dictionary of solved fluxes. """ -protein_dict(model::GeckoModel, opt_model) = +gene_product_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict(genes(model) .=> value.(opt_model[:x])[(n_reactions(model)+1):end]) : nothing """ - protein_dict(model::GeckoModel) + gene_product_dict(model::GeckoModel) -A pipe-able variant of [`protein_dict`](@ref). +A pipe-able variant of [`gene_product_dict`](@ref). """ -protein_dict(model::GeckoModel) = x -> protein_dict(model, x) +gene_product_dict(model::GeckoModel) = x -> gene_product_dict(model, x) """ - protein_mass_group_dict(model::GeckoModel, opt_model) + gene_product_mass_group_dict(model::GeckoModel, opt_model) Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). """ -protein_mass_group_dict(model::GeckoModel, opt_model) = +gene_product_mass_group_dict(model::GeckoModel, opt_model) = is_solved(opt_model) ? Dict( - grp[1] => dot(value.(opt_model[:x])[n_reactions(model).+grp[2]], grp[3]) for - grp in model.coupling_row_mass_group + grp.group_id => dot( + value.(opt_model[:x])[n_reactions(model).+grp.gene_product_idxs], + grp.gene_product_molar_masses, + ) for grp in model.coupling_row_mass_group ) : nothing """ - protein_mass_group_dict(model::GeckoModel) + gene_product_mass_group_dict(model::GeckoModel) A pipe-able variant of [`mass_group_dict`](@ref). """ -protein_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) +gene_product_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) """ - protein_mass(model::SMomentModel) + gene_product_mass(model::SMomentModel) Extract the total mass utilization in a solved [`SMomentModel`](@ref). """ -protein_mass(model::SMomentModel, opt_model) = +gene_product_mass(model::SMomentModel, opt_model) = is_solved(opt_model) ? sum((col.capacity_required for col in model.columns) .* value.(opt_model[:x])) : nothing """ - protein_mass(model::SMomentModel) + gene_product_mass(model::SMomentModel) -A pipe-able variant of [`protein_mass`](@ref). +A pipe-able variant of [`gene_product_mass`](@ref). """ -protein_mass(model::SMomentModel) = x -> protein_mass(model, x) +gene_product_mass(model::SMomentModel) = x -> gene_product_mass(model, x) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index c7514e5a1..239271f71 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -73,7 +73,7 @@ mass of each group of gene products. function _gecko_mass_group_coupling(model::GeckoModel) tmp = [ # mm = molar mass, mg = mass group, i = row idx, j = col idx (i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for - (j, mm) in zip(mg[2], mg[3]) + (j, mm) in zip(mg.gene_product_idxs, mg.gene_product_molar_masses) ] sparse( [i for (i, _, _) in tmp], @@ -83,17 +83,3 @@ function _gecko_mass_group_coupling(model::GeckoModel) n_genes(model), ) end - -""" - flux_dict(model::GeckoModel, opt_model) - -Returns the fluxes (not gene product concentrations) of the model as a -reaction-keyed dictionary, if solved. See [`protein_dict`](@ref) for a -function to get the gene product concentrations. -""" -flux_dict(model::GeckoModel, opt_model) = - is_solved(opt_model) ? - Dict( - fluxes(model) .=> - reaction_flux(model)' * value.(opt_model[:x])[1:n_reactions(model)], - ) : nothing diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 188129941..8edc3f3fd 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -11,9 +11,9 @@ ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) ) : Isozyme[] - get_gene_product_mass = gid -> get(ecoli_core_protein_masses, gid, 0.0) + get_gene_product_mass = gid -> get(ecoli_core_gene_product_masses, gid, 0.0) - total_protein_mass = 100.0 + total_gene_product_mass = 100.0 bounded_model = model |> with_changed_bounds( @@ -27,7 +27,7 @@ reaction_isozymes = get_reaction_isozymes, gene_product_bounds = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), gene_product_molar_mass = get_gene_product_mass, - gene_mass_group_bound = _ -> total_protein_mass, + gene_product_mass_group_bound = _ -> total_gene_product_mass, ) opt_model = flux_balance_analysis( @@ -37,7 +37,7 @@ ) rxn_fluxes = flux_dict(gm, opt_model) - prot_concens = protein_dict(gm, opt_model) + prot_concens = gene_product_dict(gm, opt_model) @test isapprox( rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], @@ -45,9 +45,9 @@ atol = TEST_TOLERANCE, ) - prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens) + prot_mass = sum(ecoli_core_gene_product_masses[gid] * c for (gid, c) in prot_concens) - @test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE) + @test isapprox(prot_mass, total_gene_product_mass, atol = TEST_TOLERANCE) end @testset "GECKO small model" begin @@ -95,14 +95,14 @@ end gene_product_molar_mass = Dict("g1" => 1.0, "g2" => 2.0, "g3" => 3.0, "g4" => 4.0) - gene_mass_group_bound = Dict("uncategorized" => 0.5) + gene_product_mass_group_bound = Dict("uncategorized" => 0.5) gm = make_gecko_model( m; reaction_isozymes, gene_product_bounds, gene_product_molar_mass, - gene_mass_group_bound, + gene_product_mass_group_bound, ) opt_model = flux_balance_analysis( @@ -112,8 +112,8 @@ end ) rxn_fluxes = flux_dict(gm, opt_model) - gene_products = protein_dict(gm, opt_model) - mass_groups = protein_mass_group_dict(gm, opt_model) + gene_products = gene_product_dict(gm, opt_model) + mass_groups = gene_product_mass_group_dict(gm, opt_model) @test isapprox(rxn_fluxes["r6"], 3.181818181753438, atol = TEST_TOLERANCE) @test isapprox(gene_products["g4"], 0.09090909090607537, atol = TEST_TOLERANCE) diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl index 41323a900..cba241217 100644 --- a/test/analysis/smoment.jl +++ b/test/analysis/smoment.jl @@ -1,7 +1,7 @@ @testset "SMOMENT" begin model = load_model(StandardModel, model_paths["e_coli_core.json"]) - get_gene_product_mass = gid -> get(ecoli_core_protein_masses, gid, 0.0) + get_gene_product_mass = gid -> get(ecoli_core_gene_product_masses, gid, 0.0) get_reaction_isozyme = rid -> From b027b2db964e262149b135dae5b9ba77b2f63116 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Wed, 20 Apr 2022 12:55:21 +0200 Subject: [PATCH 107/109] more comments --- src/analysis/gecko.jl | 8 ++++---- src/base/types/wrappers/GeckoModel.jl | 17 ++++++++++------- src/base/utils/gecko.jl | 4 ++-- test/data_static.jl | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl index e3b109b9b..bf79ef980 100644 --- a/src/analysis/gecko.jl +++ b/src/analysis/gecko.jl @@ -55,7 +55,7 @@ function make_gecko_model( (grp -> gene_product_mass_group_bound[grp]) # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch - columns = Vector{_gecko_column}() + columns = Vector{_gecko_reaction_column}() coupling_row_reaction = Int[] coupling_row_gene_product = Int[] @@ -69,7 +69,7 @@ function make_gecko_model( for i = 1:n_reactions(model) isozymes = ris_(rids[i]) if isempty(isozymes) - push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [])) + push!(columns, _gecko_reaction_column(i, 0, 0, 0, lbs[i], ubs[i], [])) continue end @@ -115,7 +115,7 @@ function make_gecko_model( # make a new column push!( columns, - _gecko_column( + _gecko_reaction_column( i, iidx, dir, @@ -162,7 +162,7 @@ function make_gecko_model( This way they can be set as objectives by the user. =# gm.objective .= [ - _gecko_column_reactions(gm)' * objective(gm.inner) + _gecko_reaction_column_reactions(gm)' * objective(gm.inner) spzeros(length(coupling_row_gene_product)) ] diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index 2f0f35421..e68ce7887 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -1,9 +1,9 @@ """ - struct _gecko_column + struct _gecko_reaction_column A helper type for describing the contents of [`GeckoModel`](@ref)s. """ -struct _gecko_column +struct _gecko_reaction_column reaction_idx::Int isozyme_idx::Int direction::Int @@ -55,7 +55,10 @@ The structure contains fields `columns` that describe the contents of the coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and `coupling_row_mass_group` that describe correspondence of the coupling rows to original model and determine the coupling bounds, and `inner`, which is the -original wrapped model. +original wrapped model. Note, `objective` is the objective vector of the model, +special care needs to be taken to ensure that its length is `n_reactions(model) ++ n_genes(model)` when the user modifies it, where `model` is the GeckoModel in +question. Implementation exposes the split reactions (available as `reactions(model)`), but retains the original "simple" reactions accessible by [`fluxes`](@ref). All @@ -65,7 +68,7 @@ purely virtual and do not occur in [`metabolites`](@ref). """ struct GeckoModel <: ModelWrapper objective::SparseVec - columns::Vector{_gecko_column} + columns::Vector{_gecko_reaction_column} coupling_row_reaction::Vector{Int} coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} coupling_row_mass_group::Vector{_gecko_capacity} @@ -83,7 +86,7 @@ split into unidirectional forward and reverse ones, each of which may have multiple variants per isozyme. """ function stoichiometry(model::GeckoModel) - irrevS = stoichiometry(model.inner) * COBREXA._gecko_column_reactions(model) + irrevS = stoichiometry(model.inner) * COBREXA._gecko_reaction_column_reactions(model) enzS = COBREXA._gecko_gene_product_coupling(model) [ irrevS spzeros(size(irrevS, 1), size(enzS, 1)) @@ -152,7 +155,7 @@ Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original fluxes in the wrapped model. """ function reaction_flux(model::GeckoModel) - rxnmat = _gecko_column_reactions(model)' * reaction_flux(model.inner) + rxnmat = _gecko_reaction_column_reactions(model)' * reaction_flux(model.inner) [ rxnmat spzeros(n_genes(model), size(rxnmat, 2)) @@ -167,7 +170,7 @@ wrapped model, coupling for split (arm) reactions, and the coupling for the tota enzyme capacity. """ function coupling(model::GeckoModel) - innerC = coupling(model.inner) * _gecko_column_reactions(model) + innerC = coupling(model.inner) * _gecko_reaction_column_reactions(model) rxnC = _gecko_reaction_coupling(model) enzcap = _gecko_mass_group_coupling(model) [ diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl index 239271f71..86500314b 100644 --- a/src/base/utils/gecko.jl +++ b/src/base/utils/gecko.jl @@ -10,12 +10,12 @@ _gecko_reaction_name(original_name::String, direction::Int, isozyme_idx::Int) = "$original_name#reverse#$isozyme_idx" """ - _gecko_column_reactions(model::GeckoModel) + _gecko_reaction_column_reactions(model::GeckoModel) Retrieve a utility mapping between reactions and split reactions; rows correspond to "original" reactions, columns correspond to "split" reactions. """ -_gecko_column_reactions(model::GeckoModel) = sparse( +_gecko_reaction_column_reactions(model::GeckoModel) = sparse( [col.reaction_idx for col in model.columns], 1:length(model.columns), [col.direction >= 0 ? 1 : -1 for col in model.columns], diff --git a/test/data_static.jl b/test/data_static.jl index 1492a5f14..a80d46d16 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -155,7 +155,7 @@ const reaction_standard_gibbs_free_energies = Dict{String,Float64}( "FUM" => -3.424133018702122, ) -const ecoli_core_protein_masses = Dict{String,Float64}( +const ecoli_core_gene_product_masses = Dict{String,Float64}( #= Data downloaded from Uniprot for E. coli K12, gene mass in kDa. To obtain these data yourself, go to From f406440e8ae4256ecc57b0a306b90c48a5c15ea7 Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Thu, 21 Apr 2022 22:34:40 +0200 Subject: [PATCH 108/109] small fixes --- src/base/types/Gene.jl | 11 ++--------- src/base/types/wrappers/GeckoModel.jl | 8 ++++---- test/analysis/gecko.jl | 2 ++ 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/base/types/Gene.jl b/src/base/types/Gene.jl index 9a7ec1315..b27dec9fa 100644 --- a/src/base/types/Gene.jl +++ b/src/base/types/Gene.jl @@ -7,7 +7,6 @@ id :: String name :: Maybe{String} notes :: Dict{String, Vector{String}} annotation :: Dict{String, Union{Vector{String}, String}} -molar_mass :: Maybe{Float64} ```` """ mutable struct Gene @@ -15,13 +14,7 @@ mutable struct Gene name::Maybe{String} notes::Notes annotations::Annotations - molar_mass::Maybe{Float64} - Gene( - id::String = ""; - name = nothing, - notes = Notes(), - annotations = Annotations(), - molar_mass = nothing, - ) = new(id, name, notes, annotations, molar_mass) + Gene(id::String = ""; name = nothing, notes = Notes(), annotations = Annotations()) = + new(id, name, notes, annotations) end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl index e68ce7887..42cc47aa8 100644 --- a/src/base/types/wrappers/GeckoModel.jl +++ b/src/base/types/wrappers/GeckoModel.jl @@ -55,10 +55,10 @@ The structure contains fields `columns` that describe the contents of the coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and `coupling_row_mass_group` that describe correspondence of the coupling rows to original model and determine the coupling bounds, and `inner`, which is the -original wrapped model. Note, `objective` is the objective vector of the model, -special care needs to be taken to ensure that its length is `n_reactions(model) -+ n_genes(model)` when the user modifies it, where `model` is the GeckoModel in -question. +original wrapped model. Note, `objective` is the objective vector of the model. +Special care needs to be taken to ensure that its length is the sum of +`n_reactions(model)` and `n_genes(model)` when the user modifies it, where +`model` is the GeckoModel in question. Implementation exposes the split reactions (available as `reactions(model)`), but retains the original "simple" reactions accessible by [`fluxes`](@ref). All diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl index 8edc3f3fd..58318405f 100644 --- a/test/analysis/gecko.jl +++ b/test/analysis/gecko.jl @@ -46,8 +46,10 @@ ) prot_mass = sum(ecoli_core_gene_product_masses[gid] * c for (gid, c) in prot_concens) + mass_groups = gene_product_mass_group_dict(gm, opt_model) @test isapprox(prot_mass, total_gene_product_mass, atol = TEST_TOLERANCE) + @test isapprox(prot_mass, mass_groups["uncategorized"], atol = TEST_TOLERANCE) end @testset "GECKO small model" begin From 89e34a82e885f5131ac790a1006aabcb6ecc1aab Mon Sep 17 00:00:00 2001 From: "St. Elmo Wilken" Date: Wed, 27 Apr 2022 15:35:53 +0200 Subject: [PATCH 109/109] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c422896eb..957cbb127 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "COBREXA" uuid = "babc4406-5200-4a30-9033-bf5ae714c842" authors = ["The developers of COBREXA.jl"] -version = "1.2.3" +version = "1.3.0" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"