diff --git a/Project.toml b/Project.toml index e5aac5090..957cbb127 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "COBREXA" uuid = "babc4406-5200-4a30-9033-bf5ae714c842" authors = ["The developers of COBREXA.jl"] -version = "1.2.3" +version = "1.3.0" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -21,9 +21,9 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] -DistributedData = "0.1.4" +DistributedData = "0.1.4, 0.2" JSON = "0.21" -JuMP = "0.21.0, 0.22.0" +JuMP = "0.21.0, 0.22.0, 0.23, 1" MAT = "0.10" MacroTools = "0.5.6" OSQP = "0.6" diff --git a/README.md b/README.md index e4bbf9e38..6f2945414 100644 --- a/README.md +++ b/README.md @@ -128,20 +128,20 @@ some reactions were disabled independently: ```julia # convert to a model type that is efficient to modify -m = convert(StandardModel, m) +m = convert(StandardModel, model) # find the model objective value if oxygen or carbon dioxide transports are disabled screen(m, # the base model variants=[ # this specifies how to generate the desired model variants [], # one with no modifications, i.e. the base case - [with_changed_bound("O2t", lower=0.0, upper=0.0)], # disable oxygen - [with_changed_bound("CO2t", lower=0.0, upper=0.0)], # disable CO2 - [with_changed_bound("O2t", lower=0.0, upper=0.0), - with_changed_bound("CO2t", lower=0.0, upper=0.0)], # disable both + [with_changed_bound("R_O2t", lower=0.0, upper=0.0)], # disable oxygen + [with_changed_bound("R_CO2t", lower=0.0, upper=0.0)], # disable CO2 + [with_changed_bound("R_O2t", lower=0.0, upper=0.0), + with_changed_bound("R_CO2t", lower=0.0, upper=0.0)], # disable both ], # this specifies what to do with the model variants (received as the argument `x`) analysis = x -> - flux_balance_analysis_dict(x, Tulip.Optimizer)["BIOMASS_Ecoli_core_w_GAM"], + flux_balance_analysis_dict(x, Tulip.Optimizer)["R_BIOMASS_Ecoli_core_w_GAM"], ) ``` You should receive a result showing that missing oxygen transport makes the diff --git a/docs/src/advanced.md b/docs/src/advanced.md index 957256b1a..870069e64 100644 --- a/docs/src/advanced.md +++ b/docs/src/advanced.md @@ -5,4 +5,3 @@ Pages = joinpath.("advanced", filter(x -> endswith(x, ".md"), readdir("advanced"))) Depth = 2 ``` - diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md index 20ef6c374..75bc9ea37 100644 --- a/docs/src/tutorials.md +++ b/docs/src/tutorials.md @@ -5,4 +5,3 @@ Pages = joinpath.("tutorials", filter(x -> endswith(x, ".md"), readdir("tutorials"))) Depth = 2 ``` - diff --git a/src/COBREXA.jl b/src/COBREXA.jl index 2f26a9e43..74d4cc16b 100644 --- a/src/COBREXA.jl +++ b/src/COBREXA.jl @@ -18,9 +18,7 @@ import Base: findfirst, getindex, show import Pkg import SBML # conflict with Reaction struct name - include("banner.jl") -_print_banner() # autoloading const _inc(path...) = include(joinpath(path...)) @@ -34,6 +32,7 @@ _inc_all.( joinpath("base", "logging"), joinpath("base", "macros"), joinpath("base", "types"), + joinpath("base", "types", "wrappers"), "base", "io", joinpath("io", "show"), diff --git a/src/analysis/flux_balance_analysis.jl b/src/analysis/flux_balance_analysis.jl index ae096ea3e..25a534b08 100644 --- a/src/analysis/flux_balance_analysis.jl +++ b/src/analysis/flux_balance_analysis.jl @@ -1,5 +1,5 @@ """ - flux_balance_analysis_vec(args...)::Maybe{Vector{Float64}} + flux_balance_analysis_vec(model::MetabolicModel, args...)::Maybe{Vector{Float64}} A variant of FBA that returns a vector of fluxes in the same order as reactions of the model, if the solution is found. @@ -9,8 +9,12 @@ Arguments are passed to [`flux_balance_analysis`](@ref). This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -flux_balance_analysis_vec(args...; kwargs...)::Maybe{Vector{Float64}} = - flux_vector(flux_balance_analysis(args...; kwargs...)) +flux_balance_analysis_vec( + model::MetabolicModel, + args...; + kwargs..., +)::Maybe{Vector{Float64}} = + flux_vector(model, flux_balance_analysis(model, args...; kwargs...)) """ flux_balance_analysis_dict(model::MetabolicModel, args...)::Maybe{Dict{String, Float64}} @@ -66,7 +70,6 @@ biomass_reaction_id = findfirst(model.reactions, "BIOMASS_Ecoli_core_w_GAM") modified_solution = flux_balance_analysis(model, GLPK.optimizer; modifications=[change_objective(biomass_reaction_id)]) ``` - """ function flux_balance_analysis( model::M, diff --git a/src/analysis/flux_variability_analysis.jl b/src/analysis/flux_variability_analysis.jl index 8228b16a7..7935da1bd 100644 --- a/src/analysis/flux_variability_analysis.jl +++ b/src/analysis/flux_variability_analysis.jl @@ -1,7 +1,7 @@ """ flux_variability_analysis( model::MetabolicModel, - reactions::Vector{Int}, + fluxes::Vector{Int}, optimizer; modifications = [], workers = [myid()], @@ -11,9 +11,9 @@ )::Matrix{Float64} Flux variability analysis solves a pair of optimization problems in `model` for -each flux listed in `reactions`: +each flux `f` described in `fluxes`: ``` - min,max xᵢ +min,max fᵀxᵢ s.t. S x = b xₗ ≤ x ≤ xᵤ cᵀx ≥ bounds(Z₀)[1] @@ -38,13 +38,13 @@ multithreaded variability computation can be used to improve resource allocation efficiency in many common use-cases. `ret` is a function used to extract results from optimized JuMP models of the -individual reactions. By default, it calls and returns the value of +individual fluxes. By default, it calls and returns the value of `JuMP.objective_value`. More information can be extracted e.g. by setting it to a function that returns a more elaborate data structure; such as `m -> (JuMP.objective_value(m), JuMP.value.(m[:x]))`. Returns a matrix of extracted `ret` values for minima and maxima, of total size -(`length(reactions)`,2). The optimizer result status is checked with +(`size(fluxes,2)`,2). The optimizer result status is checked with [`is_solved`](@ref); `nothing` is returned if the optimization failed for any reason. @@ -56,7 +56,7 @@ flux_variability_analysis(model, [1, 2, 3, 42], GLPK.optimizer) """ function flux_variability_analysis( model::MetabolicModel, - reactions::Vector{Int}, + fluxes::SparseMat, optimizer; modifications = [], workers = [myid()], @@ -64,8 +64,13 @@ function flux_variability_analysis( bounds = z -> (z, Inf), ret = objective_value, ) - if any(reactions .< 1) || any(reactions .> n_reactions(model)) - throw(DomainError(reactions, "Index exceeds number of reactions.")) + if size(fluxes, 1) != n_reactions(model) + throw( + DomainError( + size(fluxes, 1), + "Flux matrix size is not compatible with model reaction count.", + ), + ) end Z = bounds( @@ -75,6 +80,8 @@ function flux_variability_analysis( ) : optimal_objective_value, ) + flux_vector = [fluxes[:, i] for i = 1:size(fluxes, 2)] + return screen_optmodel_modifications( model, optimizer; @@ -93,12 +100,36 @@ function flux_variability_analysis( end, ], ), - args = Tuple.([-reactions reactions]), - analysis = (_, opt_model, ridx) -> _max_variability_flux(opt_model, ridx, ret), + args = tuple.([flux_vector flux_vector], [MIN_SENSE MAX_SENSE]), + analysis = (_, opt_model, flux, sense) -> + _max_variability_flux(opt_model, flux, sense, ret), workers = workers, ) end +""" + flux_variability_analysis(model::MetabolicModel, flux_indexes::Vector{Int}, optimizer; kwargs...) + +An overload of [`flux_variability_analysis`](@ref) that explores the fluxes specified by integer indexes +""" +function flux_variability_analysis( + model::MetabolicModel, + flux_indexes::Vector{Int}, + optimizer; + kwargs..., +) + if any((flux_indexes .< 1) .| (flux_indexes .> n_fluxes(model))) + throw(DomainError(flux_indexes, "Flux index out of range")) + end + + flux_variability_analysis( + model, + reaction_flux(model)[:, flux_indexes], + optimizer; + kwargs..., + ) +end + """ flux_variability_analysis( model::MetabolicModel, @@ -107,12 +138,10 @@ end ) A simpler version of [`flux_variability_analysis`](@ref) that maximizes and -minimizes all reactions in the model. Arguments are forwarded. +minimizes all declared fluxes in the model. Arguments are forwarded. """ -function flux_variability_analysis(model::MetabolicModel, optimizer; kwargs...) - n = n_reactions(model) - return flux_variability_analysis(model, collect(1:n), optimizer; kwargs...) -end +flux_variability_analysis(model::MetabolicModel, optimizer; kwargs...) = + flux_variability_analysis(model, reaction_flux(model), optimizer; kwargs...) """ flux_variability_analysis_dict( @@ -122,8 +151,8 @@ end ) A variant of [`flux_variability_analysis`](@ref) that returns the individual -maximized and minimized fluxes of all reactions as two dictionaries (of -dictionaries). All keyword arguments except `ret` are passed through. +maximized and minimized fluxes as two dictionaries (of dictionaries). All +keyword arguments except `ret` are passed through. # Example ``` @@ -140,24 +169,71 @@ mins, maxs = flux_variability_analysis_dict( ``` """ function flux_variability_analysis_dict(model::MetabolicModel, optimizer; kwargs...) - fluxes = flux_variability_analysis(model, optimizer; kwargs..., ret = flux_vector) - rxns = reactions(model) - dicts = zip.(Ref(rxns), fluxes) + vs = flux_variability_analysis( + model, + optimizer; + kwargs..., + ret = sol -> flux_vector(model, sol), + ) + flxs = fluxes(model) + dicts = zip.(Ref(flxs), vs) - return (Dict(rxns .=> Dict.(dicts[:, 1])), Dict(rxns .=> Dict.(dicts[:, 2]))) + return (Dict(flxs .=> Dict.(dicts[:, 1])), Dict(flxs .=> Dict.(dicts[:, 2]))) end """ - _max_variability_flux(opt_model, rid, ret) + _max_variability_flux(opt_model, flux, sense, ret) Internal helper for maximizing reactions in optimization model. """ -function _max_variability_flux(opt_model, rid, ret) - sense = rid > 0 ? MAX_SENSE : MIN_SENSE - var = all_variables(opt_model)[abs(rid)] - - @objective(opt_model, sense, var) +function _max_variability_flux(opt_model, flux, sense, ret) + @objective(opt_model, sense, sum(flux .* opt_model[:x])) optimize!(opt_model) is_solved(opt_model) ? ret(opt_model) : nothing end + +""" + reaction_variability_analysis(model::MetabolicModel, reaction_indexes::Vector{Int}, optimizer; kwargs...) + +A variant for [`flux_variability_analysis`](@ref) that examines actual +reactions (selected by their indexes in `reactions` argument) instead of whole +fluxes. This may be useful for models where the sets of reactions and fluxes +differ. +""" +function reaction_variability_analysis( + model::MetabolicModel, + reaction_indexes::Vector{Int}, + optimizer; + kwargs..., +) + if any((reaction_indexes .< 1) .| (reaction_indexes .> n_reactions(model))) + throw(DomainError(reaction_indexes, "Flux index out of range")) + end + + flux_variability_analysis( + model, + sparse( + reaction_indexes, + 1:length(reaction_indexes), + 1.0, + n_reactions(model), + length(reaction_indexes), + ), + optimizer; + kwargs..., + ) +end + +""" + reaction_variability_analysis( model::MetabolicModel, optimizer; kwargs...) + +Shortcut for [`reaction_variability_analysis`](@ref) that examines all reactions. +""" +reaction_variability_analysis(model::MetabolicModel, optimizer; kwargs...) = + reaction_variability_analysis( + model, + collect(1:n_reactions(model)), + optimizer; + kwargs..., + ) diff --git a/src/analysis/gecko.jl b/src/analysis/gecko.jl new file mode 100644 index 000000000..bf79ef980 --- /dev/null +++ b/src/analysis/gecko.jl @@ -0,0 +1,170 @@ +""" + make_gecko_model( + model::MetabolicModel; + reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}} + gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + gene_product_mass_group_bound::Union{Function,Dict{String,Float64}}, + ) + +Wrap a model into a [`GeckoModel`](@ref), following the structure given by +GECKO algorithm (see [`GeckoModel`](@ref) documentation for details). + +# Arguments + +- `reaction_isozymes` is a function that returns a vector of [`Isozyme`](@ref)s + for each reaction, or empty vector if the reaction is not enzymatic. +- `gene_product_bounds` is a function that returns lower and upper bound for + concentration for a given gene product (specified by the same string gene ID as in + `reaction_isozymes`), as `Tuple{Float64,Float64}`. +- `gene_product_molar_mass` is a function that returns a numeric molar mass of + a given gene product specified by string gene ID. +- `gene_product_mass_group` is a function that returns a string group identifier for a + given gene product, again specified by string gene ID. By default, all gene + products belong to group `"uncategorized"` which is the behavior of original + GECKO. +- `gene_product_mass_group_bound` is a function that returns the maximum mass for a given + mass group. + +Alternatively, all function arguments may be also supplied as dictionaries that +provide the same data lookup. +""" +function make_gecko_model( + model::MetabolicModel; + reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}}, + gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized", + gene_product_mass_group_bound::Union{Function,Dict{String,Float64}}, +) + ris_ = + reaction_isozymes isa Function ? reaction_isozymes : + (rid -> get(reaction_isozymes, rid, [])) + gpb_ = + gene_product_bounds isa Function ? gene_product_bounds : + (gid -> gene_product_bounds[gid]) + gpmm_ = + gene_product_molar_mass isa Function ? gene_product_molar_mass : + (gid -> gene_product_molar_mass[gid]) + gmg_ = + gene_product_mass_group isa Function ? gene_product_mass_group : + (gid -> gene_product_mass_group[gid]) + gmgb_ = + gene_product_mass_group_bound isa Function ? gene_product_mass_group_bound : + (grp -> gene_product_mass_group_bound[grp]) + # ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch + + columns = Vector{_gecko_reaction_column}() + coupling_row_reaction = Int[] + coupling_row_gene_product = Int[] + + gids = genes(model) + (lbs, ubs) = bounds(model) + rids = reactions(model) + + gene_name_lookup = Dict(gids .=> 1:length(gids)) + gene_row_lookup = Dict{Int,Int}() + + for i = 1:n_reactions(model) + isozymes = ris_(rids[i]) + if isempty(isozymes) + push!(columns, _gecko_reaction_column(i, 0, 0, 0, lbs[i], ubs[i], [])) + continue + end + + # loop over both directions for all isozymes + for (lb, ub, kcatf, dir) in [ + (-ubs[i], -lbs[i], i -> i.kcat_reverse, -1), + (lbs[i], ubs[i], i -> i.kcat_forward, 1), + ] + # The coefficients in the coupling matrix will be categorized in + # separate rows for negative and positive reactions. Surprisingly, + # we do not need to explicitly remember the bounds, because the + # ones taken from the original model are perfectly okay -- the + # "reverse" direction is unreachable because of individual + # bounds on split reactions, and the "forward" direction is + # properly negated in the reverse case to work nicely with the + # global lower bound. + reaction_coupling_row = + ub > 0 && length(isozymes) > 1 ? begin + push!(coupling_row_reaction, i) + length(coupling_row_reaction) + end : 0 + + # all isozymes in this direction + for (iidx, isozyme) in enumerate(isozymes) + kcat = kcatf(isozyme) + if ub > 0 && kcat > _constants.tolerance + # prepare the coupling with gene product molar + gene_product_coupling = collect( + begin + gidx = gene_name_lookup[gene] + row_idx = if haskey(gene_row_lookup, gidx) + gene_row_lookup[gidx] + else + push!(coupling_row_gene_product, gidx) + gene_row_lookup[gidx] = + length(coupling_row_gene_product) + end + (row_idx, stoich / kcat) + end for (gene, stoich) in isozyme.gene_product_count if + haskey(gene_name_lookup, gene) + ) + + # make a new column + push!( + columns, + _gecko_reaction_column( + i, + iidx, + dir, + reaction_coupling_row, + max(lb, 0), + ub, + gene_product_coupling, + ), + ) + end + end + end + end + + # prepare enzyme capacity constraints + mg_gid_lookup = Dict{String,Vector{String}}() + for gid in gids[coupling_row_gene_product] + mg = gmg_(gid) + if haskey(mg_gid_lookup, mg) + push!(mg_gid_lookup[mg], gid) + else + mg_gid_lookup[mg] = [gid] + end + end + coupling_row_mass_group = Vector{_gecko_capacity}() + for (grp, gs) in mg_gid_lookup + idxs = [gene_row_lookup[x] for x in Int.(indexin(gs, gids))] + mms = gpmm_.(gs) + push!(coupling_row_mass_group, _gecko_capacity(grp, idxs, mms, gmgb_(grp))) + end + + # create model with dummy objective + gm = GeckoModel( + spzeros(length(columns) + length(coupling_row_gene_product)), + columns, + coupling_row_reaction, + collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))), + coupling_row_mass_group, + model, + ) + + #= + Set objective. This is a separate field because gene products can also be objectives. + This way they can be set as objectives by the user. + =# + gm.objective .= [ + _gecko_reaction_column_reactions(gm)' * objective(gm.inner) + spzeros(length(coupling_row_gene_product)) + ] + + return gm +end diff --git a/src/analysis/minimize_metabolic_adjustment.jl b/src/analysis/minimize_metabolic_adjustment.jl index 73fd605e8..d15501f37 100644 --- a/src/analysis/minimize_metabolic_adjustment.jl +++ b/src/analysis/minimize_metabolic_adjustment.jl @@ -82,7 +82,7 @@ minimize_metabolic_adjustment(flux_ref_dict::Dict{String,Float64}) = ) """ - minimize_metabolic_adjustment_analysis_vec(args...; kwargs...) + minimize_metabolic_adjustment_analysis_vec(model::MetabolicModel, args...; kwargs...) Perform minimization of metabolic adjustment (MOMA) and return a vector of fluxes in the same order as the reactions in `model`. Arguments are forwarded to @@ -91,8 +91,8 @@ same order as the reactions in `model`. Arguments are forwarded to This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -minimize_metabolic_adjustment_analysis_vec(args...; kwargs...) = - flux_vector(minimize_metabolic_adjustment_analysis(args...; kwargs...)) +minimize_metabolic_adjustment_analysis_vec(model::MetabolicModel, args...; kwargs...) = + flux_vector(model, minimize_metabolic_adjustment_analysis(model, args...; kwargs...)) """ minimize_metabolic_adjustment_analysis_dict(model::MetabolicModel, args...; kwargs...) diff --git a/src/analysis/modifications/moment.jl b/src/analysis/modifications/moment.jl index bcde28ca5..5c46da8d5 100644 --- a/src/analysis/modifications/moment.jl +++ b/src/analysis/modifications/moment.jl @@ -36,6 +36,9 @@ flux_balance_analysis( """ add_moment_constraints(kcats::Dict{String,Float64}, protein_mass_fraction::Float64) = (model, opt_model) -> begin + @warn( + "DEPRECATION WARNING: 'add_moment_constraints' will be removed in future versions of COBREXA.jl in favor of a GECKO-based formulation" + ) lbs, ubs = get_optmodel_bounds(opt_model) # to assign directions # get grrs and ignore empty blocks: TODO: fix importing to avoid this ugly conditional see #462 diff --git a/src/analysis/parsimonious_flux_balance_analysis.jl b/src/analysis/parsimonious_flux_balance_analysis.jl index 844b3b009..12475f15c 100644 --- a/src/analysis/parsimonious_flux_balance_analysis.jl +++ b/src/analysis/parsimonious_flux_balance_analysis.jl @@ -94,7 +94,7 @@ function parsimonious_flux_balance_analysis( end """ - parsimonious_flux_balance_analysis_vec(args...; kwargs...) + parsimonious_flux_balance_analysis_vec(model::MetabolicModel, args...; kwargs...) Perform parsimonious flux balance analysis on `model` using `optimizer`. Returns a vector of fluxes in the same order as the reactions in `model`. @@ -104,8 +104,8 @@ internally. This function is kept for backwards compatibility, use [`flux_vector`](@ref) instead. """ -parsimonious_flux_balance_analysis_vec(args...; kwargs...) = - flux_vector(parsimonious_flux_balance_analysis(args...; kwargs...)) +parsimonious_flux_balance_analysis_vec(model::MetabolicModel, args...; kwargs...) = + flux_vector(model, parsimonious_flux_balance_analysis(model, args...; kwargs...)) """ parsimonious_flux_balance_analysis_dict(model::MetabolicModel, args...; kwargs...) diff --git a/src/analysis/sampling/affine_hit_and_run.jl b/src/analysis/sampling/affine_hit_and_run.jl index ca524cef4..07644654b 100644 --- a/src/analysis/sampling/affine_hit_and_run.jl +++ b/src/analysis/sampling/affine_hit_and_run.jl @@ -11,9 +11,9 @@ Run a hit-and-run style sampling that starts from `warmup_points` and uses their affine combinations for generating the run directions to sample the space -delimited by `lbs` and `ubs`. The points that represent fluxes in -`warmup_points` should be organized in columns, i.e. `warmup_points[:,1]` is -the first warmup flux. +delimited by `lbs` and `ubs`. The reaction rate vectors in `warmup_points` +should be organized in columns, i.e. `warmup_points[:,1]` is the first set of +reaction rates. There are total `chains` of hit-and-run runs, each on a batch of `size(warmup_points, 2)` points. The runs are scheduled on `workers`, for good @@ -25,9 +25,9 @@ points is collected for output. For example, `sample_iters=[1,4,5]` causes the process run for 5 iterations, returning the sample batch that was produced by 1st, 4th and last (5th) iteration. -Returns a matrix of sampled fluxes (in columns), with all collected samples -horizontally concatenated. The total number of samples (columns) will be -`size(warmup_points,2) * chains * length(sample_iters)`. +Returns a matrix of sampled reaction rates (in columns), with all collected +samples horizontally concatenated. The total number of samples (columns) will +be `size(warmup_points,2) * chains * length(sample_iters)`. # Example ``` @@ -38,6 +38,9 @@ model = load_model(StandardModel, model_path) warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer, 100) samples = affine_hit_and_run(warmup, lbs, ubs, sample_iters = 1:3) + +# convert the result to flux (for models where the distinction matters): +fluxes = reaction_flux(model)' * samples ``` """ function affine_hit_and_run( diff --git a/src/analysis/sampling/warmup_variability.jl b/src/analysis/sampling/warmup_variability.jl index c34d7246b..cf66bb522 100644 --- a/src/analysis/sampling/warmup_variability.jl +++ b/src/analysis/sampling/warmup_variability.jl @@ -75,7 +75,7 @@ function warmup_from_variability( fluxes = hcat( dpmap( - rid -> :($COBREXA._max_variability_flux( + rid -> :($COBREXA._maximize_warmup_reaction( cobrexa_sampling_warmup_optmodel, $rid, om -> $COBREXA.JuMP.value.(om[:x]), @@ -96,3 +96,18 @@ function warmup_from_variability( return fluxes, lbs, ubs end + +""" + _maximize_warmup_reaction(opt_model, rid, ret) + +A helper function for finding warmup points from reaction variability. +""" +function _maximize_warmup_reaction(opt_model, rid, ret) + sense = rid > 0 ? MAX_SENSE : MIN_SENSE + var = all_variables(opt_model)[abs(rid)] + + @objective(opt_model, sense, var) + optimize!(opt_model) + + is_solved(opt_model) ? ret(opt_model) : nothing +end diff --git a/src/analysis/screening.jl b/src/analysis/screening.jl index acfd1daa8..55ba83020 100644 --- a/src/analysis/screening.jl +++ b/src/analysis/screening.jl @@ -75,7 +75,7 @@ from pure Julia structures, because they may be transferred over the network between the computation nodes. For that reason, functions that return whole JuMP models that contain pointers to allocated C structures (such as [`flux_balance_analysis`](@ref) used with `GLPK` or `Gurobi` otimizers) will -generally not in this context. +generally not work in this context. Note: this function is a thin argument-handling wrapper around [`_screen_impl`](@ref). diff --git a/src/analysis/smoment.jl b/src/analysis/smoment.jl new file mode 100644 index 000000000..391345fb9 --- /dev/null +++ b/src/analysis/smoment.jl @@ -0,0 +1,73 @@ + +""" + make_smoment_model( + model::MetabolicModel; + reaction_isozyme::Union{Function,Dict{String,Isozyme}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + total_enzyme_capacity::Float64, + ) + +Construct a model with a structure given by sMOMENT algorithm; returns a +[`SMomentModel`](@ref) (see the documentation for details). + +# Arguments + +- `reaction_isozyme` parameter is a function that returns a single + [`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not + enzymatic. If the reaction has multiple isozymes, use + [`smoment_isozyme_speed`](@ref) to select the fastest one, as recommended by + the sMOMENT paper. +- `gene_product_molar_mass` parameter is a function that returns a molar mass + of each gene product as specified by sMOMENT. +- `total_enzyme_capacity` is the maximum "enzyme capacity" in the model. + +Alternatively, all function arguments also accept dictionaries that are used to +provide the same data lookup. +""" +function make_smoment_model( + model::MetabolicModel; + reaction_isozyme::Union{Function,Dict{String,Isozyme}}, + gene_product_molar_mass::Union{Function,Dict{String,Float64}}, + total_enzyme_capacity::Float64, +) + ris_ = + reaction_isozyme isa Function ? reaction_isozyme : + (rid -> get(reaction_isozyme, rid, nothing)) + gpmm_ = + gene_product_molar_mass isa Function ? gene_product_molar_mass : + (gid -> gene_product_molar_mass[gid]) + + columns = Vector{_smoment_column}() + + (lbs, ubs) = bounds(model) + rids = reactions(model) + + for i = 1:n_reactions(model) + isozyme = ris_(rids[i]) + if isnothing(isozyme) + # non-enzymatic reaction (or a totally ignored one) + push!(columns, _smoment_column(i, 0, lbs[i], ubs[i], 0)) + continue + end + + mw = sum(gpmm_(gid) * ps for (gid, ps) in isozyme.gene_product_count) + + if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance + # reaction can run in reverse + push!( + columns, + _smoment_column(i, -1, max(-ubs[i], 0), -lbs[i], mw / isozyme.kcat_reverse), + ) + end + + if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance + # reaction can run forward + push!( + columns, + _smoment_column(i, 1, max(lbs[i], 0), ubs[i], mw / isozyme.kcat_forward), + ) + end + end + + return SMomentModel(columns, total_enzyme_capacity, model) +end diff --git a/src/banner.jl b/src/banner.jl index 68c32620f..00f62a550 100644 --- a/src/banner.jl +++ b/src/banner.jl @@ -4,6 +4,12 @@ include_dependency(joinpath(_PKG_ROOT_DIR, "Project.toml")) const COBREXA_VERSION = VersionNumber(Pkg.TOML.parsefile(joinpath(_PKG_ROOT_DIR, "Project.toml"))["version"]) +function __init__() + if myid() == 1 && Base.JLOptions().banner != 0 + _print_banner() + end +end + function _print_banner() c = Base.text_colors n = c[:normal] @@ -11,16 +17,16 @@ function _print_banner() println( " - $(y)// $(n) | - \\\\\\\\\\ // $(y)// $(n) | $(c[:bold])COBREXA.jl $(c[:normal]) v$(COBREXA_VERSION) - \\\\ \\\\// $(y)// $(n) | - \\\\ \\/ $(y)// $(n) | $(c[:bold])CO$(c[:normal])nstraint-$(c[:bold])B$(c[:normal])ased $(c[:bold])R$(c[:normal])econstruction - \\\\ $(y)// $(n) | and $(c[:bold])EX$(c[:normal])ascale $(c[:bold])A$(c[:normal])nalysis in Julia - // $(y)\\\\ $(n) | - // $(y)/\\ \\\\ $(n) | See documentation and examples at: - // $(y)//\\\\ \\\\ $(n)| https://lcsb-biocore.github.io/COBREXA.jl - // $(y)// \\\\\\\\\\ $(n)| - // $(n)| + $(y)//$(n) | + \\\\\\\\\\ // $(y)//$(n) | $(c[:bold])COBREXA.jl $(c[:normal]) v$(COBREXA_VERSION) + \\\\ \\\\// $(y)//$(n) | + \\\\ \\/ $(y)//$(n) | $(c[:bold])CO$(c[:normal])nstraint-$(c[:bold])B$(c[:normal])ased $(c[:bold])R$(c[:normal])econstruction + \\\\ $(y)//$(n) | and $(c[:bold])EX$(c[:normal])ascale $(c[:bold])A$(c[:normal])nalysis in Julia + // $(y)\\\\$(n) | + // $(y)/\\ \\\\$(n) | See documentation and examples at: + // $(y)//\\\\ \\\\$(n) | https://lcsb-biocore.github.io/COBREXA.jl + // $(y)// \\\\\\\\\\$(n) | + // | ", ) end diff --git a/src/base/constants.jl b/src/base/constants.jl index 864a68847..56c8b36f2 100644 --- a/src/base/constants.jl +++ b/src/base/constants.jl @@ -24,6 +24,11 @@ const _constants = ( objective = ["c"], grrs = ["gene_reaction_rules", "grRules", "rules"], ids = ["id", "description"], + metformulas = ["metFormula", "metFormulas"], + metcharges = ["metCharge", "metCharges"], + metcompartments = ["metCompartment", "metCompartments"], + rxnnames = ["rxnNames"], + metnames = ["metNames"], ), gene_annotation_checks = ( "ncbigene", diff --git a/src/base/macros/model_wrapper.jl b/src/base/macros/model_wrapper.jl new file mode 100644 index 000000000..ccbdb0e29 --- /dev/null +++ b/src/base/macros/model_wrapper.jl @@ -0,0 +1,72 @@ + +""" + _inherit_model_methods_impl(mtype::Symbol, arglist, access, fwdlist, fns...) + +A helper backend for [`@_inherit_model_methods`](@ref) and +[`@_inherit_model_methods_fn`](@ref). +""" +function _inherit_model_methods_impl( + source, + mtype::Symbol, + arglist, + access, + fwdlist, + fns..., +) + Expr( + :block, + ( + begin + header = Expr(:call, fn, :(model::$mtype), arglist.args...) + call = Expr(:call, fn, access(:model), fwdlist.args...) + esc( + Expr( + :macrocall, + Symbol("@doc"), + source, + """ + $header + + Evaluates [`$fn`](@ref) on the model contained in $mtype. + """, + Expr(:(=), header, Expr(:block, source, call)), + ), + ) + end for fn in fns + )..., + ) +end + +""" + @_inherit_model_methods + +Generates trivial accessor functions listed in `fns` for a model that is +wrapped in type `mtype` as field `member`. +""" +macro _inherit_model_methods(mtype::Symbol, arglist, member::Symbol, fwdlist, fns...) + _inherit_model_methods_impl( + __source__, + mtype, + arglist, + sym -> :($sym.$member), + fwdlist, + fns..., + ) +end + +""" + @_inherit_model_methods_fn + +A more generic version of [`@_inherit_model_methods`](@ref) that accesses the +"inner" model using an accessor function name. +""" +macro _inherit_model_methods_fn(mtype::Symbol, arglist, accessor, fwdlist, fns...) + _inherit_model_methods_impl( + __source__, + mtype, + arglist, + sym -> :($accessor($sym)), + fwdlist, + fns..., + ) +end diff --git a/src/base/macros/serialized.jl b/src/base/macros/serialized.jl index 65ca54159..0883e5a91 100644 --- a/src/base/macros/serialized.jl +++ b/src/base/macros/serialized.jl @@ -2,8 +2,8 @@ @_serialized_change_unwrap function Creates a simple wrapper structure that calls the `function` transparently on -the internal precached model. Internal type is returned (because this would -break the consistency of serialization). +the internal precached model. The internal type is returned (otherwise this +would break the consistency of serialization). """ macro _serialized_change_unwrap(fn::Symbol) docstring = """ diff --git a/src/base/solver.jl b/src/base/solver.jl index 641e2bab2..058ab5b40 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -3,13 +3,15 @@ make_optimization_model( model::MetabolicModel, optimizer; - sense = MOI.MAX_SENSE, + sense = MAX_SENSE, ) Convert `MetabolicModel`s to a JuMP model, place objectives and the equality constraint. + +Here coupling means inequality constraints coupling multiple variables together. """ -function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.MAX_SENSE) +function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_SENSE) precache!(model) @@ -25,8 +27,8 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.M C = coupling(model) # empty if no coupling cl, cu = coupling_bounds(model) - isempty(C) || @constraint(optimization_model, c_lbs, cl .<= coupling(model) * x) # coupling lower bounds - isempty(C) || @constraint(optimization_model, c_ubs, coupling(model) * x .<= cu) # coupling upper bounds + isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds + isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds return optimization_model end @@ -83,7 +85,6 @@ function set_optmodel_bound!( isnothing(ub) || set_normalized_rhs(opt_model[:ubs][vidx], ub) end - """ solved_objective_value(opt_model)::Maybe{Float64} @@ -107,8 +108,8 @@ Returns a vector of fluxes of the model, if solved. flux_vector(flux_balance_analysis(model, ...)) ``` """ -flux_vector(opt_model)::Maybe{Vector{Float64}} = - is_solved(opt_model) ? value.(opt_model[:x]) : nothing +flux_vector(model::MetabolicModel, opt_model)::Maybe{Vector{Float64}} = + is_solved(opt_model) ? reaction_flux(model)' * value.(opt_model[:x]) : nothing """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String, Float64}, Nothing} @@ -121,4 +122,17 @@ flux_dict(model, flux_balance_analysis(model, ...)) ``` """ flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} = - is_solved(opt_model) ? Dict(reactions(model) .=> value.(opt_model[:x])) : nothing + is_solved(opt_model) ? + Dict(fluxes(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing + +""" + flux_dict(model::MetabolicModel) + +A pipeable variant of `flux_dict`. + +# Example +``` +flux_balance_analysis(model, ...) |> flux_dict(model) +``` +""" +flux_dict(model::MetabolicModel) = opt_model -> flux_dict(model, opt_model) diff --git a/src/base/types/CoreModel.jl b/src/base/types/CoreModel.jl index dc8b2b19b..db696c42a 100644 --- a/src/base/types/CoreModel.jl +++ b/src/base/types/CoreModel.jl @@ -86,6 +86,26 @@ balance(a::CoreModel)::SparseVec = a.b """ objective(a::CoreModel)::SparseVec = a.c +""" + genes(a::CoreModel)::Vector{String} + +Collect all genes contained in the [`CoreModel`](@ref). The call is expensive +for large models, because the vector is not stored and instead gets rebuilt +each time this function is called. +""" +function genes(a::CoreModel)::Vector{String} + res = Set{String}() + for grr in a.grrs + isnothing(grr) && continue + for gs in grr + for g in gs + push!(res, g) + end + end + end + sort(collect(res)) +end + """ reaction_stoichiometry(model::CoreModel, rid::String)::Dict{String, Float64} diff --git a/src/base/types/CoreModelCoupled.jl b/src/base/types/CoreModelCoupled.jl index 69b4d0222..fd1fb4735 100644 --- a/src/base/types/CoreModelCoupled.jl +++ b/src/base/types/CoreModelCoupled.jl @@ -1,149 +1,100 @@ """ - struct CoreModelCoupled <: MetabolicModel + mutable struct CoreCoupling{M} <: ModelWrapper where {M<:MetabolicModel} -The linear model with additional coupling constraints in the form +A matrix-based wrap that adds reaction coupling matrix to the inner model. A +flux `x` feasible in this model must satisfy: ``` cₗ ≤ C x ≤ cᵤ ``` """ -mutable struct CoreModelCoupled <: MetabolicModel - lm::CoreModel +mutable struct CoreCoupling{M} <: ModelWrapper where {M<:MetabolicModel} + lm::M C::SparseMat cl::Vector{Float64} cu::Vector{Float64} - function CoreModelCoupled(lm::MetabolicModel, C::MatType, cl::VecType, cu::VecType) + function CoreCoupling( + lm::M, + C::MatType, + cl::VecType, + cu::VecType, + ) where {M<:MetabolicModel} length(cu) == length(cl) || throw(DimensionMismatch("`cl` and `cu` need to have the same size")) size(C) == (length(cu), n_reactions(lm)) || throw(DimensionMismatch("wrong dimensions of `C`")) - new(convert(CoreModel, lm), sparse(C), collect(cl), collect(cu)) + new{M}(lm, sparse(C), collect(cl), collect(cu)) end end """ - reactions(a::CoreModelCoupled) + unwrap_model(a::CoreCoupling) -Extract reactions from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). +Get the internal [`CoreModel`](@ref) out of [`CoreCoupling`](@ref). """ -reactions(a::CoreModelCoupled) = reactions(a.lm) +unwrap_model(a::CoreCoupling) = a.lm """ - metabolites(a::CoreModelCoupled) + coupling(a::CoreCoupling)::SparseMat -Extract metabolites from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). +Coupling constraint matrix for a `CoreCoupling`. """ -metabolites(a::CoreModelCoupled) = metabolites(a.lm) +coupling(a::CoreCoupling)::SparseMat = vcat(coupling(a.lm), a.C) """ - stoichiometry(a::CoreModelCoupled) + n_coupling_constraints(a::CoreCoupling)::Int -Extract stoichiometry from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). +The number of coupling constraints in a `CoreCoupling`. """ -stoichiometry(a::CoreModelCoupled) = stoichiometry(a.lm) +n_coupling_constraints(a::CoreCoupling)::Int = n_coupling_constraints(a.lm) + size(a.C, 1) """ - bounds(a::CoreModelCoupled) + coupling_bounds(a::CoreCoupling)::Tuple{Vector{Float64},Vector{Float64}} -Extract bounds from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). +Coupling bounds for a `CoreCoupling`. """ -bounds(a::CoreModelCoupled) = bounds(a.lm) +coupling_bounds(a::CoreCoupling)::Tuple{Vector{Float64},Vector{Float64}} = + vcat.(coupling_bounds(a.lm), (a.cl, a.cu)) """ - balance(a::CoreModelCoupled) + Base.convert(::Type{CoreCoupling{M}}, mm::MetabolicModel; clone_coupling = true) where {M} -Extract balance from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). +Make a `CoreCoupling` out of any compatible model type. """ -balance(a::CoreModelCoupled) = balance(a.lm) - -""" - objective(a::CoreModelCoupled) - -Extract objective from [`CoreModelCoupled`](@ref) (uses the internal -[`CoreModel`](@ref)). -""" -objective(a::CoreModelCoupled) = objective(a.lm) - -""" - coupling(a::CoreModelCoupled)::SparseMat - -Coupling constraint matrix for a `CoreModelCoupled`. -""" -coupling(a::CoreModelCoupled)::SparseMat = a.C - -""" - n_coupling_constraints(a::CoreModelCoupled)::Int - -The number of coupling constraints in a `CoreModelCoupled`. -""" -n_coupling_constraints(a::CoreModelCoupled)::Int = size(a.C, 1) - -""" - coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} - -Coupling bounds for a `CoreModelCoupled`. -""" -coupling_bounds(a::CoreModelCoupled)::Tuple{Vector{Float64},Vector{Float64}} = (a.cl, a.cu) - -""" - reaction_stoichiometry(model::CoreModelCoupled, rid::String)::Dict{String, Float64} - -Return the stoichiometry of reaction with ID `rid`. -""" -reaction_stoichiometry(m::CoreModelCoupled, rid::String) = reaction_stoichiometry(m.lm, rid) - -""" - reaction_stoichiometry(model::CoreModelCoupled, ridx)::Dict{String, Float64} - -Return the stoichiometry of reaction at index `ridx`. -""" -function reaction_stoichiometry(m::CoreModelCoupled, ridx)::Dict{String,Float64} - reaction_stoichiometry(m.lm, ridx) +function Base.convert( + ::Type{CoreCoupling{M}}, + mm::MetabolicModel; + clone_coupling = true, +) where {M} + if mm isa CoreCoupling{M} + mm + elseif mm isa CoreCoupling + CoreCoupling(convert(M, mm.lm), mm.C, mm.cl, mm.cu) + elseif clone_coupling + (cl, cu) = coupling_bounds(mm) + CoreCoupling(convert(M, mm), coupling(mm), cl, cu) + else + CoreCoupling(convert(M, mm), spzeros(0, n_reactions(mm)), spzeros(0), spzeros(0)) + end end """ - reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} - -Retrieve a vector of gene associations in a [`CoreModelCoupled`](@ref), in the -same order as `reactions(model)`. -""" -reaction_gene_association_vec(model::CoreModelCoupled)::Vector{Maybe{GeneAssociation}} = - reaction_gene_association_vec(model.lm) - -""" - reaction_gene_association(model::CoreModelCoupled, ridx::Int)::Maybe{GeneAssociation} - -Retrieve the [`GeneAssociation`](@ref) from [`CoreModelCoupled`](@ref) by reaction -index. -""" -reaction_gene_association(model::CoreModelCoupled, ridx::Int)::Maybe{GeneAssociation} = - reaction_gene_association(model.lm, ridx) - -""" - reaction_gene_association(model::CoreModelCoupled, rid::String)::Maybe{GeneAssociation} + const CoreModelCoupled = CoreCoupling{CoreModel} -Retrieve the [`GeneAssociation`](@ref) from [`CoreModelCoupled`](@ref) by reaction ID. -""" -reaction_gene_association(model::CoreModelCoupled, rid::String)::Maybe{GeneAssociation} = - reaction_gene_association(model.lm, rid) +A matrix-based linear model with additional coupling constraints in the form: +``` + cₗ ≤ C x ≤ cᵤ +``` +Internally, the model is implemented using [`CoreCoupling`](@ref) that contains a single [`CoreModel`](@ref). """ - Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) +const CoreModelCoupled = CoreCoupling{CoreModel} -Make a `CoreModelCoupled` out of any compatible model type. -""" -function Base.convert(::Type{CoreModelCoupled}, mm::MetabolicModel) - if typeof(mm) == CoreModelCoupled - return mm - end +CoreModelCoupled(lm::CoreModel, C::MatType, cl::VecType, cu::VecType) = + CoreCoupling(lm, sparse(C), collect(cl), collect(cu)) - (cl, cu) = coupling_bounds(mm) - CoreModelCoupled(convert(CoreModel, mm), coupling(mm), cl, cu) -end +# these are special for CoreModel-ish models +@_inherit_model_methods CoreModelCoupled () lm () reaction_gene_association_vec +@_inherit_model_methods CoreModelCoupled (ridx::Int,) lm (ridx,) reaction_stoichiometry diff --git a/src/base/types/Isozyme.jl b/src/base/types/Isozyme.jl new file mode 100644 index 000000000..685f86c35 --- /dev/null +++ b/src/base/types/Isozyme.jl @@ -0,0 +1,18 @@ +""" + mutable struct Isozyme + +Information about isozyme composition and activity. + +# Fields +- `gene_product_count :: Dict{String, Int}` assigns each gene product ID its + count in the isozyme complex (which is used to determine the total mass of + the isozyme) +- `kcat_forward`, `kcat_reverse` -- forward and reverse turnover numbers of the + isozyme +```` +""" +mutable struct Isozyme + gene_product_count::Dict{String,Int} + kcat_forward::Float64 + kcat_reverse::Float64 +end diff --git a/src/base/types/MATModel.jl b/src/base/types/MATModel.jl index d55f09433..09200476d 100644 --- a/src/base/types/MATModel.jl +++ b/src/base/types/MATModel.jl @@ -144,7 +144,7 @@ Extract metabolite formula from key `metFormula` or `metFormulas`. """ metabolite_formula(m::MATModel, mid::String) = _maybemap( x -> _parse_formula(x[findfirst(==(mid), metabolites(m))]), - get(m.mat, "metFormula", get(m.mat, "metFormulas", nothing)), + gets(m.mat, nothing, _constants.keynames.metformulas), ) """ @@ -152,10 +152,13 @@ metabolite_formula(m::MATModel, mid::String) = _maybemap( Extract metabolite charge from `metCharge` or `metCharges`. """ -metabolite_charge(m::MATModel, mid::String) = _maybemap( - x -> x[findfirst(==(mid), metabolites(m))], - get(m.mat, "metCharge", get(m.mat, "metCharges", nothing)), -) +function metabolite_charge(m::MATModel, mid::String) + met_charge = _maybemap( + x -> x[findfirst(==(mid), metabolites(m))], + gets(m.mat, nothing, _constants.keynames.metcharges), + ) + isnan(met_charge) ? 0 : met_charge +end """ metabolite_compartment(m::MATModel, mid::String) @@ -164,10 +167,9 @@ Extract metabolite compartment from `metCompartment` or `metCompartments`. """ metabolite_compartment(m::MATModel, mid::String) = _maybemap( x -> x[findfirst(==(mid), metabolites(m))], - get(m.mat, "metCompartment", get(m.mat, "metCompartments", nothing)), + gets(m.mat, nothing, _constants.keynames.metcompartments), ) - """ reaction_stoichiometry(model::MATModel, rid::String)::Dict{String, Float64} @@ -244,3 +246,23 @@ function Base.convert(::Type{MATModel}, m::MetabolicModel) ), ) end + +""" + reaction_name(m::MATModel, mid::String) + +Extract reaction name from `rxnNames`. +""" +reaction_name(m::MATModel, rid::String) = _maybemap( + x -> x[findfirst(==(rid), reactions(m))], + gets(m.mat, nothing, _constants.keynames.rxnnames), +) + +""" + metabolite_name(m::MATModel, mid::String) + +Extract metabolite name from `metNames`. +""" +metabolite_name(m::MATModel, mid::String) = _maybemap( + x -> x[findfirst(==(mid), metabolites(m))], + gets(m.mat, nothing, _constants.keynames.metnames), +) diff --git a/src/base/types/MetabolicModel.jl b/src/base/types/MetabolicModel.jl new file mode 100644 index 000000000..1ba000648 --- /dev/null +++ b/src/base/types/MetabolicModel.jl @@ -0,0 +1,353 @@ + +# +# IMPORTANT +# +# This file provides a list of "officially supported" accessors that should +# work with all subtypes of [`MetabolicModel`](@ref). Keep this synced with the +# automatically derived methods for [`ModelWrapper`](@ref). +# + +_missing_impl_error(m, a) = throw(MethodError(m, a)) + +""" + reactions(a::MetabolicModel)::Vector{String} + +Return a vector of reaction identifiers in a model. +""" +function reactions(a::MetabolicModel)::Vector{String} + _missing_impl_error(reactions, (a,)) +end + +""" + metabolites(a::MetabolicModel)::Vector{String} + +Return a vector of metabolite identifiers in a model. +""" +function metabolites(a::MetabolicModel)::Vector{String} + _missing_impl_error(metabolites, (a,)) +end + +""" + n_reactions(a::MetabolicModel)::Int + +Get the number of reactions in a model. +""" +function n_reactions(a::MetabolicModel)::Int + length(reactions(a)) +end + +""" + n_metabolites(a::MetabolicModel)::Int + +Get the number of metabolites in a model. +""" +function n_metabolites(a::MetabolicModel)::Int + length(metabolites(a)) +end + +""" + stoichiometry(a::MetabolicModel)::SparseMat + +Get the sparse stoichiometry matrix of a model. A feasible solution `x` of a +model `m` is defined as satisfying the equations: + +- `stoichiometry(m) * x .== balance(m)` +- `x .>= lbs` +- `y .<= ubs` +- `(lbs, ubs) == bounds(m) +""" +function stoichiometry(a::MetabolicModel)::SparseMat + _missing_impl_error(stoichiometry, (a,)) +end + +""" + bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + +Get the lower and upper solution bounds of a model. +""" +function bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + _missing_impl_error(bounds, (a,)) +end + +""" + balance(a::MetabolicModel)::SparseVec + +Get the sparse balance vector of a model. +""" +function balance(a::MetabolicModel)::SparseVec + return spzeros(n_metabolites(a)) +end + +""" + objective(a::MetabolicModel)::SparseVec + +Get the objective vector of the model. Analysis functions, such as +[`flux_balance_analysis`](@ref), are supposed to maximize `dot(objective, x)` +where `x` is a feasible solution of the model. +""" +function objective(a::MetabolicModel)::SparseVec + _missing_impl_error(objective, (a,)) +end + +""" + fluxes(a::MetabolicModel)::Vector{String} + +In some models, the [`reactions`](@ref) that correspond to the columns of +[`stoichiometry`](@ref) matrix do not fully represent the semantic contents of +the model; for example, fluxes may be split into forward and reverse reactions, +reactions catalyzed by distinct enzymes, etc. Together with +[`reaction_flux`](@ref) (and [`n_fluxes`](@ref)) this specifies how the +flux is decomposed into individual reactions. + +By default (and in most models), fluxes and reactions perfectly correspond. +""" +function fluxes(a::MetabolicModel)::Vector{String} + reactions(a) +end + +function n_fluxes(a::MetabolicModel)::Int + n_reactions(a) +end + +""" + reaction_flux(a::MetabolicModel)::SparseMat + +Retrieve a sparse matrix that describes the correspondence of a solution of the +linear system to the fluxes (see [`fluxes`](@ref) for rationale). Returns a +sparse matrix of size `(n_reactions(a), n_fluxes(a))`. For most models, this is +an identity matrix. +""" +function reaction_flux(a::MetabolicModel)::SparseMat + nr = n_reactions(a) + nf = n_fluxes(a) + nr == nf || _missing_impl_error(reaction_flux, (a,)) + spdiagm(fill(1, nr)) +end + +""" + coupling(a::MetabolicModel)::SparseMat + +Get a matrix of coupling constraint definitions of a model. By default, there +is no coupling in the models. +""" +function coupling(a::MetabolicModel)::SparseMat + return spzeros(0, n_reactions(a)) +end + +""" + n_coupling_constraints(a::MetabolicModel)::Int + +Get the number of coupling constraints in a model. +""" +function n_coupling_constraints(a::MetabolicModel)::Int + size(coupling(a), 1) +end + +""" + coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + +Get the lower and upper bounds for each coupling bound in a model, as specified +by `coupling`. By default, the model does not have any coupling bounds. +""" +function coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} + return (spzeros(0), spzeros(0)) +end + +""" + genes(a::MetabolicModel)::Vector{String} + +Return identifiers of all genes contained in the model. By default, there are +no genes. + +In SBML, these are usually called "gene products" but we write `genes` for +simplicity. +""" +function genes(a::MetabolicModel)::Vector{String} + return [] +end + +""" + n_genes(a::MetabolicModel)::Int + +Return the number of genes in the model (as returned by [`genes`](@ref)). If +you just need the number of the genes, this may be much more efficient than +calling [`genes`](@ref) and measuring the array. +""" +function n_genes(a::MetabolicModel)::Int + return length(genes(a)) +end + +""" + reaction_gene_association(a::MetabolicModel, gene_id::String)::Maybe{GeneAssociation} + +Returns the sets of genes that need to be present so that the reaction can work +(technically, a DNF on gene availability, with positive atoms only). + +For simplicity, `nothing` may be returned, meaning that the reaction always +takes place. (in DNF, that would be equivalent to returning `[[]]`.) +""" +function reaction_gene_association( + a::MetabolicModel, + reaction_id::String, +)::Maybe{GeneAssociation} + return nothing +end + +""" + reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} + +Return the subsystem of reaction `reaction_id` in `model` if it is assigned. If not, +return `nothing`. +""" +function reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} + return nothing +end + +""" + reaction_stoichiometry(model::MetaboliteModel, rid::String)::Dict{String, Float64} + +Return the stoichiometry of reaction with ID `rid` in the model. The dictionary +maps the metabolite IDs to their stoichiometric coefficients. +""" +function reaction_stoichiometry(m::MetabolicModel, rid::String)::Dict{String,Float64} + mets = metabolites(m) + Dict( + mets[k] => v for + (k, v) in zip(findnz(stoichiometry(m)[:, first(indexin([rid], reactions(m)))])...) + ) +end + +""" + metabolite_formula( + a::MetabolicModel, + metabolite_id::String, + )::Maybe{MetaboliteFormula} + +Return the formula of metabolite `metabolite_id` in `model`. +Return `nothing` in case the formula is not known or irrelevant. +""" +function metabolite_formula( + model::MetabolicModel, + metabolite_id::String, +)::Maybe{MetaboliteFormula} + return nothing +end + +""" + metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} + +Return the charge associated with metabolite `metabolite_id` in `model`. +Returns `nothing` if charge not present. +""" +function metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} + return nothing +end + +""" + metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} + +Return the compartment of metabolite `metabolite_id` in `model` if it is assigned. If not, +return `nothing`. +""" +function metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} + return nothing +end + +""" + reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations + +Return standardized names that may help identifying the reaction. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"Reactome" => ["reactomeID123"]`. +""" +function reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations + return Dict() +end + +""" + metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations + +Return standardized names that may help to reliably identify the metabolite. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"ChEMBL" => ["123"]` or `"PubChem" => ["CID123", "CID654645645"]`. +""" +function metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations + return Dict() +end + +""" + gene_annotations(a::MetabolicModel, gene_id::String)::Annotations + +Return standardized names that identify the corresponding gene or product. The +dictionary assigns vectors of possible identifiers to identifier system names, +e.g. `"PDB" => ["PROT01"]`. +""" +function gene_annotations(a::MetabolicModel, gene_id::String)::Annotations + return Dict() +end + +""" + reaction_notes(model::MetabolicModel, reaction_id::String)::Notes + +Return the notes associated with reaction `reaction_id` in `model`. +""" +function reaction_notes(model::MetabolicModel, reaction_id::String)::Notes + return Dict() +end + +""" + metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes + +Return the notes associated with metabolite `reaction_id` in `model`. +""" +function metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes + return Dict() +end + +""" + gene_notes(model::MetabolicModel, gene_id::String)::Notes + +Return the notes associated with the gene `gene_id` in `model`. +""" +function gene_notes(model::MetabolicModel, gene_id::String)::Notes + return Dict() +end + +""" + reaction_name(model::MetabolicModel, rid::String) + +Return the name of reaction with ID `rid`. +""" +reaction_name(model::MetabolicModel, rid::String) = nothing + +""" + metabolite_name(model::MetabolicModel, mid::String) + +Return the name of metabolite with ID `mid`. +""" +metabolite_name(model::MetabolicModel, mid::String) = nothing + +""" + gene_name(model::MetabolicModel, gid::String) + +Return the name of gene with ID `gid`. +""" +gene_name(model::MetabolicModel, gid::String) = nothing + +""" + precache!(a::MetabolicModel)::Nothing + +Do whatever is feasible to get the model into a state that can be read from +as-quickly-as-possible. This may include e.g. generating helper index +structures and loading delayed parts of the model from disk. The model should +be modified "transparently" in-place. Analysis functions call this right before +applying modifications or converting the model to the optimization model using +[`make_optimization_model`](@ref); usually on the same machine where the +optimizers (and, generally, the core analysis algorithms) will run. The calls +are done in a good hope that the performance will be improved. + +By default, it should be safe to do nothing. +""" +function precache!(a::MetabolicModel)::Nothing + nothing +end diff --git a/src/base/types/ModelWrapper.jl b/src/base/types/ModelWrapper.jl new file mode 100644 index 000000000..ccf289d3e --- /dev/null +++ b/src/base/types/ModelWrapper.jl @@ -0,0 +1,23 @@ + +""" + unwrap_model(a::ModelWrapper) + +A simple helper to pick the single w +""" +function unwrap_model(a::ModelWrapper) + _missing_impl_error(unwrap_model, (a,)) +end + +# +# IMPORTANT +# +# The list of inherited functions must be synced with the methods available for [`MetabolicModel`](@ref). +# + +@_inherit_model_methods_fn ModelWrapper () unwrap_model () reactions metabolites stoichiometry bounds balance objective fluxes n_fluxes reaction_flux coupling n_coupling_constraints coupling_bounds genes n_genes precache! + +@_inherit_model_methods_fn ModelWrapper (rid::String,) unwrap_model (rid,) reaction_gene_association reaction_subsystem reaction_stoichiometry reaction_annotations reaction_notes + +@_inherit_model_methods_fn ModelWrapper (mid::String,) unwrap_model (mid,) metabolite_formula metabolite_charge metabolite_compartment metabolite_annotations metabolite_notes + +@_inherit_model_methods_fn ModelWrapper (gid::String,) unwrap_model (gid,) gene_annotations gene_notes diff --git a/src/base/types/Serialized.jl b/src/base/types/Serialized.jl index 722cbd7f9..1d1741816 100644 --- a/src/base/types/Serialized.jl +++ b/src/base/types/Serialized.jl @@ -9,7 +9,7 @@ A meta-model that represents a model that is serialized on the disk. The internal model will be loaded on-demand by using any accessor, or by calling [`precache!`](@ref) directly. """ -mutable struct Serialized{M} <: MetabolicModel where {M<:MetabolicModel} +mutable struct Serialized{M} <: ModelWrapper where {M<:MetabolicModel} m::Maybe{M} filename::String @@ -18,37 +18,16 @@ mutable struct Serialized{M} <: MetabolicModel where {M<:MetabolicModel} new{T}(model, filename) end -function _on_precached(m::Serialized, f, args...) +""" + unwrap_model(m::Serialized) + +Unwrap the serialized model (precaching it transparently). +""" +function unwrap_model(m::Serialized) precache!(m) - f(m.m, args...) + m.m end -reactions(m::Serialized) = _on_precached(m, reactions) -n_reactions(m::Serialized) = _on_precached(m, n_reactions) -metabolites(m::Serialized) = _on_precached(m, metabolites) -n_metabolites(m::Serialized) = _on_precached(m, n_metabolites) -stoichiometry(m::Serialized) = _on_precached(m, stoichiometry) -bounds(m::Serialized) = _on_precached(m, bounds) -balance(m::Serialized) = _on_precached(m, balance) -objective(m::Serialized) = _on_precached(m, objective) -coupling(m::Serialized) = _on_precached(m, coupling) -n_coupling_constraints(m::Serialized) = _on_precached(m, n_coupling_constraints) -coupling_bounds(m::Serialized) = _on_precached(m, coupling_bounds) -genes(m::Serialized) = _on_precached(m, genes) -n_genes(m::Serialized) = _on_precached(m, n_genes) -metabolite_formula(m::Serialized, id::String) = _on_precached(m, metabolite_formula, id) -metabolite_charge(m::Serialized, id::String) = _on_precached(m, metabolite_charge, id) -reaction_annotations(m::Serialized, id::String) = _on_precached(m, reaction_annotations, id) -metabolite_annotations(m::Serialized, id::String) = - _on_precached(m, metabolite_annotations, id) -gene_annotations(m::Serialized, id::String) = _on_precached(m, gene_annotations, id) -reaction_notes(m::Serialized, id::String) = _on_precached(m, reaction_notes, id) -metabolite_notes(m::Serialized, id::String) = _on_precached(m, metabolite_notes, id) -gene_notes(m::Serialized, id::String) = _on_precached(m, gene_notes, id) -metabolite_compartment(m::Serialized, id::String) = - _on_precached(m, metabolite_compartment, id) -reaction_subsystem(m::Serialized, id::String) = _on_precached(m, reaction_subsystem, id) - """ precache!(model::Serialized{MetabolicModel})::Nothing diff --git a/src/base/types/abstract/MetabolicModel.jl b/src/base/types/abstract/MetabolicModel.jl index a598fa423..8b1fc4b77 100644 --- a/src/base/types/abstract/MetabolicModel.jl +++ b/src/base/types/abstract/MetabolicModel.jl @@ -2,8 +2,8 @@ """ abstract type MetabolicModel end -A helper supertype that wraps everything usable as a linear-like model for -COBREXA functions. +A helper supertype of everything usable as a linear-like model for COBREXA +functions. If you want your model type to work with COBREXA, add the `MetabolicModel` as its supertype, and implement the accessor functions. Accessors @@ -13,6 +13,14 @@ mandatory and default to safe "empty" values. """ abstract type MetabolicModel end +""" + abstract type ModelWrapper <: MetabolicModel end + +A helper supertype of all "wrapper" types that contain precisely one other +[`MetabolicModel`](@ref). +""" +abstract type ModelWrapper <: MetabolicModel end + const SparseMat = SparseMatrixCSC{Float64,Int} const SparseVec = SparseVector{Float64,Int} const MatType = AbstractMatrix{Float64} @@ -56,305 +64,3 @@ Free-form notes about something (e.g. a [`Gene`](@ref)), categorized by "topic". """ const Notes = Dict{String,Vector{String}} - -_missing_impl_error(m, a) = throw(MethodError(m, a)) - -""" - reactions(a::MetabolicModel)::Vector{String} - -Return a vector of reaction identifiers in a model. -""" -function reactions(a::MetabolicModel)::Vector{String} - _missing_impl_error(reactions, (a,)) -end - -""" - metabolites(a::MetabolicModel)::Vector{String} - -Return a vector of metabolite identifiers in a model. -""" -function metabolites(a::MetabolicModel)::Vector{String} - _missing_impl_error(metabolites, (a,)) -end - -""" - n_reactions(a::MetabolicModel)::Int - -Get the number of reactions in a model. -""" -function n_reactions(a::MetabolicModel)::Int - length(reactions(a)) -end - -""" - n_metabolites(a::MetabolicModel)::Int - -Get the number of metabolites in a model. -""" -function n_metabolites(a::MetabolicModel)::Int - length(metabolites(a)) -end - -""" - stoichiometry(a::MetabolicModel)::SparseMat - -Get the sparse stoichiometry matrix of a model. -""" -function stoichiometry(a::MetabolicModel)::SparseMat - _missing_impl_error(stoichiometry, (a,)) -end - -""" - bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - -Get the lower and upper flux bounds of a model. -""" -function bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - _missing_impl_error(bounds, (a,)) -end - -""" - balance(a::MetabolicModel)::SparseVec - -Get the sparse balance vector of a model (ie. the `b` from `S x = b`). -""" -function balance(a::MetabolicModel)::SparseVec - return spzeros(n_metabolites(a)) -end - -""" - objective(a::MetabolicModel)::SparseVec - -Get the objective vector of a model. -""" -function objective(a::MetabolicModel)::SparseVec - _missing_impl_error(objective, (a,)) -end - -""" - coupling(a::MetabolicModel)::SparseMat - -Get a matrix of coupling constraint definitions of a model. By default, there -is no coupling in the models. -""" -function coupling(a::MetabolicModel)::SparseMat - return spzeros(0, n_reactions(a)) -end - -""" - n_coupling_constraints(a::MetabolicModel)::Int - -Get the number of coupling constraints in a model. -""" -function n_coupling_constraints(a::MetabolicModel)::Int - size(coupling(a), 1) -end - -""" - coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - -Get the lower and upper bounds for each coupling bound in a model, as specified -by `coupling`. By default, the model does not have any coupling bounds. -""" -function coupling_bounds(a::MetabolicModel)::Tuple{Vector{Float64},Vector{Float64}} - return (spzeros(0), spzeros(0)) -end - -""" - genes(a::MetabolicModel)::Vector{String} - -Return identifiers of all genes contained in the model. By default, there are -no genes. - -In SBML, these are usually called "gene products" but we write `genes` for -simplicity. -""" -function genes(a::MetabolicModel)::Vector{String} - return [] -end - -""" - n_genes(a::MetabolicModel)::Int - -Return the number of genes in the model (as returned by [`genes`](@ref)). If -you just need the number of the genes, this may be much more efficient than -calling [`genes`](@ref) and measuring the array. -""" -function n_genes(a::MetabolicModel)::Int - return length(genes(a)) -end - -""" - reaction_gene_association(a::MetabolicModel, gene_id::String)::Maybe{GeneAssociation} - -Returns the sets of genes that need to be present so that the reaction can work -(technically, a DNF on gene availability, with positive atoms only). - -For simplicity, `nothing` may be returned, meaning that the reaction always -takes place. (in DNF, that would be equivalent to returning `[[]]`.) -""" -function reaction_gene_association( - a::MetabolicModel, - reaction_id::String, -)::Maybe{GeneAssociation} - return nothing -end - -""" - metabolite_formula( - a::MetabolicModel, - metabolite_id::String, - )::Maybe{MetaboliteFormula} - -Return the formula of metabolite `metabolite_id` in `model`. -Return `nothing` in case the formula is not known or irrelevant. -""" -function metabolite_formula( - model::MetabolicModel, - metabolite_id::String, -)::Maybe{MetaboliteFormula} - return nothing -end - -""" - metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} - -Return the charge associated with metabolite `metabolite_id` in `model`. -Returns `nothing` if charge not present. -""" -function metabolite_charge(model::MetabolicModel, metabolite_id::String)::Maybe{Int} - return nothing -end - -""" - reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations - -Return standardized names that may help identifying the reaction. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"Reactome" => ["reactomeID123"]`. -""" -function reaction_annotations(a::MetabolicModel, reaction_id::String)::Annotations - return Dict() -end - -""" - metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations - -Return standardized names that may help to reliably identify the metabolite. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"ChEMBL" => ["123"]` or `"PubChem" => ["CID123", "CID654645645"]`. -""" -function metabolite_annotations(a::MetabolicModel, metabolite_id::String)::Annotations - return Dict() -end - -""" - gene_annotations(a::MetabolicModel, gene_id::String)::Annotations - -Return standardized names that identify the corresponding gene or product. The -dictionary assigns vectors of possible identifiers to identifier system names, -e.g. `"PDB" => ["PROT01"]`. -""" -function gene_annotations(a::MetabolicModel, gene_id::String)::Annotations - return Dict() -end - -""" - reaction_notes(model::MetabolicModel, reaction_id::String)::Notes - -Return the notes associated with reaction `reaction_id` in `model`. -""" -function reaction_notes(model::MetabolicModel, reaction_id::String)::Notes - return Dict() -end - -""" - metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes - -Return the notes associated with metabolite `reaction_id` in `model`. -""" -function metabolite_notes(model::MetabolicModel, metabolite_id::String)::Notes - return Dict() -end - -""" - gene_notes(model::MetabolicModel, gene_id::String)::Notes - -Return the notes associated with the gene `gene_id` in `model`. -""" -function gene_notes(model::MetabolicModel, gene_id::String)::Notes - return Dict() -end - -""" - metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} - -Return the compartment of metabolite `metabolite_id` in `model` if it is assigned. If not, -return `nothing`. -""" -function metabolite_compartment(model::MetabolicModel, metabolite_id::String)::Maybe{String} - return nothing -end - -""" - reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} - -Return the subsystem of reaction `reaction_id` in `model` if it is assigned. If not, -return `nothing`. -""" -function reaction_subsystem(model::MetabolicModel, reaction_id::String)::Maybe{String} - return nothing -end - -""" - reaction_stoichiometry(model::MetaboliteModel, rid::String)::Dict{String, Float64} - -Return the stoichiometry of reaction with ID `rid` in the model. The dictionary -maps the metabolite IDs to their stoichiometric coefficients. -""" -function reaction_stoichiometry(m::MetabolicModel, rid::String)::Dict{String,Float64} - mets = metabolites(m) - Dict( - mets[k] => v for - (k, v) in zip(findnz(stoichiometry(m)[:, first(indexin([rid], reactions(m)))])...) - ) -end - -""" - reaction_name(model::MetabolicModel, rid::String) - -Return the name of reaction with ID `rid`. -""" -reaction_name(model::MetabolicModel, rid::String) = nothing - -""" - metabolite_name(model::MetabolicModel, mid::String) - -Return the name of metabolite with ID `mid`. -""" -metabolite_name(model::MetabolicModel, mid::String) = nothing - -""" - gene_name(model::MetabolicModel, gid::String) - -Return the name of gene with ID `gid`. -""" -gene_name(model::MetabolicModel, gid::String) = nothing - -""" - precache!(a::MetabolicModel)::Nothing - -Do whatever is feasible to get the model into a state that can be read from -as-quickly-as-possible. This may include e.g. generating helper index -structures and loading delayed parts of the model from disk. The model should -be modified "transparently" in-place. Analysis functions call this right before -applying modifications or converting the model to the optimization model using -[`make_optimization_model`](@ref); usually on the same machine where the -optimizers (and, generally, the core analysis algorithms) will run. The calls -are done in a good hope that the performance will be improved. - -By default, it should be safe to do nothing. -""" -function precache!(a::MetabolicModel)::Nothing - nothing -end diff --git a/src/base/types/wrappers/GeckoModel.jl b/src/base/types/wrappers/GeckoModel.jl new file mode 100644 index 000000000..42cc47aa8 --- /dev/null +++ b/src/base/types/wrappers/GeckoModel.jl @@ -0,0 +1,253 @@ +""" + struct _gecko_reaction_column + +A helper type for describing the contents of [`GeckoModel`](@ref)s. +""" +struct _gecko_reaction_column + reaction_idx::Int + isozyme_idx::Int + direction::Int + reaction_coupling_row::Int + lb::Float64 + ub::Float64 + gene_product_coupling::Vector{Tuple{Int,Float64}} +end + +""" + struct _gecko_capacity + +A helper struct that contains the gene product capacity terms organized by +the grouping type, e.g. metabolic or membrane groups etc. +""" +struct _gecko_capacity + group_id::String + gene_product_idxs::Vector{Int} + gene_product_molar_masses::Vector{Float64} + group_upper_bound::Float64 +end + +""" + struct GeckoModel <: ModelWrapper + +A model with complex enzyme concentration and capacity bounds, as described in +*Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast +genome-scale metabolic model by incorporating enzymatic constraints." Molecular +systems biology 13.8 (2017): 935.* + +Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind +of model. + +The model wraps another "internal" model, and adds following modifications: +- enzymatic reactions with known enzyme information are split into multiple + forward and reverse variants for each isozyme, +- reaction coupling is added to ensure the groups of isozyme reactions obey the + global reaction flux bounds from the original model, +- gene concentrations specified by each reaction and its gene product stoichiometry, + can constrained by the user to reflect measurements, such as + from mass spectrometry, +- additional coupling is added to simulate total masses of different proteins + grouped by type (e.g., membrane-bound and free-floating proteins), which can + be again constrained by the user (this is slightly generalized from original + GECKO algorithm, which only considers a single group of indiscernible + proteins). + +The structure contains fields `columns` that describe the contents of the +coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and +`coupling_row_mass_group` that describe correspondence of the coupling rows to +original model and determine the coupling bounds, and `inner`, which is the +original wrapped model. Note, `objective` is the objective vector of the model. +Special care needs to be taken to ensure that its length is the sum of +`n_reactions(model)` and `n_genes(model)` when the user modifies it, where +`model` is the GeckoModel in question. + +Implementation exposes the split reactions (available as `reactions(model)`), +but retains the original "simple" reactions accessible by [`fluxes`](@ref). All +constraints are implemented using [`coupling`](@ref) and +[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are +purely virtual and do not occur in [`metabolites`](@ref). +""" +struct GeckoModel <: ModelWrapper + objective::SparseVec + columns::Vector{_gecko_reaction_column} + coupling_row_reaction::Vector{Int} + coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}} + coupling_row_mass_group::Vector{_gecko_capacity} + + inner::MetabolicModel +end + +unwrap_model(model::GeckoModel) = model.inner + +""" + stoichiometry(model::GeckoModel) + +Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are +split into unidirectional forward and reverse ones, each of which may have +multiple variants per isozyme. +""" +function stoichiometry(model::GeckoModel) + irrevS = stoichiometry(model.inner) * COBREXA._gecko_reaction_column_reactions(model) + enzS = COBREXA._gecko_gene_product_coupling(model) + [ + irrevS spzeros(size(irrevS, 1), size(enzS, 1)) + -enzS I(size(enzS, 1)) + ] +end + +""" + objective(model::GeckoModel) + +Return the objective of the [`GeckoModel`](@ref). Note, the objective is with +respect to the internal variables, i.e. [`reactions(model)`](@ref) and +[`genes(model)`](@ref). To manually set the objective, index into +`model.objective` appropriately, and remember to set the previous coefficients +to zero. +""" +objective(model::GeckoModel) = model.objective + +""" + reactions(model::GeckoModel) + +Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split +to forward- and reverse-only parts with different isozyme indexes; reactions +IDs are mangled accordingly with suffixes). +""" +reactions(model::GeckoModel) = + let inner_reactions = reactions(model.inner) + [ + _gecko_reaction_name( + inner_reactions[col.reaction_idx], + col.direction, + col.isozyme_idx, + ) for col in model.columns + ] + end + +""" + n_reactions(model::GeckoModel) + +Returns the number of all irreversible reactions in `model` as well as the +number of gene products that take part in enzymatic reactions. +""" +n_reactions(model::GeckoModel) = length(model.columns) + +""" + bounds(model::GeckoModel) + +Return variable bounds for [`GeckoModel`](@ref). +""" +function bounds(model::GeckoModel) + lbs = [ + [col.lb for col in model.columns] + [lb for (_, (lb, _)) in model.coupling_row_gene_product] + ] + ubs = [ + [col.ub for col in model.columns] + [ub for (_, (_, ub)) in model.coupling_row_gene_product] + ] + (lbs, ubs) +end + +""" + reaction_flux(model::GeckoModel) + +Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original +fluxes in the wrapped model. +""" +function reaction_flux(model::GeckoModel) + rxnmat = _gecko_reaction_column_reactions(model)' * reaction_flux(model.inner) + [ + rxnmat + spzeros(n_genes(model), size(rxnmat, 2)) + ] +end + +""" + coupling(model::GeckoModel) + +Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the +wrapped model, coupling for split (arm) reactions, and the coupling for the total +enzyme capacity. +""" +function coupling(model::GeckoModel) + innerC = coupling(model.inner) * _gecko_reaction_column_reactions(model) + rxnC = _gecko_reaction_coupling(model) + enzcap = _gecko_mass_group_coupling(model) + [ + innerC spzeros(size(innerC, 1), n_genes(model)) + rxnC spzeros(size(rxnC, 1), n_genes(model)) + spzeros(length(model.coupling_row_mass_group), length(model.columns)) enzcap + ] +end + +""" + n_coupling_constraints(model::GeckoModel) + +Count the coupling constraints in [`GeckoModel`](@ref) (refer to +[`coupling`](@ref) for details). +""" +n_coupling_constraints(model::GeckoModel) = + n_coupling_constraints(model.inner) + + length(model.coupling_row_reaction) + + length(model.coupling_row_mass_group) + +""" + coupling_bounds(model::GeckoModel) + +The coupling bounds for [`GeckoModel`](@ref) (refer to [`coupling`](@ref) for +details). +""" +function coupling_bounds(model::GeckoModel) + (iclb, icub) = coupling_bounds(model.inner) + (ilb, iub) = bounds(model.inner) + return ( + vcat( + iclb, + ilb[model.coupling_row_reaction], + [0.0 for _ in model.coupling_row_mass_group], + ), + vcat( + icub, + iub[model.coupling_row_reaction], + [grp.group_upper_bound for grp in model.coupling_row_mass_group], + ), + ) +end + +""" + balance(model::GeckoModel) + +Return the balance of the reactions in the inner model, concatenated with a vector of +zeros representing the enzyme balance of a [`GeckoModel`](@ref). +""" +balance(model::GeckoModel) = + [balance(model.inner); spzeros(length(model.coupling_row_gene_product))] + +""" + n_genes(model::GeckoModel) + +Return the number of genes that have enzymatic constraints associated with them. +""" +n_genes(model::GeckoModel) = length(model.coupling_row_gene_product) + +""" + genes(model::GeckoModel) + +Return the gene ids of genes that have enzymatic constraints associated with them. +""" +genes(model::GeckoModel) = + genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]] + +""" + metabolites(model::GeckoModel) + +Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref). +""" +metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#supply"] + +""" + n_metabolites(model::GeckoModel) + +Return the number of metabolites, both real and pseudo, for a [`GeckoModel`](@ref). +""" +n_metabolites(model::GeckoModel) = n_metabolites(model.inner) + n_genes(model) diff --git a/src/base/types/wrappers/SMomentModel.jl b/src/base/types/wrappers/SMomentModel.jl new file mode 100644 index 000000000..bdf50a71d --- /dev/null +++ b/src/base/types/wrappers/SMomentModel.jl @@ -0,0 +1,144 @@ + +""" + struct _smoment_column + +A helper type that describes the contents of [`SMomentModel`](@ref)s. +""" +struct _smoment_column + reaction_idx::Int # number of the corresponding reaction in the inner model + direction::Int # 0 if "as is" and unique, -1 if reverse-only part, 1 if forward-only part + lb::Float64 # must be 0 if the reaction is unidirectional (if direction!=0) + ub::Float64 + capacity_required::Float64 # must be 0 for bidirectional reactions (if direction==0) +end + +""" + struct SMomentModel <: ModelWrapper + +An enzyme-capacity-constrained model using sMOMENT algorithm, as described by +*Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic construction of +metabolic models with enzyme constraints" BMC bioinformatics, 2020*. + +Use [`make_smoment_model`](@ref) or [`with_smoment`](@ref) to construct the +models. + +The model is constructed as follows: +- stoichiometry of the original model is retained as much as possible, but + enzymatic reations are split into forward and reverse parts (marked by a + suffix like `...#forward` and `...#reverse`), +- coupling is added to simulate a virtual metabolite "enzyme capacity", which + is consumed by all enzymatic reactions at a rate given by enzyme mass divided + by the corresponding kcat, +- the total consumption of the enzyme capacity is constrained to a fixed + maximum. + +The `SMomentModel` structure contains a worked-out representation of the +optimization problem atop a wrapped [`MetabolicModel`](@ref), in particular the +separation of certain reactions into unidirectional forward and reverse parts, +an "enzyme capacity" required for each reaction, and the value of the maximum +capacity constraint. Original coupling in the inner model is retained. + +In the structure, the field `columns` describes the correspondence of stoichiometry +columns to the stoichiometry and data of the internal wrapped model, and +`total_enzyme_capacity` is the total bound on the enzyme capacity consumption +as specified in sMOMENT algorithm. + +This implementation allows easy access to fluxes from the split reactions +(available in `reactions(model)`), while the original "simple" reactions from +the wrapped model are retained as [`fluxes`](@ref). All additional constraints +are implemented using [`coupling`](@ref) and [`coupling_bounds`](@ref). +""" +struct SMomentModel <: ModelWrapper + columns::Vector{_smoment_column} + total_enzyme_capacity::Float64 + + inner::MetabolicModel +end + +unwrap_model(model::SMomentModel) = model.inner + +""" + stoichiometry(model::SMomentModel) + +Return a stoichiometry of the [`SMomentModel`](@ref). The enzymatic reactions +are split into unidirectional forward and reverse ones. +""" +stoichiometry(model::SMomentModel) = + stoichiometry(model.inner) * _smoment_column_reactions(model) + +""" + objective(model::SMomentModel) + +Reconstruct an objective of the [`SMomentModel`](@ref). +""" +objective(model::SMomentModel) = _smoment_column_reactions(model)' * objective(model.inner) + +""" + reactions(model::SMomentModel) + +Returns the internal reactions in a [`SMomentModel`](@ref) (these may be split +to forward- and reverse-only parts; reactions IDs are mangled accordingly with +suffixes). +""" +reactions(model::SMomentModel) = + let inner_reactions = reactions(model.inner) + [ + _smoment_reaction_name(inner_reactions[col.reaction_idx], col.direction) for + col in model.columns + ] + end + +""" + n_reactions(model::SMomentModel) + +The number of reactions (including split ones) in [`SMomentModel`](@ref). +""" +n_reactions(model::SMomentModel) = length(model.columns) + +""" + bounds(model::SMomentModel) + +Return the variable bounds for [`SMomentModel`](@ref). +""" +bounds(model::SMomentModel) = + ([col.lb for col in model.columns], [col.ub for col in model.columns]) + +""" + reaction_flux(model::SMomentModel) + +Get the mapping of the reaction rates in [`SMomentModel`](@ref) to the original +fluxes in the wrapped model. +""" +reaction_flux(model::SMomentModel) = + _smoment_column_reactions(model)' * reaction_flux(model.inner) + +""" + coupling(model::SMomentModel) + +Return the coupling of [`SMomentModel`](@ref). That combines the coupling of +the wrapped model, coupling for split reactions, and the coupling for the total +enzyme capacity. +""" +coupling(model::SMomentModel) = vcat( + coupling(model.inner) * _smoment_column_reactions(model), + [col.capacity_required for col in model.columns]', +) + +""" + n_coupling_constraints(model::SMomentModel) + +Count the coupling constraints in [`SMomentModel`](@ref) (refer to +[`coupling`](@ref) for details). +""" +n_coupling_constraints(model::SMomentModel) = n_coupling_constraints(model.inner) + 1 + +""" + coupling_bounds(model::SMomentModel) + +The coupling bounds for [`SMomentModel`](@ref) (refer to [`coupling`](@ref) for +details). +""" +coupling_bounds(model::SMomentModel) = + let (iclb, icub) = coupling_bounds(model.inner) + (vcat(iclb, [0.0]), vcat(icub, [model.total_enzyme_capacity])) + end diff --git a/src/base/utils/enzymes.jl b/src/base/utils/enzymes.jl new file mode 100644 index 000000000..6ee4091b0 --- /dev/null +++ b/src/base/utils/enzymes.jl @@ -0,0 +1,56 @@ +""" + gene_product_dict(model::GeckoModel, opt_model) + +Return a dictionary mapping protein molar concentrations to their ids. The +argument `opt_model` is a solved optimization problem, typically returned by +[`flux_balance_analysis`](@ref). See [`flux_dict`](@ref) for the corresponding +function that returns a dictionary of solved fluxes. +""" +gene_product_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + Dict(genes(model) .=> value.(opt_model[:x])[(n_reactions(model)+1):end]) : nothing + +""" + gene_product_dict(model::GeckoModel) + +A pipe-able variant of [`gene_product_dict`](@ref). +""" +gene_product_dict(model::GeckoModel) = x -> gene_product_dict(model, x) + +""" + gene_product_mass_group_dict(model::GeckoModel, opt_model) + +Extract the mass utilization in mass groups from a solved [`GeckoModel`](@ref). +""" +gene_product_mass_group_dict(model::GeckoModel, opt_model) = + is_solved(opt_model) ? + Dict( + grp.group_id => dot( + value.(opt_model[:x])[n_reactions(model).+grp.gene_product_idxs], + grp.gene_product_molar_masses, + ) for grp in model.coupling_row_mass_group + ) : nothing + +""" + gene_product_mass_group_dict(model::GeckoModel) + +A pipe-able variant of [`mass_group_dict`](@ref). +""" +gene_product_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x) + +""" + gene_product_mass(model::SMomentModel) + +Extract the total mass utilization in a solved [`SMomentModel`](@ref). +""" +gene_product_mass(model::SMomentModel, opt_model) = + is_solved(opt_model) ? + sum((col.capacity_required for col in model.columns) .* value.(opt_model[:x])) : nothing + +""" + gene_product_mass(model::SMomentModel) + + +A pipe-able variant of [`gene_product_mass`](@ref). +""" +gene_product_mass(model::SMomentModel) = x -> gene_product_mass(model, x) diff --git a/src/base/utils/gecko.jl b/src/base/utils/gecko.jl new file mode 100644 index 000000000..86500314b --- /dev/null +++ b/src/base/utils/gecko.jl @@ -0,0 +1,85 @@ + +""" + _gecko_reaction_name(original_name::String, direction::Int) + +Internal helper for systematically naming reactions in [`GeckoModel`](@ref). +""" +_gecko_reaction_name(original_name::String, direction::Int, isozyme_idx::Int) = + direction == 0 ? original_name : + direction > 0 ? "$original_name#forward#$isozyme_idx" : + "$original_name#reverse#$isozyme_idx" + +""" + _gecko_reaction_column_reactions(model::GeckoModel) + +Retrieve a utility mapping between reactions and split reactions; rows +correspond to "original" reactions, columns correspond to "split" reactions. +""" +_gecko_reaction_column_reactions(model::GeckoModel) = sparse( + [col.reaction_idx for col in model.columns], + 1:length(model.columns), + [col.direction >= 0 ? 1 : -1 for col in model.columns], + n_reactions(model.inner), + length(model.columns), +) + +""" + _gecko_reaction_coupling(model::GeckoModel) + +Compute the part of the coupling for [`GeckoModel`](@ref) that limits the +"arm" reactions (which group the individual split unidirectional reactions). +""" +_gecko_reaction_coupling(model::GeckoModel) = + let tmp = [ + (col.reaction_coupling_row, i, col.direction) for + (i, col) = enumerate(model.columns) if col.reaction_coupling_row != 0 + ] + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_reaction), + length(model.columns), + ) + end + +""" + _gecko_gene_product_coupling(model::GeckoModel) + +Compute the part of the coupling for GeckoModel that limits the amount of each +kind of protein available. +""" +_gecko_gene_product_coupling(model::GeckoModel) = + let + tmp = [ + (row, i, val) for (i, col) in enumerate(model.columns) for + (row, val) in col.gene_product_coupling + ] + sparse( + [row for (row, _, _) in tmp], + [col for (_, col, _) in tmp], + [val for (_, _, val) in tmp], + length(model.coupling_row_gene_product), + length(model.columns), + ) + end + +""" + _gecko_mass_group_coupling(model::GeckoModel) + +Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total +mass of each group of gene products. +""" +function _gecko_mass_group_coupling(model::GeckoModel) + tmp = [ # mm = molar mass, mg = mass group, i = row idx, j = col idx + (i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for + (j, mm) in zip(mg.gene_product_idxs, mg.gene_product_molar_masses) + ] + sparse( + [i for (i, _, _) in tmp], + [j for (_, j, _) in tmp], + [mm for (_, _, mm) in tmp], + length(model.coupling_row_mass_group), + n_genes(model), + ) +end diff --git a/src/base/utils/guesskey.jl b/src/base/utils/guesskey.jl index 94089daeb..4a9ad6302 100644 --- a/src/base/utils/guesskey.jl +++ b/src/base/utils/guesskey.jl @@ -20,3 +20,17 @@ function _guesskey(avail, possibilities) end return x[1] end + +""" + gets(collection, fail, keys) + +Return `fail` if key in `keys` is not in `collection`, otherwise +return `collection[key]`. Useful if may different keys need to be +tried due to non-standardized model formats. +""" +function gets(collection, fail, keys) + for key in keys + haskey(collection, key) && return collection[key] + end + return fail +end diff --git a/src/base/utils/looks_like.jl b/src/base/utils/looks_like.jl index b3f05577b..1b29f23f8 100644 --- a/src/base/utils/looks_like.jl +++ b/src/base/utils/looks_like.jl @@ -96,12 +96,12 @@ Shortcut for finding biomass reaction identifiers in a model; arguments are forwarded to [`looks_like_biomass_reaction`](@ref). """ find_biomass_reaction_ids(m::MetabolicModel; kwargs...) = - filter(id -> looks_like_biomass_reaction(id, kwargs...), reactions(m)) + filter(id -> looks_like_biomass_reaction(id; kwargs...), reactions(m)) """ looks_like_extracellular_metabolite(rxn_id::String; extracellular_suffixes = _constants.extracellular_suffixes, - )::Bool + )::Bool A predicate that matches metabolite identifiers that look like they are extracellular metabolites. Extracellular metabolites are identified by `extracellular_suffixes` at the end of the diff --git a/src/base/utils/smoment.jl b/src/base/utils/smoment.jl new file mode 100644 index 000000000..15751715c --- /dev/null +++ b/src/base/utils/smoment.jl @@ -0,0 +1,55 @@ + +""" + _smoment_reaction_name(original_name::String, direction::Int) + +Internal helper for systematically naming reactions in [`SMomentModel`](@ref). +""" +_smoment_reaction_name(original_name::String, direction::Int) = + direction == 0 ? original_name : + direction > 0 ? "$original_name#forward" : "$original_name#reverse" + +""" + _smoment_column_reactions(model::SMomentModel) + +Retrieve a utility mapping between reactions and split reactions; rows +correspond to "original" reactions, columns correspond to "split" reactions. +""" +_smoment_column_reactions(model::SMomentModel) = sparse( + [col.reaction_idx for col in model.columns], + 1:length(model.columns), + [col.direction >= 0 ? 1 : -1 for col in model.columns], + n_reactions(model.inner), + length(model.columns), +) + +""" + smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) + +Compute a "score" for picking the most viable isozyme for +[`make_smoment_model`](@ref), based on maximum kcat divided by relative mass of +the isozyme. This is used because sMOMENT algorithm can not handle multiple +isozymes for one reaction. +""" +smoment_isozyme_speed(isozyme::Isozyme, gene_product_molar_mass) = + max(isozyme.kcat_forward, isozyme.kcat_reverse) / sum( + count * gene_product_molar_mass(gene) for + (gene, count) in isozyme.gene_product_count + ) + +""" + smoment_isozyme_speed(gene_product_molar_mass::Function) + +A piping- and argmax-friendly overload of [`smoment_isozyme_speed`](@ref). + +# Example +``` +gene_mass_function = gid -> 1.234 + +best_isozyme_for_smoment = argmax( + smoment_isozyme_speed(gene_mass_function), + my_isozyme_vector, +) +``` +""" +smoment_isozyme_speed(gene_product_molar_mass::Function) = + isozyme -> smoment_isozyme_speed(isozyme, gene_product_molar_mass) diff --git a/src/reconstruction/CoreModelCoupled.jl b/src/reconstruction/CoreModelCoupled.jl index 711cd849d..ad2b4a81e 100644 --- a/src/reconstruction/CoreModelCoupled.jl +++ b/src/reconstruction/CoreModelCoupled.jl @@ -165,13 +165,14 @@ function add_reactions( end """ -Add constraints of the following form to a CoreModelCoupled and return a modified one. + add_coupling_constraints(m::CoreCoupling, args...) -Add constraints to a [`CoreModelCoupled`](@ref) and return a modified one. +Add constraints of the following form to CoreCoupling and return the modified +model. The arguments are same as for in-place [`add_coupling_constraints!`](@ref). """ -function add_coupling_constraints(m::CoreModelCoupled, args...) +function add_coupling_constraints(m::CoreCoupling, args...) new_lp = deepcopy(m) add_coupling_constraints!(new_lp, args...) return new_lp @@ -183,12 +184,11 @@ end Add coupling constraints to a plain [`CoreModel`](@ref) (returns a [`CoreModelCoupled`](@ref)). """ -add_coupling_constraints(m::CoreModel, args...) = - add_coupling_constraints(convert(CoreModelCoupled, m), args...) +add_coupling_constraints(m::CoreModel, args...) = CoreModelCoupled(m, args...) """ add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, c::VecType, cl::AbstractFloat, cu::AbstractFloat, @@ -197,7 +197,7 @@ add_coupling_constraints(m::CoreModel, args...) = Overload for adding a single coupling constraint. """ function add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, c::VecType, cl::AbstractFloat, cu::AbstractFloat, @@ -207,7 +207,7 @@ end """ add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, C::MatType, cl::V, cu::V, @@ -219,7 +219,7 @@ In-place add a single coupling constraint in form ``` """ function add_coupling_constraints!( - m::CoreModelCoupled, + m::CoreCoupling, C::MatType, cl::V, cu::V, @@ -237,35 +237,34 @@ function add_coupling_constraints!( end """ - remove_coupling_constraints(m::CoreModelCoupled, args...) + remove_coupling_constraints(m::CoreCoupling, args...) Remove coupling constraints from the linear model, and return the modified model. Arguments are the same as for in-place version [`remove_coupling_constraints!`](@ref). """ -function remove_coupling_constraints(m::CoreModelCoupled, args...) +function remove_coupling_constraints(m::CoreCoupling, args...) new_model = deepcopy(m) remove_coupling_constraints!(new_model, args...) return new_model end """ - remove_coupling_constraints!(m::CoreModelCoupled, constraint::Int) + remove_coupling_constraints!(m::CoreCoupling, constraint::Int) -Removes a single coupling constraints from a [`CoreModelCoupled`](@ref) -in-place. +Removes a single coupling constraints from a [`CoreCoupling`](@ref) in-place. """ -remove_coupling_constraints!(m::CoreModelCoupled, constraint::Int) = +remove_coupling_constraints!(m::CoreCoupling, constraint::Int) = remove_coupling_constraints!(m, [constraint]) """ - remove_coupling_constraints!(m::CoreModelCoupled, constraints::Vector{Int}) + remove_coupling_constraints!(m::CoreCoupling, constraints::Vector{Int}) -Removes a set of coupling constraints from a [`CoreModelCoupled`](@ref) +Removes a set of coupling constraints from a [`CoreCoupling`](@ref) in-place. """ -function remove_coupling_constraints!(m::CoreModelCoupled, constraints::Vector{Int}) +function remove_coupling_constraints!(m::CoreCoupling, constraints::Vector{Int}) to_be_kept = filter(!in(constraints), 1:n_coupling_constraints(m)) m.C = m.C[to_be_kept, :] m.cl = m.cl[to_be_kept] @@ -275,7 +274,7 @@ end """ change_coupling_bounds!( - model::CoreModelCoupled, + model::CoreCoupling, constraints::Vector{Int}; cl::V = Float64[], cu::V = Float64[], @@ -285,7 +284,7 @@ Change the lower and/or upper bounds (`cl` and `cu`) for the given list of coupling constraints. """ function change_coupling_bounds!( - model::CoreModelCoupled, + model::CoreCoupling, constraints::Vector{Int}; cl::V = Float64[], cu::V = Float64[], @@ -309,131 +308,131 @@ function change_coupling_bounds!( nothing end -@_change_bounds_fn CoreModelCoupled Int inplace begin +# TODO see if some of these can be derived from ModelWrapper +@_change_bounds_fn CoreCoupling Int inplace begin change_bound!(model.lm, rxn_idx, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled Int inplace plural begin +@_change_bounds_fn CoreCoupling Int inplace plural begin change_bounds!(model.lm, rxn_idxs, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled String inplace begin +@_change_bounds_fn CoreCoupling String inplace begin change_bound!(model.lm, rxn_id, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled String inplace plural begin +@_change_bounds_fn CoreCoupling String inplace plural begin change_bounds!(model.lm, rxn_ids, lower = lower, upper = upper) end -@_change_bounds_fn CoreModelCoupled Int begin +@_change_bounds_fn CoreCoupling Int begin n = copy(model) n.lm = change_bound(model.lm, rxn_idx, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled Int plural begin +@_change_bounds_fn CoreCoupling Int plural begin n = copy(model) n.lm = change_bounds(model.lm, rxn_idxs, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled String begin +@_change_bounds_fn CoreCoupling String begin n = copy(model) n.lm = change_bound(model.lm, rxn_id, lower = lower, upper = upper) n end -@_change_bounds_fn CoreModelCoupled String plural begin +@_change_bounds_fn CoreCoupling String plural begin n = copy(model) n.lm = change_bounds(model.lm, rxn_ids, lower = lower, upper = upper) n end -@_remove_fn reaction CoreModelCoupled Int inplace begin +@_remove_fn reaction CoreCoupling Int inplace begin remove_reactions!(model, [reaction_idx]) end -@_remove_fn reaction CoreModelCoupled Int inplace plural begin +@_remove_fn reaction CoreCoupling Int inplace plural begin orig_rxns = reactions(model.lm) remove_reactions!(model.lm, reaction_idxs) model.C = model.C[:, in.(orig_rxns, Ref(Set(reactions(model.lm))))] nothing end -@_remove_fn reaction CoreModelCoupled Int begin +@_remove_fn reaction CoreCoupling Int begin remove_reactions(model, [reaction_idx]) end -@_remove_fn reaction CoreModelCoupled Int plural begin +@_remove_fn reaction CoreCoupling Int plural begin n = copy(model) n.lm = remove_reactions(n.lm, reaction_idxs) n.C = n.C[:, in.(reactions(model.lm), Ref(Set(reactions(n.lm))))] return n end -@_remove_fn reaction CoreModelCoupled String inplace begin +@_remove_fn reaction CoreCoupling String inplace begin remove_reactions!(model, [reaction_id]) end -@_remove_fn reaction CoreModelCoupled String inplace plural begin +@_remove_fn reaction CoreCoupling String inplace plural begin remove_reactions!(model, Int.(indexin(reaction_ids, reactions(model)))) end -@_remove_fn reaction CoreModelCoupled String begin +@_remove_fn reaction CoreCoupling String begin remove_reactions(model, [reaction_id]) end -@_remove_fn reaction CoreModelCoupled String plural begin +@_remove_fn reaction CoreCoupling String plural begin remove_reactions(model, Int.(indexin(reaction_ids, reactions(model)))) end -@_remove_fn metabolite CoreModelCoupled Int inplace begin +@_remove_fn metabolite CoreCoupling Int inplace begin remove_metabolites!(model, [metabolite_idx]) end -@_remove_fn metabolite CoreModelCoupled Int plural inplace begin +@_remove_fn metabolite CoreCoupling Int plural inplace begin orig_rxns = reactions(model.lm) model.lm = remove_metabolites(model.lm, metabolite_idxs) model.C = model.C[:, in.(orig_rxns, Ref(Set(reactions(model.lm))))] nothing end -@_remove_fn metabolite CoreModelCoupled Int begin +@_remove_fn metabolite CoreCoupling Int begin remove_metabolites(model, [metabolite_idx]) end -@_remove_fn metabolite CoreModelCoupled Int plural begin - n = deepcopy(model) #almost everything gets changed anyway - remove_metabolites!(n, metabolite_idxs) +@_remove_fn metabolite CoreCoupling Int plural begin + n = copy(model) + n.lm = remove_metabolites(n.lm, metabolite_idxs) return n end -@_remove_fn metabolite CoreModelCoupled String inplace begin +@_remove_fn metabolite CoreCoupling String inplace begin remove_metabolites!(model, [metabolite_id]) end -@_remove_fn metabolite CoreModelCoupled String inplace plural begin +@_remove_fn metabolite CoreCoupling String inplace plural begin remove_metabolites!(model, Int.(indexin(metabolite_ids, metabolites(model)))) end -@_remove_fn metabolite CoreModelCoupled String begin +@_remove_fn metabolite CoreCoupling String begin remove_metabolites(model, [metabolite_id]) end -@_remove_fn metabolite CoreModelCoupled String plural begin +@_remove_fn metabolite CoreCoupling String plural begin remove_metabolites(model, Int.(indexin(metabolite_ids, metabolites(model)))) end """ change_objective!( - model::CoreModelCoupled, + model::CoreCoupling, args...; kwargs..., ) -Forwards arguments to [`change_objective!`](@ref) of the internal -[`CoreModel`](@ref). +Forwards arguments to [`change_objective!`](@ref) of the internal model. """ -function change_objective!(model::CoreModelCoupled, args...; kwargs...) +function change_objective!(model::CoreCoupling, args...; kwargs...) change_objective!(model.lm, args...; kwargs...) end diff --git a/src/reconstruction/enzymes.jl b/src/reconstruction/enzymes.jl new file mode 100644 index 000000000..09dac3dc6 --- /dev/null +++ b/src/reconstruction/enzymes.jl @@ -0,0 +1,17 @@ +""" + with_smoment(; kwargs...) + +Specifies a model variant which adds extra semantics of the sMOMENT algorithm, +giving a [`SMomentModel`](@ref). The arguments are forwarded to +[`make_smoment_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_smoment(; kwargs...) = model -> make_smoment_model(model; kwargs...) + +""" + with_gecko(; kwargs...) + +Specifies a model variant which adds extra semantics of the Gecko algorithm, +giving a [`GeckoModel`](@ref). The arguments are forwarded to +[`make_gecko_model`](@ref). Intended for usage with [`screen`](@ref). +""" +with_gecko(; kwargs...) = model -> make_gecko_model(model; kwargs...) diff --git a/test/analysis/flux_variability_analysis.jl b/test/analysis/flux_variability_analysis.jl index e624ba221..8d384ce5b 100644 --- a/test/analysis/flux_variability_analysis.jl +++ b/test/analysis/flux_variability_analysis.jl @@ -9,6 +9,9 @@ 2.0 2.0 ] + rates = reaction_variability_analysis(cp, optimizer) + @test fluxes == rates + fluxes = flux_variability_analysis(cp, [2], optimizer) @test size(fluxes) == (1, 2) diff --git a/test/analysis/gecko.jl b/test/analysis/gecko.jl new file mode 100644 index 000000000..58318405f --- /dev/null +++ b/test/analysis/gecko.jl @@ -0,0 +1,123 @@ +@testset "GECKO" begin + model = load_model(StandardModel, model_paths["e_coli_core.json"]) + + get_reaction_isozymes = + rid -> + haskey(ecoli_core_reaction_kcats, rid) ? + collect( + Isozyme( + Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]), + ecoli_core_reaction_kcats[rid][i]..., + ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) + ) : Isozyme[] + + get_gene_product_mass = gid -> get(ecoli_core_gene_product_masses, gid, 0.0) + + total_gene_product_mass = 100.0 + + bounded_model = + model |> with_changed_bounds( + ["EX_glc__D_e", "GLCpts"]; + lower = [-1000.0, -1.0], + upper = [nothing, 12.0], + ) + + gm = + bounded_model |> with_gecko( + reaction_isozymes = get_reaction_isozymes, + gene_product_bounds = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0), + gene_product_molar_mass = get_gene_product_mass, + gene_product_mass_group_bound = _ -> total_gene_product_mass, + ) + + opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + rxn_fluxes = flux_dict(gm, opt_model) + prot_concens = gene_product_dict(gm, opt_model) + + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.812827846796761, + atol = TEST_TOLERANCE, + ) + + prot_mass = sum(ecoli_core_gene_product_masses[gid] * c for (gid, c) in prot_concens) + mass_groups = gene_product_mass_group_dict(gm, opt_model) + + @test isapprox(prot_mass, total_gene_product_mass, atol = TEST_TOLERANCE) + @test isapprox(prot_mass, mass_groups["uncategorized"], atol = TEST_TOLERANCE) +end + +@testset "GECKO small model" begin + #= + Implement the small model found in the supplment of the + original GECKO paper. This model is nice to troubleshoot with, + because the stoich matrix is small. + =# + m = StandardModel("gecko") + m1 = Metabolite("m1") + m2 = Metabolite("m2") + m3 = Metabolite("m3") + m4 = Metabolite("m4") + + @add_reactions! m begin + "r1", nothing → m1, 0, 100 + "r2", nothing → m2, 0, 100 + "r3", m1 + m2 → m3, 0, 100 + "r4", m3 → m4, 0, 100 + "r5", m2 ↔ m4, -100, 100 + "r6", m4 → nothing, 0, 100 + end + + gs = [Gene("g$i") for i = 1:4] + + m.reactions["r4"].grr = [["g1"], ["g2"]] + m.reactions["r5"].grr = [["g3", "g4"]] + m.reactions["r6"].objective_coefficient = 1.0 + + add_genes!(m, gs) + add_metabolites!(m, [m1, m2, m3, m4]) + + reaction_isozymes = Dict( + "r3" => [Isozyme(Dict("g1" => 1), 1.0, 1.0)], + "r4" => + [Isozyme(Dict("g1" => 1), 2.0, 2.0), Isozyme(Dict("g2" => 1), 3.0, 3.0)], + "r5" => [Isozyme(Dict("g3" => 1, "g4" => 2), 70.0, 70.0)], + ) + gene_product_bounds = Dict( + "g1" => (0.0, 10.0), + "g2" => (0.0, 10.0), + "g3" => (0.0, 10.0), + "g4" => (0.0, 10.0), + ) + + gene_product_molar_mass = Dict("g1" => 1.0, "g2" => 2.0, "g3" => 3.0, "g4" => 4.0) + + gene_product_mass_group_bound = Dict("uncategorized" => 0.5) + + gm = make_gecko_model( + m; + reaction_isozymes, + gene_product_bounds, + gene_product_molar_mass, + gene_product_mass_group_bound, + ) + + opt_model = flux_balance_analysis( + gm, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + rxn_fluxes = flux_dict(gm, opt_model) + gene_products = gene_product_dict(gm, opt_model) + mass_groups = gene_product_mass_group_dict(gm, opt_model) + + @test isapprox(rxn_fluxes["r6"], 3.181818181753438, atol = TEST_TOLERANCE) + @test isapprox(gene_products["g4"], 0.09090909090607537, atol = TEST_TOLERANCE) + @test isapprox(mass_groups["uncategorized"], 0.5, atol = TEST_TOLERANCE) +end diff --git a/test/analysis/smoment.jl b/test/analysis/smoment.jl new file mode 100644 index 000000000..cba241217 --- /dev/null +++ b/test/analysis/smoment.jl @@ -0,0 +1,41 @@ +@testset "SMOMENT" begin + model = load_model(StandardModel, model_paths["e_coli_core.json"]) + + get_gene_product_mass = gid -> get(ecoli_core_gene_product_masses, gid, 0.0) + + get_reaction_isozyme = + rid -> + haskey(ecoli_core_reaction_kcats, rid) ? + argmax( + smoment_isozyme_speed(get_gene_product_mass), + Isozyme( + Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]), + ecoli_core_reaction_kcats[rid][i]..., + ) for (i, grr) in enumerate(reaction_gene_association(model, rid)) + ) : nothing + + smoment_model = + model |> + with_changed_bounds( + ["EX_glc__D_e", "GLCpts"], + lower = [-1000.0, -1.0], + upper = [nothing, 12.0], + ) |> + with_smoment( + reaction_isozyme = get_reaction_isozyme, + gene_product_molar_mass = get_gene_product_mass, + total_enzyme_capacity = 100.0, + ) + + rxn_fluxes = flux_balance_analysis_dict( + smoment_model, + Tulip.Optimizer; + modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)], + ) + + @test isapprox( + rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"], + 0.8907273630431708, + atol = TEST_TOLERANCE, + ) +end diff --git a/test/data_static.jl b/test/data_static.jl index 97699cadc..a80d46d16 100644 --- a/test/data_static.jl +++ b/test/data_static.jl @@ -73,7 +73,7 @@ test_toyModel() = CoreModel( ["m1[c]", "m3[c]", "m2[c]", "m1[e]", "m3[e]", "biomass[c]"], ) -const reaction_standard_gibbs_free_energies = Dict( +const reaction_standard_gibbs_free_energies = Dict{String,Float64}( #= ΔᵣG⁰ data from Equilibrator using the E. coli core model's reactions To generate this data manually, go to https://equilibrator.weizmann.ac.il/ and @@ -154,3 +154,399 @@ const reaction_standard_gibbs_free_energies = Dict( "PPS" => -6.0551989457468665, "FUM" => -3.424133018702122, ) + +const ecoli_core_gene_product_masses = Dict{String,Float64}( + #= + Data downloaded from Uniprot for E. coli K12, + gene mass in kDa. To obtain these data yourself, go to + Uniprot: https://www.uniprot.org/ + and search using these terms: + =# + "b4301" => 23.214, + "b1602" => 48.723, + "b4154" => 65.972, + "b3236" => 32.337, + "b1621" => 56.627, + "b1779" => 35.532, + "b3951" => 85.96, + "b1676" => 50.729, + "b3114" => 85.936, + "b1241" => 96.127, + "b2276" => 52.044, + "b1761" => 48.581, + "b3925" => 35.852, + "b3493" => 53.389, + "b3733" => 31.577, + "b2926" => 41.118, + "b0979" => 42.424, + "b4015" => 47.522, + "b2296" => 43.29, + "b4232" => 36.834, + "b3732" => 50.325, + "b2282" => 36.219, + "b2283" => 100.299, + "b0451" => 44.515, + "b2463" => 82.417, + "b0734" => 42.453, + "b3738" => 30.303, + "b3386" => 24.554, + "b3603" => 59.168, + "b2416" => 63.562, + "b0729" => 29.777, + "b0767" => 36.308, + "b3734" => 55.222, + "b4122" => 60.105, + "b2987" => 53.809, + "b2579" => 14.284, + "b0809" => 26.731, + "b1524" => 33.516, + "b3612" => 56.194, + "b3735" => 19.332, + "b3731" => 15.068, + "b1817" => 35.048, + "b1603" => 54.623, + "b1773" => 30.81, + "b4090" => 16.073, + "b0114" => 99.668, + "b3962" => 51.56, + "b2464" => 35.659, + "b2976" => 80.489, + "b1818" => 27.636, + "b2285" => 18.59, + "b1702" => 87.435, + "b1849" => 42.434, + "b1812" => 50.97, + "b0902" => 28.204, + "b3403" => 59.643, + "b1612" => 60.299, + "b1854" => 51.357, + "b0811" => 27.19, + "b0721" => 14.299, + "b2914" => 22.86, + "b1297" => 53.177, + "b0723" => 64.422, + "b3919" => 26.972, + "b3115" => 43.384, + "b4077" => 47.159, + "b3528" => 45.436, + "b0351" => 33.442, + "b2029" => 51.481, + "b1819" => 30.955, + "b0728" => 41.393, + "b2935" => 72.212, + "b2415" => 9.119, + "b0727" => 44.011, + "b0116" => 50.688, + "b0485" => 32.903, + "b3736" => 17.264, + "b0008" => 35.219, + "b3212" => 163.297, + "b3870" => 51.904, + "b4014" => 60.274, + "b2280" => 19.875, + "b2133" => 64.612, + "b2278" => 66.438, + "b0118" => 93.498, + "b2288" => 16.457, + "b3739" => 13.632, + "b3916" => 34.842, + "b3952" => 32.43, + "b2925" => 39.147, + "b2465" => 73.043, + "b2297" => 77.172, + "b2417" => 18.251, + "b4395" => 24.065, + "b3956" => 99.063, + "b0722" => 12.868, + "b2779" => 45.655, + "b0115" => 66.096, + "b0733" => 58.205, + "b1478" => 35.38, + "b2492" => 30.565, + "b0724" => 26.77, + "b0755" => 28.556, + "b1136" => 45.757, + "b2286" => 68.236, + "b0978" => 57.92, + "b1852" => 55.704, + "b2281" => 20.538, + "b2587" => 47.052, + "b2458" => 36.067, + "b0904" => 30.991, + "b1101" => 50.677, + "b0875" => 23.703, + "b3213" => 52.015, + "b2975" => 58.92, + "b0720" => 48.015, + "b0903" => 85.357, + "b1723" => 32.456, + "b2097" => 38.109, + "b3737" => 8.256, + "b0810" => 24.364, + "b4025" => 61.53, + "b1380" => 36.535, + "b0356" => 39.359, + "b2277" => 56.525, + "b1276" => 97.677, + "b4152" => 15.015, + "b1479" => 63.197, + "b4153" => 27.123, + "b4151" => 13.107, + "b2287" => 25.056, + "b0474" => 23.586, + "b2284" => 49.292, + "b1611" => 50.489, + "b0726" => 105.062, + "b2279" => 10.845, +) + +const ecoli_core_protein_stoichiometry = Dict{String,Vector{Vector{Float64}}}( + #= + Data made up, each isozyme is assumed to be composed of + only one subunit each. + =# + "ACALD" => [[1.0], [1.0]], + "PTAr" => [[1.0], [1.0]], + "ALCD2x" => [[1.0], [1.0], [1.0]], + "PDH" => [[1.0, 1.0, 1.0]], + "PYK" => [[1.0], [1.0]], + "CO2t" => [[1.0]], + "MALt2_2" => [[1.0]], + "CS" => [[1.0]], + "PGM" => [[1.0], [1.0], [1.0]], + "TKT1" => [[1.0], [1.0]], + "ACONTa" => [[1.0], [1.0]], + "GLNS" => [[1.0], [1.0]], + "ICL" => [[1.0]], + "FBA" => [[1.0], [1.0], [1.0]], + "FORt2" => [[1.0], [1.0]], + "G6PDH2r" => [[1.0]], + "AKGDH" => [[1.0, 1.0, 1.0]], + "TKT2" => [[1.0], [1.0]], + "FRD7" => [[1.0, 1.0, 1.0, 1.0]], + "SUCOAS" => [[1.0, 1.0]], + "FBP" => [[1.0], [1.0]], + "ICDHyr" => [[1.0]], + "AKGt2r" => [[1.0]], + "GLUSy" => [[1.0, 1.0]], + "TPI" => [[1.0]], + "FORt" => [[1.0], [1.0]], + "ACONTb" => [[1.0], [1.0]], + "GLNabc" => [[1.0, 1.0, 1.0]], + "RPE" => [[1.0], [1.0]], + "ACKr" => [[1.0], [1.0], [1.0]], + "THD2" => [[1.0, 1.0]], + "PFL" => [[1.0, 1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]], + "RPI" => [[1.0], [1.0]], + "D_LACt2" => [[1.0], [1.0]], + "TALA" => [[1.0], [1.0]], + "PPCK" => [[1.0]], + "ACt2r" => [[1.0]], + "NH4t" => [[1.0], [1.0]], + "PGL" => [[1.0]], + "NADTRHD" => [[1.0], [1.0, 1.0]], + "PGK" => [[1.0]], + "LDH_D" => [[1.0], [1.0]], + "ME1" => [[1.0]], + "PIt2r" => [[1.0], [1.0]], + "ATPS4r" => [ + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ], + "GLCpts" => [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]], + "GLUDy" => [[1.0]], + "CYTBD" => [[1.0, 1.0], [1.0, 1.0]], + "FUMt2_2" => [[1.0]], + "FRUpts2" => [[1.0, 1.0, 1.0, 1.0, 1.0]], + "GAPD" => [[1.0]], + "H2Ot" => [[1.0], [1.0]], + "PPC" => [[1.0]], + "NADH16" => [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]], + "PFK" => [[1.0], [1.0]], + "MDH" => [[1.0]], + "PGI" => [[1.0]], + "O2t" => [[1.0]], + "ME2" => [[1.0]], + "GND" => [[1.0]], + "SUCCt2_2" => [[1.0]], + "GLUN" => [[1.0], [1.0], [1.0]], + "ETOHt2r" => [[1.0]], + "ADK1" => [[1.0]], + "ACALDt" => [[1.0]], + "SUCDi" => [[1.0, 1.0, 1.0, 1.0]], + "ENO" => [[1.0]], + "MALS" => [[1.0], [1.0]], + "GLUt2r" => [[1.0]], + "PPS" => [[1.0]], + "FUM" => [[1.0], [1.0], [1.0]], +) + +const ecoli_core_reaction_kcats = Dict{String,Vector{Tuple{Float64,Float64}}}( + #= + Data taken from Heckmann, David, et al. "Machine learning applied to enzyme + turnover numbers reveals protein structural correlates and improves metabolic + models." Nature communications 9.1 (2018): 1-10. Assume forward and reverse + kcats are the same, and each isozyme has the same kcat. + =# + "ACALD" => + [(568.1130792316333, 568.1130792316333), (568.856126503717, 568.856126503717)], + "PTAr" => [ + (1171.9703624351055, 1171.9703624351055), + (1173.7231032615289, 1173.7231032615289), + ], + "ALCD2x" => [ + (75.9547881894345, 75.9547881894345), + (75.96334310351442, 75.96334310351442), + (76.1472359297987, 76.1472359297987), + ], + "PDH" => [(529.7610874857239, 529.7610874857239)], + "PYK" => [ + (422.0226052080562, 422.0226052080562), + (422.1332899347833, 422.1332899347833), + ], + "MALt2_2" => [(234.03664660088714, 234.03664660088714)], + "CS" => [(113.29607453875758, 113.29607453875758)], + "PGM" => [ + (681.4234715886669, 681.4234715886669), + (681.6540601244343, 681.6540601244343), + (680.5234799168278, 680.5234799168278), + ], + "TKT1" => [ + (311.16139580671637, 311.16139580671637), + (311.20967965149947, 311.20967965149947), + ], + "ACONTa" => [ + (191.02308213992006, 191.02308213992006), + (191.03458045697235, 191.03458045697235), + ], + "GLNS" => [ + (89.83860937287024, 89.83860937287024), + (89.82177852142014, 89.82177852142014), + ], + "ICL" => [(17.45922330097792, 17.45922330097792)], + "FBA" => [ + (373.425646787578, 373.425646787578), + (372.74936053215833, 372.74936053215833), + (372.88627228768166, 372.88627228768166), + ], + "FORt2" => [ + (233.93045260179326, 233.93045260179326), + (233.84804009142908, 233.84804009142908), + ], + "G6PDH2r" => [(589.3761070080022, 589.3761070080022)], + "AKGDH" => [(264.48071159327156, 264.48071159327156)], + "TKT2" => [ + (467.4226876901618, 467.4226876901618), + (468.1440593542596, 468.1440593542596), + ], + "FRD7" => [(90.20637824912605, 90.20637824912605)], + "SUCOAS" => [(18.494387648707622, 18.494387648707622)], + "FBP" => [ + (568.5346256470805, 568.5346256470805), + (567.6367759041788, 567.6367759041788), + ], + "ICDHyr" => [(39.62446791678959, 39.62446791678959)], + "AKGt2r" => [(234.99097804446805, 234.99097804446805)], + "GLUSy" => [(33.262997317319055, 33.262997317319055)], + "TPI" => [(698.301904211076, 698.301904211076)], + "FORt" => [ + (234.38391855848187, 234.38391855848187), + (234.34725576182922, 234.34725576182922), + ], + "ACONTb" => [ + (159.74612206327865, 159.74612206327865), + (159.81975755249232, 159.81975755249232), + ], + "GLNabc" => [(233.80358131677775, 233.80358131677775)], + "RPE" => [ + (1772.4850826683305, 1772.4850826683305), + (1768.8536177485582, 1768.8536177485582), + ], + "ACKr" => [ + (554.611547307207, 554.611547307207), + (555.112707891257, 555.112707891257), + (555.2464368932744, 555.2464368932744), + ], + "THD2" => [(24.739139801185537, 24.739139801185537)], + "PFL" => [ + (96.56316095411077, 96.56316095411077), + (96.65024313036014, 96.65024313036014), + (96.60761818004025, 96.60761818004025), + (96.49541118899961, 96.49541118899961), + ], + "RPI" => [ + (51.771578021074234, 51.771578021074234), + (51.81603467243345, 51.81603467243345), + ], + "D_LACt2" => [ + (233.51709131524734, 233.51709131524734), + (233.83187606098016, 233.83187606098016), + ], + "TALA" => [ + (109.05210545422884, 109.05210545422884), + (109.04246437049026, 109.04246437049026), + ], + "PPCK" => [(218.4287805666016, 218.4287805666016)], + "PGL" => [(2120.4297518987964, 2120.4297518987964)], + "NADTRHD" => [ + (186.99387360624777, 186.99387360624777), + (187.16629305266423, 187.16629305266423), + ], + "PGK" => [(57.641966636896335, 57.641966636896335)], + "LDH_D" => [ + (31.11118891764946, 31.11118891764946), + (31.12493425054357, 31.12493425054357), + ], + "ME1" => [(487.0161203971232, 487.0161203971232)], + "PIt2r" => [ + (233.8651331835765, 233.8651331835765), + (234.27374798581067, 234.27374798581067), + ], + "ATPS4r" => [ + (7120.878030435999, 7120.878030435999), + (7116.751386037507, 7116.751386037507), + ], + "GLCpts" => [ + (233.9009878400008, 233.9009878400008), + (233.66656882114864, 233.66656882114864), + (233.66893882934883, 233.66893882934883), + ], + "GLUDy" => [(105.32811069172409, 105.32811069172409)], + "CYTBD" => [ + (153.18512795009505, 153.18512795009505), + (153.2429537682265, 153.2429537682265), + ], + "FUMt2_2" => [(234.37495609395967, 234.37495609395967)], + "FRUpts2" => [(234.1933863380989, 234.1933863380989)], + "GAPD" => [(128.76795529111456, 128.76795529111456)], + "PPC" => [(165.52424516841342, 165.52424516841342)], + "NADH16" => [(971.7487306963936, 971.7487306963936)], + "PFK" => [ + (1000.4626204522712, 1000.4626204522712), + (1000.5875517343595, 1000.5875517343595), + ], + "MDH" => [(25.931655783969283, 25.931655783969283)], + "PGI" => [(468.11833198138834, 468.11833198138834)], + "ME2" => [(443.0973626307168, 443.0973626307168)], + "GND" => [(240.1252264230952, 240.1252264230952)], + "SUCCt2_2" => [(234.18109388303225, 234.18109388303225)], + "GLUN" => [ + (44.76358496525738, 44.76358496525738), + (44.84850207360875, 44.84850207360875), + (44.76185250415503, 44.76185250415503), + ], + "ADK1" => [(111.64869652600649, 111.64869652600649)], + "SUCDi" => [(680.3193833053011, 680.3193833053011)], + "ENO" => [(209.35855069219886, 209.35855069219886)], + "MALS" => [ + (252.7540503869977, 252.7540503869977), + (252.2359738678874, 252.2359738678874), + ], + "GLUt2r" => [(234.22890837451837, 234.22890837451837)], + "PPS" => [(706.1455885214322, 706.1455885214322)], + "FUM" => [ + (1576.8372583425075, 1576.8372583425075), + (1576.233088455828, 1576.233088455828), + (1575.9638204848736, 1575.9638204848736), + ], +)