Merge pull request #119 from jcrozum/block-expand

Block (scc) expansion
jcrozum · Jun 5, 2024 · 8322278 · 8322278 · github-actions · Jun 5, 2024
2 parents a6d545a + 26f09a8
commit 8322278
Show file tree

Hide file tree

Showing 14 changed files with 1,035 additions and 364 deletions.
diff --git a/biobalm/_sd_algorithms/expand_source_SCCs.py b/biobalm/_sd_algorithms/expand_source_SCCs.py
diff --git a/biobalm/_sd_algorithms/expand_source_blocks.py b/biobalm/_sd_algorithms/expand_source_blocks.py
@@ -0,0 +1,220 @@
+from __future__ import annotations
+
+import itertools as it
+from typing import TYPE_CHECKING, cast
+
+from biobalm.space_utils import percolate_network
+from biobalm.types import BooleanSpace
+from biobalm.interaction_graph_utils import source_nodes
+
+if TYPE_CHECKING:
+ from biobalm.succession_diagram import SuccessionDiagram
+
+
+def expand_source_blocks(
+ sd: SuccessionDiagram,
+ check_maa: bool = True,
+) -> bool:
+ """
+ Base correctness assumptions:
+
+ - Expanding two minimal blocks is always independent.
+ -
+ """
+
+ if sd.config["debug"]:
+ print(f"Start SD expansion using block decomposition on {sd.network}.")
+
+ root = sd.root()
+
+ current_level: set[int] = set([root])
+ next_level: set[int] = set()
+
+ # This already accounts for constant percolation.
+ node_space = sd.node_data(root)["space"]
+
+ # find source nodes
+ perc_bn = percolate_network(sd.network, node_space)
+ sources = source_nodes(perc_bn)
+
+ if sd.config["debug"]:
+ print(f" > Computed source/input variable(s): {sources}")
+
+ # get source nodes combinations and expand root node
+ if len(sources) != 0:
+ # If there are too many source nodes, this can generate an absurdly large SD.
+ # This would be a problem even without the SCC expansion, but we can just
+ # stop the whole thing faster because we know how many nodes it generates.
+ if 2 ** len(sources) > sd.config["max_motifs_per_node"]:
+ raise RuntimeError(
+ f"Exceeded the maximum amount of stable motifs per node ({sd.config['max_motifs_per_node']}; see `SuccessionDiagramConfiguration.max_motifs_per_node`)."
+ )
+ else:
+ if sd.config["debug"]:
+ print(
+ f" > Expanding {len(sources)} source node into {2 ** len(sources)} SD nodes."
+ )
+
+ bin_values_iter = it.product(range(2), repeat=len(sources))
+ for bin_values in bin_values_iter:
+ valuation = cast(BooleanSpace, dict(zip(sources, bin_values)))
+ sub_space = node_space | valuation
+
+ next_level.add(sd._ensure_node(root, sub_space)) # type: ignore
+
+ # This makes the root artificially "expanded". Also, there
+ # can be no attractors here because we are just fixing the source nodes.
+ sd.node_data(root)["expanded"] = True
+ sd.node_data(root)["attractor_seeds"] = []
+ sd.node_data(root)["attractor_sets"] = []
+ current_level = next_level
+ next_level = set()
+
+ bfs_depth = 0
+
+ while len(current_level) > 0:
+ bfs_depth += 1
+ if sd.config["debug"]:
+ print(
+ f" > Start block expansion of a BFS level {bfs_depth} with {len(current_level)} node(s)."
+ )
+
+ for node in sorted(current_level): # Sorted for determinism
+ if sd.node_data(node)["expanded"]:
+ # We re-discovered a previously expanded node.
+ continue
+
+ # Compute successors as if this was a normal expansion procedure.
+ successors = sd.node_successors(node, compute=True)
+ # Sort successors to avoid non-determinism.
+ successors = sorted(successors)
+
+ if len(successors) == 0:
+ # This is a minimal trap space.
+ continue
+
+ if len(successors) == 1 and not check_maa:
+ # This space is not minimal, but there is no "choice" to make here.
+ # We can just continue with that single choice.
+ #
+ # However, if we are checking for MAAs, we continue as normal, because
+ # we need to compute most of the non-trivial results anyway.
+
+ next_level.add(successors[0])
+ continue
+
+ node_bn = sd.node_percolated_network(node, compute=True)
+
+ # Split successors into "blocks" based on the regulatory component
+ # of the variables fixed by the stable motif.
+
+ # Maps a "block" (bwd-closed set of variables) to a list of node IDs (successor nodes).
+ blocks: list[tuple[set[str], list[int]]] = []
+ for s in successors:
+ motif = sd.edge_stable_motif(node, s, reduced=True)
+ motif_block = node_bn.backward_reachable(list(motif.keys()))
+ motif_block_names = {node_bn.get_variable_name(v) for v in motif_block}
+
+ found = False
+ for block, nodes in blocks:
+ if block == motif_block_names:
+ found = True
+ nodes.append(s)
+ break
+ if not found:
+ blocks.append((motif_block_names, [s]))
+
+ if sd.config["debug"]:
+ print(
+ f" > [{node}] Computed blocks: {[(len(k), len(v)) for (k, v) in blocks]}"
+ )
+
+ # Now remove all non-minimal blocks (minimal in terms of set inclusion).
+ # The reason why we are removing those is that they are not independent
+ # on the minimal blocks.
+
+ minimal_blocks: list[tuple[set[str], list[int]]] = []
+ if len(blocks) > 1:
+ for block, nodes in blocks:
+ is_minimal = True
+ for b2, _ in blocks:
+ if b2 < block:
+ is_minimal = False
+ break
+ if is_minimal:
+ minimal_blocks.append((block, nodes))
+ else:
+ minimal_blocks = blocks
+
+ # Sort minimal blocks by the number of successor nodes (we want to pick
+ # the one that leads leads to the minimal SD expansion).
+ minimal_blocks = sorted(minimal_blocks, key=lambda x: len(x[1]))
+
+ if sd.config["debug"]:
+ print(
+ f" > [{node}] Minimal blocks: {[(len(k), len(v)) for (k, v) in minimal_blocks]}"
+ )
+
+ if not check_maa:
+ # We will expand all nodes that are in the smallest block.
+ to_expand = minimal_blocks[0][1]
+
+ if sd.config["debug"]:
+ print(f" > [{node}] Final block ({len(to_expand)}): {to_expand}")
+
+ next_level = next_level | set(to_expand)
+ else:
+ # Here, we want to find the smallest block without any MAAs and choose it.
+ # If such block does not exist, we expand the whole node, because the MAAs
+ # can be either in this node, or in any of the child nodes. If a clean block
+ # is found, we know that it is safe to expand it without "missing" any MAAs.
+ clean_block_found = False
+ for block, block_nodes in minimal_blocks:
+ block_sd = sd.component_subdiagram(list(block), node)
+
+ # The succession diagram "restricted" to the considered block should have
+ # the same (restricted) successor nodes.
+ assert len(
+ block_sd.node_successors(block_sd.root(), compute=True)
+ ) == len(block_nodes)
+
+ # We could also consider using `seeds` instead of `candidates` here. Ultimately, this
+ # matters very rarely. The reasoning for why we use `candidates` is that we can (almost)
+ # always guarantee that the expansion is going to finish, even if some nodes do not
+ # have their MAAs eliminated. As such, one can try to use other techniques to disprove
+ # MAAs in the problematic nodes while using the nice properties of the expansion to
+ # still disprove MAAs in the remaining nodes. If we used `seeds`, the expansion could
+ # just get stuck on this node and the "partial" results wouldn't be usable.
+ block_sd_candidates = block_sd.node_attractor_candidates(
+ block_sd.root(), compute=True
+ )
+ if len(block_sd_candidates) == 0:
+ if sd.config["debug"]:
+ print(
+ f" > [{node}] Found clean block with no MAAs ({len(block_nodes)}): {block_nodes}"
+ )
+ clean_block_found = True
+ next_level = next_level | set(block_nodes)
+ sd.node_data(node)["attractor_seeds"] = []
+ sd.node_data(node)["attractor_sets"] = []
+ break
+ else:
+ if sd.config["debug"]:
+ print(
+ f"[{node}] > Found {len(block_sd_candidates)} MAA cnadidates in a block. Delaying expansion."
+ )
+ if not clean_block_found:
+ # If all blocks have MAAs, we expand all successors.
+ if sd.config["debug"]:
+ print(
+ f" > [{node}] No clean block found. Expanding all {len(successors)} successors."
+ )
+ next_level = next_level | set(successors)
+
+ current_level = next_level
+ next_level = set()
+
+ if sd.config["debug"]:
+ print(f" > Block expansion terminated with {len(sd)} node(s).")
+
+ return True
diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py
@@ -302,18 +302,22 @@ def compute_attractor_candidates(
 
  # At this point, we know the candidate count increased and so we should
  # try to bring it back down.
- if sd.config["debug"]:
- print(f"[{node_id}] Optimizing partial retained set...")
- optimized = asp_greedy_retained_set_optimization(
- sd,
- node_id,
- petri_net=pn_reduced,
- retained_set=retained_set,
- candidate_states=candidate_states,
- avoid_dnf=child_motifs_reduced,
- )
- retained_set = optimized[0]
- candidate_states = optimized[1]
+ if (
+ len(candidate_states)
+ > sd.config["retained_set_optimization_threshold"]
+ ):
+ if sd.config["debug"]:
+ print(f"[{node_id}] Optimizing partial retained set...")
+ optimized = asp_greedy_retained_set_optimization(
+ sd,
+ node_id,
+ petri_net=pn_reduced,
+ retained_set=retained_set,
+ candidate_states=candidate_states,
+ avoid_dnf=child_motifs_reduced,
+ )
+ retained_set = optimized[0]
+ candidate_states = optimized[1]
 
  # Terminate if done.
  if len(candidate_states) == 0:
@@ -341,8 +345,12 @@ def compute_attractor_candidates(
 
  # Here, we gradually increase the iteration count while
  # the candidate set is being actively reduced. If the simulation
- # cannot reduce any further states, we are done.
- iterations = 1024
+ # cannot reduce any further states and exceeds the proposed budget,
+ # we are done.
+ iterations = 2**10
+ max_budget = (
+ sd.config["minimum_simulation_budget"] * bn_reduced.variable_count()
+ )
  while len(candidate_states) > 0:
  if sd.config["debug"]:
  print(
@@ -358,7 +366,10 @@ def compute_attractor_candidates(
  simulation_seed=123,
  )
 
- if len(reduced) == len(candidate_states):
+ if (
+ len(reduced) == len(candidate_states)
+ and (iterations * len(candidate_states)) > max_budget
+ ):
  candidate_states = reduced
  break
 

diff --git a/biobalm/interaction_graph_utils.py b/biobalm/interaction_graph_utils.py
@@ -14,7 +14,7 @@
 
 from typing import cast
 
-from biodivine_aeon import BooleanNetwork, RegulatoryGraph, SignType
+from biodivine_aeon import BooleanNetwork, RegulatoryGraph, SignType, SymbolicContext
 from networkx import DiGraph # type: ignore
 
 
@@ -181,3 +181,50 @@ def source_SCCs(bn: BooleanNetwork) -> list[list[str]]:
  result.append(scc_names)
 
  return sorted(result)
+
+
+def source_nodes(
+ network: BooleanNetwork, ctx: SymbolicContext | None = None
+) -> list[str]:
+ """
+ Identify the source nodes of a given `BooleanNetwork`.
+
+ Return the source nodes of a `BooleanNetwork`. That is, variables whose
+ value cannot change, but is not fixed to a `true`/`false` constant.
+
+ Note that this internally uses BDD translation to detect identity functions
+ semantically rather than syntactically. If you already have a
+ `SymbolicContext` for the given `network` available, you can supply it as
+ the second argument.
+
+ Parameters
+ ----------
+ network : BooleanNetwork
+ The Boolean network to be examined.
+ ctx : SymbolicContext
+ The context used to translate the network to BDDs. A
+ `biodivine_aeon.SymbolicContext` object.
+
+ Returns
+ -------
+ list[str]
+ The list of source nodes.
+ """
+ if ctx is None:
+ ctx = SymbolicContext(network)
+
+ result: list[str] = []
+ for var in network.variable_names():
+ update_function = network.get_update_function(var)
+ if update_function is None:
+ # This is an input variable with unspecified update
+ # (this defaults to identity in most tools).
+ assert len(network.predecessors(var)) == 0
+ result.append(var)
+ else:
+ fn_bdd = ctx.mk_update_function(update_function)
+ var_bdd = ctx.mk_network_variable(var)
+ if fn_bdd == var_bdd:
+ result.append(network.get_variable_name(var))
+
+ return result
File	Stmts	Miss	Cover	Missing
biobalm
_pint_reachability.py	61	50	18%	24, 40–54, 69–93, 101–146
control.py	114	14	88%	107, 119, 125, 129, 134, 143–159, 477, 480, 493
interaction_graph_utils.py	52	6	88%	11–13, 151–152, 222–223
petri_net_translation.py	149	11	93%	22–26, 79, 136, 308–309, 333–334, 343, 452
space_utils.py	132	20	85%	26–28, 104–110, 133–139, 347–350, 414, 462
succession_diagram.py	379	65	83%	6, 120, 210–215, 228, 275–282, 386–393, 410–411, 421, 427, 543, 630–636, 752, 755, 873–891, 923, 933, 936, 976, 983, 1034, 1048, 1170, 1329, 1340, 1348, 1376, 1391, 1403, 1408, 1414
symbolic_utils.py	32	5	84%	10, 39–44, 100, 128
trappist_core.py	184	27	85%	14–18, 55, 57, 92, 215, 217, 219, 247–250, 254–256, 276–282, 340, 342, 372, 420, 422
biobalm/_sd_algorithms
expand_attractor_seeds.py	60	7	88%	6, 28, 42, 109–114, 119
expand_bfs.py	28	1	96%	6
expand_dfs.py	30	1	97%	6
expand_minimal_spaces.py	37	2	95%	6, 31
expand_source_SCCs.py	111	16	86%	11–13, 50, 69, 77, 82, 103, 112, 120, 131, 140, 143, 167, 179, 242–243
expand_source_blocks.py	108	13	88%	11, 26, 41, 49, 54, 78, 128, 154, 163, 193, 203, 209, 218
expand_to_target.py	31	3	90%	6, 38, 43
biobalm/_sd_attractors
attractor_candidates.py	265	90	66%	13–15, 26–27, 93, 101, 107–108, 130, 152, 187, 193–204, 223, 239–320, 325, 329, 335, 341, 356, 383, 388, 392, 398, 400–438, 511, 582–583, 684
attractor_symbolic.py	114	16	86%	6–7, 75, 88–92, 103, 112, 144, 179, 191–193, 202, 230, 236
TOTAL	1974	347	82%