Skip to content

Commit

Permalink
Merge pull request #367 from nanglo123/add_graph_to_docker
Browse files Browse the repository at this point in the history
Add obo pickled graphs to docker build
  • Loading branch information
bgyori authored Sep 23, 2024
2 parents 8e39c17 + 9386894 commit 15708a9
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 20 deletions.
2 changes: 2 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ RUN python -m pip install --upgrade pip && \
python -m pip install --no-dependencies "lxml>=4.6.4" && \
python -m pip install --no-dependencies --ignore-requires-python sbmlmath

RUN python -m mira.dkg.generate_obo_graphs

# Copy the example json for reconstructing the ode semantics
RUN wget -O /sw/sir_flux_span.json https://raw.githubusercontent.com/gyorilab/mira/main/tests/sir_flux_span.json

Expand Down
29 changes: 9 additions & 20 deletions mira/dkg/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,8 @@
from pydantic import BaseModel, Field
from pyobo.struct import part_of, is_a
from pyobo.sources import ontology_resolver
from pyobo.getters import _ensure_ontology_path
from pyobo.api.utils import get_version
from pyobo.utils.path import prefix_directory_join
from obonet import read_obo
from tabulate import tabulate
from tqdm.auto import tqdm
from typing_extensions import Literal
Expand All @@ -63,6 +61,7 @@
from mira.dkg.resources.geonames import get_geonames_terms
from mira.dkg.resources.extract_eiffel_ontology import get_eiffel_ontology_terms
from mira.dkg.resources.uat import get_uat
from mira.dkg.generate_obo_graphs import download_convert_ncbitaxon_obo_to_graph

MODULE = pystow.module("mira")
DEMO_MODULE = MODULE.module("demo", "import")
Expand Down Expand Up @@ -434,14 +433,11 @@ def extract_ontology_subtree(curie: str, add_subtree: bool = False):
under the corresponding entry's subtree in its respective ontology.
Relation information is also extracted with this option.
Running this method for the first time for each specific resource will
take a long time (minutes) as the obo resource file has to be downloaded,
converted to a networkx graph, have their node indices normalized, and
pickled.
Subsequent runs of this method will take a few seconds as the pickled
Execution of this method will take a few seconds as the pickled
graph object has to be loaded.
Currently we only support the addition of ncbitaxon terms.
Parameters
----------
curie :
Expand All @@ -465,19 +461,12 @@ def extract_ontology_subtree(curie: str, add_subtree: bool = False):
type = "class"
version = get_version(resource_prefix)
cached_relabeled_obo_graph_path = prefix_directory_join(resource_prefix,
name="relabeled_obo_graph.pkl",
version=version)
name="relabeled_obo_graph.pkl",
version=version)
if not cached_relabeled_obo_graph_path.exists():
_, obo_path = _ensure_ontology_path(resource_prefix, force=False,
version=version)
obo_graph = read_obo(obo_path)
relabeled_graph = networkx.relabel_nodes(obo_graph,
lambda node_index: node_index.lower())
with open(cached_relabeled_obo_graph_path,'wb') as relabeled_graph_file:
pickle.dump(relabeled_graph, relabeled_graph_file)
else:
with open(cached_relabeled_obo_graph_path,'rb') as relabeled_graph_file:
relabeled_graph = pickle.load(relabeled_graph_file)
download_convert_ncbitaxon_obo_to_graph()
with open(cached_relabeled_obo_graph_path,'rb') as relabeled_graph_file:
relabeled_graph = pickle.load(relabeled_graph_file)
else:
return nodes, edges

Expand Down
34 changes: 34 additions & 0 deletions mira/dkg/generate_obo_graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pyobo.api.utils import get_version
from pyobo.getters import _ensure_ontology_path
from pyobo.utils.path import prefix_directory_join
from obonet import read_obo
import networkx
import pickle


def download_convert_ncbitaxon_obo_to_graph():
resource_prefix = "ncbitaxon"
version = get_version(resource_prefix)

# Checks to see if the pickled ncbitaxon obo graph exists in the container
cached_relabeled_obo_graph_path = prefix_directory_join(resource_prefix,
name="relabeled_obo_graph.pkl",
version=version)
if not cached_relabeled_obo_graph_path.exists():
_, obo_path = _ensure_ontology_path(
resource_prefix, force=False, version=version
)
obo_graph = read_obo(obo_path)

# Normalize node indices
relabeled_graph = networkx.relabel_nodes(
obo_graph, lambda node_index: node_index.lower()
)
with open(
cached_relabeled_obo_graph_path, "wb"
) as relabeled_graph_file:
pickle.dump(relabeled_graph, relabeled_graph_file)


if __name__ == "__main__":
download_convert_ncbitaxon_obo_to_graph()

0 comments on commit 15708a9

Please sign in to comment.