Skip to content

Commit

Permalink
Initial implementation of adding an ontology subtree
Browse files Browse the repository at this point in the history
  • Loading branch information
nanglo123 committed Sep 13, 2024
1 parent ebd00b9 commit 9f131b9
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 4 deletions.
23 changes: 22 additions & 1 deletion mira/dkg/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from mira.dkg.client import AskemEntity, Entity, Relation
from mira.dkg.utils import DKG_REFINER_RELS
from mira.dkg.construct import add_resource_to_dkg
from mira.dkg.construct import add_resource_to_dkg, extract_ontology_term

__all__ = [
"api_blueprint",
Expand Down Expand Up @@ -360,6 +360,27 @@ def add_relations(
for relation in relation_list:
request.app.state.client.add_relation(relation)

@api_blueprint.post(
"/add_ontology_subtree",
response_model=None,
tags=["relations"],
)
def add_ontology_subtree(
request: Request,
curie_to_add: str = Query(..., example="ncbitaxon:9871"),
add_subtree: bool = False
):
"""Given a curie, add the entry it corresponds to from its respective
ontology as a node to the DKG.
Can enable the `add_subtree` flag to add all subtree entries."""
nodes, edges = extract_ontology_term(curie_to_add, add_subtree)
entities = [Entity(**node_info) for node_info in nodes]
relations = [Relation(**edge_info) for edge_info in edges]
for entity in entities:
request.app.state.client.add_node(entity)
for relation in relations:
request.app.state.client.add_relation(relation)


@api_blueprint.post(
"/add_resources",
Expand Down
109 changes: 106 additions & 3 deletions mira/dkg/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
import click
import pyobo
import pystow
import networkx
from bioontologies import obograph
from bioontologies.obograph import Xref
from bioregistry import manager
from pydantic import BaseModel, Field
from pyobo.struct import part_of
from pyobo.struct import part_of, is_a
from pyobo.sources import ontology_resolver
from tabulate import tabulate
from tqdm.auto import tqdm
Expand All @@ -54,7 +54,7 @@
from mira.dkg.physical_constants import get_physical_constant_terms
from mira.dkg.constants import EDGE_HEADER, NODE_HEADER
from mira.dkg.utils import PREFIXES
from mira.dkg.client import Synonym, Xref
from mira.dkg.models import Synonym, Xref
from mira.dkg.resources.cso import get_cso_obo
from mira.dkg.resources.geonames import get_geonames_terms
from mira.dkg.resources.extract_eiffel_ontology import get_eiffel_ontology_terms
Expand Down Expand Up @@ -421,6 +421,109 @@ def add_resource_to_dkg(resource_prefix: str):
# handle resource names that we don't process
return [], []

def extract_ontology_term(curie: str, add_subtree: bool = False):
"""Takes in a curie and extracts the information from the
entry in its respective resource ontology to add as a node into the
Epidemiology DKG.
There is an option to extract all the information from the entries
under the corresponding entry's subtree in its respective ontology.
Relation information is also extracted with this option.
Parameters
----------
curie :
The curie for the entry that will be added as a node to the
Epidemiology DKG.
add_subtree :
Whether to add all the nodes and relations under the entry's subtree
Returns
-------
nodes : List[dict]
A list of node information added to the DKG, where each node is
represented as a dictionary.
edges : List[dict]
A list of edge information added to the DKG, where each edge is
represented as a dictionary.
"""

nodes, edges = [], []
resource_prefix = curie.split(":")[0].lower()
if resource_prefix == "ncbitaxon":
# place-holder
# load ncbitaxon.obo using pyobo
# check to see if the obo is cached
# load the obo file as a networkx graph using obonet
# relabel the node indexes using lowercases
# pickle the new graph with relabeled indexes
# load the graph
# check to see if the graph is pickled and stored
graph = networkx.DiGraph()

node = graph.nodes.get(curie)
if not node:
return nodes, edges
if not add_subtree:
nodes.append(
{
"id": curie.lower(),
"name": node["name"],
"type": "class",
"description": "",
"obsolete": False,
"synonyms": [
Synonym(value=syn.split("\"")[1],
type="") for syn in
node.get("synonym", [])
],
"alts": [],
"xrefs": [Xref(id=xref_curie.lower(), type="")
for xref_curie in node["xref"]],
"properties": {k: v for text in node[
"property_value"] for k, v in [text.split(" ")]}
}
)
return nodes, edges
else:
for node_curie in networkx.ancestors(graph, curie) | {curie}:
node_curie = node_curie.lower()
node_to_add = graph.nodes[node_curie]
nodes.append(
{
"id": node_curie.lower(),
"name": node_to_add["name"],
"type": "class",
"description": "",
"obsolete": False,
"synonyms": [
Synonym(value=syn.split("\"")[1],
type="") for syn in
node_to_add.get("synonym", [])
],
"alts": [],
"xrefs": [Xref(id=xref_curie.lower(), type="")
for xref_curie in node_to_add.get("xref", [])],
"properties": {k: v for text in node_to_add.get(
"property_value", []) for k, v in [text.split(" ")]}
}
)
# Don't add relations where the original curie to add is the source
# of an is_a relation. Root nodes won't have an is_a relation.
if node_curie == curie or node_to_add["name"] == "root":
continue
edges.append(
{
"source_curie": node_curie.lower(),
"target_curie": node_to_add["is_a"][0].lower(),
"type": is_a.name,
"pred": is_a.curie,
"source": resource_prefix,
"graph": resource_prefix,
"version": ""
}
)
return nodes, edges

@click.command()
@click.option(
Expand Down
1 change: 1 addition & 0 deletions mira/dkg/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"HERE",
"get_resource_path",
"SLIMS",
"get_ncbitaxon"
]

HERE = os.path.dirname(os.path.abspath(__file__))
Expand Down

0 comments on commit 9f131b9

Please sign in to comment.