Skip to content

Commit

Permalink
Add wikidata nodes and edges, update add_resources endpoint example, …
Browse files Browse the repository at this point in the history
…use better variable names, revert kwarg addition to 'from_obo_path' call
  • Loading branch information
nanglo123 committed Jul 15, 2024
1 parent 725d5ad commit 55de2a2
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 11 deletions.
11 changes: 6 additions & 5 deletions mira/dkg/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from mira.dkg.client import AskemEntity, Entity, Relation
from mira.dkg.utils import DKG_REFINER_RELS
from mira.dkg.construct import process_resource
from mira.dkg.construct import add_resource_to_dkg

__all__ = [
"api_blueprint",
Expand Down Expand Up @@ -368,16 +368,17 @@ def add_relations(
)
def add_resources(
request: Request,
resource_list: List[str] = Body(
resource_prefix_list: List[str] = Body(
...,
description="A of resources to add to the DKG",
title="Resource Prefixes",
example=["probonto"],
example=["probonto", "wikidata", "eiffel", "geonames", "ncit",
"nbcbitaxon"],
)
):
for resource in resource_list:
for resource_prefix in resource_prefix_list:
# nodes and edges will be a list of dicts
nodes, edges = process_resource(resource.lower())
nodes, edges = add_resource_to_dkg(resource_prefix.lower())
# node_info and edge_info are dictionaries that will be
# unpacked when creating instances of entities and relations
entities = [Entity(**node_info) for node_info in nodes]
Expand Down
1 change: 0 additions & 1 deletion mira/dkg/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,6 @@ def add_relation(self, relation):

self.create_tx(create_relation_query)


def create_single_property_node_index(
self,
index_name: str,
Expand Down
50 changes: 48 additions & 2 deletions mira/dkg/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,55 @@ def extract_cso_nodes_edges():


def extract_wikidata_nodes_edges():
pass
wikidata_nodes, wikidata_edges = [], []
for wikidata_id, label, description, synonyms, xrefs in tqdm(
get_unit_terms(), unit="unit"):
synonyms_list = [Synonym(value=value, type="") for value in synonyms]
xrefs_list = [Xref(id=_id, type="oboinowl:hasDbXref") for _id in xrefs]
wikidata_nodes.append(
{
"id": f"wikidata:{wikidata_id}",
"name": label,
"type": "class",
"description": description,
"synonyms": synonyms_list,
"xrefs": xrefs_list,
"obsolete": False
}
)

for (wikidata_id, label, description, synonyms, xrefs, value, formula,
symbols) in tqdm(get_physical_constant_terms()):
synonym_types, synonym_values = [], []
for syn in synonyms:
synonym_values.append(syn)
synonym_types.append("oboInOwl:hasExactSynonym")
for symbol in symbols:
synonym_values.append(symbol)
synonym_types.append("debio:0000031")

synonyms_list = [Synonym(value=value, type=type) for value, type
in zip(synonym_values, synonym_types)]
xrefs_list = [Xref(id=_id, type="oboinowl:hasDbXref") for _id in xrefs]
if value:
properties = {"debio:0000042": [str(value)]}
else:
properties = {}
wikidata_nodes.append(
{
"id": f"wikidata:{wikidata_id}",
"name": label,
"obsolete": False,
"type": "class",
"description": description,
"synonyms": synonyms_list,
"xrefs": xrefs_list,
"properties": properties
}
)


def process_resource(resource_prefix: str):
def add_resource_to_dkg(resource_prefix: str):
if resource_prefix == "probonto":
return extract_probonto_nodes_edges()
elif resource_prefix == "geonames":
Expand All @@ -443,6 +488,7 @@ def process_resource(resource_prefix: str):
elif resource_prefix == "cso":
return extract_cso_nodes_edges()
elif resource_prefix == "wikidata":
# combine retrieval of wikidata constants and units
return extract_wikidata_nodes_edges()


Expand Down
5 changes: 2 additions & 3 deletions mira/dkg/resources/cso.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ def get_cso_obo() -> Obo:
)
download(url=URL, path=PATH)
# use https://github.com/pyobo/pyobo/pull/159
kwargs = {"default_prefix": "cso"}
return from_obo_path(PATH, prefix="cso", strict=False, **kwargs)
return from_obo_path(PATH, prefix="cso", default_prefix="cso", strict=False)


if __name__ == "__main__":
for term in get_cso_obo():
print(term)
print(term)

0 comments on commit 55de2a2

Please sign in to comment.