diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py index ad683e8c4..631e25415 100644 --- a/mira/dkg/construct.py +++ b/mira/dkg/construct.py @@ -564,8 +564,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: f"{manager.get_name(prefix)} ({len(_graphs)} graphs)", fg="green", bold=True ) for graph in tqdm(_graphs, unit="graph", desc=prefix, leave=False): - if not graph.id: - raise ValueError(f"graph in {prefix} missing an ID") + graph_id = graph.id or prefix version = graph.version if version == "imports": version = None @@ -623,7 +622,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: .replace(" ", " ") if node.name else "", - synonyms=";".join(synonym.val for synonym in node.synonyms), + synonyms=";".join(synonym.value for synonym in node.synonyms), deprecated="true" if node.deprecated else "false", # type:ignore # TODO better way to infer type based on hierarchy # (e.g., if rdfs:type available, consider as instance) @@ -639,7 +638,8 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: property_values=";".join(property_values), xref_types=";".join(xref_predicates), synonym_types=";".join( - synonym.pred for synonym in node.synonyms + synonym.predicate.curie if synonym.predicate else synonym.predicate_raw + for synonym in node.synonyms ), ) @@ -651,7 +651,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: "replaced_by", "iao:0100001", prefix, - graph.id, + graph_id, version or "", ) ) @@ -690,7 +690,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: "xref", "oboinowl:hasDbXref", prefix, - graph.id, + graph_id, version or "", ) ) @@ -713,12 +713,15 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: synonym_types="", ) - for provenance_curie in node.get_provenance(): + for provenance in node.get_provenance(): + if ":" in provenance.identifier: + tqdm.write(f"Malformed provenance for {node.curie}") + provenance_curie = provenance.curie node_sources[provenance_curie].add(prefix) if provenance_curie not in nodes: nodes[provenance_curie] = NodeInfo( curie=provenance_curie, - prefix=provenance_curie.split(":")[0], + prefix=provenance.prefix, label="", synonyms="", deprecated="false", @@ -739,7 +742,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: "has_citation", "debio:0000029", prefix, - graph.id, + graph_id, version or "", ) ) @@ -785,7 +788,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: _get_edge_name(edge.pred).lower().replace(" ", "_").replace("-", "_"), edge.pred, prefix, - graph.id, + graph_id, version or "", ) for edge in tqdm( diff --git a/mira/dkg/construct_rdf.py b/mira/dkg/construct_rdf.py index 38fa0da9a..76aaa15f1 100644 --- a/mira/dkg/construct_rdf.py +++ b/mira/dkg/construct_rdf.py @@ -27,6 +27,8 @@ # Should be fixed in https://github.com/geneontology/go-ontology/pull/24148 # and after HP re-imports GO "doi:10.1002/(SICI)1097-4687(199608)229:2<121::AID-JMOR1>3.0.CO;2-4", + # https://github.com/obophenotype/human-phenotype-ontology/pull/9812 + "pubmed:14645606|PMID:14647932|PMID:31669363", } REMAPPING = { REFERENCED_BY_SYMBOL: "debio:0000030",