From 6aec35442215fbd2a966cff2e5ff419ff47eb4cb Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Mon, 30 Sep 2024 09:58:22 -0400 Subject: [PATCH] Readd city term processing and don't add duplicate xref edges --- mira/dkg/construct.py | 6 +++--- mira/dkg/resources/geonames.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py index 35c4f2e1..c136f770 100644 --- a/mira/dkg/construct.py +++ b/mira/dkg/construct.py @@ -1112,8 +1112,7 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: if xref.value.prefix in obograph.PROVENANCE_PREFIXES: # Don't add provenance information as xrefs continue - edges.append( - ( + xref_edge_info = ( node.curie, xref.value.curie, "xref", @@ -1122,7 +1121,8 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: graph_id, version or "", ) - ) + if xref_edge_info not in edges: + edges.append(xref_edge_info) if xref.value.curie not in nodes: node_sources[node.replaced_by].add(prefix) nodes[xref.value.curie] = NodeInfo( diff --git a/mira/dkg/resources/geonames.py b/mira/dkg/resources/geonames.py index 542c2b9c..2b782158 100644 --- a/mira/dkg/resources/geonames.py +++ b/mira/dkg/resources/geonames.py @@ -122,6 +122,7 @@ def get_cities(code_to_country, code_to_admin1, code_to_admin2, *, minimum_popul cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "population"] for identifier, name, synonyms, country, admin1, admin2, population in (cities_df[cols].values): + terms[identifier] = term = Term.from_triple("geonames", identifier,name) if synonyms and not isinstance(synonyms, float): for synoynm in synonyms: term.append_synonym(synoynm)