From 0767e94adaecbed3e37b82c16d561ba396ba8e8e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 6 Jul 2023 20:27:43 +0200 Subject: [PATCH 1/6] Fix node header --- mira/dkg/constants.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mira/dkg/constants.py b/mira/dkg/constants.py index 57527304a..6088b6cf9 100644 --- a/mira/dkg/constants.py +++ b/mira/dkg/constants.py @@ -12,6 +12,11 @@ "xrefs:string[]", "alts:string[]", "version:string", + "property_predicates:string[]", + "property_values:string[]", + "xref_types:string[]", + "synonym_types:string[]", + "source:string", ) #: The used for the edges files in the neo4j bulk import From 60485ed096d28c33be970925a21fed73bd16e2c1 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 6 Jul 2023 20:40:23 +0200 Subject: [PATCH 2/6] Update Dockerfile --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2707a2896..6e4ecdcf6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -25,7 +25,7 @@ RUN wget -O /sw/nodes.tsv.gz https://askem-mira.s3.amazonaws.com/dkg/$domain/bui neo4j-admin import --delimiter='TAB' --skip-duplicate-nodes=true --skip-bad-relationships=true --nodes /sw/nodes.tsv.gz --relationships /sw/edges.tsv.gz # Python packages -RUN python -m pip install git+https://github.com/indralab/mira.git@main#egg=mira[web,uvicorn] && \ +RUN python -m pip install git+https://github.com/indralab/mira.git@main#egg=mira[web,uvicorn,dkg-client] && \ python -m pip uninstall -y flask_bootstrap && \ python -m pip uninstall -y bootstrap_flask && \ python -m pip install bootstrap_flask From 56cda62b554593963d78586e1fadd9e2ed1acf8a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 6 Jul 2023 20:42:18 +0200 Subject: [PATCH 3/6] Update Dockerfile --- docker/Dockerfile.local | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local index 4fd0673c0..abeb5b6ba 100644 --- a/docker/Dockerfile.local +++ b/docker/Dockerfile.local @@ -24,7 +24,7 @@ RUN sed -i 's/#dbms.default_listen_address/dbms.default_listen_address/' /etc/ne neo4j-admin import --delimiter='TAB' --skip-duplicate-nodes=true --skip-bad-relationships=true --nodes /sw/nodes.tsv.gz --relationships /sw/edges.tsv.gz # Python packages -RUN python -m pip install git+https://github.com/indralab/mira.git@$branch#egg=mira[web,uvicorn] && \ +RUN python -m pip install git+https://github.com/indralab/mira.git@$branch#egg=mira[web,uvicorn,dkg-client] && \ python -m pip uninstall -y flask_bootstrap && \ python -m pip uninstall -y bootstrap_flask && \ python -m pip install bootstrap_flask From 5a08c79f555c7972d370efc0b6a495f5ea674011 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 6 Jul 2023 20:58:25 +0200 Subject: [PATCH 4/6] Update setup.cfg --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index d394e597a..82da1fd6e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ web = gilda click neo4j + networkx pystow tabulate pygraphviz From b84028a820cb1a4637e49575f64ee7a18d4faac7 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Thu, 6 Jul 2023 15:51:57 -0400 Subject: [PATCH 5/6] Exclude terms that result in empty norm texts --- mira/dkg/client.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mira/dkg/client.py b/mira/dkg/client.py index ba4a23b8d..aac78b49d 100644 --- a/mira/dkg/client.py +++ b/mira/dkg/client.py @@ -598,25 +598,29 @@ def get_terms( from gilda.process import normalize from gilda.term import Term - yield Term( - norm_text=normalize(name), - text=name, - db=prefix, - id=identifier, - entry_name=name, - status="name", - source=prefix, - ) - for synonym in synonyms or []: + norm_text = normalize(name) + if norm_text: yield Term( - norm_text=normalize(synonym), - text=synonym, + norm_text=norm_text, + text=name, db=prefix, id=identifier, entry_name=name, - status="synonym", + status="name", source=prefix, ) + for synonym in synonyms or []: + norm_text = normalize(synonym) + if norm_text: + yield Term( + norm_text=norm_text, + text=synonym, + db=prefix, + id=identifier, + entry_name=name, + status="synonym", + source=prefix, + ) def build_match_clause( From ac7f04f6b7ef82e099f8e599a31dbc5443166c6f Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 6 Jul 2023 21:56:13 +0200 Subject: [PATCH 6/6] Exclude strings with no norm text --- mira/dkg/client.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mira/dkg/client.py b/mira/dkg/client.py index aac78b49d..745962c56 100644 --- a/mira/dkg/client.py +++ b/mira/dkg/client.py @@ -382,13 +382,15 @@ def get_grounder(self, prefix: Union[str, List[str]]) -> "gilda.grounder.Grounde if isinstance(prefix, str): prefix = [prefix] - terms = list( - itt.chain.from_iterable( + terms = [ + term + for term in itt.chain.from_iterable( self.get_grounder_terms(p) for p in tqdm( prefix, desc="Caching grounding terms" ) ) - ) + if term.norm_text + ] return Grounder(terms) def get_node_counter(self) -> Counter: