Skip to content

Commit

Permalink
add an example for federated query
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Oct 21, 2024
1 parent 7a9d6aa commit 9000678
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
49 changes: 49 additions & 0 deletions notebooks/test_expasy_chat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,53 @@
"\n",
"\n",
" # New queries to test:\n",
" # What are the genes expressed in the human brain?\n",
" # FAILS to add filter for human\n",
" # What are the human genes expressed in the brain? WORKS as expected\n",
"\n",
" # Which are the human genes associated with cancer and their orthologs?\n",
" # This one does not work because in the query generated the variable in the UniProt block ?protein does not match the one used in the OMA block, ?humanUniprot...\n",
" # TODO: when we parse the query check there is a link between the two blocks (2 block are using the same variable)\n",
" # https://sibkru.atlassian.net/jira/software/projects/E4/boards/6?selectedIssue=E4-34\n",
"# {\n",
"# \"question\": \"Which are the human genes associated with cancer and their orthologs? Return ?humanGeneName ?orthologUniprot, and limit to 10\",\n",
"# \"endpoint\": \"https://sparql.uniprot.org/sparql/\",\n",
"# \"query\": \"\"\"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
"# PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
"# PREFIX taxon: <http://purl.uniprot.org/taxonomy/>\n",
"# PREFIX up: <http://purl.uniprot.org/core/>\n",
"# PREFIX orth: <http://purl.org/net/orth#>\n",
"# PREFIX lscr: <http://purl.org/lscr#>\n",
"# SELECT DISTINCT ?humanGeneName ?orthologProtein ?orthologUniprot\n",
"# WHERE {\n",
"# # Retrieve human genes associated with cancer from UniProt\n",
"# ?humanUniprot a up:Protein ;\n",
"# up:organism taxon:9606 ;\n",
"# up:encodedBy ?gene ;\n",
"# up:annotation ?annotation .\n",
"# ?annotation a up:Disease_Annotation ;\n",
"# rdfs:comment ?diseaseComment .\n",
"# FILTER(CONTAINS(LCASE(?diseaseComment), \"cancer\"))\n",
"# ?gene skos:prefLabel ?humanGeneName .\n",
"\n",
"# # Find orthologs of these genes using OMA\n",
"# SERVICE <https://sparql.omabrowser.org/sparql> {\n",
"# ?cluster a orth:OrthologsCluster ;\n",
"# orth:hasHomologousMember ?node1 ;\n",
"# orth:hasHomologousMember ?node2 .\n",
"# ?node1 orth:hasHomologousMember* ?humanProtein .\n",
"# ?node2 orth:hasHomologousMember* ?orthologProtein .\n",
"# ?humanProtein lscr:xrefUniprot ?humanUniprot .\n",
"# ?orthologProtein lscr:xrefUniprot ?orthologUniprot .\n",
"# FILTER(?node1 != ?node2)\n",
"# } } LIMIT 10\"\"\",\n",
"# },\n",
"\n",
"\n",
" # List human genes that have known orthologs in the rat and are expressed in the brain?\n",
"\n",
"\n",
" # Which are the human genes associated with cancer and their orthologs expressed in the rat brain?\n",
" # Find all proteins linked to arachidonate (CHEBI:32395) and their associated pathways\n",
" # List all enzymes that have been experimentally validated and are involved in DNA repair\n",
" # Find all proteins that have a mutagenesis annotation affecting their active site\n",
Expand Down Expand Up @@ -383,6 +430,8 @@
" # } LIMIT 20\"\"\",\n",
" # },\n",
"\n",
" # Which are the human genes associated with lung cancer and their orthologs expressed in the rat brain?\n",
"\n",
" # {\n",
" # \"question\": \"Which are the human genes associated with cancer (which have cancer in their disease label) and their orthologs expressed in the rat brain? Return the disease label, human gene URI, human gene HGNC symbol, ortholog rat gene URI\",\n",
" # \"endpoint\": \"https://sparql.uniprot.org/sparql/\",\n",
Expand Down
1 change: 1 addition & 0 deletions src/sparql_llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def chat_ui(request: Request) -> Any:
"How can I get the HGNC symbol for the protein P68871?",
"What are the rat orthologs of the human TP53?",
"Where is expressed the gene ACE2 in human?",
"List the genes in primates orthologous to genes expressed in the fruit fly's eye",
# "Say hi",
# "Which are the genes, expressed in the rat, corresponding to human genes associated with cancer?",
# "What is the gene associated with the protein P68871?",
Expand Down
3 changes: 2 additions & 1 deletion src/sparql_llm/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ def load_schemaorg_description(endpoint: dict[str, str]) -> list[Document]:
g = ConjunctiveGraph()
for json_ld_tag in json_ld_tags:
json_ld_content = json_ld_tag.string
# print(json_ld_content)
if json_ld_content:
g.parse(data=json_ld_content, format="json-ld")
# json_ld_content = json.loads(json_ld_content)
question = f"What are the general metadata about {endpoint['label']} resource? (description, creators, license, dates, version, etc)"
question = f"What are the general metadata about {endpoint['label']} resource? (description, creators, maintainers, license, dates, version, etc)"
docs.append(
Document(
page_content=question,
Expand Down

0 comments on commit 9000678

Please sign in to comment.