diff --git a/notebooks/test_expasy_chat.ipynb b/notebooks/test_expasy_chat.ipynb
index 173bbd7..1910921 100644
--- a/notebooks/test_expasy_chat.ipynb
+++ b/notebooks/test_expasy_chat.ipynb
@@ -17,954 +17,14 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐งช Testing 13 queries\n",
- "\n",
- "๐ง Testing model: gpt-4o-mini\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 2\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 0, Different results: 0, No results: 2, Error: 0\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 4\n",
- "๐ฏ RAG without validation - Success: 2, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ
1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 4\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 0, No results: 2, Error: 0\n",
- "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "โ๏ธ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 413\n",
- "\n",
- "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "โ๏ธ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 413\n",
- "\n",
- "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "โ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 413\n",
- "\n",
- "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "โ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 413\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 6\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 2, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 2, No results: 2, Error: 0\n",
- "Missing from reference: {'hgncSymbol': {'type': 'literal', 'value': 'AKT3'}, 'diseaseLabel': {'type': 'literal', 'value': 'AKT3 is a key modulator of several tumors like melanoma, glioma and ovarian cancer. Active AKT3 increases progressively during melanoma tumor progression with highest levels present in advanced-stage metastatic melanomas. Promotes melanoma tumorigenesis by decreasing apoptosis. Plays a key role in the genesis of ovarian cancers through modulation of G2/M phase transition. With AKT2, plays a pivotal role in the biology of glioblastoma.'}}\n",
- "โ๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 6308\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 8\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 3, No results: 3, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 2, No results: 4, Error: 0\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ๏ธ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ๏ธ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 10\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 3, No results: 5, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 2, No results: 6, Error: 0\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 12\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 3, No results: 7, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 2, No results: 8, Error: 0\n",
- "โ
1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "Missing from reference: {'orthologCluster': {'type': 'literal', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "โ๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "โ
1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "โ
2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 14\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 7, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 2, No results: 8, Error: 0\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 16\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 9, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 2, No results: 10, Error: 0\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 17\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 11, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 2, No results: 12, Error: 0\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 19\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 13, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 2, No results: 14, Error: 0\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "โ๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n",
- "โ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "โ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 21\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 5, No results: 14, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 3, No results: 15, Error: 0\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 5, Error: 21\n",
- "๐ฏ RAG without validation - Success: 7, Different results: 5, No results: 14, Error: 0\n",
- "๐ฏ RAG with validation - Success: 8, Different results: 3, No results: 15, Error: 0\n",
- "\n",
- "๐ง Testing model: Llama3.1 8B\n",
- "\n",
- "โ๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0A%0ASELECT+DISTINCT+%3Fprotein%0AWHERE+%7B%0A++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++up%3Aorganism+taxon%3A9606+%3B%0A+++++++++++up%3AencodedBy+%3Fgene+.%0A++%3Fgene+skos%3AprefLabel+%22LCT%22+.%0A%7D\n",
- "โ๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0A%0ASELECT+DISTINCT+%3Fprotein%0AWHERE+%7B%0A++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++up%3Aorganism+taxon%3A9606+%3B%0A+++++++++++up%3AencodedBy+%3Fgene+.%0A++%3Fgene+skos%3AprefLabel+%22LCT%22+.%0A%7D\n",
- "โ
1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "โ
2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 2\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 0, Error: 2\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n",
- "โ๏ธ 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n",
- "โ๏ธ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n",
- "โ 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n",
- "โ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 4\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 2\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ๏ธ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 0\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ๏ธ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 0\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 0\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 6\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 4, Error: 2\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 4, Error: 0\n",
- "โ๏ธ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "list index out of range\n",
- "โ๏ธ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "list index out of range\n",
- "โ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "list index out of range\n",
- "โ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "list index out of range\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 8\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 4, Error: 4\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 4, Error: 2\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 10\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 6, Error: 4\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 6, Error: 2\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ๏ธ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%7B%0A++%3Fgene+a+genex%3AExpression+%3B%0A++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++LIMIT+10%0A%7D\n",
- "โ๏ธ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%7B%0A++%3Fgene+a+genex%3AExpression+%3B%0A++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++LIMIT+10%0A%7D\n",
- "โ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%0A%7B%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++%7B+%3Fgene+a+genex%3AExpression+%3B%0A++++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++%7D+LIMIT+10%0A%7D\n",
- "โ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "HTTPSConnectionPool(host='www.bgee.org', port=443): Read timed out. (read timeout=200)\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 10\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 6, Error: 6\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 6, Error: 4\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 12\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 6\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 8, Error: 4\n",
- "โ๏ธ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "400 Client Error: Bad Request for url: https://sparql.omabrowser.org/sparql/?query=%23+https%3A%2F%2Fsparql.omabrowser.org%2Fsparql%2F%0APREFIX+orth%3A+%3Chttp%3A%2F%2Fpurl.org%2Fnet%2Forth%23%3E%0ASELECT+%3ForthologCluster+%3FdatasetURI%0AWHERE%0A%7B%0A++%3ForthologCluster+a+orth%3AOrthologsCluster+%3B%0A++++++++++++++++++++orth%3AinDataset+%3Fdataset+.%0A++%3Fdataset+a+orth%3AOrthologyDataset+%3B%0A+++++++++++%3Chttp%3A%2F%2Fpurl.org%2Fpav%2Fversion%3E+%3Fversion+.%0A++FILTER+%28regex%28%3Fversion%2C+%22All.Jul2023%22%29%29%0A++LIMIT+20%0A%7D\n",
- "โ๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "400 Client Error: Bad Request for url: https://sparql.omabrowser.org/sparql/?query=%23+https%3A%2F%2Fsparql.omabrowser.org%2Fsparql%2F%0APREFIX+orth%3A+%3Chttp%3A%2F%2Fpurl.org%2Fnet%2Forth%23%3E%0ASELECT+%3ForthologCluster+%3FdatasetURI%0AWHERE%0A%7B%0A++%3ForthologCluster+a+orth%3AOrthologsCluster+%3B%0A++++++++++++++++++++orth%3AinDataset+%3Fdataset+.%0A++%3Fdataset+a+orth%3AOrthologyDataset+%3B%0A+++++++++++%3Chttp%3A%2F%2Fpurl.org%2Fpav%2Fversion%3E+%3Fversion+.%0A++FILTER+%28regex%28%3Fversion%2C+%22All.Jul2023%22%29%29%0A++LIMIT+20%0A%7D\n",
- "โ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "Invalid URL 'None': No scheme supplied. Perhaps you meant https://None?\n",
- "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n",
- "โ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 1048576\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 14\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 8\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 1, No results: 8, Error: 5\n",
- "โ๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+taxon%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Ftaxonomy%2F%3E%0A%0ASELECT+%3Ftaxon+%3Fstrain+%3Fname%0AWHERE+%7B%0A++%3Ftaxon+a+up%3ATaxon+%3B%0A+++++++++up%3AscientificName+%22Escherichia+coli%22+%3B%0A+++++++++up%3Arank+up%3ASpecies+%3B%0A+++++++++up%3AhasStrain+%3Fstrain+.%0A++%3Fstrain+a+up%3AStrain+%3B%0A++++++++++up%3Aname+%3Fname+.%0A++FILTER+%28+%3Ftaxon+%3D+%3Fstrain+%29%0A++LIMIT+20%0A%7D\n",
- "โ๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+taxon%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Ftaxonomy%2F%3E%0A%0ASELECT+%3Ftaxon+%3Fstrain+%3Fname%0AWHERE+%7B%0A++%3Ftaxon+a+up%3ATaxon+%3B%0A+++++++++up%3AscientificName+%22Escherichia+coli%22+%3B%0A+++++++++up%3Arank+up%3ASpecies+%3B%0A+++++++++up%3AhasStrain+%3Fstrain+.%0A++%3Fstrain+a+up%3AStrain+%3B%0A++++++++++up%3Aname+%3Fname+.%0A++FILTER+%28+%3Ftaxon+%3D+%3Fstrain+%29%0A++LIMIT+20%0A%7D\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 16\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 10\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 1, No results: 10, Error: 5\n",
- "โ๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29%29%0A++LIMIT+20%0A%7D\n",
- "โ๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29%29%0A++LIMIT+20%0A%7D\n",
- "โ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+%3B%0A+++++++++++++up%3Aidentifier+%3FproteinURI+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22Glycolysis%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22glycolytic+pathway%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22GLYCOLYTIC+PATHWAY%22%29%29%0A++%7D%0A++LIMIT+20%0A%7D\n",
- "โ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++%3Chttps%3A%2F%2Fsparql.uniprot.org%2Funiprot%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++%7D%0A++%3Chttp%3A%2F%2Fsparql.uniprot.org%2Fpathways%2F%3E+%7B%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2F%3E+%7B%0A++++%3Fpathway+a+%3Fclass+%3B%0A++++%3Fclass+rdfs%3AsubClassOf+%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2Fglycolysis%3E+%3B%0A++++%3Fprotein+up%3Aannotation%2Fpathway+%3Fpathway+.%0A++++%3Fprotein+a+%3Fannotation+%3B%0A++++%3Fannotation+rdfs%3AseeAlso+%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2Fglycolysis%3E+.%0A++++%3Fprotein+%3Fpdb+up%3Apdb%2Fmolecule+%3FproteinURI+.%0A++%7D++%0A++FILTER+%28regex%28STR%28%3FproteinLabel%29%2C+%22glycolysis%22%29+%26%26+STRstarts%28STR%28%3FproteinURI%29%2C+%22http%3A%2F%2Fpurl.uniprot.org%2Fpdb%2F%22%29%29%0A%7D\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 18\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 12\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 1, No results: 10, Error: 7\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 20\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 10, Error: 12\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 1, No results: 12, Error: 7\n",
- "โ๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation+%3FsubcellAnnotation+.%0A++++%3FsubcellAnnotation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n",
- "โ๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation+%3FsubcellAnnotation+.%0A++++%3FsubcellAnnotation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n",
- "Missing from reference: {'location_inside_cell_uri': {'type': 'uri', 'value': 'http://www.uni-mainz.de/FB/Medizin/Anatomie/workshop/EM/EMMVBE.html'}, 'disease': {'type': 'literal', 'value': 'Alzheimer disease 2'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P02649'}, 'location_inside_cell_label': {'type': 'literal', 'value': 'Multivesicular body'}}\n",
- "โ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 2\n",
- "\n",
- "โ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation%2Fup%3AcatalyticActivity%2Fup%3AcatalyzedReaction+%3Freaction+.%0A++++%3Fprotein+%3Fannotation+subcellLocation+.%0A++++%3FsubcellLocation+rdfs%3Alabel+%3Flocation_inside_cell+.%0A++++%23+Add+condition+to+identify+location+of+interest%3A%0A++++%3FsubcellLocation+a+up%3ASubcellular_Location_Annotation+.%0A++++%3FsubcellLocation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+up%3Aexact_location_uri+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 22\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 10, Error: 14\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 2, No results: 12, Error: 8\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
- "\n",
- "Results mismatch. Ref: 143 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ๏ธ 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
- "\n",
- "Results mismatch. Ref: 143 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
- "\n",
- "Results mismatch. Ref: 143 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
- "\n",
- "Results mismatch. Ref: 143 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 24\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 12, Error: 14\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 2, No results: 14, Error: 8\n",
- "\n",
- "๐ง Testing model: Mixtral 8x22B\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n",
- "\n",
- "Results mismatch. Ref: 4 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 1, Error: 1\n",
- "๐ฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 0, Different results: 0, No results: 2, Error: 0\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "Missing from reference: {'sequence': {'type': 'uri', 'value': 'http://purl.uniprot.org/isoforms/P83854-1'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n",
- "โ๏ธ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 1, Error: 3\n",
- "๐ฏ RAG without validation - Success: 1, Different results: 1, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ๏ธ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 4806\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ๏ธ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 4806\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 4806\n",
- "\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n",
- "\n",
- "Results mismatch. Ref: 11650 != gen: 4806\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 4\n",
- "๐ฏ RAG without validation - Success: 1, Different results: 3, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 2, No results: 2, Error: 0\n",
- "โ
1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "โ
2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "Missing from reference: {'mouseProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/ACES_MOUSE'}, 'cluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0671680_33154#PG_13'}, 'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'humanProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P19835'}}\n",
- "โ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 31078\n",
- "\n",
- "Missing from reference: {'mouseProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/ACES_MOUSE'}, 'cluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0671680_33154#PG_13'}, 'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'humanProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P19835'}}\n",
- "โ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n",
- "\n",
- "Results mismatch. Ref: 710 != gen: 31078\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 6\n",
- "๐ฏ RAG without validation - Success: 3, Different results: 3, No results: 2, Error: 0\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 4, No results: 2, Error: 0\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "โ
1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "\n",
- "Results mismatch. Ref: 121 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 7\n",
- "๐ฏ RAG without validation - Success: 3, Different results: 3, No results: 4, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 4, No results: 3, Error: 0\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ
1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "โ
2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n",
- "\n",
- "Results mismatch. Ref: 10 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 4, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 3, No results: 4, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 4, No results: 5, Error: 0\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 1\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 1\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 6, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 3, No results: 6, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 6, No results: 5, Error: 0\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "โ๏ธ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n",
- "โ๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "โ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n",
- "โ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 8, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 7, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 7, No results: 6, Error: 0\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 10, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 9, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 7, No results: 8, Error: 0\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 12, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 11, Error: 0\n",
- "๐ฏ RAG with validation - Success: 3, Different results: 7, No results: 10, Error: 0\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "โ
1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n",
- "\n",
- "Results mismatch. Ref: 3 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 14, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 4, No results: 13, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 7, No results: 11, Error: 0\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/93'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Early endosome membrane'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n",
- "โ๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/134'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Golgi apparatus membrane'}}\n",
- "โ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n",
- "โ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 16, Error: 8\n",
- "๐ฏ RAG without validation - Success: 5, Different results: 6, No results: 13, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 9, No results: 11, Error: 0\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 18, Error: 8\n",
- "๐ฏ RAG without validation - Success: 7, Different results: 6, No results: 13, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 9, No results: 11, Error: 0\n",
- "\n",
- "๐ง Testing model: gpt-4o\n",
- "\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n",
- "โ
1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "โ
2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "โ
1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "โ
2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n",
- "๐ฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG without validation - Success: 2, Different results: 0, No results: 0, Error: 0\n",
- "๐ฏ RAG with validation - Success: 2, Different results: 0, No results: 0, Error: 0\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "โ
2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 2, Error: 0\n",
- "๐ฏ RAG without validation - Success: 4, Different results: 0, No results: 0, Error: 0\n",
- "๐ฏ RAG with validation - Success: 4, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n",
- "โ
1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "โ
2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 4, Error: 0\n",
- "๐ฏ RAG without validation - Success: 6, Different results: 0, No results: 0, Error: 0\n",
- "๐ฏ RAG with validation - Success: 6, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'sterol': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "Missing from generated: {'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'sterol': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n",
- "โ
1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "โ
2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "โ
1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "โ
2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 6, Error: 0\n",
- "๐ฏ RAG without validation - Success: 8, Different results: 0, No results: 0, Error: 0\n",
- "๐ฏ RAG with validation - Success: 8, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n",
- "โ
1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n",
- "โ๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "โ
1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n",
- "โ
2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 8, Error: 0\n",
- "๐ฏ RAG without validation - Success: 9, Different results: 0, No results: 0, Error: 1\n",
- "๐ฏ RAG with validation - Success: 10, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n",
- "โ
1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "โ
2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "โ
1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "โ
2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 10, Error: 0\n",
- "๐ฏ RAG without validation - Success: 11, Different results: 0, No results: 0, Error: 1\n",
- "๐ฏ RAG with validation - Success: 12, Different results: 0, No results: 0, Error: 0\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n",
- "โ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n",
- "\n",
- "Results mismatch. Ref: 96 != gen: 0\n",
- "\n",
- "โ
2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI = 96\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 12, Error: 0\n",
- "๐ฏ RAG without validation - Success: 11, Different results: 0, No results: 2, Error: 1\n",
- "๐ฏ RAG with validation - Success: 13, Different results: 0, No results: 1, Error: 0\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n",
- "โ
1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "โ
2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "โ
1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "โ
2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 14, Error: 0\n",
- "๐ฏ RAG without validation - Success: 13, Different results: 0, No results: 2, Error: 1\n",
- "๐ฏ RAG with validation - Success: 15, Different results: 0, No results: 1, Error: 0\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "โ๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "โ
1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20 = 20\n",
- "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n",
- "โ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 16, Error: 0\n",
- "๐ฏ RAG without validation - Success: 13, Different results: 0, No results: 3, Error: 2\n",
- "๐ฏ RAG with validation - Success: 16, Different results: 0, No results: 2, Error: 0\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "โ๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "โ
1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20 = 20\n",
- "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n",
- "โ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 0\n",
- "\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 18, Error: 0\n",
- "๐ฏ RAG without validation - Success: 13, Different results: 0, No results: 3, Error: 4\n",
- "๐ฏ RAG with validation - Success: 17, Different results: 0, No results: 3, Error: 0\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n",
- "โ
1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n",
- "โ
2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n",
- "โ
1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n",
- "โ
2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 20, Error: 0\n",
- "๐ฏ RAG without validation - Success: 15, Different results: 0, No results: 3, Error: 4\n",
- "๐ฏ RAG with validation - Success: 19, Different results: 0, No results: 3, Error: 0\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n",
- "โ๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "โ๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/473'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Phagocytic cup'}}\n",
- "โ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/473'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Phagocytic cup'}}\n",
- "โ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n",
- "\n",
- "Results mismatch. Ref: 20 != gen: 20\n",
- "\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 22, Error: 0\n",
- "๐ฏ RAG without validation - Success: 15, Different results: 0, No results: 3, Error: 6\n",
- "๐ฏ RAG with validation - Success: 19, Different results: 2, No results: 3, Error: 0\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "โ
2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n",
- "๐ฏ No RAG - Success: 2, Different results: 0, No results: 24, Error: 0\n",
- "๐ฏ RAG without validation - Success: 17, Different results: 0, No results: 3, Error: 6\n",
- "๐ฏ RAG with validation - Success: 21, Different results: 2, No results: 3, Error: 0\n",
- " Model RAG Approach Success Different Results \\\n",
- "0 gpt-4o-mini No RAG 0 0 \n",
- "1 gpt-4o-mini RAG without validation 7 5 \n",
- "2 gpt-4o-mini RAG with validation 8 3 \n",
- "3 Llama3.1 8B No RAG 0 0 \n",
- "4 Llama3.1 8B RAG without validation 0 0 \n",
- "5 Llama3.1 8B RAG with validation 2 2 \n",
- "6 Mixtral 8x22B No RAG 0 0 \n",
- "7 Mixtral 8x22B RAG without validation 7 6 \n",
- "8 Mixtral 8x22B RAG with validation 6 9 \n",
- "9 gpt-4o No RAG 2 0 \n",
- "10 gpt-4o RAG without validation 17 0 \n",
- "11 gpt-4o RAG with validation 21 2 \n",
- "\n",
- " No Results Errors Precision Price \n",
- "0 5 21 0.000000 0.00012 \n",
- "1 14 0 0.269231 0.00111 \n",
- "2 15 0 0.307692 0.00111 \n",
- "3 2 24 0.000000 0.00011 \n",
- "4 12 14 0.000000 0.00141 \n",
- "5 14 8 0.076923 0.00141 \n",
- "6 18 8 0.000000 0.00079 \n",
- "7 13 0 0.269231 0.01075 \n",
- "8 11 0 0.230769 0.01077 \n",
- "9 24 0 0.076923 0.00480 \n",
- "10 3 6 0.653846 0.03734 \n",
- "11 3 0 0.807692 0.03734 \n",
- "\n",
- "\n",
- "\n",
- "None\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import os\n",
"from collections import defaultdict\n",
+ "import time\n",
+ "from weakref import ref\n",
"\n",
"import pandas as pd\n",
"import requests\n",
@@ -1182,17 +242,15 @@
"}\"\"\",\n",
" },\n",
" {\n",
- " \"question\": \"Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\",\n",
+ " \"question\": \"\"\"Retrieve all proteins that are associated with Alzheimer disease (http://purl.uniprot.org/diseases/3832) and where they are known to be located in the cell. Return ?proteinURI, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\"\"\",\n",
" \"endpoint\": \"https://sparql.uniprot.org/sparql/\",\n",
" \"query\": \"\"\"PREFIX up: \n",
"PREFIX skos: \n",
- "SELECT ?proteinURI ?diseaseLabel ?locationInsideCellLabel ?locationInsideCellUri\n",
+ "SELECT ?proteinURI ?locationInsideCellLabel ?locationInsideCellUri\n",
"WHERE {\n",
" ?proteinURI a up:Protein ;\n",
" up:annotation ?diseaseAnnotation , ?subcellAnnotation .\n",
- " ?diseaseAnnotation up:disease ?disease .\n",
- " ?disease skos:prefLabel ?diseaseLabel .\n",
- " FILTER(CONTAINS(LCASE(?diseaseLabel), \"alzheimer\"))\n",
+ " ?diseaseAnnotation up:disease .\n",
" ?subcellAnnotation up:locatedIn/up:cellularComponent ?locationInsideCellUri .\n",
" ?locationInsideCellUri skos:prefLabel ?locationInsideCellLabel .\n",
"} LIMIT 20\"\"\",\n",
@@ -1205,7 +263,7 @@
"PREFIX orth: \n",
"PREFIX obo: \n",
"PREFIX lscr: \n",
- "SELECT DISTINCT ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
+ "SELECT DISTINCT ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n",
"WHERE {\n",
" ?proteinOMA a orth:Protein ;\n",
" orth:organism/obo:RO_0002162/up:scientificName ?speciesLabel ;\n",
@@ -1379,15 +437,10 @@
"\n",
"# QLEVER_UNIPROT = \"https://qlever.cs.uni-freiburg.de/api/uniprot\"\n",
"\n",
- "# Price per M tokens, open source models based on fireworks.io pricing\n",
+ "# Price per million tokens, open source models based on fireworks.io pricing\n",
"# https://openai.com/api/pricing/\n",
"# https://fireworks.ai/pricing\n",
"models = {\n",
- " \"gpt-4o-mini\": {\n",
- " \"id\": \"gpt-4o-mini\",\n",
- " \"price_input\": 0.15,\n",
- " \"price_output\": 0.6,\n",
- " },\n",
" \"Llama3.1 8B\": {\n",
" \"id\": \"hf:meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" \"price_input\": 0.2,\n",
@@ -1403,6 +456,11 @@
" \"price_input\": 5,\n",
" \"price_output\": 15,\n",
" },\n",
+ " \"gpt-4o-mini\": {\n",
+ " \"id\": \"gpt-4o-mini\",\n",
+ " \"price_input\": 0.15,\n",
+ " \"price_output\": 0.6,\n",
+ " },\n",
"}\n",
"\n",
"\n",
@@ -1416,9 +474,7 @@
" ],\n",
" stream=False,\n",
" temperature=0,\n",
- " # response_format={ \"type\": \"json_object\" },\n",
" )\n",
- " # response.choices[0].message.content\n",
" return response.to_dict()\n",
"\n",
"\n",
@@ -1476,10 +532,26 @@
" \"F1\": [],\n",
"}\n",
"\n",
- "number_of_tries = 2\n",
+ "number_of_tries = 3\n",
"\n",
"print(f\"๐งช Testing {len(example_queries)} queries\")\n",
"\n",
+ "# Get results for the reference queries\n",
+ "ref_results = []\n",
+ "for i, test_query in enumerate(example_queries):\n",
+ " res_ref_finally_pass = False\n",
+ " while not res_ref_finally_pass:\n",
+ " try:\n",
+ " query_start_time = time.time()\n",
+ " res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n",
+ " print(f\"Reference query {i} '{test_query['question']}' took {time.time() - query_start_time:.2f} seconds\")\n",
+ " ref_results.append(res_from_ref)\n",
+ " res_ref_finally_pass = True\n",
+ " except Exception as e:\n",
+ " print(f\"Timeout for reference query {i}: {e}, Trying again because we know if should work.\")\n",
+ " res_ref_finally_pass = False\n",
+ " # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n",
+ "\n",
"for model_label, model in models.items():\n",
" print(f\"\\n๐ง Testing model: {model_label}\\n\")\n",
" res = defaultdict(dict)\n",
@@ -1487,10 +559,18 @@
" for approach in list_of_approaches:\n",
" res[approach] = defaultdict(int)\n",
"\n",
- " for _i, test_query in enumerate(example_queries):\n",
- " # Execute the reference query\n",
- " res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=200)[\"results\"][\"bindings\"]\n",
- " # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n",
+ " for query_num, test_query in enumerate(example_queries):\n",
+ " # # Execute the reference query.\n",
+ " # res_ref_finally_pass = False\n",
+ " # while not res_ref_finally_pass:\n",
+ " # try:\n",
+ " # res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n",
+ " # res_ref_finally_pass = True\n",
+ " # except Exception as e:\n",
+ " # print(f\"Error in reference query: {e}, Trying again because we know if should work :)\")\n",
+ " # res_ref_finally_pass = False\n",
+ " # # res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n",
+ " # # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n",
"\n",
" for approach, approach_func in list_of_approaches.items():\n",
" for t in range(number_of_tries):\n",
@@ -1509,17 +589,17 @@
"\n",
" # Execute the generated query\n",
" res_from_generated = query_sparql(\n",
- " generated_sparql[\"query\"], generated_sparql[\"endpoint_url\"], timeout=200\n",
+ " generated_sparql[\"query\"], generated_sparql[\"endpoint_url\"], timeout=300\n",
" )[\"results\"][\"bindings\"]\n",
" # res_from_generated = query_sparql(generated_sparql[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n",
"\n",
- " if not result_sets_are_same(res_from_generated, res_from_ref):\n",
+ " if not result_sets_are_same(res_from_generated, ref_results[query_num]):\n",
" if len(res_from_generated) == 0:\n",
" res[approach][\"no_results\"] += 1\n",
" else:\n",
" res[approach][\"different_results\"] += 1\n",
" raise Exception(\n",
- " f\"\\nResults mismatch. Ref: {len(res_from_ref)} != gen: {len(res_from_generated)}\\n\"\n",
+ " f\"\\nResults mismatch. Ref: {len(ref_results[query_num])} != gen: {len(res_from_generated)}\\n\"\n",
" )\n",
" else:\n",
" print(f\"โ
{t+1}/{number_of_tries} {test_query['question']} = {len(res_from_generated)}\")\n",
@@ -1530,10 +610,10 @@
" if approach != \"No RAG\":\n",
" fail_emoji = \"โ\" if approach == \"RAG with validation\" else \"โ๏ธ\"\n",
" print(f\"{fail_emoji} {t+1}/{number_of_tries} {test_query['question']}\\n{e}\")\n",
- " # print(generated_sparql[\"query\"])\n",
- " # print(\"Correct query:\")\n",
- " # print(test_query[\"query\"])\n",
- " # print(\"\")\n",
+ " print(generated_sparql[\"query\"])\n",
+ " print(\"Correct query:\")\n",
+ " print(test_query[\"query\"])\n",
+ " print(\"\")\n",
"\n",
" for approach in list_of_approaches:\n",
" print(\n",
@@ -1556,7 +636,10 @@
" results_data[\"Price\"].append(round(mean_price, 5))\n",
" # results_data['Precision'].append(precision)\n",
" # results_data['Recall'].append(recall)\n",
- " results_data[\"F1\"].append(2 * (precision * recall) / (precision + recall))\n",
+ " if precision + recall == 0:\n",
+ " results_data[\"F1\"].append(0)\n",
+ " else:\n",
+ " results_data[\"F1\"].append(round(2 * (precision * recall) / (precision + recall), 2))\n",
"\n",
"\n",
"df = pd.DataFrame(results_data)\n",
@@ -1566,7 +649,7 @@
"\n",
"# Output Latex table\n",
"latex_str = \"\"\n",
- "prev_model = models.keys()[0]\n",
+ "prev_model = list(models.keys())[0]\n",
"for index, row in df.iterrows():\n",
" row_str = \" & \".join([str(item) for item in row]) # Join all values in the row with \" & \"\n",
" row_str += \" \\\\\\\\\"\n",
diff --git a/src/sparql_llm/api.py b/src/sparql_llm/api.py
index 53dcfb5..d53f83c 100644
--- a/src/sparql_llm/api.py
+++ b/src/sparql_llm/api.py
@@ -9,7 +9,7 @@
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
-from openai import Stream, OpenAI
+from openai import OpenAI, Stream
from openai.types.chat import ChatCompletion, ChatCompletionChunk
from pydantic import BaseModel
from qdrant_client.models import FieldCondition, Filter, MatchValue, ScoredPoint
@@ -194,10 +194,7 @@ async def chat(request: ChatCompletionRequest):
# Use messages from the request to keep memory of previous messages sent by the client
# Replace the question asked by the user with the big prompt with all contextual infos
request.messages[-1].content = prompt_with_context
- all_messages = [
- Message(role="system", content=settings.system_prompt),
- *request.messages
- ]
+ all_messages = [Message(role="system", content=settings.system_prompt), *request.messages]
# Send the prompt to OpenAI to get a response
response = client.chat.completions.create(
@@ -214,12 +211,10 @@ async def chat(request: ChatCompletionRequest):
stream_openai(response, query_hits + docs_hits, prompt_with_context), media_type="application/x-ndjson"
)
- print(response)
+ # print(response)
# print(response.choices[0].message.content)
response: ChatCompletion = (
- validate_and_fix_sparql(response, all_messages, client, request.model)
- if request.validate_output
- else response
+ validate_and_fix_sparql(response, all_messages, client, request.model) if request.validate_output else response
)
# NOTE: the response is similar to OpenAI API, but we add the list of hits and the full prompt used to ask the question
response.docs = query_hits + docs_hits
@@ -243,10 +238,12 @@ def validate_and_fix_sparql(
"""Recursive function to validate the SPARQL queries in the chat response and fix them if needed."""
if try_count >= settings.max_try_fix_sparql:
- resp.choices[0].message.content = f"{resp.choices[0].message.content}\n\nThe SPARQL query could not be fixed after multiple tries. Please do it yourself!"
+ resp.choices[
+ 0
+ ].message.content = f"{resp.choices[0].message.content}\n\nThe SPARQL query could not be fixed after multiple tries. Please do it yourself!"
return resp
generated_sparqls = extract_sparql_queries(resp.choices[0].message.content)
- print("generated_sparqls", generated_sparqls)
+ # print("generated_sparqls", generated_sparqls)
error_detected = False
for gen_query in generated_sparqls:
try:
@@ -261,7 +258,9 @@ def validate_and_fix_sparql(
except Exception as e:
if "Unknown namespace prefix" in str(e):
- md_resp = md_resp.replace(gen_query["query"], add_missing_prefixes(gen_query["query"], prefixes_map))
+ resp.choices[0].message.content = resp.choices[0].message.content.replace(
+ gen_query["query"], add_missing_prefixes(gen_query["query"], prefixes_map)
+ )
else:
# Ask the LLM to try to fix it
print(f"Error in SPARQL query try #{try_count}: {e}\n{gen_query['query']}")
@@ -279,7 +278,7 @@ def validate_and_fix_sparql(
# {md_resp}
messages.append({"role": "assistant", "content": fix_prompt})
fixing_resp = client.chat.completions.create(
- model=resp.model,
+ model=llm_model,
messages=messages,
stream=False,
)
@@ -289,7 +288,9 @@ def validate_and_fix_sparql(
resp.usage.prompt_tokens += fixing_resp.usage.prompt_tokens
resp.usage.completion_tokens += fixing_resp.usage.completion_tokens
resp.usage.total_tokens += fixing_resp.usage.total_tokens
- resp.choices[0].message.content = resp.choices[0].message.content.replace(gen_query["query"], fixed_query["query"])
+ resp.choices[0].message.content = resp.choices[0].message.content.replace(
+ gen_query["query"], fixed_query["query"]
+ )
if error_detected:
# Check again the fixed query
return validate_and_fix_sparql(resp, messages, client, llm_model, try_count)