diff --git a/data/zombie/zombie-health/zombie-diet.csv b/data/zombie/zombie-health/zombie-diet.csv new file mode 100644 index 0000000..d0b72a1 --- /dev/null +++ b/data/zombie/zombie-health/zombie-diet.csv @@ -0,0 +1,102 @@ +bitter herb,slugs,cooked veg,giant meat,raw veg,dessert,potatoes,green meat,eggs,beans,honey,fish,milk,chili,fried veg,cups water,region +1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,forest +1,0,0,1,0,0,1,1,1,1,0,0,0,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,5,north +0,0,1,0,0,1,0,1,1,0,0,1,1,1,0,1,forest +0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,forest +1,0,0,1,0,0,0,1,1,1,0,0,0,1,0,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,3,highlands +0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,forest +0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,south +0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,5,south +0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,7,south +0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,3,highlands +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,forest +0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,north +0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,3,highlands +0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,3,highlands +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +0,1,1,0,1,0,0,0,1,1,0,0,1,0,1,3,highlands +0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,7,mountains +0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,5,coast +0,0,1,0,0,1,1,1,1,1,1,0,0,0,0,5,coast +1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,3,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,0,1,1,3,north +0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,7,mountains +1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,3,north +0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,3,highlands +0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,forest +1,0,0,1,0,0,0,1,1,1,0,0,1,1,0,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,5,north +0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,3,highlands +0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,forest +1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,7,mountains +1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,7,mountains +0,1,1,0,0,0,1,0,1,1,0,0,1,0,0,3,highlands +0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,7,mountains +0,1,1,0,1,0,0,0,1,1,0,0,1,0,0,3,highlands +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,7,south +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,1,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,7,mountains +0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,5,coast +0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,9,south +1,0,0,1,0,0,1,1,1,1,0,0,1,0,0,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,5,north +0,1,1,0,0,0,0,0,1,1,0,0,1,0,1,3,highlands +0,1,1,0,1,0,0,0,1,1,0,0,1,1,0,3,highlands +0,1,1,0,0,1,1,0,1,1,0,0,1,0,1,3,highlands +0,1,1,0,1,0,0,0,1,1,0,0,1,0,0,3,highlands +0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,forest +0,0,1,0,0,1,1,1,1,0,0,1,1,0,0,1,forest +0,0,1,0,0,0,1,1,1,1,1,0,1,0,0,1,savanna +1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,5,north +0,0,0,1,0,1,1,1,1,1,0,1,1,0,1,1,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,5,north +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +1,0,0,1,0,0,0,1,1,1,0,0,1,1,1,5,north +0,1,1,0,0,0,1,0,1,1,0,0,1,0,1,3,highlands +0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,9,south +0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,forest +1,0,0,1,0,1,1,1,1,1,0,1,0,0,1,1,north +1,0,0,1,0,1,1,1,1,1,0,1,1,0,1,3,north +0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,1,savanna +0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,south +0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,3,highlands +0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,3,highlands +0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,1,savanna +0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,south +0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,forest +0,1,1,0,1,0,0,0,1,1,0,0,1,0,0,3,highlands +1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,3,north +0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,6,south +0,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,forest +0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,3,highlands +0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,7,mountains +0,0,1,0,0,1,0,1,1,1,0,0,0,0,0,5,coast +0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,5,savanna +0,0,1,0,0,0,1,1,1,1,0,0,1,0,0,5,savanna +0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,forest +1,0,0,1,1,0,0,1,1,1,0,0,1,0,0,3,north +1,0,0,1,0,0,0,1,1,1,0,0,1,0,0,5,north +0,1,1,0,1,0,1,0,1,1,0,0,1,0,1,3,highlands +1,0,0,1,0,0,0,1,1,1,0,0,1,0,1,3,north +1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,7,mountains +1,0,0,1,0,0,1,1,1,1,0,0,1,0,1,5,north +0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,south +0,1,1,0,1,0,0,0,1,1,0,0,1,0,0,3,highlands diff --git a/labs/2021/09-grafo-conhecimento/README.md b/labs/2021/09-grafo-conhecimento/README.md new file mode 100644 index 0000000..142590b --- /dev/null +++ b/labs/2021/09-grafo-conhecimento/README.md @@ -0,0 +1,21 @@ +# Modelo Lógico para Banco de Dados de Grafos +## Laboratório 9 + Trabalho Final Etapa 5 +*Lab de Bancos de Dados em 22/10/2021* + +* [Especificação em PDF](bd-trabalho-2021-05-etapa-05-laboratorio-v01.pdf) + +O objetivo é usar o espaço do laboratório para praticar conceitos que contribuam na elaboração do trabalho. Como é um laboratório, o conteúdo desta entrega pode ser debatido com a sua equipe, mas a entrega deve ser individual, diferente dos demais membros do grupo (apesar de poder haver partes semelhantes). Será aceito que membros diferentes de uma equipe tenham uma fonte de dados ou modelos equivalentes, mas os exemplos devem ser diferentes para cada membro. + +Baseado no tema escolhido pela equipe, considere que o banco compartilhado poderá oferecer dados na forma de grafo de conhecimento, ou se enriquecer a partir de um grafo de conhecimento. Nesse sentido, proponha: + +* Como você publicaria seus dados ou parte dos seus dados na forma de um grafo de conhecimento. Use um exemplo de um grafo (apresente na forma de para ilustrar o modelo de (recurso, propriedade, valor). + +ou + +* Que base que apresenta seus dados na forma de grafo de conhecimento você usaria para enriquecer seu banco de dados (mesmo que este não esteja na forma de um grafo de conhecimento). Use um exemplo para ilustrar como você usaria o grafo para enriquecer seu banco e mostre como o esquema do seu banco evoluiria para comportar esse enriquecimento. + +Apresente um conjunto de queries ou perguntas que podem ser respondidas pelo seu grafo – devem ser no mínimo três queries ou perguntas. + +Submeta a resposta conforme modelo no template: `/templates/2021/lab09`. + +A entrega deste laboratório deve ser feita no Github individual (não da equipe). Seus resultados poderão ser posteriormente usados para o trabalho da equipe. diff --git a/labs/2021/09-grafo-conhecimento/bd-trabalho-2021-05-etapa-05-laboratorio-v01.pdf b/labs/2021/09-grafo-conhecimento/bd-trabalho-2021-05-etapa-05-laboratorio-v01.pdf new file mode 100644 index 0000000..af0395e Binary files /dev/null and b/labs/2021/09-grafo-conhecimento/bd-trabalho-2021-05-etapa-05-laboratorio-v01.pdf differ diff --git a/labs/2021/10-regras-associacao/faers-association-rules.ipynb b/labs/2021/10-regras-associacao/faers-association-rules.ipynb new file mode 100644 index 0000000..25025f3 --- /dev/null +++ b/labs/2021/10-regras-associacao/faers-association-rules.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%defaultDatasource jdbc:h2:mem:db" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DROP TABLE IF EXISTS Drug;\n", + "DROP TABLE IF EXISTS Pathology;\n", + "DROP TABLE IF EXISTS DrugUse;\n", + "\n", + "CREATE TABLE Drug (\n", + " code INT NOT NULL,\n", + " name VARCHAR(200) NOT NULL,\n", + " PRIMARY KEY(code)\n", + ") AS SELECT\n", + " code,\n", + " name\n", + "FROM CSVREAD('../../../data/faers-2017/drug.csv');\n", + "\n", + "CREATE TABLE Pathology (\n", + " code INT NOT NULL,\n", + " name VARCHAR(200) NOT NULL,\n", + " PRIMARY KEY(code)\n", + ") AS SELECT\n", + " code,\n", + " name\n", + "FROM CSVREAD('../../../data/faers-2017/pathology.csv');\n", + "\n", + "CREATE TABLE DrugUse (\n", + " idPerson INT NOT NULL,\n", + " codePathology INT NOT NULL,\n", + " codeDrug INT NOT NULL,\n", + " FOREIGN KEY(codePathology)\n", + " REFERENCES Pathology(code)\n", + " ON DELETE NO ACTION\n", + " ON UPDATE NO ACTION,\n", + " FOREIGN KEY(codeDrug)\n", + " REFERENCES Drug(code)\n", + " ON DELETE NO ACTION\n", + " ON UPDATE NO ACTION,\n", + ") AS SELECT\n", + " idPerson,\n", + " codePathology,\n", + " codeDrug\n", + "FROM CSVREAD('../../../data/faers-2017/drug-use.csv');\n", + "\n", + "CREATE TABLE SideEffect(\n", + " idPerson INT NOT NULL,\n", + " codePathology INT NOT NULL,\n", + " FOREIGN KEY(codePathology)\n", + " REFERENCES Pathology(code)\n", + " ON DELETE NO ACTION\n", + " ON UPDATE NO ACTION\n", + ") AS SELECT idPerson, codePathology\n", + "FROM CSVREAD('../../../data/faers-2017/sideeffect.csv');" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a8ab9864-6165-46fb-bd1c-22ced9e83936", + "version_major": 2, + "version_minor": 0 + }, + "method": "display_data" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7ab4c336-c43c-40ca-8708-cd96f77633a9", + "version_major": 2, + "version_minor": 0 + }, + "method": "display_data" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "63965d5d-7e66-4923-b7a2-7def42477f3f", + "version_major": 2, + "version_minor": 0 + }, + "method": "display_data" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5d281392-6296-494c-904d-2145f39f220f", + "version_major": 2, + "version_minor": 0 + }, + "method": "display_data" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "SELECT * FROM Drug;\n", + "SELECT * FROM Pathology;\n", + "SELECT * FROM DrugUse;\n", + "SELECT * FROM SideEffect;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FDA Adverse Event Reporting System (FAERS)\n", + "\n", + "Considere a base de dados que reúne relatos de efeitos adversos de drogas: [FDA Adverse Event Reporting System (FAERS)](https://open.fda.gov/data/faers/).\n", + "\n", + "As fontes de dados são publicadas e documentadas em: [Latest Quarterly Data Files](https://www.fda.gov/drugs/fda-adverse-event-reporting-system-faers/fda-adverse-event-reporting-system-faers-latest-quarterly-data-files)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interação entre medicamentos\n", + "Considere como uma interação toda vez que um medicamento A é tomado em conjunto com um medicamento B por uma pessoa. Crie uma consulta que retorne para cada par de medicamento A,B o número de interações, somente para aqueles medicamentos que são tomados em conjunto mais de 30 vezes (passaremos a chamar de **grupo mais 30**)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tabela Resultante\n", + "\n", + "Se estiver demorando muito para gerar a tabela, você poderá usar a seguinte tabela resultante pronta. Essa tabela tem todas as interações, você ainda precisará filtrar o **grupo mais 30**." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a26468b9-7984-4c42-9083-c5d29935c620", + "version_major": 2, + "version_minor": 0 + }, + "method": "display_data" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CREATE TABLE DrugInteraction (\n", + " druga INT NOT NULL,\n", + " drugb INT NOT NULL,\n", + " weight INT NOT NULL,\n", + " PRIMARY KEY(druga, drugb)\n", + ") AS SELECT\n", + " druga,\n", + " drugb,\n", + " weight\n", + "FROM CSVREAD('../../../data/faers-2017/results/drug-interaction.csv');\n", + "\n", + "SELECT * FROM DrugInteraction;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Suporte\n", + "\n", + "### Qual o suporte que cada uma das associações tem?\n", + "\n", + "Gere uma consulta que apresente o suporte do **grupo mais 30**. Ordene o resultado por ordem de suporte. \n", + "\n", + "Observação: Para que o resultado da divisão seja fracionária, converta o numerador em `DOUBLE` usando:\n", + "~~~sql\n", + "CAST(campo AS DOUBLE)\n", + "~~~" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Confiança\n", + "\n", + "### Qual a confiança que cada uma das associações tem?\n", + "\n", + "Gere uma consulta que apresente a confiança do **grupo mais 30**. Ordene o resultado por ordem de confiança." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Efeito Colateral\n", + "\n", + "Para cada medicamento, qual o percentual do medicamento A que tem efeito colateral junto com o medicamento B, quando comparado com seu uso sem o medicamento B." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "SQL", + "language": "SQL", + "name": "sql" + }, + "language_info": { + "codemirror_mode": "sql", + "file_extension": ".sql", + "mimetype": "", + "name": "SQL", + "nbconverter_exporter": "", + "version": "" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/sql/faers/respostas/faers-lab-01-resposta.ipynb b/sql/faers/respostas/faers-lab-01-resposta.ipynb index 6124236..c8e5c2d 100644 --- a/sql/faers/respostas/faers-lab-01-resposta.ipynb +++ b/sql/faers/respostas/faers-lab-01-resposta.ipynb @@ -74,7 +74,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c7a0b4bb-bd46-4b63-a773-f2ede958d764", + "model_id": "f0a4274e-3373-4651-9da4-95314be46a87", "version_major": 2, "version_minor": 0 }, @@ -86,7 +86,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0aa7a2f4-f214-4c2e-a1d6-0969644b3222", + "model_id": "2aa15fbb-fdac-4449-9bc9-3ed7bbb987a4", "version_major": 2, "version_minor": 0 }, @@ -98,7 +98,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "30c2ddc1-84bc-4d83-b647-651aba83f5fe", + "model_id": "947bcd3e-5214-4019-896d-e29742c6a0ac", "version_major": 2, "version_minor": 0 }, @@ -110,7 +110,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2d2c89ff-a4d5-4aa3-8f66-acaec1eec060", + "model_id": "7c23dd10-096c-437e-b934-4d62ffa8b776", "version_major": 2, "version_minor": 0 }, @@ -187,29 +187,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "org.h2.jdbc.JdbcSQLException", - "evalue": " Table \"DRUGINTERACTION\" already exists; SQL statement", - "output_type": "error", - "text": "org.h2.jdbc.JdbcSQLException: Table \"DRUGINTERACTION\" already exists; SQL statement:\nCREATE TABLE DrugInteraction (\ndruga INT NOT NULL,\ndrugb INT NOT NULL,\nweight INT NOT NULL,\nPRIMARY KEY(druga, drugb)\n) AS SELECT\ndruga,\ndrugb,\nweight\nFROM CSVREAD('../../../data/faers-2017/results/drug-interaction.csv') [42101-197]", - "traceback": [ - "\u001b[1;31morg.h2.jdbc.JdbcSQLException: Table \"DRUGINTERACTION\" already exists; SQL statement:\u001b[0;0m", - "\u001b[1;31mCREATE TABLE DrugInteraction (\u001b[0;0m", - "\u001b[1;31mdruga INT NOT NULL,\u001b[0;0m", - "\u001b[1;31mdrugb INT NOT NULL,\u001b[0;0m", - "\u001b[1;31mweight INT NOT NULL,\u001b[0;0m", - "\u001b[1;31mPRIMARY KEY(druga, drugb)\u001b[0;0m", - "\u001b[1;31m) AS SELECT\u001b[0;0m", - "\u001b[1;31mdruga,\u001b[0;0m", - "\u001b[1;31mdrugb,\u001b[0;0m", - "\u001b[1;31mweight\u001b[0;0m", - "\u001b[1;31mFROM CSVREAD('../../../data/faers-2017/results/drug-interaction.csv') [42101-197]\u001b[0;0m" - ] - } - ], + "outputs": [], "source": [ "CREATE TABLE DrugInteraction (\n", " druga INT NOT NULL,\n", @@ -225,22 +205,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "org.h2.jdbc.JdbcSQLException", - "evalue": " Column \"DI.WEIGHT\" not found; SQL statement", - "output_type": "error", - "text": "org.h2.jdbc.JdbcSQLException: Column \"DI.WEIGHT\" not found; SQL statement:\nCREATE VIEW DrugInteraction30 AS\nSELECT * FROM DrugInteraction DI\nWHERE DI.weight > 30 [42122-197]", - "traceback": [ - "\u001b[1;31morg.h2.jdbc.JdbcSQLException: Column \"DI.WEIGHT\" not found; SQL statement:\u001b[0;0m", - "\u001b[1;31mCREATE VIEW DrugInteraction30 AS\u001b[0;0m", - "\u001b[1;31mSELECT * FROM DrugInteraction DI\u001b[0;0m", - "\u001b[1;31mWHERE DI.weight > 30 [42122-197]\u001b[0;0m" - ] - } - ], + "outputs": [], "source": [ "CREATE VIEW DrugInteraction30 AS\n", "SELECT * FROM DrugInteraction DI\n", diff --git a/templates/2021/lab09/README.md b/templates/2021/lab09/README.md new file mode 100644 index 0000000..f51e1a7 --- /dev/null +++ b/templates/2021/lab09/README.md @@ -0,0 +1,25 @@ +# Modelo para Apresentação do Lab09 - Grafo de Conhecimento + +Estrutura de pastas: + +~~~ +├── README.md <- arquivo apresentando a tarefa +│ +└── images <- arquivos de imagem usados na tarefa +~~~ + +# Aluno +* `RA`: `` + +## Exemplo de Grafo de Conhecimento - para publicar ou enriquecer +> Utilize a abordagem (recurso, propriedade, valor) para apresentar seu grafo exemplo. +> Coloque a imagem do PNG do seu modelo lógico como ilustrado abaixo (a imagem estará na pasta `image`): +> +> ![Modelo Lógico de Grafos](images/grafo-conhecimento.png) + +## Perguntas de Pesquisa ou Queries + +> Liste aqui as três perguntas de pesquisa ou queries +> * Pergunta 1 +> * Pergunta 2 +> * Pergunta 3 diff --git a/templates/2021/lab09/images/grafo-conhecimento.png b/templates/2021/lab09/images/grafo-conhecimento.png new file mode 100644 index 0000000..a47036a Binary files /dev/null and b/templates/2021/lab09/images/grafo-conhecimento.png differ diff --git a/xml/lab01-xpath-xquery.md b/xml/01-xpath-xquery.md similarity index 100% rename from xml/lab01-xpath-xquery.md rename to xml/01-xpath-xquery.md diff --git a/xml/lab02-xquery-publicacoes.ipynb b/xml/02-xquery-publicacoes.ipynb similarity index 100% rename from xml/lab02-xquery-publicacoes.ipynb rename to xml/02-xquery-publicacoes.ipynb diff --git a/xml/lab03-xquery-dron.ipynb b/xml/03-xquery-dron.ipynb similarity index 100% rename from xml/lab03-xquery-dron.ipynb rename to xml/03-xquery-dron.ipynb diff --git a/xml/lab04-xquery-drom-pubchem-questoes.md b/xml/04-xquery-drom-pubchem-questoes.md similarity index 100% rename from xml/lab04-xquery-drom-pubchem-questoes.md rename to xml/04-xquery-drom-pubchem-questoes.md diff --git a/xml/04-xquery-drom-pubchem-txqueryengine.md b/xml/04-xquery-drom-pubchem-txqueryengine.md new file mode 100644 index 0000000..f78b58d --- /dev/null +++ b/xml/04-xquery-drom-pubchem-txqueryengine.md @@ -0,0 +1,183 @@ +# PubChem + +PubChem is the world's largest collection of freely accessible chemical information. + +## Exploring + +Go to PubChem: https://pubchem.ncbi.nlm.nih.gov/ + +Look for: `acetylsalicylic acid` + +It is possible to do the same search on PubChem through an API. The details are here: + +https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest-tutorial + +Let us build an API request using the popular name aspirin: + +https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/aspirin/XML + +Retrieving from its PubChem code (2244): +https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/2244/XML + +Retrieving its synonyms: +https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/2244/synonyms/XML + +Retieving a substance related to Acetylsalicylic Acid whose code is 49854366. It is described here: + +https://pubchem.ncbi.nlm.nih.gov/substance/49854366 + +It is possible to retrieve data from this substance with the REST request: + +https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/49854366/XML + +This substance will be used as an example in the following queries. + +## TXQueryEngine + +Since TXQueryEngine requires copy and paste straight from the result, the file was downloaded and is available here: + +https://github.com/santanche/lab2learn/blob/master/data/pubchem/pubchem-acetylsalicylic-acid.xml + +# Basics + +The following queries will be tested on https://www.videlibri.de/cgi-bin/xidelcgi + +Fetching the XML file: + +Synonyms of aspirin: +~~~xquery +//PC-Substance_synonyms_E +~~~ + +How many synonyms? + +~~~xquery +count(//PC-Substance_synonyms_E) +~~~ + +One per line: + +~~~xquery +for $a in (//PC-Substance_synonyms_E) +return $a/text() +~~~ + +It is possible to request to PubChem all compounds whose structure is related to Acetylsalicylic Acid (2244) in the following way: + +https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastsubstructure/cid/2244/cids/XML + +## Accessing PubChem from Python + +This code is read and processed through Python in this notebook: + +https://github.com/santanche/lab2learn/blob/master/api/pubchem/pubchem-api.ipynb + +# DRON - The Drug Ontology + +A simplified version of DRON in XML can be found here: +[DRON in XML](https://github.com/santanche/lab2learn/blob/master//data/faers-2017-dron/dron.xml) + +Each element `` represents a drug or drug classification (group). When a drug is part of a classification its `` element is subordinated to the classification. It produces a hierarchy as further illustrated. + +## Exploring + +Go to the Ontology Search related to DRON Ontology: https://www.ebi.ac.uk/ols/ontologies/dron + +Look for: `acetylsalicylic acid` + +The tree view will look like this: + +![acetylsalicylic acid tree view](images/ontology-search-acetylsalicylic-acid.png) + +# Basics + +The following queries will be tested on https://www.videlibri.de/cgi-bin/xidelcgi + +Name of all the classifications one level up (direct parents) of `Acetylsalicylic Acid` (with repetitions). + +~~~xquery +//drug[drug/@name="ACETYLSALICYLIC ACID"]/@name +~~~ + +Same as the previous query, but iterating each classification element in a FOR. + +~~~xquery +for $d in (//drug[drug/@name="ACETYLSALICYLIC ACID"]) +return data($d/@name) +~~~ + +Using the GROUP BY clause to avoid repetitions. + +~~~xquery +for $d in (//drug[drug/@name="ACETYLSALICYLIC ACID"]) +let $gr := $d/@name +group by $gr +return data($gr) +~~~ + +Expanding to all levels up of classification, i.e., all (parent) classifications related to `Acetylsalicylic Acid` (paths from the `Acetylsalicylic Acid` to the root), without repetition. + +~~~xquery +for $d in (//drug[drug//@name="ACETYLSALICYLIC ACID"]) +let $parent := $d/@name +group by $parent +order by $parent +return data($parent) +~~~ + +# Integrating PubChem to DRON + +A list of all PubChem elements that have cross-reference with ChEBI was retrieved by the REST call: + +https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sourceall/ChEBI/xrefs/RegistryID/XML + +The result of this call was materialized at: + +https://raw.githubusercontent.com/santanche/lab2learn/master/data/pubchem/pubchem-chebi.xml + +The following XQuery fetches the list and displays the ChEBI codes. It uses substring to remove the CHEBI prefix, maintaining only the code: + +~~~xquery +let $pubchem := doc('https://raw.githubusercontent.com/santanche/lab2learn/master/data/pubchem/pubchem-chebi.xml') +for $p in ($pubchem//RegistryID) +return substring($p/text(), 7) +~~~ + +## JOIN + +The following code does a JOIN between the previous list, retrieved from PubChem and compounds in DRON. It adds a URI prefix to be compatible with the DRON approach of defining the code: + +~~~xquery +let $pubchem := doc('https://raw.githubusercontent.com/santanche/lab2learn/master/data/pubchem/pubchem-chebi.xml') +let $dron := doc('https://raw.githubusercontent.com/santanche/lab2learn/master/data/faers-2017-dron/dron.xml') +for $p in ($pubchem//RegistryID), + $d in ($dron//drug) +where concat('http://purl.obolibrary.org/obo/CHEBI_',substring($p/text(), 7)) = $d/@id +let $gr := $d/@name +group by $gr +order by $gr +return {data($gr), ' '} +~~~ + +# Enriching XML with Python + +Departing from the list of all PubChem elements that have cross-reference with ChEBI, produced previously, it is possible to produce an XML file with the SIDs (PubChem) of the substances that appears in both: + +~~~xquery +let $pubchem := doc('https://raw.githubusercontent.com/santanche/lab2learn/master/data/pubchem/pubchem-chebi.xml') +let $dron := doc('https://raw.githubusercontent.com/santanche/lab2learn/master/data/faers-2017-dron/dron.xml') +return + +{for $p in ($pubchem//Information), + $d in ($dron//drug) +where concat('http://purl.obolibrary.org/obo/CHEBI_',substring($p/RegistryID/text(), 7)) = $d/@id +let $gr := $p/SID/text() +group by $gr +order by $gr +return {data($gr)} +} +~~~ + +In the following notebook, there is a code that produces a REST request for each id to retrieve synonym names from PubChem. It illustrates how to explore Python to enrich XML resources. + +https://github.com/santanche/lab2learn/blob/master/api/pubchem/pubchem-api.ipynb diff --git a/xml/lab04-xquery-drom-pubchem.md b/xml/04-xquery-drom-pubchem-zorba.md similarity index 97% rename from xml/lab04-xquery-drom-pubchem.md rename to xml/04-xquery-drom-pubchem-zorba.md index 9e335d1..14612ea 100644 --- a/xml/lab04-xquery-drom-pubchem.md +++ b/xml/04-xquery-drom-pubchem-zorba.md @@ -30,9 +30,11 @@ It is possible to retrieve data from this substance with the REST request: https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/49854366/XML -This substance will be used as an example in the following queries. +This substance will be used as an example in the following queries. -Since Zorba did not accept to fetch the data straight from the result, the file was downloaded and is available here: +## Zorba + +Since Zorba does not accept to fetch the data straight from the result, the file was downloaded and is available here: https://github.com/santanche/lab2learn/blob/master/data/pubchem/pubchem-acetylsalicylic-acid.xml @@ -200,4 +202,4 @@ return {data($gr)} In the following notebook, there is a code that produces a REST request for each id to retrieve synonym names from PubChem. It illustrates how to explore Python to enrich XML resources. -https://github.com/santanche/lab2learn/blob/master/api/pubchem/pubchem-api.ipynb \ No newline at end of file +https://github.com/santanche/lab2learn/blob/master/api/pubchem/pubchem-api.ipynb