diff --git a/backend/src/auth.py b/backend/src/auth.py index 48e3d86..30be6b6 100644 --- a/backend/src/auth.py +++ b/backend/src/auth.py @@ -41,8 +41,8 @@ async def __call__(self, request: Request) -> Optional[str]: auth_params = { - # "audience": "https://explorer.icare4cvd.eu", - "audience": "https://other-ihi-app", + "audience": "https://explorer.icare4cvd.eu", + # "audience": "https://other-ihi-app", "redirect_uri": settings.redirect_uri, } diff --git a/backend/src/explore.py b/backend/src/explore.py index 1c8d73e..5815e12 100644 --- a/backend/src/explore.py +++ b/backend/src/explore.py @@ -98,25 +98,25 @@ async def search_concepts( }} GRAPH ?cohortVarGraph {{ - ?cohort icare:has_variable ?variable . + ?cohort icare:hasVariable ?variable . ?variable a icare:Variable ; dc:identifier ?varName ; rdfs:label ?varLabel ; - icare:var_type ?varType ; + icare:varType ?varType ; icare:index ?index . OPTIONAL {{ ?variable icare:omop ?omopDomain }} }} {{ GRAPH ?cohortMappingsGraph {{ - ?variable icare:mapped_id ?mappedId . + ?variable icare:mappedId ?mappedId . }} }} UNION {{ GRAPH ?cohortVarGraph {{ ?variable icare:categories ?category. }} GRAPH ?cohortMappingsGraph {{ - ?category icare:mapped_id ?mappedId . + ?category icare:mappedId ?mappedId . }} }} OPTIONAL {{ ?mappedId rdfs:label ?mappedLabel }} diff --git a/backend/src/upload.py b/backend/src/upload.py index 30acccc..d746dc5 100644 --- a/backend/src/upload.py +++ b/backend/src/upload.py @@ -3,6 +3,7 @@ import shutil from datetime import datetime from typing import Any +from re import sub import pandas as pd import requests @@ -164,6 +165,10 @@ def create_uri_from_id(row): ACCEPTED_DATATYPES = ["STR", "FLOAT", "INT", "DATETIME"] +def to_camelcase(s: str) -> str: + s = sub(r"(_|-)+", " ", s).title().replace(" ", "") + return ''.join([s[0].lower(), s[1:]]) + def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Dataset: """Parse the cohort dictionary uploaded as excel or CSV spreadsheet, and load it to the triplestore""" # print(f"Loading dictionary {dict_path}") @@ -199,7 +204,7 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Da # Create a URI for the variable variable_uri = get_var_uri(cohort_id, row["VARIABLE NAME"]) - g.add((cohort_uri, ICARE.has_variable, variable_uri, cohort_uri)) + g.add((cohort_uri, ICARE.hasVariable, variable_uri, cohort_uri)) # Add the type of the resource g.add((variable_uri, RDF.type, ICARE.Variable, cohort_uri)) @@ -211,7 +216,7 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Da for column, value in row.items(): # if value and column not in ["categories"]: if column not in ["categories"] and value: - property_uri = ICARE[column.replace(" ", "_").lower()] + property_uri = ICARE[to_camelcase(column)] if ( isinstance(value, str) and (value.startswith("http://") or value.startswith("https://")) @@ -230,8 +235,8 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Da continue for index, category in enumerate(value): cat_uri = get_category_uri(variable_uri, index) - g.add((variable_uri, ICARE["categories"], cat_uri, cohort_uri)) - g.add((cat_uri, RDF.type, ICARE.Category, cohort_uri)) + g.add((variable_uri, ICARE.categories, cat_uri, cohort_uri)) + g.add((cat_uri, RDF.type, ICARE.VariableCategory, cohort_uri)) g.add((cat_uri, RDF.value, Literal(category["value"]), cohort_uri)) g.add((cat_uri, RDFS.label, Literal(category["label"]), cohort_uri)) # TODO: add categories @@ -359,19 +364,19 @@ def cohorts_metadata_file_to_graph(filepath: str) -> Dataset: for email in row["Email"].split(";"): g.add((cohort_uri, ICARE.email, Literal(email.strip()), cohorts_graph)) if row["Type"]: - g.add((cohort_uri, ICARE.cohort_type, Literal(row["Type"]), cohorts_graph)) + g.add((cohort_uri, ICARE.cohortType, Literal(row["Type"]), cohorts_graph)) if row["Study type"]: - g.add((cohort_uri, ICARE.study_type, Literal(row["Study type"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyType, Literal(row["Study type"]), cohorts_graph)) if row["N"]: - g.add((cohort_uri, ICARE.study_participants, Literal(row["N"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyParticipants, Literal(row["N"]), cohorts_graph)) if row["Study duration"]: - g.add((cohort_uri, ICARE.study_duration, Literal(row["Study duration"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyDuration, Literal(row["Study duration"]), cohorts_graph)) if row["Ongoing"]: - g.add((cohort_uri, ICARE.study_ongoing, Literal(row["Ongoing"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyOngoing, Literal(row["Ongoing"]), cohorts_graph)) if row["Patient population"]: - g.add((cohort_uri, ICARE.study_population, Literal(row["Patient population"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyPopulation, Literal(row["Patient population"]), cohorts_graph)) if row["Primary objective"]: - g.add((cohort_uri, ICARE.study_objective, Literal(row["Primary objective"]), cohorts_graph)) + g.add((cohort_uri, ICARE.studyObjective, Literal(row["Primary objective"]), cohorts_graph)) return g diff --git a/backend/src/utils.py b/backend/src/utils.py index 9878d72..a8a780c 100644 --- a/backend/src/utils.py +++ b/backend/src/utils.py @@ -47,23 +47,23 @@ def run_query(query: str) -> dict[str, Any]: ?cohort a icare:Cohort ; dc:identifier ?cohortId ; icare:institution ?cohortInstitution . - OPTIONAL { ?cohort icare:cohort_type ?cohortType . } + OPTIONAL { ?cohort icare:cohortType ?cohortType . } OPTIONAL { ?cohort icare:email ?cohortEmail . } - OPTIONAL { ?cohort icare:study_type ?study_type . } - OPTIONAL { ?cohort icare:study_participants ?study_participants . } - OPTIONAL { ?cohort icare:study_duration ?study_duration . } - OPTIONAL { ?cohort icare:study_ongoing ?study_ongoing . } - OPTIONAL { ?cohort icare:study_population ?study_population . } - OPTIONAL { ?cohort icare:study_objective ?study_objective . } + OPTIONAL { ?cohort icare:studyType ?study_type . } + OPTIONAL { ?cohort icare:studyParticipants ?study_participants . } + OPTIONAL { ?cohort icare:studyDuration ?study_duration . } + OPTIONAL { ?cohort icare:studyOngoing ?study_ongoing . } + OPTIONAL { ?cohort icare:studyPopulation ?study_population . } + OPTIONAL { ?cohort icare:studyObjective ?study_objective . } } OPTIONAL { GRAPH ?cohortVarGraph { - ?cohort icare:has_variable ?variable . + ?cohort icare:hasVariable ?variable . ?variable a icare:Variable ; dc:identifier ?varName ; rdfs:label ?varLabel ; - icare:var_type ?varType ; + icare:varType ?varType ; icare:index ?index . OPTIONAL { ?variable icare:count ?count } OPTIONAL { ?variable icare:na ?na } @@ -72,7 +72,7 @@ def run_query(query: str) -> dict[str, Any]: OPTIONAL { ?variable icare:units ?units } OPTIONAL { ?variable icare:formula ?formula } OPTIONAL { ?variable icare:definition ?definition } - OPTIONAL { ?variable icare:concept_id ?conceptId } + OPTIONAL { ?variable icare:conceptId ?conceptId } OPTIONAL { ?variable icare:omop ?omopDomain } OPTIONAL { ?variable icare:visits ?visits } OPTIONAL { @@ -86,11 +86,11 @@ def run_query(query: str) -> dict[str, Any]: OPTIONAL { GRAPH ?cohortMappingsGraph { OPTIONAL { - ?variable icare:mapped_id ?mappedId . + ?variable icare:mappedId ?mappedId . OPTIONAL { ?mappedId rdfs:label ?mappedLabel } } OPTIONAL { - ?category icare:mapped_id ?categoryMappedId . + ?category icare:mappedId ?categoryMappedId . OPTIONAL { ?categoryMappedId rdfs:label ?categoryMappedLabel } } } diff --git a/cohort-explorer-ontology.ttl b/cohort-explorer-ontology.ttl index ae1f387..ab30074 100644 --- a/cohort-explorer-ontology.ttl +++ b/cohort-explorer-ontology.ttl @@ -9,148 +9,158 @@ icare: a owl:Ontology ; - rdfs:label "OWL ontology for the iCARE4CVD project, to represent Cohorts and their Variables"^^xsd:string ; + rdfs:label "iCARE4CVD ontology" ; dcterms:license ; vann:preferredNamespacePrefix "icare" ; vann:preferredNamespaceUri "https://w3id.org/icare4cvd/" ; - rdfs:comment "OWL ontology for the iCARE4CVD project, to represent Cohorts tabular file containing Variables for individuals in the cohort (metadata, measurement, etc)"^^xsd:string . + rdfs:comment "OWL ontology for the iCARE4CVD project, to represent Cohorts tabular file containing Variables for individuals in the cohort (metadata, measurement, etc)"@en . # Classes icare:Cohort a owl:Class ; - rdfs:subClassOf owl:Thing ; + # rdfs:subClassOf owl:Thing ; rdfs:label "Cohort" ; - rdfs:comment "A study cohort, represented by a tabular data file (usually rows are for patients infos and measurements)" . + rdfs:comment "A study cohort, represented by a tabular data file (usually rows are for patients infos and measurements)"@en ; + owl:disjointWith icare:Variable, icare:VariableCategory . icare:Variable a owl:Class ; - rdfs:subClassOf owl:Thing ; rdfs:label "Variable" ; - rdfs:comment "A variable in a cohort, represented by a column in the cohort tabular data file" . + rdfs:comment "A variable in a cohort, represented by a column in the cohort tabular data file"@en ; + owl:disjointWith icare:Cohort, icare:VariableCategory . icare:VariableCategory a owl:Class ; - rdfs:subClassOf owl:Thing ; rdfs:label "Variable category" ; - rdfs:comment "Categories for categorical variables" . + rdfs:comment "Categories for categorical variables"@en ; + owl:disjointWith icare:Cohort, icare:Variable . # Cohort Properties icare:institution a owl:DatatypeProperty ; rdfs:label "Institution" ; - rdfs:comment "The institution that conducted the study" ; + rdfs:comment "The institution that conducted the study"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:cohort_type a owl:DatatypeProperty ; +icare:cohortType a owl:DatatypeProperty ; rdfs:label "Cohort type" ; - rdfs:comment "The type of cohort" ; + rdfs:comment "The type of cohort"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . icare:email a owl:DatatypeProperty ; rdfs:label "Contact email" ; - rdfs:comment "Email of the contact for this cohort" ; + rdfs:comment "Email of the contact for this cohort"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:study_type a owl:DatatypeProperty ; +icare:studyType a owl:DatatypeProperty ; rdfs:label "Study type" ; - rdfs:comment "The type of study" ; + rdfs:comment "The type of study"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:study_participants a owl:DatatypeProperty ; +icare:studyParticipants a owl:DatatypeProperty ; rdfs:label "Number of study participants" ; - rdfs:comment "The number of participants in the study" ; + rdfs:comment "The number of participants in the study"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:integer . -icare:study_duration a owl:DatatypeProperty ; +icare:studyDuration a owl:DatatypeProperty ; rdfs:label "Study duration" ; - rdfs:comment "The duration of the study" ; + rdfs:comment "The duration of the study"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:study_ongoing a owl:DatatypeProperty ; +icare:studyOngoing a owl:DatatypeProperty ; rdfs:label "Ongoing study" ; - rdfs:comment "Is the study ongoing? Either yes or no" ; + rdfs:comment "Is the study ongoing? Either yes or no"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:study_population a owl:DatatypeProperty ; +icare:studyPopulation a owl:DatatypeProperty ; rdfs:label "Study population" ; - rdfs:comment "Description of the population studied" ; + rdfs:comment "Description of the population studied"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:study_objective a owl:DatatypeProperty ; +icare:studyObjective a owl:DatatypeProperty ; rdfs:label "Study objective" ; - rdfs:comment "Description of the study objective" ; + rdfs:comment "Description of the study objective"@en ; rdfs:domain icare:Cohort ; rdfs:range xsd:string . -icare:has_variable a owl:ObjectProperty ; +icare:hasVariable a owl:ObjectProperty ; rdfs:label "Has variable" ; - rdfs:comment "The variables that are included in this cohort" ; + rdfs:comment "The variables that are included in this cohort"@en ; rdfs:domain icare:Cohort ; rdfs:range icare:Variable . +# Should we add inverse? +# icare:isVariableOf a owl:ObjectProperty ; +# owl:inverseOf icare:hasVariable ; +# rdfs:label "Is variable of" ; +# rdfs:comment "The cohort this variable belongs to"@en ; +# rdfs:domain icare:Variable ; +# rdfs:range icare:Cohort . + # Variable Properties -icare:var_type a owl:DatatypeProperty ; +icare:varType a owl:DatatypeProperty ; rdfs:label "Variable type" ; - rdfs:comment "The data type of the variable: STR, INT, FLOAT, DATETIME" ; + rdfs:comment "The data type of the variable: STR, INT, FLOAT, DATETIME"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . icare:count a owl:DatatypeProperty ; rdfs:label "Value count" ; - dc:description "The number of rows with values for this variable" ; + rdfs:comment "The number of rows with values for this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:integer . icare:na a owl:DatatypeProperty ; rdfs:label "n/a count" ; - rdfs:comment "The number of n/a rows for this variable" ; + rdfs:comment "The number of n/a rows for this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:integer . icare:index a owl:DatatypeProperty ; rdfs:label "Variable index" ; - rdfs:comment "The index of the variable in the cohort tabular data file" ; + rdfs:comment "The index of the variable in the cohort tabular data file"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:integer . icare:max a owl:DatatypeProperty ; rdfs:label "Maximum value" ; - rdfs:comment "The maximum value for this variable" ; + rdfs:comment "The maximum value for this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:decimal . # int + float = decimal icare:min a owl:DatatypeProperty ; rdfs:label "Minimum value" ; - rdfs:comment "The minimum value for this variable" ; + rdfs:comment "The minimum value for this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:decimal . icare:units a owl:DatatypeProperty ; rdfs:label "Units" ; - rdfs:comment "The units of the variable (e.g. kg, mmHg, etc)" ; + rdfs:comment "The units of the variable (e.g. kg, mmHg, etc)"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . icare:formula a owl:DatatypeProperty ; rdfs:label "Formula" ; - rdfs:comment "The formula used to calculate this variable" ; + rdfs:comment "The formula used to calculate this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . icare:definition a owl:DatatypeProperty ; rdfs:label "Definition" ; - rdfs:comment "A reference to the standard used to define this variable" ; + rdfs:comment "A reference to the standard used to define this variable"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . icare:visits a owl:DatatypeProperty ; rdfs:label "Visits" ; - rdfs:comment "Details about the study visit for this variable (e.g. M1, M2, M6)" ; + rdfs:comment "Details about the study visit for this variable (e.g. M1, M2, M6)"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . # Mapping properties -icare:concept_id a owl:ObjectProperty ; +icare:conceptId a owl:ObjectProperty ; rdfs:label "Concept ID" ; - rdfs:comment "The concept ID for this variable, as provided in the original cohort metadata file" ; + rdfs:comment "The concept ID for this variable, as provided in the original cohort metadata file"@en ; rdfs:domain icare:Variable ; rdfs:range owl:Thing . -icare:mapped_id a owl:DatatypeProperty ; + +icare:mappedId a owl:ObjectProperty ; rdfs:label "Mapped ID" ; - rdfs:comment "The mapped concept ID for this variable or category, defined through the Cohort Explorer service" ; + rdfs:comment "The mapped concept ID for this variable or category, defined through the Cohort Explorer service"@en ; rdfs:domain [ owl:unionOf ( icare:Variable icare:VariableCategory ) ] ; rdfs:range owl:Thing . # Should we limit to OMOP concepts? @@ -158,19 +168,21 @@ icare:mapped_id a owl:DatatypeProperty ; icare:omop a owl:DatatypeProperty ; rdfs:label "OMOP domain" ; - rdfs:comment "The OMOP domain for this variable, such as Measurement, Condition, etc" ; + rdfs:comment "The OMOP domain for this variable, such as Measurement, Condition, etc"@en ; rdfs:domain icare:Variable ; rdfs:range xsd:string . icare:categories a owl:ObjectProperty ; rdfs:label "Variable categories" ; - rdfs:comment "Categories for this variable, if it is a categorical variable" ; + rdfs:comment "Categories for this variable, if it is a categorical variable"@en ; rdfs:domain icare:Variable ; rdfs:range icare:VariableCategory . # Category Properties rdf:value a owl:DatatypeProperty ; + rdfs:label "Category value" ; + rdfs:comment "Value for a category"@en ; rdfs:domain icare:VariableCategory ; rdfs:range xsd:string . @@ -178,6 +190,6 @@ rdf:value a owl:DatatypeProperty ; # Misc. properties rdfs:label a owl:DatatypeProperty ; rdfs:label "Label" ; - rdfs:comment "A concept label" ; + rdfs:comment "A concept label"@en ; rdfs:domain owl:Thing ; rdfs:range xsd:string . diff --git a/scripts/docs-build.sh b/scripts/docs-build.sh index ed08cae..973056f 100755 --- a/scripts/docs-build.sh +++ b/scripts/docs-build.sh @@ -14,8 +14,7 @@ rm -r widoco mv docs/index-en.html docs/index.html # Generate JSON-LD context -# java -jar owl2jsonld.jar https://raw.githubusercontent.com/vemonet/omop-cdm-owl/main/$ONTOLOGY_FILE > docs/context.jsonld - +java -jar owl2jsonld.jar https://raw.githubusercontent.com/MaastrichtU-IDS/cohort-explorer/main/$ONTOLOGY_FILE > docs/context.jsonld # Generate docs with Ontospy mkdir -p docs/browse