From 41aef7d3519cc6f80c4a86af37532fbaa76adbee Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Wed, 10 Apr 2024 17:35:22 +0200 Subject: [PATCH] improve airlock handling --- README.md | 4 ++-- backend/src/upload.py | 2 +- backend/src/utils.py | 5 +++-- cohort-explorer-ontology.ttl | 7 +++++++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 37fb904..54d0250 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ It aims to enable data owners and data scientists to: This platform is composed of 3 main components: * **[Oxigraph](https://github.com/oxigraph/oxigraph) triplestore** containing the cohorts and their variables metadata, exposing a SPARQL endpoint only available to the backend API. - * The data stored in the triplestore complies with the custom **[iCARE4CVD OWL ontology](https://maastrichtu-ids.github.io/cohort-explorer/)**. It contains 3 classes: Cohort, Variable, and Variable category. + * The data stored in the triplestore complies with the custom **[iCARE4CVD OWL ontology](https://maastrichtu-ids.github.io/cohort-explorer/)**. It contains 3 classes: Cohort, Variable, and Variable category. You can explore the ontology classes and properties [here](https://maastrichtu-ids.github.io/cohort-explorer/browse). * **`backend/` server**, built with python, FastAPI and RDFLib. * **`frontend/` web app** running on the client, built with TypeScript, NextJS, ReactJS, TailwindCSS, and DaisyUI. @@ -214,7 +214,7 @@ docker compose exec backend curl -X POST -T /data/triplestore_dump_20240225.nq - If you need to move the app to a different server, just copy the whole `data/` folder. -### ✨ Automatically generate variables metadata +## ✨ Automatically generate variables metadata Experimental: you can use the [`csvw-ontomap`](https://github.com/vemonet/csvw-ontomap) python package to automatically generate a CSV metadata file for your data file, with the format expected by iCARE4CVD. It will automatically fill the following columns: var name, var type, categorical, min, max. But it does not properly extract datetime data types. diff --git a/backend/src/upload.py b/backend/src/upload.py index 293fb78..f8d6e2f 100644 --- a/backend/src/upload.py +++ b/backend/src/upload.py @@ -220,7 +220,7 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, airlock: bool) -> Data g = init_graph() g.add((cohort_uri, RDF.type, ICARE.Cohort, cohort_uri)) g.add((cohort_uri, DC.identifier, Literal(cohort_id), cohort_uri)) - g.add((cohort_uri, ICARE.previewEnabled, Literal(str(airlock).lower()), cohort_uri)) + g.add((cohort_uri, ICARE.previewEnabled, Literal(str(airlock).lower(), datatype=XSD.boolean), cohort_uri)) # Record all errors and raise them at the end errors = [] diff --git a/backend/src/utils.py b/backend/src/utils.py index 21789b7..c88cedc 100644 --- a/backend/src/utils.py +++ b/backend/src/utils.py @@ -104,10 +104,11 @@ def run_query(query: str) -> dict[str, Any]: def get_value(key: str, row: dict[str, Any]) -> str | None: return str(row[key]["value"]) if key in row and row[key]["value"] else None - def get_int_value(key: str, row: dict[str, Any]) -> int | None: return int(row[key]["value"]) if key in row and row[key]["value"] else None +def get_bool_value(key: str, row: dict[str, Any]) -> bool: + return str(row[key]["value"]).lower() == "true" if key in row and row[key]["value"] else False def get_curie_value(key: str, row: dict[str, Any]) -> int | None: return converter.compress(get_value(key, row)) if get_value(key, row) else None @@ -140,7 +141,7 @@ def retrieve_cohorts_metadata(user_email: str) -> dict[str, Cohort]: study_population=get_value("study_population", row), study_objective=get_value("study_objective", row), variables={}, # You might want to populate this separately, depending on your data structure - airlock=get_value("airlock", row), + airlock=get_bool_value("airlock", row), can_edit=user_email in [*settings.admins_list, get_value("cohortEmail", row)], ) elif get_value("cohortEmail", row) not in target_dict[cohort_id].cohort_email: diff --git a/cohort-explorer-ontology.ttl b/cohort-explorer-ontology.ttl index ef9ab5f..c0b8e0f 100644 --- a/cohort-explorer-ontology.ttl +++ b/cohort-explorer-ontology.ttl @@ -104,6 +104,13 @@ icare:studyObjective a owl:DatatypeProperty ; rdfs:range xsd:string ; rdfs:isDefinedBy icare: . +icare:previewEnabled a owl:DatatypeProperty ; + rdfs:label "Data preview enabled"@en ; + rdfs:comment "If data preview (airlock) is enabled for this cohort"@en ; + rdfs:domain icare:Cohort ; + rdfs:range xsd:boolean ; + rdfs:isDefinedBy icare: . + icare:hasVariable a owl:ObjectProperty ; rdfs:label "Has variable"@en ; rdfs:comment "The variables that are included in this cohort"@en ;