From 41aef7d3519cc6f80c4a86af37532fbaa76adbee Mon Sep 17 00:00:00 2001
From: Vincent Emonet <vincent.emonet@gmail.com>
Date: Wed, 10 Apr 2024 17:35:22 +0200
Subject: [PATCH] improve airlock handling

---
 README.md                    | 4 ++--
 backend/src/upload.py        | 2 +-
 backend/src/utils.py         | 5 +++--
 cohort-explorer-ontology.ttl | 7 +++++++
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 37fb904..54d0250 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ It aims to enable data owners and data scientists to:
 This platform is composed of 3 main components:
 
 *   **[Oxigraph](https://github.com/oxigraph/oxigraph) triplestore** containing the cohorts and their variables metadata, exposing a SPARQL endpoint only available to the backend API.
-    *   The data stored in the triplestore complies with the custom **[iCARE4CVD OWL ontology](https://maastrichtu-ids.github.io/cohort-explorer/)**. It contains 3 classes: Cohort, Variable, and Variable category.
+    *   The data stored in the triplestore complies with the custom **[iCARE4CVD OWL ontology](https://maastrichtu-ids.github.io/cohort-explorer/)**. It contains 3 classes: Cohort, Variable, and Variable category. You can explore the ontology classes and properties [here](https://maastrichtu-ids.github.io/cohort-explorer/browse).
 
 *   **`backend/` server**, built with python, FastAPI and RDFLib.
 *   **`frontend/` web app** running on the client, built with TypeScript, NextJS, ReactJS, TailwindCSS, and DaisyUI.
@@ -214,7 +214,7 @@ docker compose exec backend curl -X POST -T /data/triplestore_dump_20240225.nq -
 
 If you need to move the app to a different server, just copy the whole `data/` folder.
 
-### ✨ Automatically generate variables metadata
+## ✨ Automatically generate variables metadata
 
 Experimental: you can use the [`csvw-ontomap`](https://github.com/vemonet/csvw-ontomap) python package to automatically generate a CSV metadata file for your data file, with the format expected by iCARE4CVD. It will automatically fill the following columns: var name, var type, categorical, min, max. But it does not properly extract datetime data types.
 
diff --git a/backend/src/upload.py b/backend/src/upload.py
index 293fb78..f8d6e2f 100644
--- a/backend/src/upload.py
+++ b/backend/src/upload.py
@@ -220,7 +220,7 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, airlock: bool) -> Data
         g = init_graph()
         g.add((cohort_uri, RDF.type, ICARE.Cohort, cohort_uri))
         g.add((cohort_uri, DC.identifier, Literal(cohort_id), cohort_uri))
-        g.add((cohort_uri, ICARE.previewEnabled, Literal(str(airlock).lower()), cohort_uri))
+        g.add((cohort_uri, ICARE.previewEnabled, Literal(str(airlock).lower(), datatype=XSD.boolean), cohort_uri))
 
         # Record all errors and raise them at the end
         errors = []
diff --git a/backend/src/utils.py b/backend/src/utils.py
index 21789b7..c88cedc 100644
--- a/backend/src/utils.py
+++ b/backend/src/utils.py
@@ -104,10 +104,11 @@ def run_query(query: str) -> dict[str, Any]:
 def get_value(key: str, row: dict[str, Any]) -> str | None:
     return str(row[key]["value"]) if key in row and row[key]["value"] else None
 
-
 def get_int_value(key: str, row: dict[str, Any]) -> int | None:
     return int(row[key]["value"]) if key in row and row[key]["value"] else None
 
+def get_bool_value(key: str, row: dict[str, Any]) -> bool:
+    return str(row[key]["value"]).lower() == "true" if key in row and row[key]["value"] else False
 
 def get_curie_value(key: str, row: dict[str, Any]) -> int | None:
     return converter.compress(get_value(key, row)) if get_value(key, row) else None
@@ -140,7 +141,7 @@ def retrieve_cohorts_metadata(user_email: str) -> dict[str, Cohort]:
                 study_population=get_value("study_population", row),
                 study_objective=get_value("study_objective", row),
                 variables={},  # You might want to populate this separately, depending on your data structure
-                airlock=get_value("airlock", row),
+                airlock=get_bool_value("airlock", row),
                 can_edit=user_email in [*settings.admins_list, get_value("cohortEmail", row)],
             )
         elif get_value("cohortEmail", row) not in target_dict[cohort_id].cohort_email:
diff --git a/cohort-explorer-ontology.ttl b/cohort-explorer-ontology.ttl
index ef9ab5f..c0b8e0f 100644
--- a/cohort-explorer-ontology.ttl
+++ b/cohort-explorer-ontology.ttl
@@ -104,6 +104,13 @@ icare:studyObjective a owl:DatatypeProperty ;
     rdfs:range xsd:string ;
     rdfs:isDefinedBy icare: .
 
+icare:previewEnabled a owl:DatatypeProperty ;
+	rdfs:label "Data preview enabled"@en ;
+	rdfs:comment "If data preview (airlock) is enabled for this cohort"@en ;
+    rdfs:domain icare:Cohort ;
+    rdfs:range xsd:boolean ;
+    rdfs:isDefinedBy icare: .
+
 icare:hasVariable a owl:ObjectProperty ;
 	rdfs:label "Has variable"@en ;
 	rdfs:comment "The variables that are included in this cohort"@en ;