add button to download DCR definition file directly. upgrade decentri…

…q-platform dep to 0.33
MaastrichtU-IDS · Oct 24, 2024 · 08108e6 · 08108e6
1 parent 40be5ba
commit 08108e6
Show file tree

Hide file tree

Showing 4 changed files with 155 additions and 72 deletions.
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "decentriq_platform >=0.26.3",
+    "decentriq_platform >=0.33.0",
     "curies",
     "pandas[excel,spss]",
     "pyarrow",
@@ -123,6 +123,8 @@ cov-check = [
 ]
 compile = "pip-compile -o requirements.txt pyproject.toml"
 # TODO: use uv
+# uv venv
+# uv pip install .
 # uv pip compile pyproject.toml -o requirements.txt
 # uv run ruff format
 # uv run ruff check src --fix

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -31,7 +31,6 @@ cbor2==5.6.2
     # via decentriq-platform
 certifi==2023.11.17
     # via
-    #   decentriq-platform
     #   httpcore
     #   httpx
     #   requests
@@ -45,7 +44,7 @@ click==8.1.7
     # via uvicorn
 coloredlogs==15.0.1
     # via onnxruntime
-cryptography==42.0.5
+cryptography==42.0.8
     # via
     #   decentriq-platform
     #   pyopenssl
@@ -54,10 +53,12 @@ curies==0.7.9
     # via cohort-explorer-backend (pyproject.toml)
 dataclasses-json==0.6.7
     # via langchain-community
-decentriq-dcr-compiler==0.7.1
+decentriq-dcr-compiler==0.13.0
     # via decentriq-platform
-decentriq-platform==0.27.2
+decentriq-platform==0.33.0
     # via cohort-explorer-backend (pyproject.toml)
+decentriq-transparency-verification==0.1.0.dev2
+    # via decentriq-platform
 defusedxml==0.7.1
     # via odfpy
 diskcache==5.6.3
@@ -123,6 +124,7 @@ httpx==0.27.0
     #   cohort-explorer-backend (pyproject.toml)
     #   groq
     #   langsmith
+    #   ollama
     #   openai
     #   qdrant-client
 huggingface-hub==0.25.1
@@ -136,9 +138,10 @@ humanfriendly==10.0
     # via coloredlogs
 hyperframe==6.0.1
     # via h2
-idna==3.6
+idna==3.10
     # via
     #   anyio
+    #   decentriq-platform
     #   httpx
     #   requests
     #   yarl
@@ -166,6 +169,7 @@ langchain-core==0.2.41
     #   langchain-community
     #   langchain-groq
     #   langchain-huggingface
+    #   langchain-ollama
     #   langchain-openai
     #   langchain-qdrant
     #   langchain-text-splitters
@@ -174,6 +178,8 @@ langchain-groq==0.1.10
     # via cohort-explorer-backend (pyproject.toml)
 langchain-huggingface==0.0.3
     # via cohort-explorer-backend (pyproject.toml)
+langchain-ollama==0.1.3
+    # via cohort-explorer-backend (pyproject.toml)
 langchain-openai==0.1.20
     # via
     #   cohort-explorer-backend (pyproject.toml)
@@ -224,40 +230,42 @@ numpy==1.26.4
     #   rank-bm25
     #   scikit-learn
     #   scipy
-#     #   transformers
-# nvidia-cublas-cu12==12.1.3.1
+    #   transformers
+nvidia-cublas-cu12==12.1.3.1
     # via
     #   nvidia-cudnn-cu12
     #   nvidia-cusolver-cu12
     #   torch
-# nvidia-cuda-cupti-cu12==12.1.105
-#     # via torch
-# nvidia-cuda-nvrtc-cu12==12.1.105
-#     # via torch
-# nvidia-cuda-runtime-cu12==12.1.105
-#     # via torch
-# nvidia-cudnn-cu12==9.1.0.70
-#     # via torch
-# nvidia-cufft-cu12==11.0.2.54
-#     # via torch
-# nvidia-curand-cu12==10.3.2.106
-#     # via torch
-# nvidia-cusolver-cu12==11.4.5.107
-#     # via torch
-# nvidia-cusparse-cu12==12.1.0.106
-#     # via
-#     #   nvidia-cusolver-cu12
-#     #   torch
-# nvidia-nccl-cu12==2.20.5
-#     # via torch
-# nvidia-nvjitlink-cu12==12.6.77
-#     # via
-#     #   nvidia-cusolver-cu12
-#     #   nvidia-cusparse-cu12
-# nvidia-nvtx-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.1.105
+    # via torch
+nvidia-cuda-runtime-cu12==12.1.105
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.0.2.54
+    # via torch
+nvidia-curand-cu12==10.3.2.106
+    # via torch
+nvidia-cusolver-cu12==11.4.5.107
+    # via torch
+nvidia-cusparse-cu12==12.1.0.106
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-nccl-cu12==2.20.5
+    # via torch
+nvidia-nvjitlink-cu12==12.6.77
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+nvidia-nvtx-cu12==12.1.105
     # via torch
 odfpy==1.4.1
     # via pandas
+ollama==0.3.3
+    # via langchain-ollama
 onnx==1.17.0
     # via fastembed
 onnxruntime==1.19.2
@@ -374,7 +382,7 @@ regex==2024.9.11
     # via
     #   tiktoken
     #   transformers
-requests==2.31.0
+requests==2.32.3
     # via
     #   curies
     #   decentriq-platform
@@ -431,8 +439,6 @@ sqlalchemy==2.0.35
     # via
     #   langchain
     #   langchain-community
-sqloxide==0.1.43
-    # via decentriq-platform
 starlette==0.36.3
     # via fastapi
 sympy==1.13.3
@@ -473,7 +479,7 @@ transformers==4.43.4
     #   adapters
     #   langchain-huggingface
     #   sentence-transformers
-# triton==3.0.0
+triton==3.0.0
     # via torch
 typing-extensions==4.9.0
     # via
@@ -496,8 +502,9 @@ typing-inspect==0.9.0
     # via dataclasses-json
 tzdata==2024.1
     # via pandas
-urllib3==2.2.1
+urllib3==1.26.19
     # via
+    #   decentriq-platform
     #   qdrant-client
     #   requests
 uvicorn==0.27.1

diff --git a/backend/src/decentriq.py b/backend/src/decentriq.py
@@ -1,4 +1,5 @@
 from copy import deepcopy
+import json
 from typing import Any
 
 import decentriq_platform as dq
@@ -107,16 +108,11 @@ def pandas_script_merge_cohorts(merged_cohorts: dict[str, list[str]], all_cohort
     return merge_script
 
 
-@router.post(
-    "/create-dcr",
-    name="Create Data Clean Room for computing",
-    response_description="Upload result",
-)
-async def create_compute_dcr(
+async def get_compute_dcr_definition(
     cohorts_request: dict[str, Any],
-    user: Any = Depends(get_current_user),
-) -> dict[str, Any]:
-    """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK"""
+    user: Any,
+    client: Any,
+) -> Any:
     # users = [user["email"]]
     # TODO: cohorts_request could also be a dict of union of cohorts to merge
     # {"cohorts": {"cohort_id": ["var1", "var2"], "merged_cohort3": {"cohort1": ["weight", "sex"], "cohort2": ["gender", "patient weight"]}}}
@@ -143,8 +139,6 @@ async def create_compute_dcr(
         else:
             raise HTTPException(status_code=400, detail=f"Invalid structure for cohort {cohort_id}")
 
-    # Establish connection to Decentriq
-    client = dq.create_client(settings.decentriq_email, settings.decentriq_token)
 
     # Creation of a Data Clean Room (DCR)
     data_nodes = []
@@ -231,13 +225,51 @@ async def create_compute_dcr(
         )
 
     # Build and publish DCR
-    dcr_definition = builder.build()
+    return builder.build(), dcr_title
+
+
+
+@router.post(
+    "/create-compute-dcr",
+    name="Create Data Clean Room for computing",
+    response_description="Upload result",
+)
+async def create_compute_dcr(
+    cohorts_request: dict[str, Any],
+    user: Any = Depends(get_current_user),
+) -> dict[str, Any]:
+    """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK"""
+    # Establish connection to Decentriq
+    client = dq.create_client(settings.decentriq_email, settings.decentriq_token)
+
+    dcr_definition, dcr_title = await get_compute_dcr_definition(cohorts_request, user, client)
+
     dcr = client.publish_analytics_dcr(dcr_definition)
     dcr_url = f"https://platform.decentriq.com/datarooms/p/{dcr.id}"
     return {
         "message": f"Data Clean Room available for compute at {dcr_url}",
         "dcr_url": dcr_url,
         "dcr_title": dcr_title,
-        "merge_script": pandas_script,
+        # "merge_script": pandas_script,
         **cohorts_request,
     }
+
+
+@router.post(
+    "/get-compute-dcr-definition",
+    name="Get the Data Clean Room definition for computing as JSON",
+    response_description="Upload result",
+)
+async def api_get_compute_dcr_definition(
+    cohorts_request: dict[str, Any],
+    user: Any = Depends(get_current_user),
+) -> Any:
+    """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK"""
+    # Establish connection to Decentriq
+    client = dq.create_client(settings.decentriq_email, settings.decentriq_token)
+
+    dcr_definition, _dcr_title = await get_compute_dcr_definition(cohorts_request, user, client)
+
+    # return dcr_definition.model_dump_json(by_alias=True)
+    # return json.dumps(dcr_definition.high_level)
+    return dcr_definition.high_level