added accession number to sort parameters for export (#273)

BioImage-Archive · Jan 3, 2025 · d05a9c1 · d05a9c1
1 parent be3fda4
commit d05a9c1
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 14 deletions.
diff --git a/bia-export/bia_export/website_export/export_all.py b/bia-export/bia_export/website_export/export_all.py
@@ -5,6 +5,7 @@
 from bia_integrator_api.models import Study
 from .generic_object_retrieval import read_api_json_file
 import logging
+import re
 
 logger = logging.getLogger("__main__." + __name__)
 
@@ -39,15 +40,24 @@ def fetch_studies_from_api(
 
 
 def get_study_ids(root_directory: Optional[Path] = None):
+
+    def get_accno(acc_id):
+        match = re.search(r"\d+$", acc_id)
+        return int(match.group()) if match else None
+
     if root_directory:
         studies_list = find_local_studies(root_directory)
         sorted_studies = sorted(
-            studies_list, key=lambda study: study.release_date, reverse=True
+            studies_list,
+            key=lambda study: (study.release_date, get_accno(study.accession_id)),
+            reverse=True,
         )
         return [study.accession_id for study in sorted_studies]
     else:
         studies_list = fetch_studies_from_api(page_size=100)
         sorted_studies = sorted(
-            studies_list, key=lambda study: study.release_date, reverse=True
+            studies_list,
+            key=lambda study: (study.release_date, get_accno(study.accession_id)),
+            reverse=True,
         )
         return [study.uuid for study in sorted_studies]
diff --git a/bia-export/bia_export/website_export/studies/retrieve.py b/bia-export/bia_export/website_export/studies/retrieve.py
@@ -110,11 +110,9 @@ def write_to_cache(dataset_uuid, aggregation_fields):
     logging.info(f"writing to dataset aggregation cache for dataset: {dataset_uuid}")
 
     cache_file = (
-        Path(__file__)
-        .parent.parent.parent.parent.join(
-            "cached_computed_data", "dataset_aggregate_fields.json"
-        )
-        .absolute()
+        Path(__file__).parents[3].absolute()
+        / "cached_computed_data"
+        / "dataset_aggregate_fields.json"
     )
     with open(cache_file, "r") as object_file:
         try:
@@ -131,11 +129,9 @@ def write_to_cache(dataset_uuid, aggregation_fields):
 def aggregate_file_list_data(context: StudyCLIContext) -> None:
     if context.cache_use == CacheUse.READ_CACHE:
         cache_file = (
-            Path(__file__)
-            .parent.parent.parent.parent.join(
-                "cached_computed_data", "dataset_aggregate_fields.json"
-            )
-            .absolute()
+            Path(__file__).parents[3].absolute()
+            / "cached_computed_data"
+            / "dataset_aggregate_fields.json"
         )
 
         with open(cache_file, "r") as object_file:

diff --git a/bia-export/bia_export/website_export/studies/transform.py b/bia-export/bia_export/website_export/studies/transform.py
@@ -4,7 +4,7 @@
     Dataset,
     Study,
     StudyCLIContext,
-    CacheUse
+    CacheUse,
 )
 from bia_export.website_export.studies.retrieve import (
     retrieve_study,
@@ -30,8 +30,8 @@
 
 
 def transform_study(context: StudyCLIContext) -> Study:
-
     api_study = retrieve_study(context)
+    logger.info(f"Processing study: {api_study.accession_id}")
     study_dict = api_study.model_dump()
 
     # Collect file list information prior to creating eid if reading locally to avoid reading them multiple times.