Skip to content

Commit

Permalink
added accession number to sort parameters for export (#273)
Browse files Browse the repository at this point in the history
  • Loading branch information
sherwoodf authored Jan 3, 2025
1 parent be3fda4 commit d05a9c1
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
14 changes: 12 additions & 2 deletions bia-export/bia_export/website_export/export_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from bia_integrator_api.models import Study
from .generic_object_retrieval import read_api_json_file
import logging
import re

logger = logging.getLogger("__main__." + __name__)

Expand Down Expand Up @@ -39,15 +40,24 @@ def fetch_studies_from_api(


def get_study_ids(root_directory: Optional[Path] = None):

def get_accno(acc_id):
match = re.search(r"\d+$", acc_id)
return int(match.group()) if match else None

if root_directory:
studies_list = find_local_studies(root_directory)
sorted_studies = sorted(
studies_list, key=lambda study: study.release_date, reverse=True
studies_list,
key=lambda study: (study.release_date, get_accno(study.accession_id)),
reverse=True,
)
return [study.accession_id for study in sorted_studies]
else:
studies_list = fetch_studies_from_api(page_size=100)
sorted_studies = sorted(
studies_list, key=lambda study: study.release_date, reverse=True
studies_list,
key=lambda study: (study.release_date, get_accno(study.accession_id)),
reverse=True,
)
return [study.uuid for study in sorted_studies]
16 changes: 6 additions & 10 deletions bia-export/bia_export/website_export/studies/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,9 @@ def write_to_cache(dataset_uuid, aggregation_fields):
logging.info(f"writing to dataset aggregation cache for dataset: {dataset_uuid}")

cache_file = (
Path(__file__)
.parent.parent.parent.parent.join(
"cached_computed_data", "dataset_aggregate_fields.json"
)
.absolute()
Path(__file__).parents[3].absolute()
/ "cached_computed_data"
/ "dataset_aggregate_fields.json"
)
with open(cache_file, "r") as object_file:
try:
Expand All @@ -131,11 +129,9 @@ def write_to_cache(dataset_uuid, aggregation_fields):
def aggregate_file_list_data(context: StudyCLIContext) -> None:
if context.cache_use == CacheUse.READ_CACHE:
cache_file = (
Path(__file__)
.parent.parent.parent.parent.join(
"cached_computed_data", "dataset_aggregate_fields.json"
)
.absolute()
Path(__file__).parents[3].absolute()
/ "cached_computed_data"
/ "dataset_aggregate_fields.json"
)

with open(cache_file, "r") as object_file:
Expand Down
4 changes: 2 additions & 2 deletions bia-export/bia_export/website_export/studies/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Dataset,
Study,
StudyCLIContext,
CacheUse
CacheUse,
)
from bia_export.website_export.studies.retrieve import (
retrieve_study,
Expand All @@ -30,8 +30,8 @@


def transform_study(context: StudyCLIContext) -> Study:

api_study = retrieve_study(context)
logger.info(f"Processing study: {api_study.accession_id}")
study_dict = api_study.model_dump()

# Collect file list information prior to creating eid if reading locally to avoid reading them multiple times.
Expand Down

0 comments on commit d05a9c1

Please sign in to comment.