From aaa33a0cc8625457d990e62a50d644a066d22c2a Mon Sep 17 00:00:00 2001 From: Tamara Slosarek Date: Thu, 4 Jul 2024 15:56:57 +0200 Subject: [PATCH] feat(scripts): write JSON body properly --- anni/README.md | 16 ++++++++++++++++ anni/backupper/README.md | 2 +- scripts/README.md | 29 +++++++++++++++-------------- scripts/common/write_data.py | 22 +++++++++++++++++++++- 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/anni/README.md b/anni/README.md index ec9210c1c..1f5c1b830 100644 --- a/anni/README.md +++ b/anni/README.md @@ -14,3 +14,19 @@ interface. ## Getting Started See our [Contribution Guide](CONTRIBUTING.md) to get started. + +## Updating + +To update data from external sources, execute the following steps (API +request can be executed with Postman): + +* Get current backup using the `GET /api/backup` route and save the response to + `./backup.base64.json` +* Init external sources using the `POST /api/init-external` route (overwrites + present data, make sure it was backed up properly) +* Get overwritten backup using the `/api/backup` route again and save the + response to `./initiated.base64.json` +* Run update script + `python update.py ./backup.base64.json ./initiated.base64.json` +* Upload the updated backup `./backup_updated_[timestamp].base64.json` with + `POST /api/backup` diff --git a/anni/backupper/README.md b/anni/backupper/README.md index 0f0df89fa..732918f59 100644 --- a/anni/backupper/README.md +++ b/anni/backupper/README.md @@ -33,5 +33,5 @@ Backupper has its own environment file!). Then, run the following command from the repository's root: ```sh -docker compose --file anni/docker-compose.yaml --profile production --profile with-backupper up +docker compose --file anni/docker-compose.yaml --profile production --profile with-backupper up -d ``` diff --git a/scripts/README.md b/scripts/README.md index 7fc361473..341990f5e 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -15,16 +15,17 @@ pip install -r requirements.txt 🗒️ _Note: for VS Code, you might need to set the Python interpreter for_ 📜 _Scripts to the created `.venv`._ -As input, Anni backup data is assumed, either in JSON format, or Base64 format -containing a zipped JSON (depending on the script). +As input, Anni backup data is assumed, either (1) in JSON format, or (2) as a +JSON response with Base64 text containing the zipped JSON (depending on the +script). Tests can be executed by running `pytest`. ## Update external data -Run `pyhthon update.py .base64 -.base64` to receive -`_updated_.base64` and +Run `python update.py .base64.json +.base64.json` to receive +`_updated_.base64.json` and `_updated__log.md`. The script will update external data of the old backup based on the recently @@ -35,21 +36,21 @@ initialized external data and describe updates in the log. Sometimes updates get too large to upload again, then it helps to reset the data by deleting the history data and published data. -Run `python reset.py .base64` to receive -`_reset_.base64`. +Run `python reset.py .base64.json` to receive +`_reset_.base64.json`. ## Unstage data This is probably a use case only relevant once, we want to unstage all data and do a second review. -Run `python unstage.py .base64` to receive -`_reset_.base64`. +Run `python unstage.py .base64.json` to receive +`_reset_.base64.json`. ## Migrate data -Run `pyhthon migrate.py [.json|.base64]` to receive -`_migrated_.base64`. +Run `python migrate.py [.json|.base64.json]` to receive +`_migrated_.base64.json`. **⚠️ Migrating data will remove the data history, including published versions!** @@ -68,13 +69,13 @@ published versions!** ## Decode Base64 -Run `python decode.py .base64` to receive +Run `python decode.py .base64.json` to receive `_decoded_.json`. ## Encode Base64 Run `python encode.py .json` to receive -`_encoded_.base64`. +`_encoded_.base64.json`. ## Clean script outputs @@ -86,7 +87,7 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in Run `python analyze.py [--correct]` to analyze annotations and optionally correct what can be corrected easily in -`_corrected_.base64`. +`_corrected_.base64.json`. | Check | Description | `--correct`ed | Only for single-gene results* | | ----- | ----------- | ------------- | ----------------------------- | diff --git a/scripts/common/write_data.py b/scripts/common/write_data.py index aeed07439..bb18fcb22 100644 --- a/scripts/common/write_data.py +++ b/scripts/common/write_data.py @@ -35,17 +35,37 @@ def write_json_file(data, file_path): json.dump(data, json_file) def write_data(data, postfix=''): + empty_tables = [] + for table_name in data.keys(): + if len(data[table_name]) == 0: + empty_tables.append(table_name) + for table_name in empty_tables: + del data[table_name] json_temp_path = get_output_file_path( postfix, file_ending=JSON_ENDING, temp=True) zip_temp_path = get_output_file_path( postfix, file_ending=ZIP_ENDING, temp=True) + base64_temp_path = get_output_file_path( + postfix, file_ending='.base64', temp=True) output_path = get_output_file_path(postfix) write_json_file(data, json_temp_path) with zipfile.ZipFile(zip_temp_path, 'w') as zip_file: zip_file.write(json_temp_path, arcname=get_archive_name()) with open(zip_temp_path, 'rb') as zip_file: - with open(output_path, 'wb') as base64_file: + with open(base64_temp_path, 'wb') as base64_file: base64.encode(zip_file, base64_file) + with open(base64_temp_path, 'r') as base64_file: + with open(output_path, 'w') as output_file: + base64_string = '' + for line in base64_file.readlines(): + base64_part = line.strip() + if base64_part != '': + base64_string += base64_part + json.dump({ + 'data': { + 'base64': base64_string + } + }, output_file) def write_log(log_content, postfix): log_file_postfix = postfix + '_log'