From 5584566773ac2e48e86893c79ca08605c6510a74 Mon Sep 17 00:00:00 2001 From: Trapsilo Bumi Date: Fri, 20 Jan 2023 22:04:57 +0700 Subject: [PATCH] Miscellaneous updates and fixes (#39) * Update readme & TODO * Update some logic and fix typos --- README.md | 29 +++++++++++++++++++---------- TODO | 1 + src/glacier_upload/archives.py | 26 +++++++++++--------------- src/glacier_upload/inventories.py | 5 +++++ src/glacier_upload/upload.py | 6 +++--- 5 files changed, 39 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f6dd74a..fd5eaf5 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ A helper tool to upload and manage archives in [Amazon S3 Glacier](https://docs.aws.amazon.com/amazonglacier/latest/dev/introduction.html) -Vaults. +Vaults. Amazon S3 Glacier is a cloud storage service that is optimized for long +term storage for a relatively cheap price. ## Installation @@ -20,9 +21,7 @@ $ pip install glacier_upload ### Prerequisites -Amazon S3 Glacier is a cloud storage service that is optimized for long term -storage for a relatively cheap price. To upload an archive to Amazon S3 Glacier -vault, ensure you have: +To upload an archive to Amazon S3 Glacier vault, ensure you have: - Created an AWS account - Created an Amazon S3 Glacier vault from the AWS CLI tool or the Management @@ -58,6 +57,15 @@ There are additional options to customize your upload, such as adding a description to the archive or configuring the number of threads or the size of parts. Run `glacier upload --help` for more information. +If a multipart upload is interrupted in the middle (because of an exception, +interrupted manually, or other reason), the script will show you the upload ID. +That upload ID can be used to resume the upload, using the same command but +adding the `--upload-id` option, like so: + +``` +glacier upload --upload-id UPLOAD_ID VAULT_NAME FILE_NAME [FILE_NAME ...] +``` + ### Retrieving an archive Retrieving an archive in glacier requires two steps. First, initiate a @@ -84,7 +92,7 @@ a vault contains, you need to request an inventory of the archive, in a similar manner to retrieving an archive. To initiate an inventory, run: ``` -glacier archive init-retrieval VAULT_NAME ARCHIVE_ID +glacier inventory init-retrieval VAULT_NAME ``` Then, the inventory job will take some time to complete. Run the next step to @@ -92,14 +100,15 @@ both check whether the job is complete and retrieve the inventory if it has been completed. ``` -glacier archive get VAULT_NAME JOB_ID FILE_NAME +glacier inventory get VAULT_NAME JOB_ID ``` -### Deleting an archive, deleting an upload, creating/deleting a vault etc. +### Deleting an archive, deleting an upload job, creating/deleting a vault, etc. -Anything that is not listed above can be done using the AWS CLI. Those -functionalities are not implemented here to avoid duplication of work, and -minimize maintenance efforts of this package. +All jobs other than uploading an archive and requesting/downloading an inventory +or archive can be done using the AWS CLI. Those functionalities are not +implemented here to avoid duplication of work, and minimize maintenance efforts +of this package. ## Contributing diff --git a/TODO b/TODO index 377722c..2361afe 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,7 @@ - Unit tests - https://help.github.com/en/articles/setting-guidelines-for-repository-contributors +- Support multpart download of archives like in #25 - Add progress indication for archiving - implement auto-retry on multipart upload failure - it should ask the user for confirmation to continue diff --git a/src/glacier_upload/archives.py b/src/glacier_upload/archives.py index 6aaad9c..60c2332 100644 --- a/src/glacier_upload/archives.py +++ b/src/glacier_upload/archives.py @@ -14,7 +14,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import json import os import boto3 @@ -46,6 +45,11 @@ def get(vault_name, job_id, file_name): except glacier.exceptions.ResourceNotFoundException as e: raise click.ClickException(e.response["Error"]["Message"]) + if response["Action"] != "ArchiveRetrieval": + raise click.ClickException( + "Job is not an archive retrieval. Check the Job ID again." + ) + click.echo(f"Job status: {response['StatusCode']}") if not response["Completed"]: @@ -60,20 +64,12 @@ def get(vault_name, job_id, file_name): click.echo("Retrieving job data...") response = glacier.get_job_output(vaultName=vault_name, jobId=job_id) - if response["contentType"] == "application/json": - inventory_json = json.load(response["body"]) - click.echo(json.dumps(inventory_json, indent=2)) - elif response["contentType"] == "text/csv": - click.echo(response["body"].read()) - else: - content_length = int( - response["ResponseMetadata"]["HTTPHeaders"]["content-length"] - ) - response_stream = response["body"] - try: - download_archive(content_length, response_stream, file_name) - finally: - response_stream.close() + content_length = int(response["ResponseMetadata"]["HTTPHeaders"]["content-length"]) + response_stream = response["body"] + try: + download_archive(content_length, response_stream, file_name) + finally: + response_stream.close() def download_archive(content_length, response_stream, file_name): diff --git a/src/glacier_upload/inventories.py b/src/glacier_upload/inventories.py index 51a6f6d..c1eacf6 100644 --- a/src/glacier_upload/inventories.py +++ b/src/glacier_upload/inventories.py @@ -45,6 +45,11 @@ def get(vault_name, job_id): except glacier.exceptions.ResourceNotFoundException as e: raise click.ClickException(e.response["Error"]["Message"]) + if response["Action"] != "InventoryRetrieval": + raise click.ClickException( + "Job is not an inventory retrieval. Check the Job ID again." + ) + click.echo(f"Inventory status: {response['StatusCode']}") if not response["Completed"]: diff --git a/src/glacier_upload/upload.py b/src/glacier_upload/upload.py index 464f09b..428fac7 100644 --- a/src/glacier_upload/upload.py +++ b/src/glacier_upload/upload.py @@ -189,14 +189,14 @@ def multipart_upload( futures_list, return_when=concurrent.futures.FIRST_EXCEPTION ) if len(not_done) > 0: - # an exception occured + # an exception occurred for future in not_done: future.cancel() for future in done: exc = future.exception() if exc is not None: exc_string = "".join(traceback.format_exception(exc)) - click.secho(f"Exception occured: {exc_string}", err=True, fg="red") + click.secho(f"Exception occurred: {exc_string}", err=True, fg="red") click.echo(f"Upload can still be resumed. Upload ID: {upload_id}") raise click.Abort else: @@ -240,6 +240,7 @@ def upload_part( range_header = f"bytes {start_pos}-{end_pos}/{file_size_bytes}" part_num = start_pos // part_size_bytes percentage = part_num / num_parts + checksum = calculate_tree_hash(part, part_size_bytes) click.echo(f"Uploading part {part_num + 1} of {num_parts}... ({percentage:.2%})") @@ -249,7 +250,6 @@ def upload_part( response = glacier.upload_multipart_part( vaultName=vault_name, uploadId=upload_id, range=range_header, body=part ) - checksum = calculate_tree_hash(part, part_size_bytes) if checksum != response["checksum"]: raise Exception("Local checksum does not match Glacier checksum")