From cda94abb35b911c3afbb362e134eb21ae4685720 Mon Sep 17 00:00:00 2001 From: Shriram Athreya Date: Mon, 11 Dec 2023 14:54:24 -0600 Subject: [PATCH] upload to s3 complete --- .github/workflows/main.yml | 2 - main.py | 96 ++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 48 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cf2635b..1480ef2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,5 +26,3 @@ jobs: aws_secret_access_key: "S8aT/cgh/TedtMlQlnvKr7BnsCBjno" s3_bucket_id: "che230022-bucket01" s3_bucket_path: "/qmc_graphene_stacking_fault/0_interlayer_energy/data/" - - name: Get the output logs - run: echo "${{ steps.push_publish.outputs.logs }}" diff --git a/main.py b/main.py index ae31166..c32a006 100644 --- a/main.py +++ b/main.py @@ -1,56 +1,60 @@ # main.py import sys +import boto3 -def main(): - - # Get action inputs - _, globus_auth_client_id, globus_auth_secret, \ +_, globus_auth_client_id, globus_auth_secret, \ files_to_publish, mdf_source_id, mdf_title, mdf_authors, mdf_affiliations, \ mdf_publication_year, staging_object_store_url, aws_access_key_id, \ aws_secret_access_key, s3_bucket_id, s3_bucket_path = sys.argv - logs = [] - logs.append('Input Data:-') - logs.append(f'Globus Auth Client ID: {globus_auth_client_id}') - logs.append(f'Globus Auth Secret: {globus_auth_secret}') - logs.append(f'Files to Publish: {files_to_publish}') - logs.append(f'MDF Source ID: {mdf_source_id}') - logs.append(f'MDF Title: {mdf_title}') - logs.append(f'MDF Authors: {mdf_authors}') - logs.append(f'MDF Affiliations: {mdf_affiliations}') - logs.append(f'MDF Publication Year: {mdf_publication_year}') - logs.append(f'Staging Object Store URL: {staging_object_store_url}') - logs.append(f'AWS Access Key ID: {aws_access_key_id}') - logs.append(f'AWS Secret Access Key: {aws_secret_access_key}') - logs.append(f'S3 Bucket ID: {s3_bucket_id}') - logs.append(f'S3 Bucket Path: {s3_bucket_path}') - - # # Process data - # if source_format == 'csv': - # df = pd.read_csv(source_url) - # elif source_format == 'json': - # df = pd.read_json(source_url) - - # if transform == 'normalize': - # df = normalize(df) - # elif transform == 'aggregate': - # df = aggregate(df) - - # # Output results - # if output_format == 'csv': - # out_csv = df.to_csv() - # Path('/results/out.csv').write_text(out_csv) - # print(f'::set-output name=results_url::https://store/results/out.csv') - # elif output_format == 'json': - # out_json = df.to_json() - # Path('/results/out.json').write_text(out_json) - # print(f'::set-output name=results_url::https://store/results/out.json') - - # # Output metrics - # row_count = len(df) - - output_logs = "\n".join(logs) - print(f'::set-output name=logs::{output_logs}') +s3 = boto3.client('s3', + endpoint_url=staging_object_store_url, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key) + + +def upload_s3(bucket_id, object_path, file): + with open(file, "rb") as f: + s3.upload_fileobj(f, bucket_id, object_path) + + url = s3.generate_presigned_url( + ClientMethod='get_object', + Params={ + 'Bucket': bucket_id, + 'Key': object_path + }) + + return url + + +def main(): + print('Input Data:-') + print(f'Globus Auth Client ID: {globus_auth_client_id}') + print(f'Globus Auth Secret: {globus_auth_secret}') + print(f'Files to Publish: {files_to_publish}') + print(f'MDF Source ID: {mdf_source_id}') + print(f'MDF Title: {mdf_title}') + print(f'MDF Authors: {mdf_authors}') + print(f'MDF Affiliations: {mdf_affiliations}') + print(f'MDF Publication Year: {mdf_publication_year}') + print(f'Staging Object Store URL: {staging_object_store_url}') + print(f'AWS Access Key ID: {aws_access_key_id}') + print(f'AWS Secret Access Key: {aws_secret_access_key}') + print(f'S3 Bucket ID: {s3_bucket_id}') + print(f'S3 Bucket Path: {s3_bucket_path}') + + files_list = files_to_publish.split(",") + + + source_urls = [] + for file in files_list: + source_urls.append(upload_s3(s3_bucket_id, f"{s3_bucket_path}{file}", file)) + + print(source_urls) + + + + if __name__ == "__main__": main() \ No newline at end of file