Skip to content

Commit

Permalink
upload to s3 complete
Browse files Browse the repository at this point in the history
  • Loading branch information
shriram192 committed Dec 11, 2023
1 parent c49eb6d commit cda94ab
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 48 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,3 @@ jobs:
aws_secret_access_key: "S8aT/cgh/TedtMlQlnvKr7BnsCBjno"
s3_bucket_id: "che230022-bucket01"
s3_bucket_path: "/qmc_graphene_stacking_fault/0_interlayer_energy/data/"
- name: Get the output logs
run: echo "${{ steps.push_publish.outputs.logs }}"
96 changes: 50 additions & 46 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,60 @@
# main.py
import sys
import boto3

def main():

# Get action inputs
_, globus_auth_client_id, globus_auth_secret, \
_, globus_auth_client_id, globus_auth_secret, \
files_to_publish, mdf_source_id, mdf_title, mdf_authors, mdf_affiliations, \
mdf_publication_year, staging_object_store_url, aws_access_key_id, \
aws_secret_access_key, s3_bucket_id, s3_bucket_path = sys.argv

logs = []
logs.append('Input Data:-')
logs.append(f'Globus Auth Client ID: {globus_auth_client_id}')
logs.append(f'Globus Auth Secret: {globus_auth_secret}')
logs.append(f'Files to Publish: {files_to_publish}')
logs.append(f'MDF Source ID: {mdf_source_id}')
logs.append(f'MDF Title: {mdf_title}')
logs.append(f'MDF Authors: {mdf_authors}')
logs.append(f'MDF Affiliations: {mdf_affiliations}')
logs.append(f'MDF Publication Year: {mdf_publication_year}')
logs.append(f'Staging Object Store URL: {staging_object_store_url}')
logs.append(f'AWS Access Key ID: {aws_access_key_id}')
logs.append(f'AWS Secret Access Key: {aws_secret_access_key}')
logs.append(f'S3 Bucket ID: {s3_bucket_id}')
logs.append(f'S3 Bucket Path: {s3_bucket_path}')

# # Process data
# if source_format == 'csv':
# df = pd.read_csv(source_url)
# elif source_format == 'json':
# df = pd.read_json(source_url)

# if transform == 'normalize':
# df = normalize(df)
# elif transform == 'aggregate':
# df = aggregate(df)

# # Output results
# if output_format == 'csv':
# out_csv = df.to_csv()
# Path('/results/out.csv').write_text(out_csv)
# print(f'::set-output name=results_url::https://store/results/out.csv')
# elif output_format == 'json':
# out_json = df.to_json()
# Path('/results/out.json').write_text(out_json)
# print(f'::set-output name=results_url::https://store/results/out.json')

# # Output metrics
# row_count = len(df)

output_logs = "\n".join(logs)
print(f'::set-output name=logs::{output_logs}')
s3 = boto3.client('s3',
endpoint_url=staging_object_store_url,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key)


def upload_s3(bucket_id, object_path, file):
with open(file, "rb") as f:
s3.upload_fileobj(f, bucket_id, object_path)

url = s3.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': bucket_id,
'Key': object_path
})

return url


def main():
print('Input Data:-')
print(f'Globus Auth Client ID: {globus_auth_client_id}')
print(f'Globus Auth Secret: {globus_auth_secret}')
print(f'Files to Publish: {files_to_publish}')
print(f'MDF Source ID: {mdf_source_id}')
print(f'MDF Title: {mdf_title}')
print(f'MDF Authors: {mdf_authors}')
print(f'MDF Affiliations: {mdf_affiliations}')
print(f'MDF Publication Year: {mdf_publication_year}')
print(f'Staging Object Store URL: {staging_object_store_url}')
print(f'AWS Access Key ID: {aws_access_key_id}')
print(f'AWS Secret Access Key: {aws_secret_access_key}')
print(f'S3 Bucket ID: {s3_bucket_id}')
print(f'S3 Bucket Path: {s3_bucket_path}')

files_list = files_to_publish.split(",")


source_urls = []
for file in files_list:
source_urls.append(upload_s3(s3_bucket_id, f"{s3_bucket_path}{file}", file))

print(source_urls)





if __name__ == "__main__":
main()

0 comments on commit cda94ab

Please sign in to comment.