Skip to content

Commit

Permalink
Upload (#13)
Browse files Browse the repository at this point in the history
upload to staging bucket
  • Loading branch information
paul-butcher authored Jun 21, 2024
1 parent ac4132b commit 44a273e
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 17 deletions.
9 changes: 6 additions & 3 deletions src/transferrer/make_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ def generate_metadata_csv(csvfile, accession_id):
The accession id is placed in the final cell
>>> import io
>>> generate_metadata_csv(io.StringIO(), shoot_number_to_accession_id("2754", "CP000159")).getvalue()
'filename,collection_reference,accession_number\\r\\n/objects,WT,2754_CP000159\\r\\n'
'filename,collection_reference,accession_number\\r\\nobjects/,WT,2754_CP000159\\r\\n'
"""
writer = csv.DictWriter(
csvfile, fieldnames=["filename", "collection_reference", "accession_number"]
)
writer.writeheader()
writer.writerow(
{
"filename": "/objects",
"filename": "objects/",
"collection_reference": "WT",
"accession_number": accession_id,
}
Expand Down Expand Up @@ -90,7 +90,6 @@ def create_born_digital_zips(
source_directory, target_directory, accession_number, shoot_number, max_batch_size
):
filenames = files_in_folder(source_directory)

if len(filenames) == 0:
raise FileNotFoundError(
"Attempt to build born digital accession zip from empty folder"
Expand Down Expand Up @@ -134,3 +133,7 @@ def batched(iterable, n):
while batch := tuple(itertools.islice(iterator, n)):
yield batch


if __name__ == "__main__":
import sys
list(create_born_digital_zips(sys.argv[1], '.', "1234", sys.argv[2], 99))
23 changes: 19 additions & 4 deletions src/transferrer/upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@

import logging
import os
import sys

import boto3
def get_bucket():
session = boto3.Session()
S3 = session.resource('s3')
return S3.Bucket("wellcomecollection-workflow-stage-upload")

logger = logging.getLogger(__name__)


def upload(s3, zipfile_path, target_bucket_name="wellcomecollection-archivematica-staging-transfer-source"):
logger.info(f"uploading {zipfile_path} to {target_bucket_name}")
get_target_bucket(s3, target_bucket_name).upload_file(zipfile_path, f"born-digital-accessions/{os.path.basename(zipfile_path)}")


def get_target_bucket(s3, target_bucket):
return s3.Bucket(target_bucket)


if __name__ == "__main__":
upload( boto3.Session(profile_name=os.environ["AWS_TARGET_PROFILE"]).resource('s3'), sys.argv[1])
31 changes: 22 additions & 9 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os.path
from contextlib import contextmanager
import moto
import boto3
import pytest
Expand All @@ -7,29 +8,41 @@

HERE = os.path.dirname(os.path.abspath(__file__))


@pytest.fixture
def empty_bucket():
def moto_s3():
moto_fake = moto.mock_aws()
try:
moto_fake.start()
conn = boto3.resource('s3', region_name="eu-west-1",)
conn.create_bucket(CreateBucketConfiguration={
'LocationConstraint': 'eu-west-1'
}, Bucket="wellcomecollection-editorial-photography")
yield conn
yield boto3.resource(service_name='s3', region_name="eu-west-1")
finally:
moto_fake.stop()

@pytest.fixture
def empty_bucket(moto_s3):
@contextmanager
def _empty_bucket(bucket_name):
yield moto_s3.create_bucket(CreateBucketConfiguration={
'LocationConstraint': 'eu-west-1'
}, Bucket=bucket_name)
yield _empty_bucket


@pytest.fixture
def available_shoot_bucket(empty_bucket):
return populate_bucket(get_source_bucket(), extra_args={})
with empty_bucket("wellcomecollection-editorial-photography") as bucket:
yield populate_bucket(bucket, extra_args={})


@pytest.fixture
def glacier_shoot_bucket(empty_bucket):
return populate_bucket(get_source_bucket(), extra_args={'StorageClass': 'GLACIER'})
with empty_bucket("wellcomecollection-editorial-photography") as bucket:
yield populate_bucket(bucket, extra_args={'StorageClass': 'GLACIER'})


@pytest.fixture
def target_bucket(empty_bucket):
with empty_bucket("wellcomecollection-archivematica-staging-transfer-source") as bucket:
yield bucket


def populate_bucket(bucket, extra_args):
Expand Down
2 changes: 1 addition & 1 deletion test/test_make_zips.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def assert_csv_has_accession_id(csv_path, accession_id):
csv_file
) # Ensures that the header row is present and correct
assert next(reader) == {
"filename": "/objects",
"filename": "objects/",
"collection_reference": "WT",
"accession_number": accession_id,
}
Expand Down
18 changes: 18 additions & 0 deletions test/test_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest
import pyfakefs
import boto3
from transferrer.upload import upload
from transferrer.upload import upload


def test_raises_on_missing_zip(moto_s3, target_bucket, fs):
with pytest.raises(FileNotFoundError):
upload(moto_s3, "missing.zip")


def test_uploads_to_accessions_folder_in_bucket(moto_s3, target_bucket, fs):
fs.create_file("present.zip")
upload(moto_s3, "present.zip")
assert [obj.key for obj in target_bucket.objects.all()] == ["born-digital-accessions/present.zip"]


0 comments on commit 44a273e

Please sign in to comment.