From 31b956bc77237dce31e7f46a4009b031dc4b4bd0 Mon Sep 17 00:00:00 2001 From: Paul Butcher Date: Mon, 22 Jul 2024 17:07:17 +0100 Subject: [PATCH 1/2] deploy with Terraform --- terraform/.terraform.lock.hcl | 43 +++++++++++ terraform/main.tf | 36 +++++++++ terraform/modules/notification_queue/main.tf | 22 ++++++ .../modules/notification_queue/outputs.tf | 3 + .../modules/notification_queue/provider.tf | 7 ++ .../modules/notification_queue/variables.tf | 11 +++ terraform/modules/sqs_lambda_trigger/main.tf | 33 +++++++++ .../modules/sqs_lambda_trigger/provider.tf | 7 ++ .../modules/sqs_lambda_trigger/variables.tf | 19 +++++ terraform/modules/transferrer_lambda/main.tf | 74 +++++++++++++++++++ .../modules/transferrer_lambda/outputs.tf | 7 ++ .../modules/transferrer_lambda/provider.tf | 7 ++ .../modules/transferrer_lambda/variables.tf | 16 ++++ terraform/modules/transferrer_pipe/main.tf | 20 +++++ .../modules/transferrer_pipe/provider.tf | 7 ++ .../modules/transferrer_pipe/variables.tf | 20 +++++ terraform/provider.tf | 8 ++ terraform/variables.tf | 0 18 files changed, 340 insertions(+) create mode 100644 terraform/.terraform.lock.hcl create mode 100644 terraform/main.tf create mode 100644 terraform/modules/notification_queue/main.tf create mode 100644 terraform/modules/notification_queue/outputs.tf create mode 100644 terraform/modules/notification_queue/provider.tf create mode 100644 terraform/modules/notification_queue/variables.tf create mode 100644 terraform/modules/sqs_lambda_trigger/main.tf create mode 100644 terraform/modules/sqs_lambda_trigger/provider.tf create mode 100644 terraform/modules/sqs_lambda_trigger/variables.tf create mode 100644 terraform/modules/transferrer_lambda/main.tf create mode 100644 terraform/modules/transferrer_lambda/outputs.tf create mode 100644 terraform/modules/transferrer_lambda/provider.tf create mode 100644 terraform/modules/transferrer_lambda/variables.tf create mode 100644 terraform/modules/transferrer_pipe/main.tf create mode 100644 terraform/modules/transferrer_pipe/provider.tf create mode 100644 terraform/modules/transferrer_pipe/variables.tf create mode 100644 terraform/provider.tf create mode 100644 terraform/variables.tf diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..9136804 --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,43 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/archive" { + version = "2.4.2" + hashes = [ + "h1:WfIjVbYA9s/uN2FwhGoiffT7CLFydy7MT1waFbt9YrY=", + "zh:08faed7c9f42d82bc3d406d0d9d4971e2d1c2d34eae268ad211b8aca57b7f758", + "zh:3564112ed2d097d7e0672378044a69b06642c326f6f1584d81c7cdd32ebf3a08", + "zh:53cd9afd223c15828c1916e68cb728d2be1cbccb9545568d6c2b122d0bac5102", + "zh:5ae4e41e3a1ce9d40b6458218a85bbde44f21723943982bca4a3b8bb7c103670", + "zh:5b65499218b315b96e95c5d3463ea6d7c66245b59461217c99eaa1611891cd2c", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7f45b35a8330bebd184c2545a41782ff58240ed6ba947274d9881dd5da44b02e", + "zh:87e67891033214e55cfead1391d68e6a3bf37993b7607753237e82aa3250bb71", + "zh:de3590d14037ad81fc5cedf7cfa44614a92452d7b39676289b704a962050bc5e", + "zh:e7e6f2ea567f2dbb3baa81c6203be69f9cd6aeeb01204fd93e3cf181e099b610", + "zh:fd24d03c89a7702628c2e5a3c732c0dede56fa75a08da4a1efe17b5f881c88e2", + "zh:febf4b7b5f3ff2adff0573ef6361f09b6638105111644bdebc0e4f575373935f", + ] +} + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.57.0" + hashes = [ + "h1:9yi3yb3XOMjj/xsSbOfscfmQzPUQ7sZqSYSBfGSfkBA=", + "zh:03761bedb72290599aef0040d3cefb77842f0ef4338673a7e5b53557b0ca4960", + "zh:1c70c050116370688abd239979b06f33c5c8cb7f6e59e89f60cf08ee01666064", + "zh:1cc3b259028a65b2f68ffc25df876bbb0f46d108f262b8ec7c56fc597ac697af", + "zh:3bcdf1415b37f39b71e07d4d92977cf8697f07602382d63687d5f683fee0231a", + "zh:40b1774a2cacc84002ac88ef30fb017c273009456d7a1f9f7c5a4a057041ec75", + "zh:46d51fa066c6441594a1e242c9491cc31dbb2dc85f1acf8bc54ad6faa4de524b", + "zh:550e5635b0cd5d98fa66c2afd5dbb1563a8e019be9f760bd1543fbcca763f0c1", + "zh:7acc8357b5e02ed3eb478125614d049511d6faeb9850c084d6e6519db875f0d1", + "zh:7f7367299811ddf5560a0586e525d57dd52f1a0ca37e42e2c5284308069bf2b6", + "zh:8766cc10c83b1fc2e971c4e645bc4d3c871d4758eb54b0a3216600c66e3db681", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:a1e85b1fb9004d8ffab7600304e02bce4aa14cea9f0ad77fbd7b84aae6390760", + "zh:bcf2fc83bd9e20e5a930d9d596eb813c319f2b007c620b1818e574c1702eb9a9", + "zh:d2538fcb20dc2afc04b716f67969944eef7f4fc4296410116d5b7af1811100f2", + "zh:e0e47c5d8710bbfcfe4db1cfa81c67e320056006d08063e69640cd2d492c6f64", + ] +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..5325e07 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,36 @@ +locals { + event_batching_window_timeout = 20 + lambda_timeout = 120 //two minutes + + # The lambda event source pulls messages from SQS in batches, finally triggering the lambda + # when either it has enough messages, or enough time has elapsed. + # A message becomes invisible when it joins the event source buffer, so could wait for + # the whole timeout window plus the whole execution time before being confirmed. + # The value of visibility timeout must be at least 20 seconds more than the lambda timeout + # This doesn't necessarily need to exist with a longer batching window, but + # always adding 20 here should mean that you can safely set batching window to 0 + # if you wish. + # See: https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html + # "Lambda might wait for up to 20 seconds before invoking your function." + queue_visibility_timeout = local.event_batching_window_timeout + local.lambda_timeout + 20 +} + +data "archive_file" "lambda_zip" { + type = "zip" + output_path = "lambda.zip" + source_dir = "../src" +} + +module "staging_lambda" { + source = "./modules/transferrer_pipe" + environment = "staging" + queue_visibility_timeout = local.queue_visibility_timeout + lambda_zip = data.archive_file.lambda_zip +} + +module "production_lambda" { + source = "./modules/transferrer_pipe" + environment = "production" + queue_visibility_timeout = local.queue_visibility_timeout + lambda_zip = data.archive_file.lambda_zip +} \ No newline at end of file diff --git a/terraform/modules/notification_queue/main.tf b/terraform/modules/notification_queue/main.tf new file mode 100644 index 0000000..e87bfb4 --- /dev/null +++ b/terraform/modules/notification_queue/main.tf @@ -0,0 +1,22 @@ + +module "transfer_shoots_topic" { + source = "github.com/wellcomecollection/terraform-aws-sns-topic.git?ref=v1.0.1" + name = "transfer-shoots-${var.environment}" +} + +module "dlq_alarm_topic" { + source = "github.com/wellcomecollection/terraform-aws-sns-topic.git?ref=v1.0.1" + name = "transfer-shoots-alarm-${var.environment}" +} + +module "input_queue" { + source = "github.com/wellcomecollection/terraform-aws-sqs//queue?ref=v1.2.1" + + queue_name = "transfer-shoots-${var.environment}" + + topic_arns = [module.transfer_shoots_topic.arn] + visibility_timeout_seconds = var.queue_visibility_timeout + max_receive_count = 1 + message_retention_seconds = 1200 + alarm_topic_arn = module.dlq_alarm_topic.arn +} diff --git a/terraform/modules/notification_queue/outputs.tf b/terraform/modules/notification_queue/outputs.tf new file mode 100644 index 0000000..d8a0c89 --- /dev/null +++ b/terraform/modules/notification_queue/outputs.tf @@ -0,0 +1,3 @@ +output "queue_arn" { + value = module.input_queue.arn +} \ No newline at end of file diff --git a/terraform/modules/notification_queue/provider.tf b/terraform/modules/notification_queue/provider.tf new file mode 100644 index 0000000..7afdcf4 --- /dev/null +++ b/terraform/modules/notification_queue/provider.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + } +} \ No newline at end of file diff --git a/terraform/modules/notification_queue/variables.tf b/terraform/modules/notification_queue/variables.tf new file mode 100644 index 0000000..f625bfb --- /dev/null +++ b/terraform/modules/notification_queue/variables.tf @@ -0,0 +1,11 @@ +variable "queue_visibility_timeout" { + type = number +} + +variable "environment" { + type = string + validation { + condition = contains(["staging", "production"], var.environment) + error_message = "environment must be one of staging or production" + } +} \ No newline at end of file diff --git a/terraform/modules/sqs_lambda_trigger/main.tf b/terraform/modules/sqs_lambda_trigger/main.tf new file mode 100644 index 0000000..814f226 --- /dev/null +++ b/terraform/modules/sqs_lambda_trigger/main.tf @@ -0,0 +1,33 @@ + +data "aws_iam_policy_document" "allow_sqs_pull" { + statement { + actions = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ] + resources = [ + var.queue_arn + ] + } +} + +resource "aws_iam_role_policy" "allow_sqs_pull" { + name = "${var.trigger_name}-pull-from-queue" + role = var.role_name + policy = data.aws_iam_policy_document.allow_sqs_pull.json +} + +resource "aws_lambda_event_source_mapping" "lambda_trigger" { + event_source_arn = var.queue_arn + enabled = true + function_name = var.function_name + batch_size = var.batch_size +} + +resource "aws_lambda_permission" "allow_lambda_sqs_trigger" { + action = "lambda:InvokeFunction" + function_name = var.function_name + principal = "sqs.amazonaws.com" + source_arn = var.queue_arn +} \ No newline at end of file diff --git a/terraform/modules/sqs_lambda_trigger/provider.tf b/terraform/modules/sqs_lambda_trigger/provider.tf new file mode 100644 index 0000000..7afdcf4 --- /dev/null +++ b/terraform/modules/sqs_lambda_trigger/provider.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + } +} \ No newline at end of file diff --git a/terraform/modules/sqs_lambda_trigger/variables.tf b/terraform/modules/sqs_lambda_trigger/variables.tf new file mode 100644 index 0000000..4a6277c --- /dev/null +++ b/terraform/modules/sqs_lambda_trigger/variables.tf @@ -0,0 +1,19 @@ +variable "queue_arn" { + type = string +} + +variable "function_name" { + type = string +} + +variable "role_name" { + type = string +} + +variable "trigger_name" { + type = string +} +variable "batch_size" { + type = number + default = 1 +} \ No newline at end of file diff --git a/terraform/modules/transferrer_lambda/main.tf b/terraform/modules/transferrer_lambda/main.tf new file mode 100644 index 0000000..6feb827 --- /dev/null +++ b/terraform/modules/transferrer_lambda/main.tf @@ -0,0 +1,74 @@ +locals { + lambda_name = "editorial-photography-transfer-${var.environment}" + lambda_timeout = 300 //five minutes + buckets = tomap( + { + staging = "wellcomecollection-archivematica-staging-transfer-source", + production = "wellcomecollection-archivematica-transfer-source" + } + ) + target_bucket = lookup(local.buckets, var.environment) + +} + + +module "transfer_lambda" { + source = "git@github.com:wellcomecollection/terraform-aws-lambda?ref=v1.2.0" + + name = local.lambda_name + runtime = "python3.12" + handler = "lambda_function.lambda_handler" + + filename = var.lambda_zip.output_path + memory_size = 2048 + timeout = local.lambda_timeout + + environment = { + variables = { + ACCESSION_NUMBER = "2754" + TARGET_BUCKET = local.target_bucket + } + } + source_code_hash = var.lambda_zip.output_base64sha256 + ephemeral_storage = { + size = 4096 + } +} + +resource "aws_iam_role_policy" "write_to_archivematica_transfer_source" { + role = module.transfer_lambda.lambda_role.name + name = "write_to_archivematica_transfer_source-${var.environment}" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + "Effect": "Allow", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::${local.target_bucket}/*" + }, + ] + } + ) +} + +resource "aws_iam_role_policy" "read_from_editorial_photography" { + role = module.transfer_lambda.lambda_role.name + name = "read_from_editorial_photography-${var.environment}" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + "Effect" = "Allow", + "Action" = [ + "s3:GetObject", + "s3:ListBucket" + ], + "Resource" = [ + "arn:aws:s3:::wellcomecollection-editorial-photography", + "arn:aws:s3:::wellcomecollection-editorial-photography/*" + ], + }, + ] + }) + +} \ No newline at end of file diff --git a/terraform/modules/transferrer_lambda/outputs.tf b/terraform/modules/transferrer_lambda/outputs.tf new file mode 100644 index 0000000..f504240 --- /dev/null +++ b/terraform/modules/transferrer_lambda/outputs.tf @@ -0,0 +1,7 @@ +output "lambda" { + value = module.transfer_lambda.lambda +} + +output "role" { + value = module.transfer_lambda.lambda_role +} \ No newline at end of file diff --git a/terraform/modules/transferrer_lambda/provider.tf b/terraform/modules/transferrer_lambda/provider.tf new file mode 100644 index 0000000..7afdcf4 --- /dev/null +++ b/terraform/modules/transferrer_lambda/provider.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + } +} \ No newline at end of file diff --git a/terraform/modules/transferrer_lambda/variables.tf b/terraform/modules/transferrer_lambda/variables.tf new file mode 100644 index 0000000..537c342 --- /dev/null +++ b/terraform/modules/transferrer_lambda/variables.tf @@ -0,0 +1,16 @@ +variable "environment" { + type = string + validation { + condition = contains(["staging", "production"], var.environment) + error_message = "environment must be one of staging or production" + } +} + +variable "lambda_zip" { + type = object( + { + output_path = string, + output_base64sha256 = string + } + ) +} \ No newline at end of file diff --git a/terraform/modules/transferrer_pipe/main.tf b/terraform/modules/transferrer_pipe/main.tf new file mode 100644 index 0000000..45d66bb --- /dev/null +++ b/terraform/modules/transferrer_pipe/main.tf @@ -0,0 +1,20 @@ + +module "transfer_lambda" { + source = "../transferrer_lambda" + environment = var.environment + lambda_zip = var.lambda_zip +} + +module "input_queue" { + source = "../notification_queue" + environment = var.environment + queue_visibility_timeout = var.queue_visibility_timeout +} + +module "trigger" { + source = "../sqs_lambda_trigger" + queue_arn = module.input_queue.queue_arn + function_name = module.transfer_lambda.lambda.function_name + role_name = module.transfer_lambda.role.name + trigger_name = "editorial-photography-${var.environment}" +} \ No newline at end of file diff --git a/terraform/modules/transferrer_pipe/provider.tf b/terraform/modules/transferrer_pipe/provider.tf new file mode 100644 index 0000000..7afdcf4 --- /dev/null +++ b/terraform/modules/transferrer_pipe/provider.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + } +} \ No newline at end of file diff --git a/terraform/modules/transferrer_pipe/variables.tf b/terraform/modules/transferrer_pipe/variables.tf new file mode 100644 index 0000000..2e70377 --- /dev/null +++ b/terraform/modules/transferrer_pipe/variables.tf @@ -0,0 +1,20 @@ +variable "environment" { + type = string + validation { + condition = contains(["staging", "production"], var.environment) + error_message = "environment must be one of staging or production" + } +} + +variable "queue_visibility_timeout" { + type = number +} + +variable "lambda_zip" { + type = object( + { + output_path = string, + output_base64sha256 = string + } + ) +} \ No newline at end of file diff --git a/terraform/provider.tf b/terraform/provider.tf new file mode 100644 index 0000000..474feb9 --- /dev/null +++ b/terraform/provider.tf @@ -0,0 +1,8 @@ + +provider "aws" { + region = "eu-west-1" + + assume_role { + role_arn = "arn:aws:iam::404315009621:role/digitisation-developer" + } +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000..e69de29 From e557e95070cadc818d33bfd79762e33409321376 Mon Sep 17 00:00:00 2001 From: Paul Butcher Date: Tue, 23 Jul 2024 14:55:10 +0100 Subject: [PATCH 2/2] Add Makefile (#21) * Add Makefile * set the splitter to 20 * neater permission --- Makefile | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f847df3 --- /dev/null +++ b/Makefile @@ -0,0 +1,49 @@ +.PHONY: shoots/clean %.sliced +.SECONDARY: + +# Remove intermediate/final files from the shoots folder +shoots/clean: + rm shoots/*restored + rm shoots/*transferred + rm shoots/*slice* + +# Request the Glacier restoration of the shoots in the given file +# The file is expected to contain one shoot identifier per line. +# In order to run this, set your AWS profile to one with authority in the workflow account. +%.restored : % + cat $< | python src/restore.py + cp $< $@ + + +# Request the Glacier transfer of the shoots in the given file +# This rule depends on restoration having completed, which is not guaranteed +# (or even likely) if you run this rule without having previously requested the restoration +# Any shoots that are not yet fully restored will result in a DLQ message that can eventually +# be redriven when the s3 objects are finally available for download +# In order to run this, set your AWS profile to one with authority in the digitisation account. + +# transfer to staging (see above) +%.transferred.staging: %.restored + cat $< | python src/start_transfers.py staging + cp $< $@ + + +# transfer to production (see above) +%.transferred.production: %.restored + cat $< | python src/start_transfers.py production + cp $< $@ + +# Slice a given input file into manageable chunks, so that you can run them through the +# transfer process separately without overwhelming the target system. +# The right number for archivematica is probably about 20. + +%.sliced: % + split -l 20 $< $<. + +# Touch the files already on AWS. This will stimulate the corresponding transfer lambdas +# In order to run this, set your AWS profile to one with authority in the digitisation account. +%.touched.staging: % + cat % | python src/touch.py staging + +%.touched.production: % + cat % | python src/touch.py production