From bc70c8e856387470e0095d581bf7a82d71c3cfbf Mon Sep 17 00:00:00 2001 From: Robert Kenny Date: Wed, 3 Jul 2024 10:12:14 +0100 Subject: [PATCH] Move EBSCO Adapter to ECS (#2675) * initial shift to ecs for ebsco adapter * add task definition * remove lambda traces, and add alerting * remove deployment hacks * Apply auto-formatting rules --------- Co-authored-by: Buildkite on behalf of Wellcome Collection --- .buildkite/pipeline.deploy-adapters.yml | 5 +- .buildkite/pipeline.yml | 17 +++- ebsco_adapter/ebsco_adapter/Dockerfile | 8 ++ .../ebsco_adapter/docker-compose.yml | 21 +++++ ebsco_adapter/ebsco_adapter/run_local.sh | 25 ++++++ ebsco_adapter/ebsco_adapter/src/ebsco_ftp.py | 9 +- ebsco_adapter/ebsco_adapter/src/main.py | 73 ++++++++-------- ebsco_adapter/ebsco_adapter/src/metrics.py | 36 ++++++++ .../ebsco_adapter/src/test_fixtures.py | 3 + ebsco_adapter/terraform/alarm_lambda.tf | 62 ++++++++++++++ ebsco_adapter/terraform/alarms.tf | 15 ++++ ebsco_adapter/terraform/cluster.tf | 3 + ebsco_adapter/terraform/data.tf | 32 +++++++ ebsco_adapter/terraform/data/.gitignore | 2 + .../data/cloudwatch_alarm_to_slack.py | 37 ++++++++ ebsco_adapter/terraform/ecr.tf | 27 ++++++ ebsco_adapter/terraform/ftp_lambda.tf | 84 ------------------- ebsco_adapter/terraform/ftp_task.tf | 67 +++++++++++++++ ebsco_adapter/terraform/iam.tf | 77 ++++++++++++++++- ebsco_adapter/terraform/locals.tf | 15 ++++ ebsco_adapter/terraform/outputs.tf | 2 +- ebsco_adapter/terraform/parameters.tf | 14 ++++ ebsco_adapter/terraform/security_groups.tf | 20 +++++ infrastructure/modules/task/output.tf | 19 +++++ .../modules/task/task_definition.tf | 37 ++++++++ infrastructure/modules/task/variables.tf | 24 ++++++ 26 files changed, 605 insertions(+), 129 deletions(-) create mode 100644 ebsco_adapter/ebsco_adapter/Dockerfile create mode 100755 ebsco_adapter/ebsco_adapter/run_local.sh create mode 100644 ebsco_adapter/ebsco_adapter/src/metrics.py create mode 100644 ebsco_adapter/terraform/alarm_lambda.tf create mode 100644 ebsco_adapter/terraform/alarms.tf create mode 100644 ebsco_adapter/terraform/cluster.tf create mode 100644 ebsco_adapter/terraform/data/.gitignore create mode 100644 ebsco_adapter/terraform/data/cloudwatch_alarm_to_slack.py create mode 100644 ebsco_adapter/terraform/ecr.tf delete mode 100644 ebsco_adapter/terraform/ftp_lambda.tf create mode 100644 ebsco_adapter/terraform/ftp_task.tf create mode 100644 ebsco_adapter/terraform/security_groups.tf create mode 100644 infrastructure/modules/task/output.tf create mode 100644 infrastructure/modules/task/task_definition.tf create mode 100644 infrastructure/modules/task/variables.tf diff --git a/.buildkite/pipeline.deploy-adapters.yml b/.buildkite/pipeline.deploy-adapters.yml index 3ff518f38d..8b1311b9b6 100644 --- a/.buildkite/pipeline.deploy-adapters.yml +++ b/.buildkite/pipeline.deploy-adapters.yml @@ -1,9 +1,8 @@ steps: - label: deploy EBSCO adapter command: | - ./builds/deploy_lambda_zip.sh \ - ebsco_adapter/ebsco_adapter \ - ebsco-adapter-ftp + ENV_TAG=env.prod ./builds/update_ecr_image_tag.sh \ + uk.ac.wellcome/ebsco_adapter plugins: - wellcomecollection/aws-assume-role#v0.2.2: diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 767abc2ee1..7b113c1397 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -151,11 +151,26 @@ steps: matrix: - "calm_adapter/calm_window_generator" - "calm_adapter/calm_deletion_check_initiator" - - "ebsco_adapter/ebsco_adapter" - "ebsco_adapter/ebsco_indexer" - "common/window_generator" - "tei_adapter/tei_updater" + - label: "{{ matrix }} (Publish/Image)" + branches: "main" + plugins: + - wellcomecollection/aws-assume-role#v0.2.2: + role: "arn:aws:iam::760097843905:role/platform-ci" + - ecr#v2.5.0: + login: true + - docker-compose#v4.16.0: + config: "{{ matrix }}/docker-compose.yml" + cli-version: 2 + push: + - publish:760097843905.dkr.ecr.eu-west-1.amazonaws.com/uk.ac.wellcome/ebsco_adapter:ref.${BUILDKITE_COMMIT} + - publish:760097843905.dkr.ecr.eu-west-1.amazonaws.com/uk.ac.wellcome/ebsco_adapter:latest + matrix: + - "ebsco_adapter/ebsco_adapter" + - wait - label: trigger adapter deployments diff --git a/ebsco_adapter/ebsco_adapter/Dockerfile b/ebsco_adapter/ebsco_adapter/Dockerfile new file mode 100644 index 0000000000..d52b9bbfb5 --- /dev/null +++ b/ebsco_adapter/ebsco_adapter/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10 + +WORKDIR /app + +ADD src /app +RUN pip install -r requirements.txt + +ENTRYPOINT ["python", "main.py"] diff --git a/ebsco_adapter/ebsco_adapter/docker-compose.yml b/ebsco_adapter/ebsco_adapter/docker-compose.yml index b576a5d581..5b51475a30 100644 --- a/ebsco_adapter/ebsco_adapter/docker-compose.yml +++ b/ebsco_adapter/ebsco_adapter/docker-compose.yml @@ -4,3 +4,24 @@ services: context: . dockerfile: ../../builds/test.python.Dockerfile command: ["py.test"] + dev: + build: + context: . + dockerfile: ./Dockerfile + volumes: + - $HOME/.aws:/root/.aws:ro + environment: + - AWS_PROFILE=platform-developer + - OUTPUT_TOPIC_ARN=${OUTPUT_TOPIC_ARN} + - CUSTOMER_ID=${CUSTOMER_ID} + - S3_BUCKET=${S3_BUCKET} + - S3_PREFIX=${S3_PREFIX} + - FTP_SERVER=${FTP_SERVER} + - FTP_USERNAME=${FTP_USERNAME} + - FTP_PASSWORD=${FTP_PASSWORD} + - FTP_REMOTE_DIR=${FTP_REMOTE_DIR} + command: ["python", "main.py"] + publish: + build: + context: . + dockerfile: ./Dockerfile diff --git a/ebsco_adapter/ebsco_adapter/run_local.sh b/ebsco_adapter/ebsco_adapter/run_local.sh new file mode 100755 index 0000000000..59d4acb3e5 --- /dev/null +++ b/ebsco_adapter/ebsco_adapter/run_local.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Usage: ./run_local.sh + +AWS_PROFILE=platform-developer + +export AWS_PROFILE + +FTP_PASSWORD=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/ftp_password --with-decryption --query "Parameter.Value" --output text) +FTP_SERVER=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/ftp_server --query "Parameter.Value" --output text) +FTP_USERNAME=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/ftp_username --query "Parameter.Value" --output text) +CUSTOMER_ID=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/customer_id --query "Parameter.Value" --output text) +FTP_REMOTE_DIR=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/ftp_remote_dir --query "Parameter.Value" --output text) +S3_BUCKET=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/bucket_name --query "Parameter.Value" --output text) +OUTPUT_TOPIC_ARN=$(aws ssm get-parameter --name /catalogue_pipeline/ebsco_adapter/output_topic_arn --query "Parameter.Value" --output text) + +# Update the S3_PREFIX to be the environment (use dev for local testing) +S3_PREFIX=prod + +export FTP_PASSWORD FTP_SERVER FTP_USERNAME CUSTOMER_ID FTP_REMOTE_DIR S3_BUCKET S3_PREFIX OUTPUT_TOPIC_ARN + +# Ensure the docker image is up to date +docker-compose --log-level ERROR build dev + +docker-compose run dev "$@" diff --git a/ebsco_adapter/ebsco_adapter/src/ebsco_ftp.py b/ebsco_adapter/ebsco_adapter/src/ebsco_ftp.py index 19c738bdae..c0caec5731 100644 --- a/ebsco_adapter/ebsco_adapter/src/ebsco_ftp.py +++ b/ebsco_adapter/ebsco_adapter/src/ebsco_ftp.py @@ -8,11 +8,13 @@ def __init__(self, ftp_server, ftp_username, ftp_password, ftp_remote_dir): self.ftp_username = ftp_username self.ftp_password = ftp_password self.ftp_remote_dir = ftp_remote_dir + self.ftp_connection_open = False def __enter__(self): self.ftp = FTP(self.ftp_server) self.ftp.login(self.ftp_username, self.ftp_password) self.ftp.cwd(self.ftp_remote_dir) + self.ftp_connection_open = True return self def list_files(self, valid_suffixes): @@ -32,5 +34,10 @@ def download_file(self, file, temp_dir): return os.path.join(temp_dir, file) - def __exit__(self, exc_type, exc_val, exc_tb): + def quit(self): self.ftp.quit() + self.ftp_connection_open = False + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.ftp_connection_open: + self.ftp.quit() diff --git a/ebsco_adapter/ebsco_adapter/src/main.py b/ebsco_adapter/ebsco_adapter/src/main.py index f00cdad076..7cd6f24038 100644 --- a/ebsco_adapter/ebsco_adapter/src/main.py +++ b/ebsco_adapter/ebsco_adapter/src/main.py @@ -13,6 +13,7 @@ from extract_marc import extract_marc_records from compare_uploads import compare_uploads, find_notified_and_completed_flag from update_notifier import update_notifier +from metrics import ProcessMetrics ftp_server = os.environ.get("FTP_SERVER") ftp_username = os.environ.get("FTP_USERNAME") @@ -30,6 +31,12 @@ def run_process(temp_dir, ebsco_ftp, s3_store, sns_publisher, invoked_at): print("Running regular process ...") available_files = sync_and_list_files(temp_dir, ftp_s3_prefix, ebsco_ftp, s3_store) + + # Holding the connection open for the next step + # is unnecessary, if we close here we avoid any + # potential timeout issues with the connection. + ebsco_ftp.quit() + updates = compare_uploads( available_files, extract_marc_records, xml_s3_prefix, temp_dir, s3_store ) @@ -109,34 +116,6 @@ def _get_iso8601_invoked_at(): return invoked_at -def lambda_handler(event, context): - invoked_at = _get_iso8601_invoked_at() - if "invoked_at" in event: - invoked_at = event["invoked_at"] - - print(f"Starting lambda_handler @ {invoked_at}, got event: {event}") - - with tempfile.TemporaryDirectory() as temp_dir: - with EbscoFtp( - ftp_server, ftp_username, ftp_password, ftp_remote_dir - ) as ebsco_ftp: - s3_store = S3Store(s3_bucket) - sns_publisher = SnsPublisher(sns_topic_arn) - - if event is not None and "reindex_type" in event: - return run_reindex( - s3_store, - sns_publisher, - invoked_at, - event["reindex_type"], - event.get("reindex_ids"), - ) - else: - return run_process( - temp_dir, ebsco_ftp, s3_store, sns_publisher, invoked_at - ) - - if __name__ == "__main__": event = None context = SimpleNamespace(invoked_function_arn=None) @@ -154,16 +133,40 @@ def lambda_handler(event, context): type=str, help="Comma-separated list of IDs to reindex (for partial)", ) + parser.add_argument( + "--scheduled-invoke", + action="store_true", + help="To run a regular process invocation, without reindexing.", + ) + invoked_at = _get_iso8601_invoked_at() args = parser.parse_args() + + process_type = None + reindex_ids = None if args.reindex_type: - reindex_ids = None + process_type = f"reindex-{args.reindex_type}" if args.reindex_ids: reindex_ids = args.reindex_ids.split(",") reindex_ids = [rid.strip() for rid in reindex_ids] - - # This is the event that will be passed to the lambda handler. - # When invoking the function, use this structure to trigger reindexing. - event = {"reindex_type": args.reindex_type, "reindex_ids": reindex_ids} - - lambda_handler(event, None) + elif args.scheduled_invoke: + process_type = "scheduled" + + with ProcessMetrics( + process_type + ) as metrics, tempfile.TemporaryDirectory() as temp_dir, EbscoFtp( + ftp_server, ftp_username, ftp_password, ftp_remote_dir + ) as ebsco_ftp: + s3_store = S3Store(s3_bucket) + sns_publisher = SnsPublisher(sns_topic_arn) + + if args.reindex_type: + run_reindex( + s3_store, + sns_publisher, + invoked_at, + args.reindex_type, + reindex_ids, + ) + elif args.scheduled_invoke: + run_process(temp_dir, ebsco_ftp, s3_store, sns_publisher, invoked_at) diff --git a/ebsco_adapter/ebsco_adapter/src/metrics.py b/ebsco_adapter/ebsco_adapter/src/metrics.py new file mode 100644 index 0000000000..e411a348a4 --- /dev/null +++ b/ebsco_adapter/ebsco_adapter/src/metrics.py @@ -0,0 +1,36 @@ +import boto3 +import time + + +class ProcessMetrics: + def __init__(self, process_name: str): + self.process_name = process_name + + def __enter__(self): + self.start_time = time.time() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + end_time = time.time() + duration = end_time - self.start_time + + if exc_type is not None: + self.put_metric("ProcessDurationFailure", duration) + else: + self.put_metric("ProcessDurationSuccess", duration) + + def put_metric(self, metric_name: str, value: float): + print(f"Putting metric {metric_name} ({self.process_name}) with value {value}s") + boto3.client("cloudwatch").put_metric_data( + Namespace="ebsco_adapter", + MetricData=[ + { + "MetricName": metric_name, + "Dimensions": [ + {"Name": "process_name", "Value": self.process_name} + ], + "Value": value, + "Unit": "Seconds", + } + ], + ) diff --git a/ebsco_adapter/ebsco_adapter/src/test_fixtures.py b/ebsco_adapter/ebsco_adapter/src/test_fixtures.py index 912f19edb4..7a80d86442 100644 --- a/ebsco_adapter/ebsco_adapter/src/test_fixtures.py +++ b/ebsco_adapter/ebsco_adapter/src/test_fixtures.py @@ -46,6 +46,9 @@ def download_file(self, file, temp_dir): f.write(self.files[file]) return os.path.join(temp_dir, file) + def quit(self): + pass + def __exit__(self, exc_type, exc_val, exc_tb): pass diff --git a/ebsco_adapter/terraform/alarm_lambda.tf b/ebsco_adapter/terraform/alarm_lambda.tf new file mode 100644 index 0000000000..f0e2392aa3 --- /dev/null +++ b/ebsco_adapter/terraform/alarm_lambda.tf @@ -0,0 +1,62 @@ +module "cloudwatch_alarm_to_slack_lambda" { + source = "git@github.com:wellcomecollection/terraform-aws-lambda?ref=v1.2.0" + + name = "cloudwatch-alarm-to-slack" + description = "Sends CloudWatch alarms to a Slack channel." + runtime = "python3.10" + + filename = data.archive_file.cloudwatch_alarm_to_slack.output_path + handler = "cloudwatch_alarm_to_slack.lambda_handler" + memory_size = 512 + timeout = 60 // 1 minute + + source_code_hash = data.archive_file.cloudwatch_alarm_to_slack.output_base64sha256 + + error_alarm_topic_arn = data.terraform_remote_state.monitoring.outputs["platform_lambda_error_alerts_topic_arn"] + + environment = { + variables = { + HOOK_URL = aws_ssm_parameter.cloudwatch_alarm_to_slack_hook_url.value + SLACK_CHANNEL = aws_ssm_parameter.cloudwatch_alarm_to_slack_channel.value + } + } +} + +resource "aws_ssm_parameter" "cloudwatch_alarm_to_slack_hook_url" { + name = "/catalogue_pipeline/ebsco_adapter/cloudwatch_alarm_to_slack_hook_url" + description = "The URL of the Slack webhook to send messages to" + type = "String" + value = "placeholder" + + lifecycle { + ignore_changes = [ + value + ] + } +} + +resource "aws_ssm_parameter" "cloudwatch_alarm_to_slack_channel" { + name = "/catalogue_pipeline/ebsco_adapter/cloudwatch_alarm_to_slack_channel" + description = "The Slack channel to send messages to" + type = "String" + value = "wc-platform-alerts" +} + +module "cloudwatch_alarm_to_slack_topic" { + source = "github.com/wellcomecollection/terraform-aws-sns-topic.git?ref=v1.0.0" + name = "cloudwatch_alarm_to_slack" +} + +resource "aws_sns_topic_subscription" "cloudwatch_alarm_to_slack_subscription" { + topic_arn = module.cloudwatch_alarm_to_slack_topic.arn + protocol = "lambda" + endpoint = module.cloudwatch_alarm_to_slack_lambda.lambda.arn +} + +resource "aws_lambda_permission" "allow_execution_from_sns" { + statement_id = "AllowExecutionFromSNS" + action = "lambda:InvokeFunction" + function_name = module.cloudwatch_alarm_to_slack_lambda.lambda.function_name + principal = "sns.amazonaws.com" + source_arn = module.cloudwatch_alarm_to_slack_topic.arn +} diff --git a/ebsco_adapter/terraform/alarms.tf b/ebsco_adapter/terraform/alarms.tf new file mode 100644 index 0000000000..d99b62cdc6 --- /dev/null +++ b/ebsco_adapter/terraform/alarms.tf @@ -0,0 +1,15 @@ +resource "aws_cloudwatch_metric_alarm" "ebsco_adapter_no_success_metric" { + alarm_name = "ebsco-adapter-no-success-metric" + comparison_operator = "LessThanThreshold" + evaluation_periods = 1 + metric_name = "ProcessDurationSuccess" + namespace = "ebsco_adapter" + period = 86400 + statistic = "Sum" + threshold = 1 + alarm_description = "No success metrics have been sent in the last 24 hours" + alarm_actions = [module.cloudwatch_alarm_to_slack_topic.arn] + dimensions = { + process_name = "scheduled" + } +} diff --git a/ebsco_adapter/terraform/cluster.tf b/ebsco_adapter/terraform/cluster.tf new file mode 100644 index 0000000000..a1a445fa7f --- /dev/null +++ b/ebsco_adapter/terraform/cluster.tf @@ -0,0 +1,3 @@ +resource "aws_ecs_cluster" "cluster" { + name = local.namespace +} diff --git a/ebsco_adapter/terraform/data.tf b/ebsco_adapter/terraform/data.tf index ad466c0c34..f4a9bd5904 100644 --- a/ebsco_adapter/terraform/data.tf +++ b/ebsco_adapter/terraform/data.tf @@ -6,3 +6,35 @@ data "archive_file" "empty_zip" { filename = "lambda.py" } } + +data "archive_file" "cloudwatch_alarm_to_slack" { + output_path = "data/cloudwatch_alarm_to_slack.zip" + type = "zip" + source { + content = file("${path.module}/data/cloudwatch_alarm_to_slack.py") + filename = "cloudwatch_alarm_to_slack.py" + } +} + +data "terraform_remote_state" "accounts_catalogue" { + backend = "s3" + + config = { + role_arn = "arn:aws:iam::760097843905:role/platform-read_only" + + bucket = "wellcomecollection-platform-infra" + key = "terraform/aws-account-infrastructure/catalogue.tfstate" + region = "eu-west-1" + } +} + +data "terraform_remote_state" "shared_infra" { + backend = "s3" + + config = { + role_arn = "arn:aws:iam::760097843905:role/platform-read_only" + bucket = "wellcomecollection-platform-infra" + key = "terraform/platform-infrastructure/shared.tfstate" + region = "eu-west-1" + } +} diff --git a/ebsco_adapter/terraform/data/.gitignore b/ebsco_adapter/terraform/data/.gitignore new file mode 100644 index 0000000000..2472d8f475 --- /dev/null +++ b/ebsco_adapter/terraform/data/.gitignore @@ -0,0 +1,2 @@ +# This directory contains generated files, ignore the zip files +*.zip diff --git a/ebsco_adapter/terraform/data/cloudwatch_alarm_to_slack.py b/ebsco_adapter/terraform/data/cloudwatch_alarm_to_slack.py new file mode 100644 index 0000000000..c4a5f349d5 --- /dev/null +++ b/ebsco_adapter/terraform/data/cloudwatch_alarm_to_slack.py @@ -0,0 +1,37 @@ +import json +import logging +import os + +from urllib.request import Request, urlopen +from urllib.error import URLError, HTTPError + +HOOK_URL = os.environ['HOOK_URL'] +SLACK_CHANNEL = os.environ['SLACK_CHANNEL'] + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def lambda_handler(event, context): + logger.info("Event: " + str(event)) + message = json.loads(event['Records'][0]['Sns']['Message']) + logger.info("Message: " + str(message)) + + alarm_name = message['AlarmName'] + new_state = message['NewStateValue'] + reason = message['NewStateReason'] + + slack_message = { + 'channel': SLACK_CHANNEL, + 'text': "%s state is now %s: %s" % (alarm_name, new_state, reason) + } + + req = Request(HOOK_URL, json.dumps(slack_message).encode('utf-8')) + try: + response = urlopen(req) + response.read() + logger.info("Message posted to %s", slack_message['channel']) + except HTTPError as e: + logger.error("Request failed: %d %s", e.code, e.reason) + except URLError as e: + logger.error("Server connection failed: %s", e.reason) diff --git a/ebsco_adapter/terraform/ecr.tf b/ebsco_adapter/terraform/ecr.tf new file mode 100644 index 0000000000..cdb610552b --- /dev/null +++ b/ebsco_adapter/terraform/ecr.tf @@ -0,0 +1,27 @@ +locals { + ecr_policy_only_keep_the_last_100_images = jsonencode({ + rules = [ + { + rulePriority = 1 + description = "Only keep the last 100 images in a repo" + selection = { + tagStatus = "any" + countType = "imageCountMoreThan" + countNumber = 100 + } + action = { + type = "expire" + } + } + ] + }) +} + +resource "aws_ecr_repository" "ebsco_adapter" { + name = "uk.ac.wellcome/ebsco_adapter" +} + +resource "aws_ecr_lifecycle_policy" "ebsco_adapter" { + repository = aws_ecr_repository.ebsco_adapter.name + policy = local.ecr_policy_only_keep_the_last_100_images +} diff --git a/ebsco_adapter/terraform/ftp_lambda.tf b/ebsco_adapter/terraform/ftp_lambda.tf deleted file mode 100644 index 2ff3c78b25..0000000000 --- a/ebsco_adapter/terraform/ftp_lambda.tf +++ /dev/null @@ -1,84 +0,0 @@ -module "ftp_lambda" { - source = "git@github.com:wellcomecollection/terraform-aws-lambda?ref=v1.2.0" - - name = "ebsco-adapter-ftp" - runtime = "python3.10" - - filename = data.archive_file.empty_zip.output_path - handler = "main.lambda_handler" - memory_size = 512 - timeout = 10 * 60 // 10 minutes - - environment = { - variables = { - S3_BUCKET = aws_s3_bucket.ebsco_adapter.bucket - S3_PREFIX = "prod" - - FTP_SERVER = aws_ssm_parameter.ebsco_adapter_ftp_server.value - FTP_USERNAME = aws_ssm_parameter.ebsco_adapter_ftp_username.value - FTP_PASSWORD = aws_ssm_parameter.ebsco_adapter_ftp_password.value - FTP_REMOTE_DIR = aws_ssm_parameter.ebsco_adapter_ftp_remote_dir.value - CUSTOMER_ID = aws_ssm_parameter.ebsco_adapter_customer_id.value - OUTPUT_TOPIC_ARN = module.ebsco_adapter_output_topic.arn - } - } - - depends_on = [ - aws_s3_bucket.ebsco_adapter, - aws_ssm_parameter.ebsco_adapter_ftp_server, - aws_ssm_parameter.ebsco_adapter_ftp_username, - aws_ssm_parameter.ebsco_adapter_ftp_password, - aws_ssm_parameter.ebsco_adapter_ftp_remote_dir, - aws_ssm_parameter.ebsco_adapter_customer_id - ] -} - -data "aws_iam_policy_document" "rw_ebsco_adapter_bucket" { - statement { - actions = [ - "s3:GetObject", - "s3:PutObject", - "s3:DeleteObject", - "s3:List*" - ] - - resources = [ - "${aws_s3_bucket.ebsco_adapter.arn}/*" - ] - } - - statement { - actions = [ - "s3:ListBucket" - ] - - resources = [ - "${aws_s3_bucket.ebsco_adapter.arn}" - ] - } -} - -resource "aws_iam_role_policy" "ftp_lambda_policy" { - role = module.ftp_lambda.lambda_role.name - policy = data.aws_iam_policy_document.rw_ebsco_adapter_bucket.json -} - -# The lambda source is updated much less frequently than once a day (every 7 days) -# but it's still a good idea to trigger the lambda to run at least once a day to ensure -# that it's always up to date. -resource "aws_cloudwatch_event_rule" "every_day_at_6am" { - name = "trigger_ftp_lambda" - schedule_expression = "cron(0 6 * * ? *)" -} - -resource "aws_lambda_permission" "allow_reporter_cloudwatch_trigger" { - action = "lambda:InvokeFunction" - function_name = module.ftp_lambda.lambda.function_name - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.every_day_at_6am.arn -} - -resource "aws_cloudwatch_event_target" "event_trigger" { - rule = aws_cloudwatch_event_rule.every_day_at_6am.name - arn = module.ftp_lambda.lambda.arn -} diff --git a/ebsco_adapter/terraform/ftp_task.tf b/ebsco_adapter/terraform/ftp_task.tf new file mode 100644 index 0000000000..df84b53e3e --- /dev/null +++ b/ebsco_adapter/terraform/ftp_task.tf @@ -0,0 +1,67 @@ +module "ftp_task" { + source = "../../infrastructure/modules/task" + + task_name = "ebsco-adapter-ftp" + + image = "${aws_ecr_repository.ebsco_adapter.repository_url}:latest" + + environment = { + FTP_SERVER = aws_ssm_parameter.ebsco_adapter_ftp_server.value + FTP_USERNAME = aws_ssm_parameter.ebsco_adapter_ftp_username.value + FTP_REMOTE_DIR = aws_ssm_parameter.ebsco_adapter_ftp_remote_dir.value + CUSTOMER_ID = aws_ssm_parameter.ebsco_adapter_customer_id.value + FTP_PASSWORD = aws_ssm_parameter.ebsco_adapter_ftp_password.value + OUTPUT_TOPIC_ARN = module.ebsco_adapter_output_topic.arn + S3_BUCKET = aws_s3_bucket.ebsco_adapter.bucket + S3_PREFIX = "prod" + } + + cpu = 2048 + memory = 4096 +} + +resource "aws_scheduler_schedule" "ftp_task_schedule" { + name = "ebsco-adapter-ftp-schedule" + group_name = "default" + + flexible_time_window { + mode = "OFF" + } + + schedule_expression = "rate(1 days)" + + # Disable the schedule for now + state = "DISABLED" + + target { + arn = aws_ecs_cluster.cluster.arn + role_arn = aws_iam_role.eventbridge_task_scheduler.arn + + ecs_parameters { + task_definition_arn = local.task_definition_arn_latest + launch_type = "FARGATE" + + network_configuration { + assign_public_ip = false + security_groups = [ + aws_security_group.egress.id, + local.network_config.ec_privatelink_security_group_id + ] + subnets = local.network_config.subnets + } + } + + input = jsonencode({ + containerOverrides = [ + { + name = "ebsco-adapter-ftp" + command = ["--scheduled-invoke"] + } + ] + }) + + retry_policy { + maximum_retry_attempts = 3 + } + } +} diff --git a/ebsco_adapter/terraform/iam.tf b/ebsco_adapter/terraform/iam.tf index 03dfa66a9c..508ec9754b 100644 --- a/ebsco_adapter/terraform/iam.tf +++ b/ebsco_adapter/terraform/iam.tf @@ -1,6 +1,6 @@ -resource "aws_iam_role_policy" "publish_to_topic" { +resource "aws_iam_role_policy" "ftp_adapter_publish_to_topic" { policy = module.ebsco_adapter_output_topic.publish_policy - role = module.ftp_lambda.lambda_role.name + role = module.ftp_task.task_role_name } data "aws_iam_policy_document" "ebsco_s3_bucket_full_access" { @@ -25,13 +25,82 @@ data "aws_iam_policy_document" "ebsco_s3_bucket_full_access" { } } +# a policy for publishing cloudwatch metrics + +data "aws_iam_policy_document" "ebsco_adapter_publish_metrics" { + statement { + actions = [ + "cloudwatch:PutMetricData", + ] + + resources = ["*"] + } +} + +resource "aws_iam_policy" "ebsco_adapter_publish_metrics" { + name = "ebsco_adapter_publish_metrics" + description = "Allow the ebsco_adapter to publish metrics to CloudWatch" + policy = data.aws_iam_policy_document.ebsco_adapter_publish_metrics.json +} + +resource "aws_iam_role_policy_attachment" "ebsco_adapter_publish_metrics" { + policy_arn = aws_iam_policy.ebsco_adapter_publish_metrics.arn + role = module.ftp_task.task_role_name +} + resource "aws_iam_policy" "ebsco_s3_bucket_full_access" { name = "ebsco_s3_bucket_full_access" description = "Allow full access to the ebsco_adapter S3 bucket" policy = data.aws_iam_policy_document.ebsco_s3_bucket_full_access.json } -resource "aws_iam_role_policy_attachment" "ebsco_s3_bucket_full_access" { +resource "aws_iam_role_policy_attachment" "ftp_adapter_s3_bucket_full_access" { policy_arn = aws_iam_policy.ebsco_s3_bucket_full_access.arn - role = module.ftp_lambda.lambda_role.name + role = module.ftp_task.task_role_name +} + +# EventBridge scheduler IAM resources + +resource "aws_iam_role" "eventbridge_task_scheduler" { + name = "eventbridge-task-scheduler-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + Service = ["scheduler.amazonaws.com"] + } + Action = "sts:AssumeRole" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "eventbridge_task_scheduler" { + policy_arn = aws_iam_policy.eventbridge_task_scheduler.arn + role = aws_iam_role.eventbridge_task_scheduler.name +} + +resource "aws_iam_policy" "eventbridge_task_scheduler" { + name = "eventbridge-task-scheduler-policy" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow", + Action = [ + "ecs:RunTask" + ] + Resource = ["${local.task_definition_arn_latest}:*"] + }, + { + Effect = "Allow", + Action = [ + "iam:PassRole" + ] + Resource = [module.ftp_task.task_role_arn, module.ftp_task.task_execution_role_arn] + }, + ] + }) } diff --git a/ebsco_adapter/terraform/locals.tf b/ebsco_adapter/terraform/locals.tf index 26017c3635..9076f1d317 100644 --- a/ebsco_adapter/terraform/locals.tf +++ b/ebsco_adapter/terraform/locals.tf @@ -1,3 +1,18 @@ locals { namespace = "ebsco-adapter" + + catalogue_vpcs = data.terraform_remote_state.accounts_catalogue.outputs + shared_infra = data.terraform_remote_state.shared_infra.outputs + + _task_definition_split = split(":", module.ftp_task.task_definition_arn) + task_definition_version = element(local._task_definition_split, length(local._task_definition_split) - 1) + task_definition_arn_latest = trimsuffix(module.ftp_task.task_definition_arn, ":${local.task_definition_version}") + + network_config = { + vpc_id = local.catalogue_vpcs["catalogue_vpc_delta_id"] + subnets = local.catalogue_vpcs["catalogue_vpc_delta_private_subnets"] + + + ec_privatelink_security_group_id = local.shared_infra["ec_platform_privatelink_sg_id"] + } } diff --git a/ebsco_adapter/terraform/outputs.tf b/ebsco_adapter/terraform/outputs.tf index 685531e906..321a7e0920 100644 --- a/ebsco_adapter/terraform/outputs.tf +++ b/ebsco_adapter/terraform/outputs.tf @@ -8,4 +8,4 @@ output "ebsco_adapter_topic_arn" { output "ebsco_adapter_bucket_name" { value = aws_s3_bucket.ebsco_adapter.bucket -} \ No newline at end of file +} diff --git a/ebsco_adapter/terraform/parameters.tf b/ebsco_adapter/terraform/parameters.tf index 5f288e3843..23eef2a1a6 100644 --- a/ebsco_adapter/terraform/parameters.tf +++ b/ebsco_adapter/terraform/parameters.tf @@ -62,3 +62,17 @@ resource "aws_ssm_parameter" "ebsco_adapter_customer_id" { ] } } + +resource "aws_ssm_parameter" "ebsco_adapter_output_topic_arn" { + name = "/catalogue_pipeline/ebsco_adapter/output_topic_arn" + description = "The ARN of the SNS topic to publish messages to" + type = "String" + value = module.ebsco_adapter_output_topic.arn +} + +resource "aws_ssm_parameter" "ebsco_adapter_bucket_name" { + name = "/catalogue_pipeline/ebsco_adapter/bucket_name" + description = "The name of the S3 bucket to write files to" + type = "String" + value = aws_s3_bucket.ebsco_adapter.bucket +} diff --git a/ebsco_adapter/terraform/security_groups.tf b/ebsco_adapter/terraform/security_groups.tf new file mode 100644 index 0000000000..a218368081 --- /dev/null +++ b/ebsco_adapter/terraform/security_groups.tf @@ -0,0 +1,20 @@ +resource "aws_security_group" "egress" { + name = "${local.namespace}_egress" + description = "Allow egress traffic from the services" + vpc_id = local.network_config.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + + cidr_blocks = [ + "0.0.0.0/0", + ] + } + + tags = { + Name = "${local.namespace}_egress" + } +} + diff --git a/infrastructure/modules/task/output.tf b/infrastructure/modules/task/output.tf new file mode 100644 index 0000000000..2b47430f9d --- /dev/null +++ b/infrastructure/modules/task/output.tf @@ -0,0 +1,19 @@ +output "task_role_arn" { + value = module.task_definition.task_role_arn +} + +output "task_execution_role_arn" { + value = module.task_definition.task_execution_role_arn +} + +output "task_role_name" { + value = module.task_definition.task_role_name +} + +output "task_execution_role_name" { + value = module.task_definition.task_execution_role_name +} + +output "task_definition_arn" { + value = module.task_definition.arn +} diff --git a/infrastructure/modules/task/task_definition.tf b/infrastructure/modules/task/task_definition.tf new file mode 100644 index 0000000000..5c87126b23 --- /dev/null +++ b/infrastructure/modules/task/task_definition.tf @@ -0,0 +1,37 @@ +module "app_container_definition" { + source = "git::github.com/wellcomecollection/terraform-aws-ecs-service.git//modules/container_definition?ref=v3.13.1" + name = "ebsco-adapter-ftp" + image = var.image + + environment = var.environment + + log_configuration = module.log_router_container.container_log_configuration +} + +module "log_router_container" { + source = "git::github.com/wellcomecollection/terraform-aws-ecs-service.git//modules/firelens?ref=v3.13.1" + namespace = var.task_name + + use_privatelink_endpoint = true +} + +module "log_router_container_secrets_permissions" { + source = "git::github.com/wellcomecollection/terraform-aws-ecs-service.git//modules/secrets?ref=v3.13.1" + secrets = module.log_router_container.shared_secrets_logging + role_name = module.task_definition.task_execution_role_name +} + +module "task_definition" { + source = "git::github.com/wellcomecollection/terraform-aws-ecs-service.git//modules/task_definition?ref=v3.13.1" + + cpu = var.cpu + memory = var.memory + + container_definitions = [ + module.log_router_container.container_definition, + module.app_container_definition.container_definition + ] + + launch_types = ["FARGATE"] + task_name = var.task_name +} diff --git a/infrastructure/modules/task/variables.tf b/infrastructure/modules/task/variables.tf new file mode 100644 index 0000000000..5ed77d7a39 --- /dev/null +++ b/infrastructure/modules/task/variables.tf @@ -0,0 +1,24 @@ +variable "environment" { + type = map(string) + description = "A map of environment variables to pass to the container" +} + +variable "image" { + type = string + description = "The container image to use for the container" +} + +variable "cpu" { + type = number + description = "The number of CPU units to reserve for the container" +} + +variable "memory" { + type = number + description = "The amount of memory to reserve for the container" +} + +variable "task_name" { + type = string + description = "The name of the task" +}