diff --git a/index_config/mappings.images_indexed.2024-11-06.json b/index_config/mappings.images_indexed.2024-11-06.json index 04828d30aa..fed85cb206 100644 --- a/index_config/mappings.images_indexed.2024-11-06.json +++ b/index_config/mappings.images_indexed.2024-11-06.json @@ -1,6 +1,6 @@ { "dynamic": "strict", - "properties": { + "properties": { "aggregatableValues": { "properties": { "locations": { @@ -47,29 +47,77 @@ }, "filterableValues": { "properties": { - "locations.license.id": { - "type": "keyword" - }, - "source.contributors.agent.label": { - "type": "keyword" - }, - "source.contributors.agent.id": { - "type": "keyword" - }, - "source.genres.label": { - "type": "keyword" - }, - "source.genres.concepts.id": { - "type": "keyword" - }, - "source.subjects.label": { - "type": "keyword" - }, - "source.subjects.concepts.id": { - "type": "keyword" + "locations": { + "properties": { + "license": { + "properties": { + "id": { + "type": "keyword" + } + } + } + } }, - "source.production.dates.range.from": { - "type": "date" + "source": { + "properties": { + "contributors": { + "properties": { + "agent": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + } + } + }, + "genres": { + "properties": { + "concepts": { + "properties": { + "id": { + "type": "keyword" + } + } + }, + "label": { + "type": "keyword" + } + } + }, + "production": { + "properties": { + "dates": { + "properties": { + "range": { + "properties": { + "from": { + "type": "date" + } + } + } + } + } + } + }, + "subjects": { + "properties": { + "concepts": { + "properties": { + "id": { + "type": "keyword" + } + } + }, + "label": { + "type": "keyword" + } + } + } + } } } }, @@ -77,11 +125,15 @@ "properties": { "features1": { "type": "dense_vector", - "dims": 2048 + "dims": 2048, + "index": true, + "similarity": "cosine" }, "features2": { "type": "dense_vector", - "dims": 2048 + "dims": 2048, + "index": true, + "similarity": "cosine" }, "paletteEmbedding": { "type": "dense_vector", @@ -200,13 +252,17 @@ } } }, - "contributors.agent.label": { - "type": "text", - "analyzer": "english" - }, - "contributors.agent.id": { - "type": "keyword", - "normalizer": "lowercase" + "contributors": { + "properties": { + "agent": { + "properties": { + "label": { + "type": "text", + "analyzer": "english" + } + } + } + } }, "description": { "type": "text", @@ -262,25 +318,37 @@ "type": "text", "analyzer": "english" }, - "genres.concepts.label": { - "type": "text", - "analyzer": "english" - }, - "genres.concepts.id": { - "type": "keyword", - "normalizer": "lowercase" + "genres": { + "properties": { + "concepts": { + "properties": { + "label": { + "type": "text", + "analyzer": "english" + } + } + } + } }, "id": { "type": "keyword", "normalizer": "lowercase" }, - "sourceIdentifier.value": { - "type": "keyword", - "normalizer": "lowercase" + "sourceIdentifier": { + "properties": { + "value": { + "type": "keyword", + "normalizer": "lowercase" + } + } }, - "identifiers.value": { - "type": "keyword", - "normalizer": "lowercase" + "identifiers": { + "properties": { + "value": { + "type": "keyword", + "normalizer": "lowercase" + } + } }, "images": { "properties": { @@ -288,9 +356,13 @@ "type": "keyword", "normalizer": "lowercase" }, - "identifiers.value": { - "type": "keyword", - "normalizer": "lowercase" + "identifiers": { + "properties": { + "value": { + "type": "keyword", + "normalizer": "lowercase" + } + } } } }, @@ -300,9 +372,13 @@ "type": "keyword", "normalizer": "lowercase" }, - "identifiers.value": { - "type": "keyword", - "normalizer": "lowercase" + "identifiers": { + "properties": { + "value": { + "type": "keyword", + "normalizer": "lowercase" + } + } }, "shelfmark": { "properties": { @@ -326,9 +402,13 @@ } } }, - "languages.label": { - "type": "text", - "analyzer": "lowercase" + "languages": { + "properties": { + "label": { + "type": "text", + "analyzer": "lowercase" + } + } }, "lettering": { "type": "text", @@ -384,111 +464,119 @@ } } }, - "notes.contents": { - "type": "text", - "analyzer": "lowercase", - "fields": { - "arabic": { - "type": "text", - "analyzer": "arabic" - }, - "bengali": { - "type": "text", - "analyzer": "bengali" - }, - "english": { - "type": "text", - "analyzer": "english" - }, - "french": { - "type": "text", - "analyzer": "french" - }, - "german": { - "type": "text", - "analyzer": "german" - }, - "hindi": { - "type": "text", - "analyzer": "hindi" - }, - "italian": { - "type": "text", - "analyzer": "italian" - }, - "persian": { - "type": "text", - "analyzer": "persian" - }, - "spanish": { - "type": "text", - "analyzer": "spanish" - }, - "swappable_characters": { - "type": "text", - "analyzer": "swappable_characters" - }, - "cased": { - "type": "text", - "analyzer": "cased" - }, - "base": { + "notes": { + "properties": { + "contents": { "type": "text", - "analyzer": "base" + "analyzer": "lowercase", + "fields": { + "arabic": { + "type": "text", + "analyzer": "arabic" + }, + "bengali": { + "type": "text", + "analyzer": "bengali" + }, + "english": { + "type": "text", + "analyzer": "english" + }, + "french": { + "type": "text", + "analyzer": "french" + }, + "german": { + "type": "text", + "analyzer": "german" + }, + "hindi": { + "type": "text", + "analyzer": "hindi" + }, + "italian": { + "type": "text", + "analyzer": "italian" + }, + "persian": { + "type": "text", + "analyzer": "persian" + }, + "spanish": { + "type": "text", + "analyzer": "spanish" + }, + "swappable_characters": { + "type": "text", + "analyzer": "swappable_characters" + }, + "cased": { + "type": "text", + "analyzer": "cased" + }, + "base": { + "type": "text", + "analyzer": "base" + } + } } } }, - "partOf.title": { - "type": "text", - "analyzer": "lowercase", - "fields": { - "arabic": { - "type": "text", - "analyzer": "arabic" - }, - "bengali": { - "type": "text", - "analyzer": "bengali" - }, - "english": { - "type": "text", - "analyzer": "english" - }, - "french": { - "type": "text", - "analyzer": "french" - }, - "german": { - "type": "text", - "analyzer": "german" - }, - "hindi": { - "type": "text", - "analyzer": "hindi" - }, - "italian": { - "type": "text", - "analyzer": "italian" - }, - "persian": { - "type": "text", - "analyzer": "persian" - }, - "spanish": { - "type": "text", - "analyzer": "spanish" - }, - "swappable_characters": { - "type": "text", - "analyzer": "swappable_characters" - }, - "cased": { - "type": "text", - "analyzer": "cased" - }, - "base": { + "partOf": { + "properties": { + "title": { "type": "text", - "analyzer": "base" + "analyzer": "lowercase", + "fields": { + "arabic": { + "type": "text", + "analyzer": "arabic" + }, + "bengali": { + "type": "text", + "analyzer": "bengali" + }, + "english": { + "type": "text", + "analyzer": "english" + }, + "french": { + "type": "text", + "analyzer": "french" + }, + "german": { + "type": "text", + "analyzer": "german" + }, + "hindi": { + "type": "text", + "analyzer": "hindi" + }, + "italian": { + "type": "text", + "analyzer": "italian" + }, + "persian": { + "type": "text", + "analyzer": "persian" + }, + "spanish": { + "type": "text", + "analyzer": "spanish" + }, + "swappable_characters": { + "type": "text", + "analyzer": "swappable_characters" + }, + "cased": { + "type": "text", + "analyzer": "cased" + }, + "base": { + "type": "text", + "analyzer": "base" + } + } } } }, @@ -496,57 +584,61 @@ "type": "text", "analyzer": "english" }, - "production.label": { - "type": "text", - "analyzer": "lowercase", - "fields": { - "arabic": { - "type": "text", - "analyzer": "arabic" - }, - "bengali": { - "type": "text", - "analyzer": "bengali" - }, - "english": { - "type": "text", - "analyzer": "english" - }, - "french": { - "type": "text", - "analyzer": "french" - }, - "german": { - "type": "text", - "analyzer": "german" - }, - "hindi": { - "type": "text", - "analyzer": "hindi" - }, - "italian": { - "type": "text", - "analyzer": "italian" - }, - "persian": { - "type": "text", - "analyzer": "persian" - }, - "spanish": { - "type": "text", - "analyzer": "spanish" - }, - "swappable_characters": { - "type": "text", - "analyzer": "swappable_characters" - }, - "cased": { - "type": "text", - "analyzer": "cased" - }, - "base": { + "production": { + "properties": { + "label": { "type": "text", - "analyzer": "base" + "analyzer": "lowercase", + "fields": { + "arabic": { + "type": "text", + "analyzer": "arabic" + }, + "bengali": { + "type": "text", + "analyzer": "bengali" + }, + "english": { + "type": "text", + "analyzer": "english" + }, + "french": { + "type": "text", + "analyzer": "french" + }, + "german": { + "type": "text", + "analyzer": "german" + }, + "hindi": { + "type": "text", + "analyzer": "hindi" + }, + "italian": { + "type": "text", + "analyzer": "italian" + }, + "persian": { + "type": "text", + "analyzer": "persian" + }, + "spanish": { + "type": "text", + "analyzer": "spanish" + }, + "swappable_characters": { + "type": "text", + "analyzer": "swappable_characters" + }, + "cased": { + "type": "text", + "analyzer": "cased" + }, + "base": { + "type": "text", + "analyzer": "base" + } + } } } }, @@ -561,13 +653,17 @@ } } }, - "subjects.concepts.label": { - "type": "text", - "analyzer": "english" - }, - "subjects.concepts.id": { - "type": "keyword", - "normalizer": "lowercase" + "subjects": { + "properties": { + "concepts": { + "properties": { + "label": { + "type": "text", + "analyzer": "english" + } + } + } + } }, "title": { "type": "text", diff --git a/pipeline/ingestor/ingestor_images/docker-compose.yml b/pipeline/ingestor/ingestor_images/docker-compose.yml index cb4a517a16..6bb7a6f9c6 100644 --- a/pipeline/ingestor/ingestor_images/docker-compose.yml +++ b/pipeline/ingestor/ingestor_images/docker-compose.yml @@ -9,7 +9,7 @@ services: ports: - "4566:4566" elasticsearch: - image: "docker.elastic.co/elasticsearch/elasticsearch:8.5.0" + image: "docker.elastic.co/elasticsearch/elasticsearch:8.11.4" ports: - "9200:9200" - "9300:9300" diff --git a/pipeline/terraform/2024-11-05/main.tf b/pipeline/terraform/2024-11-05/main.tf index 040589fed5..96cfb5aa80 100644 --- a/pipeline/terraform/2024-11-05/main.tf +++ b/pipeline/terraform/2024-11-05/main.tf @@ -2,7 +2,7 @@ module "pipeline" { source = "../modules/stack" reindexing_state = { - listen_to_reindexer = true + listen_to_reindexer = false scale_up_tasks = false scale_up_elastic_cluster = false scale_up_id_minter_db = false diff --git a/pipeline/terraform/modules/stack/service_id_minter.tf b/pipeline/terraform/modules/stack/service_id_minter.tf index 36a7e977a7..ac83e2b260 100644 --- a/pipeline/terraform/modules/stack/service_id_minter.tf +++ b/pipeline/terraform/modules/stack/service_id_minter.tf @@ -45,15 +45,8 @@ module "id_minter" { cpu = 2048 memory = 4096 - # The total number of connections to RDS across all tasks from all ID minter - # services must not exceed the maximum supported by the RDS instance. min_capacity = var.min_capacity - max_capacity = min( - floor( - local.id_minter_rds_max_connections / local.id_minter_task_max_connections - ), - local.max_capacity - ) + max_capacity = local.max_capacity fargate_service_boilerplate = local.fargate_service_boilerplate } diff --git a/pipeline/terraform/modules/stack/service_work_batcher.tf b/pipeline/terraform/modules/stack/service_work_batcher.tf index d15a867445..9efb3fefcb 100644 --- a/pipeline/terraform/modules/stack/service_work_batcher.tf +++ b/pipeline/terraform/modules/stack/service_work_batcher.tf @@ -52,12 +52,18 @@ module "batcher_lambda" { max_batch_size = 40 } + timeout = 60 * 10 # 10 Minutes + queue_config = { topic_arns = [ module.router_path_output_topic.arn, module.path_concatenator_output_topic.arn, ] visibility_timeout_seconds = (local.wait_minutes + 5) * 60 + + maximum_concurrency = 20 + batch_size = 2500 + batching_window_seconds = 60 } ecr_repository_name = "uk.ac.wellcome/batcher"