-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add failure list compilation * add es to requirements * polish up pending/failure lists * polish up pending list check * move failure/pending lists * improve structure * end to end scheduling * tidy * tidy * improve commentary * remove broken diagram * fix diagram * tidy * print -> log * help GH find the client-only code * help GH find the client-only code * fix stuff found in review * longer timeout
- Loading branch information
1 parent
3350d40
commit 8902b09
Showing
37 changed files
with
870 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Produce a list of records that have failed since a given date/time. | ||
Usage: | ||
> python compile_failure_list.py 2024-09-05T40:00:00 | ||
This will print out the S3 keys of all the zips that have been | ||
transferred to Archivematica, but failed to fully process, since 1400 on the 5th of September 2024. | ||
""" | ||
import boto3 | ||
import datetime | ||
from reporting_client import get_es_client | ||
|
||
|
||
def get_failures_since(session, since_time): | ||
es = get_es_client(session) | ||
response = es.search( | ||
index="storage_ingests", | ||
size=100, | ||
query=get_query(since_time), | ||
source=False, | ||
fields=["bag.info.externalIdentifier", "lastModifiedDate"] | ||
) | ||
print("\n".join(get_zip_paths(response["hits"]["hits"]))) | ||
|
||
|
||
def get_zip_paths(hits): | ||
return (f'born-digital-accessions/{hit["fields"]["bag.info.externalIdentifier"][0]}.zip' for hit in hits) | ||
|
||
|
||
def get_query(since_time): | ||
return { | ||
"bool": { | ||
"filter": [ | ||
{"term": { | ||
"status.id": "failed" | ||
}}, | ||
{"range": { | ||
"lastModifiedDate": { | ||
"gte": since_time | ||
} | ||
}} | ||
] | ||
} | ||
} | ||
|
||
|
||
def main(): | ||
import sys | ||
get_failures_since(boto3.Session(), datetime.datetime.fromisoformat(sys.argv[1])) | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
""" | ||
Compile a list of the ingested status of the requested shoots. | ||
Given a list of shoots that you want to have been ingested, | ||
this will check whether they have all been successfully ingested (True) | ||
or not (False). | ||
A shoot may have not been ingested due to a failure, or because it | ||
is yet to be transferred (either in progress or just not even started) | ||
This contrasts with compile_failure_list.py, which produces a list of recent failures. | ||
Usage: | ||
Provide a newline separated list of shoot identifiers on STDIN, | ||
e.g. given a file myfile.txt: | ||
``` | ||
CP1G00D1 | ||
CP1BAAD1 | ||
CP000001 | ||
CP999999 | ||
``` | ||
where | ||
* CP1G00D1 and CP000001 have both been ingested, | ||
* CP1BAAD1 is somehow broken | ||
* CP999999 is yet to be ingested | ||
$ cat myfile.txt | python compile_pending_list.py | ||
Output: | ||
``` | ||
2754_CP1G00D1, True | ||
2754_CP1BAAD1, False | ||
2754_CP000001, True | ||
2754_CP999999, False | ||
``` | ||
""" | ||
|
||
import boto3 | ||
from reporting_client import get_es_client | ||
|
||
|
||
def get_successful_list(session, expected): | ||
es = get_es_client(session) | ||
response = es.search( | ||
index="storage_ingests", | ||
size=1000, | ||
query=find_shoots_query(expected), | ||
source=False, | ||
fields=["bag.info.externalIdentifier", "lastModifiedDate"] | ||
) | ||
succeeded = get_identifiers(response["hits"]["hits"]) | ||
for shoot in expected: | ||
if shoot in succeeded: | ||
print(f'{shoot}, True') | ||
else: | ||
print(f'{shoot}, {is_cracked_shoot_successful(es, shoot)}') | ||
|
||
|
||
def is_cracked_shoot_successful(es, shoot): | ||
response = es.search( | ||
index="storage_ingests", | ||
size=1000, | ||
query=find_subshoots_query(shoot), | ||
source=False, | ||
fields=["bag.info.externalIdentifier", "lastModifiedDate", "status.id"] | ||
) | ||
|
||
return bool(response['hits']['hits']) and all((hit['fields']['status.id'] == "succeeded" for hit in response['hits']['hits'])) | ||
|
||
|
||
def get_identifiers(hits): | ||
return [hit["fields"]["bag.info.externalIdentifier"][0] for hit in hits] | ||
|
||
|
||
def find_shoots_query(shoots): | ||
return { | ||
"bool": { | ||
"filter": [ | ||
{"term": { | ||
"status.id": "succeeded" | ||
}}, | ||
{"terms": { | ||
"bag.info.externalIdentifier": shoots | ||
}} | ||
] | ||
} | ||
} | ||
|
||
|
||
def find_subshoots_query(shoot): | ||
return { | ||
"bool": { | ||
"filter": [ | ||
{"prefix": { | ||
"bag.info.externalIdentifier": shoot | ||
}} | ||
] | ||
} | ||
} | ||
|
||
|
||
def main(): | ||
import sys | ||
get_successful_list(boto3.Session(), [f"2754_{shoot.strip()}" for shoot in sys.stdin.readlines()]) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
|
||
import botocore | ||
|
||
BUCKETS = { | ||
"staging": "wellcomecollection-archivematica-staging-transfer-source", | ||
"production": "wellcomecollection-archivematica-transfer-source" | ||
} | ||
|
||
|
||
def find_objects(session, bucket, object_keys, yield_on_found): | ||
for object_key in object_keys: | ||
full_key = f"born-digital-accessions/{object_key.strip()}.zip" | ||
try: | ||
session.client('s3').head_object(Bucket=bucket, Key=full_key) | ||
if yield_on_found: | ||
yield full_key | ||
except botocore.exceptions.ClientError as e: | ||
if e.response['Error']['Code'] == '404': | ||
if not yield_on_found: | ||
yield full_key | ||
else: | ||
raise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from elasticsearch import Elasticsearch | ||
|
||
def get_es_client(session): | ||
""" | ||
Returns an Elasticsearch client for the reporting cluster. | ||
""" | ||
username = get_secret_string( | ||
session, secret_id="reporting/read_only/es_username" | ||
) | ||
password = get_secret_string( | ||
session, secret_id=f"reporting/read_only/es_password" | ||
) | ||
host = get_secret_string( | ||
session, secret_id=f"reporting/es_host" | ||
) | ||
return Elasticsearch(f"https://{host}", basic_auth=(username, password)) | ||
|
||
|
||
def get_secret_string(session, *, secret_id): | ||
return session.client("secretsmanager").get_secret_value(SecretId=secret_id)["SecretString"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import sys | ||
import boto3 | ||
|
||
|
||
def post_messages(session, shoot_numbers): | ||
sns = session.resource("sns") | ||
topic = sns.Topic(f"arn:aws:sns:eu-west-1:760097843905:restore_shoots-production") | ||
for shoot_number in shoot_numbers: | ||
print(f"requesting restore of {shoot_number}") | ||
topic.publish(Message=shoot_number.strip()) | ||
|
||
|
||
if __name__ == "__main__": | ||
post_messages(boto3.Session(), sys.stdin.readlines()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import sys | ||
import boto3 | ||
|
||
|
||
def post_messages(session, environment, shoot_numbers): | ||
sns = session.resource("sns") | ||
topic = sns.Topic(f"arn:aws:sns:eu-west-1:404315009621:touch_shoots-{environment}") | ||
for shoot_number in shoot_numbers: | ||
print(f"requesting touch of {shoot_number}") | ||
topic.publish(Message=shoot_number.strip()) | ||
|
||
|
||
if __name__ == "__main__": | ||
post_messages(boto3.Session(), sys.argv[1], sys.stdin.readlines()) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import boto3 | ||
import sys | ||
|
||
from objects_on_target import find_objects, BUCKETS | ||
|
||
if __name__ == '__main__': | ||
print("\n".join( | ||
find_objects(boto3.Session(), BUCKETS[sys.argv[1]], sys.stdin.readlines(), True) | ||
)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import boto3 | ||
import sys | ||
|
||
from objects_on_target import find_objects, BUCKETS | ||
|
||
if __name__ == '__main__': | ||
print("\n".join( | ||
find_objects(boto3.Session(), BUCKETS[sys.argv[1]], sys.stdin.readlines(), False) | ||
)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.