From 78097758ce619022fc12f3c7432335d5fad74023 Mon Sep 17 00:00:00 2001 From: Arnab Manna Date: Mon, 12 Apr 2021 11:10:27 +0530 Subject: [PATCH] Added failure handling --- .idea/.gitignore | 3 +++ ...erless-large-scale-document-processing.iml | 8 ++++++ .../inspectionProfiles/profiles_settings.xml | 6 +++++ .idea/modules.xml | 8 ++++++ .idea/vcs.xml | 6 +++++ src/datastore.py | 5 ++-- src/jobresultsproc.py | 27 +++++++++++++++++++ .../lib/textract-pipeline-stack.ts | 1 + 8 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/amazon-textract-serverless-large-scale-document-processing.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/amazon-textract-serverless-large-scale-document-processing.iml b/.idea/amazon-textract-serverless-large-scale-document-processing.iml new file mode 100644 index 00000000..d0876a78 --- /dev/null +++ b/.idea/amazon-textract-serverless-large-scale-document-processing.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..2aca4c44 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/datastore.py b/src/datastore.py index d4ff570c..be311f0a 100644 --- a/src/datastore.py +++ b/src/datastore.py @@ -48,10 +48,11 @@ def updateDocumentStatus(self, documentId, documentStatus): try: table.update_item( Key = { 'documentId': documentId }, - UpdateExpression = 'SET documentStatus= :documentstatusValue', + UpdateExpression = 'SET documentStatus= :documentstatusValue, documentCompletedOn = :documentCompletedOnValue', ConditionExpression = 'attribute_exists(documentId)', ExpressionAttributeValues = { - ':documentstatusValue': documentStatus + ':documentstatusValue': documentStatus, + ':documentCompletedOnValue': str(datetime.datetime.utcnow()) } ) except ClientError as e: diff --git a/src/jobresultsproc.py b/src/jobresultsproc.py index cdc8c260..17bed4fd 100644 --- a/src/jobresultsproc.py +++ b/src/jobresultsproc.py @@ -55,6 +55,32 @@ def processRequest(request): objectName = request['objectName'] outputTable = request["outputTable"] documentsTable = request["documentsTable"] + qUrl = request["dlqQueueUrl"] + + if jobStatus == 'FAILED': + + print("DocumentId: {}".format(jobTag)) + + ds = datastore.DocumentStore(documentsTable, outputTable) + ds.updateDocumentStatus(jobTag, jobStatus) + + output = "Processed -> Document: {}, Object: {}/{} processed.".format(jobTag, bucketName, objectName) + print(output) + + features = ["Text", "Forms", "Tables"] + jsonMessage = {'documentId': jobTag, + "features": features, + 'bucketName': bucketName, + 'objectName': objectName} + + client = AwsHelper().getClient('sqs') + + message = json.dumps(jsonMessage) + client.send_message(QueueUrl=qUrl, MessageBody=message) + + print("Submitted message to DLQ queue: {}".format(message)) + + return pages = getJobResults(jobAPI, jobId) @@ -109,6 +135,7 @@ def lambda_handler(event, context): request["outputTable"] = os.environ['OUTPUT_TABLE'] request["documentsTable"] = os.environ['DOCUMENTS_TABLE'] + request["dlqQueueUrl"] = os.environ['DLQ_QUEUE_URL'] return processRequest(request) diff --git a/textract-pipeline/lib/textract-pipeline-stack.ts b/textract-pipeline/lib/textract-pipeline-stack.ts index c730463d..b0760aba 100644 --- a/textract-pipeline/lib/textract-pipeline-stack.ts +++ b/textract-pipeline/lib/textract-pipeline-stack.ts @@ -286,6 +286,7 @@ export class TextractPipelineStack extends cdk.Stack { environment: { OUTPUT_TABLE: outputTable.tableName, DOCUMENTS_TABLE: documentsTable.tableName, + DLQ_QUEUE_URL: dlq.queueUrl, AWS_DATA_PATH : "models" } });