From 4daa903c9d63d12ccae3a6382b8c8aa551bc8d94 Mon Sep 17 00:00:00 2001 From: Powell Date: Mon, 17 Jul 2023 00:27:19 -0600 Subject: [PATCH] Cleaned up text reading services, added code to amr endpoint. --- api/server.py | 12 ++++++ workers/operations.py | 93 ++++++++++++++++++++++++++++++++++++------- workers/utils.py | 25 +++++------- 3 files changed, 99 insertions(+), 31 deletions(-) diff --git a/api/server.py b/api/server.py index 40501b9..a3ef006 100644 --- a/api/server.py +++ b/api/server.py @@ -66,6 +66,18 @@ def mathml_to_amr(payload: List[str], model: str = "petrinet"): return resp +@app.post("/code_to_amr") +def code_to_amr(artifact_id: str): + from utils import create_job + + operation_name = "operations.code_to_amr" + options = {"artifact_id": artifact_id} + + resp = create_job(operation_name=operation_name, options=options) + + return resp + + @app.post("/pdf_extractions") async def pdf_extractions( artifact_id: str, diff --git a/workers/operations.py b/workers/operations.py index dd76fad..6fd3186 100644 --- a/workers/operations.py +++ b/workers/operations.py @@ -51,7 +51,6 @@ def put_mathml_to_skema(*args, **kwargs): logger.error("Failed to parse response from TA1 Service") if amr_response.status_code == 200 and amr_json: - tds_responses = put_amr_to_tds(amr_json) response = { @@ -75,6 +74,7 @@ def put_mathml_to_skema(*args, **kwargs): return response +# dccde3a0-0132-430c-afd8-c67953298f48 def pdf_extractions(*args, **kwargs): # Get options artifact_id = kwargs.get("artifact_id") @@ -92,22 +92,22 @@ def pdf_extractions(*args, **kwargs): unified_text_reading_url = f"{UNIFIED_API}/text-reading/integrated-pdf-extractions?annotate_skema={annotate_skema}&annotate_mit={annotate_mit}" # headers = {"Content-Type": "application/json"} - put_payload = [("pdfs", ( - filename, - io.BytesIO(downloaded_artifact), - "application/pdf" - ))] + put_payload = [ + ("pdfs", (filename, io.BytesIO(downloaded_artifact), "application/pdf")) + ] try: logger.info(f"Sending PDF to TA1 service with artifact id: {artifact_id}") response = requests.post( unified_text_reading_url, files=put_payload, - #headers=headers, + # headers=headers, + ) + logger.info( + f"Response received from TA1 with status code: {response.status_code}" ) - logger.info(f"Response received from TA1 with status code: {response.status_code}") extraction_json = response.json() - outputs = extraction_json['outputs'] + outputs = extraction_json["outputs"] if isinstance(outputs, dict): if extraction_json.get("outputs", {"data": None}).get("data", None) is None: @@ -126,7 +126,7 @@ def pdf_extractions(*args, **kwargs): } artifact_response = put_artifact_extraction_to_tds( - artifact_id = artifact_id, + artifact_id=artifact_id, name=name if name is not None else artifact_json.get("name"), description=description if description is not None @@ -153,6 +153,7 @@ def pdf_extractions(*args, **kwargs): return response +# 2931748e-3932-4cef-b5d7-d0d7e9e7740b def dataset_profiling(*args, **kwargs): openai_key = os.getenv("OPENAI_API_KEY") dataset_id = kwargs.get("dataset_id") @@ -249,28 +250,33 @@ def dataset_profiling_with_document(*args, **kwargs): return resp.json() +# dccde3a0-0132-430c-afd8-c67953298f48 +# 77a2dffb-08b3-4f6e-bfe5-83d27ed259c4 def link_amr(*args, **kwargs): artifact_id = kwargs.get("artifact_id") model_id = kwargs.get("model_id") artifact_json, downloaded_artifact = get_artifact_from_tds(artifact_id=artifact_id) - tds_models_url = f"{TDS_API}/models" + tds_models_url = f"{TDS_API}/models/{model_id}" - model = requests.get(tds_models_url, data={"model_id": model_id}) + model = requests.get(tds_models_url) model_json = model.json() - model_amr = model_json.get("model") + logging.info(model_amr) + + jsonified_amr = json.dumps(model_amr).encode("utf-8") + files = { "amr_file": ( "amr.json", - io.BytesIO(json.dumps(model_amr, ensure_ascii=False).encode("utf-8")), + io.BytesIO(jsonified_amr), "application/json", ), "text_extractions_file": ( "extractions.json", - downloaded_artifact, + io.BytesIO(downloaded_artifact), "application/json", ), } @@ -297,3 +303,60 @@ def link_amr(*args, **kwargs): "status": update_response.status_code, "message": "Model enriched and updated in TDS", } + else: + logging.error("Response from TA1 service was not 200") + + return { + "status": response.status_code, + "message": f"Response from TA1 service was not 200: {response.text}", + } + + +# 60e539e4-6969-4369-a358-c601a3a583da +def code_to_amr(*args, **kwargs): + artifact_id = kwargs.get("artifact_id") + + artifact_json, downloaded_artifact = get_artifact_from_tds(artifact_id=artifact_id) + + code_blob = downloaded_artifact.decode("utf-8") + + code_amr_workflow_url = f"{UNIFIED_API}/workflows/code/snippets-to-pn-amr" + + request_payload = { + "files": [artifact_json.get("file_names")[0]], + "blobs": [code_blob], + } + + amr_response = requests.post( + code_amr_workflow_url, json=json.loads(json.dumps(request_payload)) + ) + + amr_json = amr_response + + try: + amr_json = amr_response.json() + except: + logger.error("Failed to parse response from TA1 Service") + + if amr_response.status_code == 200 and amr_json: + tds_responses = put_amr_to_tds(amr_json) + + response = { + "status_code": amr_response.status_code, + "amr": amr_json, + "tds_model_id": tds_responses.get("model_id"), + "tds_configuration_id": tds_responses.get("configuration_id"), + "error": None, + } + + return response + else: + response = { + "status_code": amr_response.status_code, + "amr": None, + "tds_model_id": None, + "tds_configuration_id": None, + "error": amr_response.text, + } + + return response diff --git a/workers/utils.py b/workers/utils.py index 041bb2b..00a8c6e 100644 --- a/workers/utils.py +++ b/workers/utils.py @@ -23,10 +23,10 @@ def put_amr_to_tds(amr_payload): headers = {"Content-Type": "application/json"} - print(amr_payload) + logger.info(amr_payload) # Create TDS model - tds_models = TDS_API + "/models" + tds_models = f"{TDS_API}/models" model_response = requests.post(tds_models, json=amr_payload, headers=headers) model_id = model_response.json().get("id") @@ -54,8 +54,7 @@ def put_amr_to_tds(amr_payload): def put_artifact_extraction_to_tds( artifact_id, name, description, filename, extractions -): # TODO change to get artifact from TDS via filename and artifact id maybe - +): artifact_payload = { "username": "extraction_service", "name": name, @@ -66,9 +65,7 @@ def put_artifact_extraction_to_tds( logger.info(f"Storing extraction to TDS for artifact: {artifact_id}") # Create TDS artifact tds_artifact = f"{TDS_API}/artifacts/{artifact_id}" - artifact_response = requests.put( - tds_artifact, json=artifact_payload - ) + artifact_response = requests.put(tds_artifact, json=artifact_payload) logger.info(f"TDS response: {artifact_response.text}") artifact_put_status = artifact_response.status_code @@ -90,13 +87,11 @@ def get_artifact_from_tds(artifact_id): presigned_download = artifact_download_url.json().get("url") - print(presigned_download) - sys.stdout.flush() + logger.info(presigned_download) downloaded_artifact = requests.get(artifact_download_url.json().get("url")) - print(f"ARTIFACT RETRIEVAL STATUS:{downloaded_artifact.status_code}") - sys.stdout.flush() + logger.info(f"ARTIFACT RETRIEVAL STATUS:{downloaded_artifact.status_code}") return artifact_json, downloaded_artifact.content @@ -107,20 +102,18 @@ def get_dataset_from_tds(dataset_id): dataset = requests.get(tds_datasets_url) dataset_json = dataset.json() - print(f"DATASET RESPONSE JSON: {dataset_json}") - sys.stdout.flush() + logger.info(f"DATASET RESPONSE JSON: {dataset_json}") dataframes = [] for filename in dataset_json.get("file_names", []): gen_download_url = f"{TDS_API}/datasets/{dataset_id}/download-url?dataset_id={dataset_id}&filename={filename}" dataset_download_url = requests.get(gen_download_url) - print(dataset_download_url) + logger.info(f"{dataset_download_url} {dataset_download_url.json().get('url')}") downloaded_dataset = requests.get(dataset_download_url.json().get("url")) - print(downloaded_dataset) - sys.stdout.flush() + logger.info(downloaded_dataset) dataset_file = io.BytesIO(downloaded_dataset.content) dataset_file.seek(0)