Skip to content

Commit

Permalink
Cleaned up text reading services, added code to amr endpoint.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sorrento110 committed Jul 17, 2023
1 parent ed7e8d7 commit 4daa903
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 31 deletions.
12 changes: 12 additions & 0 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ def mathml_to_amr(payload: List[str], model: str = "petrinet"):
return resp


@app.post("/code_to_amr")
def code_to_amr(artifact_id: str):
from utils import create_job

operation_name = "operations.code_to_amr"
options = {"artifact_id": artifact_id}

resp = create_job(operation_name=operation_name, options=options)

return resp


@app.post("/pdf_extractions")
async def pdf_extractions(
artifact_id: str,
Expand Down
93 changes: 78 additions & 15 deletions workers/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def put_mathml_to_skema(*args, **kwargs):
logger.error("Failed to parse response from TA1 Service")

if amr_response.status_code == 200 and amr_json:

tds_responses = put_amr_to_tds(amr_json)

response = {
Expand All @@ -75,6 +74,7 @@ def put_mathml_to_skema(*args, **kwargs):
return response


# dccde3a0-0132-430c-afd8-c67953298f48
def pdf_extractions(*args, **kwargs):
# Get options
artifact_id = kwargs.get("artifact_id")
Expand All @@ -92,22 +92,22 @@ def pdf_extractions(*args, **kwargs):
unified_text_reading_url = f"{UNIFIED_API}/text-reading/integrated-pdf-extractions?annotate_skema={annotate_skema}&annotate_mit={annotate_mit}"
# headers = {"Content-Type": "application/json"}

put_payload = [("pdfs", (
filename,
io.BytesIO(downloaded_artifact),
"application/pdf"
))]
put_payload = [
("pdfs", (filename, io.BytesIO(downloaded_artifact), "application/pdf"))
]

try:
logger.info(f"Sending PDF to TA1 service with artifact id: {artifact_id}")
response = requests.post(
unified_text_reading_url,
files=put_payload,
#headers=headers,
# headers=headers,
)
logger.info(
f"Response received from TA1 with status code: {response.status_code}"
)
logger.info(f"Response received from TA1 with status code: {response.status_code}")
extraction_json = response.json()
outputs = extraction_json['outputs']
outputs = extraction_json["outputs"]

if isinstance(outputs, dict):
if extraction_json.get("outputs", {"data": None}).get("data", None) is None:
Expand All @@ -126,7 +126,7 @@ def pdf_extractions(*args, **kwargs):
}

artifact_response = put_artifact_extraction_to_tds(
artifact_id = artifact_id,
artifact_id=artifact_id,
name=name if name is not None else artifact_json.get("name"),
description=description
if description is not None
Expand All @@ -153,6 +153,7 @@ def pdf_extractions(*args, **kwargs):
return response


# 2931748e-3932-4cef-b5d7-d0d7e9e7740b
def dataset_profiling(*args, **kwargs):
openai_key = os.getenv("OPENAI_API_KEY")
dataset_id = kwargs.get("dataset_id")
Expand Down Expand Up @@ -249,28 +250,33 @@ def dataset_profiling_with_document(*args, **kwargs):
return resp.json()


# dccde3a0-0132-430c-afd8-c67953298f48
# 77a2dffb-08b3-4f6e-bfe5-83d27ed259c4
def link_amr(*args, **kwargs):
artifact_id = kwargs.get("artifact_id")
model_id = kwargs.get("model_id")

artifact_json, downloaded_artifact = get_artifact_from_tds(artifact_id=artifact_id)

tds_models_url = f"{TDS_API}/models"
tds_models_url = f"{TDS_API}/models/{model_id}"

model = requests.get(tds_models_url, data={"model_id": model_id})
model = requests.get(tds_models_url)
model_json = model.json()

model_amr = model_json.get("model")

logging.info(model_amr)

jsonified_amr = json.dumps(model_amr).encode("utf-8")

files = {
"amr_file": (
"amr.json",
io.BytesIO(json.dumps(model_amr, ensure_ascii=False).encode("utf-8")),
io.BytesIO(jsonified_amr),
"application/json",
),
"text_extractions_file": (
"extractions.json",
downloaded_artifact,
io.BytesIO(downloaded_artifact),
"application/json",
),
}
Expand All @@ -297,3 +303,60 @@ def link_amr(*args, **kwargs):
"status": update_response.status_code,
"message": "Model enriched and updated in TDS",
}
else:
logging.error("Response from TA1 service was not 200")

return {
"status": response.status_code,
"message": f"Response from TA1 service was not 200: {response.text}",
}


# 60e539e4-6969-4369-a358-c601a3a583da
def code_to_amr(*args, **kwargs):
artifact_id = kwargs.get("artifact_id")

artifact_json, downloaded_artifact = get_artifact_from_tds(artifact_id=artifact_id)

code_blob = downloaded_artifact.decode("utf-8")

code_amr_workflow_url = f"{UNIFIED_API}/workflows/code/snippets-to-pn-amr"

request_payload = {
"files": [artifact_json.get("file_names")[0]],
"blobs": [code_blob],
}

amr_response = requests.post(
code_amr_workflow_url, json=json.loads(json.dumps(request_payload))
)

amr_json = amr_response

try:
amr_json = amr_response.json()
except:
logger.error("Failed to parse response from TA1 Service")

if amr_response.status_code == 200 and amr_json:
tds_responses = put_amr_to_tds(amr_json)

response = {
"status_code": amr_response.status_code,
"amr": amr_json,
"tds_model_id": tds_responses.get("model_id"),
"tds_configuration_id": tds_responses.get("configuration_id"),
"error": None,
}

return response
else:
response = {
"status_code": amr_response.status_code,
"amr": None,
"tds_model_id": None,
"tds_configuration_id": None,
"error": amr_response.text,
}

return response
25 changes: 9 additions & 16 deletions workers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def put_amr_to_tds(amr_payload):

headers = {"Content-Type": "application/json"}

print(amr_payload)
logger.info(amr_payload)

# Create TDS model
tds_models = TDS_API + "/models"
tds_models = f"{TDS_API}/models"
model_response = requests.post(tds_models, json=amr_payload, headers=headers)

model_id = model_response.json().get("id")
Expand Down Expand Up @@ -54,8 +54,7 @@ def put_amr_to_tds(amr_payload):

def put_artifact_extraction_to_tds(
artifact_id, name, description, filename, extractions
): # TODO change to get artifact from TDS via filename and artifact id maybe

):
artifact_payload = {
"username": "extraction_service",
"name": name,
Expand All @@ -66,9 +65,7 @@ def put_artifact_extraction_to_tds(
logger.info(f"Storing extraction to TDS for artifact: {artifact_id}")
# Create TDS artifact
tds_artifact = f"{TDS_API}/artifacts/{artifact_id}"
artifact_response = requests.put(
tds_artifact, json=artifact_payload
)
artifact_response = requests.put(tds_artifact, json=artifact_payload)
logger.info(f"TDS response: {artifact_response.text}")
artifact_put_status = artifact_response.status_code

Expand All @@ -90,13 +87,11 @@ def get_artifact_from_tds(artifact_id):

presigned_download = artifact_download_url.json().get("url")

print(presigned_download)
sys.stdout.flush()
logger.info(presigned_download)

downloaded_artifact = requests.get(artifact_download_url.json().get("url"))

print(f"ARTIFACT RETRIEVAL STATUS:{downloaded_artifact.status_code}")
sys.stdout.flush()
logger.info(f"ARTIFACT RETRIEVAL STATUS:{downloaded_artifact.status_code}")

return artifact_json, downloaded_artifact.content

Expand All @@ -107,20 +102,18 @@ def get_dataset_from_tds(dataset_id):
dataset = requests.get(tds_datasets_url)
dataset_json = dataset.json()

print(f"DATASET RESPONSE JSON: {dataset_json}")
sys.stdout.flush()
logger.info(f"DATASET RESPONSE JSON: {dataset_json}")

dataframes = []
for filename in dataset_json.get("file_names", []):
gen_download_url = f"{TDS_API}/datasets/{dataset_id}/download-url?dataset_id={dataset_id}&filename={filename}"
dataset_download_url = requests.get(gen_download_url)

print(dataset_download_url)
logger.info(f"{dataset_download_url} {dataset_download_url.json().get('url')}")

downloaded_dataset = requests.get(dataset_download_url.json().get("url"))

print(downloaded_dataset)
sys.stdout.flush()
logger.info(downloaded_dataset)

dataset_file = io.BytesIO(downloaded_dataset.content)
dataset_file.seek(0)
Expand Down

0 comments on commit 4daa903

Please sign in to comment.