Skip to content

Commit

Permalink
first version of open parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Sdddell authored and CambioML committed Mar 31, 2024
1 parent 8db4d2d commit 6236936
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 2 deletions.
1 change: 1 addition & 0 deletions open_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from open_parser.base import OpenParser
69 changes: 69 additions & 0 deletions open_parser/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import json

import requests

CAMBIO_UPLOAD_URL = (
"https://p1iz3c1c77.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload"
)
CAMBIO_EXTRACT_URL = (
"https://p1iz3c1c77.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract"
)


class OpenParser:
def __init__(self, apiKey="") -> None:
self._uploadurl = CAMBIO_UPLOAD_URL
self._extracturl = CAMBIO_EXTRACT_URL
self._request_header = {"x-api-key": apiKey}

def setAPIKey(self, apiKey):
self._request_header = {"x-api-key": apiKey}

def extract_pdf_content(self, file_path):
user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path)
result = self._request_file_extraction(user_id, job_id, s3_key)
return result["file_content"]

def _error_handler(self, response):
if response.status_code == 403:
raise Exception("Invalid API Key")
elif response.status_code == 429:
return Exception("API Key limit exceeded")
else:
return Exception(f"Error: {response.status_code} {response.text}")

def _request_and_upload_by_apiKey(self, file_path):
params = {"fileName": file_path}
response = requests.get(
self._uploadurl, headers=self._request_header, params=params
)

if response.status_code == 200:
url_info = response.json()["presignedUrl"]
uid = response.json()["userId"]
jid = response.json()["jobId"]
with open(file_path, "rb") as file_to_upload:
files = {"file": (file_path, file_to_upload)}
upload_response = requests.post(
url_info["url"], data=url_info["fields"], files=files
)
print(f"Upload response: {upload_response.status_code}")
return uid, jid, url_info["fields"]["key"]

self._error_handler(response)

def _request_file_extraction(self, user_id, job_id, s3_key):
payload = {
"userId": user_id,
"jobId": job_id,
"fileKey": s3_key,
}
response = requests.post(
self._extracturl, headers=self._request_header, json=payload
)

if response.status_code == 200:
print("Extraction success.")
return json.loads(response.json()["result"])

self._error_handler(response)
165 changes: 165 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6236936

Please sign in to comment.