-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: file preprocessor simple service (#7)
* chore(fileprocessor): add additional error handling for prod kong-request-id differently than dev * fix(fileprocessor): bug where in dev mode it will ref kong request id * chore(fileprocessor): added even more extensive logging based on environment. Added test cases * fix(fileprocessor): removed random uuid in client side code in development to ensure readibility * fix(fileprocessor): added validation for filesize over 5 mb
- Loading branch information
1 parent
0744334
commit 22a5871
Showing
16 changed files
with
181 additions
and
223 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
# Navigate to the directory containing the proto file | ||
cd /app/protos | ||
|
||
# Generate Python files from the .proto file | ||
python -m grpc_tools.protoc -I. --python_out=/app --grpc_python_out=/app file_processor.proto | ||
|
||
# Move generated Python files to the desired directory (if needed) | ||
# In this case, files are already generated directly in the /app directory based on the protoc command above, | ||
# so moving files is not necessary. This step is mentioned for clarity and future reference. | ||
|
||
echo "Proto files have been processed and Python files are generated in /app directory." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
grpcio | ||
grpcio-tools | ||
tika | ||
langdetect | ||
pypdf2 | ||
pypdf2 | ||
python-dotenv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import asyncio | ||
import grpc | ||
import logging | ||
import file_processor_pb2 | ||
import file_processor_pb2_grpc | ||
import uuid | ||
|
||
# Configure logging | ||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | ||
|
||
async def process_file_test(user_id, file_path, filename, file_id, channel_options): | ||
try: | ||
with open(file_path, 'rb') as file: | ||
file_content = file.read() | ||
except FileNotFoundError: | ||
logging.error(f"File {file_path} not found.") | ||
return False | ||
|
||
# Establish gRPC channel and process file | ||
async with grpc.aio.insecure_channel('localhost:50053', options=channel_options) as channel: | ||
stub = file_processor_pb2_grpc.FileProcessorStub(channel) | ||
request = file_processor_pb2.FileUploadRequest(userId=user_id, file=file_content, filename=filename, fileId=str(file_id)) | ||
try: | ||
response_wrapper = await stub.ProcessFile(request) | ||
logging.info(f"Successfully processed {filename}.") | ||
return True # Indicate success | ||
except grpc.aio.AioRpcError as e: | ||
logging.error(f"Failure processing {filename}: {e.details()}") | ||
return False # Indicate failure | ||
|
||
async def file_type_test(channel_options): | ||
test_files = [ | ||
("file1.pdf", "Valid PDF"), | ||
("file2.pdf", "Valid PDF with complex layout"), | ||
("file3.pdf", "Valid PDF with non english char"), | ||
("file4.pdf", "Valid PDF with complex layout and non english char"), | ||
("file5.jpg", "JPG image file"), | ||
("file6.png", "No resolution metadata PNG image file"), | ||
("file7.exe", "Unsupported executable file"), | ||
("file8.zip", "Unsupported archive file"), | ||
("file9_corrupt.pdf", "Corrupt PDF file"), | ||
("file10_malicious.pdf", "PDF with potential security threat"), | ||
] | ||
user_id = str(uuid.uuid4()) | ||
|
||
for file_path, description in test_files: | ||
file_id = uuid.uuid4() | ||
success = await process_file_test(user_id, file_path, file_path, file_id, channel_options) | ||
if success: | ||
logging.info(f"Test passed for {description}") | ||
else: | ||
logging.error(f"Test failed for {description}") | ||
|
||
async def main(): | ||
channel_options = [ | ||
('grpc.keepalive_time_ms', 30000), | ||
('grpc.keepalive_timeout_ms', 10000), | ||
('grpc.keepalive_permit_without_calls', True), | ||
('grpc.http2.min_time_between_pings_ms', 30000), | ||
('grpc.http2.min_ping_interval_without_data_ms', 5000), | ||
] | ||
await file_type_test(channel_options) | ||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
File renamed without changes.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file was deleted.
Oops, something went wrong.
69 changes: 0 additions & 69 deletions
69
backend/simple/fileprocessor/src/file_processor_pb2_grpc.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.