-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
110 lines (84 loc) · 3.09 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
####################
# Required Modules #
####################
# Generic/Built-in
import os
# Libs
import streamlit as st
from dotenv import load_dotenv
# Custom
from src.download import download_xlsx, download_zip
from src.process import acs_main, brc_main, gw_main, island_main, panu_main, sinmix_main
from src.session import initialize_session_state, next_session_state
from src.uploads import copy_uploads, show_uploads
from src.utils import dropdown_options, get_file_paths, print_result
##################
# Configurations #
##################
# Load environment variables
load_dotenv()
upload_path = os.getenv('UPLOAD_PATH')
output_path = os.getenv('OUTPUT_PATH')
##########
# Script #
##########
# Clear and initialize session the first time the app starts up
if "uploaded_files" not in st.session_state:
initialize_session_state()
# Set up Streamlit page
st.set_page_config(
page_title="Invoice Info Extraction",
page_icon="💸",
layout="centered"
)
st.title("Extract Information from Invoices")
uploaded_file = st.file_uploader(
"Upload your invoices for information extraction!",
type=["zip", "pdf", "xlsx"],
key=st.session_state["file_uploader_key"],
)
if uploaded_file:
# Copy the uploaded file into the upload_path
copy_uploads(uploaded_file, upload_path)
# Append to the list of uploaded files in session state
if uploaded_file.name not in st.session_state["uploaded_files"]:
st.session_state["uploaded_files"].append(uploaded_file.name)
# Display the list of uploaded files
show_uploads()
# Clear all files
if st.button("Clear uploaded files"):
next_session_state()
# Get file paths of uploads
pdf_file_paths, excel_file_paths = get_file_paths()
# Get option to process data from dropdown menu
option = dropdown_options()
# Process data
if st.button("Process"):
result = None
result_zipped = False
if option == "ACS":
result = acs_main(pdf_file_paths, excel_file_paths)
print_result(option, len(pdf_file_paths))
elif option == "BRC":
result, error_files = brc_main(pdf_file_paths)
print_result(option, len(pdf_file_paths), error_files=error_files)
elif option == "GW":
result = gw_main(pdf_file_paths)
print_result(option, len(pdf_file_paths))
elif option == "ISLAND":
result = island_main(pdf_file_paths, excel_file_paths)
print_result(option, len(pdf_file_paths))
elif option == "PANU":
result = panu_main(pdf_file_paths, excel_file_paths)
print_result(option, len(pdf_file_paths))
elif option == "SINMIX":
error_dict = sinmix_main(pdf_file_paths)
print_result(option, len(pdf_file_paths), error_dict=error_dict)
result_zipped = True
# Download result in Excel format
if result is not None:
download_xlsx(option, result)
# Download zipped file containing processed PDFs
if result_zipped:
download_zip(option)