-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser_fave.py
116 lines (95 loc) · 3.05 KB
/
parser_fave.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import csv
import json
import os
from pathlib import Path
from bs4 import BeautifulSoup
def parse_fave_html(html_str: str) -> dict:
# make soup
soup = BeautifulSoup(html_str, "html.parser")
p_tags = soup.find_all("p")
content_str_lst = []
for tag in p_tags:
try:
content_str_lst.append(tag.string.strip())
except:
pass
# pprint(len(content_str_lst))
check_time = check_amount = check_id = check_merchant = False
txn_time = txn_amount = txn_to = txn_id = None
for i in content_str_lst:
if check_id:
txn_id = i.strip()
check_id = False
if check_merchant:
txn_to = i.strip()
check_merchant = False
if "AM" in i or "PM" in i:
txn_time = i.strip()
txn_time = txn_time.split(",")[1].strip()
txn_time = txn_time[:-2]
if check_amount:
txn_amount = i.strip()[2:]
txn_amount = f"{float(txn_amount):.2f}"
check_amount = False
break
if "Where" in i:
check_merchant = True
elif "Total" in i:
check_amount = True
elif "Receipt ID" in i:
check_id = True
else:
pass
data_dict = {
"txn_type": "Fave",
"txn_id": txn_id,
"txn_date": None,
"txn_time": txn_time,
"txn_amount": txn_amount,
"txn_from": "me",
"txn_to": txn_to,
}
return data_dict
def main(output_dir="output"):
output_dir = Path(output_dir)
fave_dir = output_dir / "fave"
fave_files = os.listdir(fave_dir)
fave_files = [fave_dir / i for i in fave_files if i.endswith(".json")]
all_data_dicts = []
for fave_file in fave_files:
with open(fave_file, "r") as f:
email_data = json.load(f)
fave_html = email_data.get("body")
subject = email_data.get("subject")
if not subject.startswith("Your FavePay Receipt"):
continue
data_dict = parse_fave_html(fave_html)
if not data_dict:
print(f"Skipping '{fave_file}' (likely not a transaction email)")
continue
date_str = email_data["date"]
data_dict["txn_date"] = date_str
all_data_dicts.append(data_dict)
# sort by date
all_data_dicts.sort(key=lambda x: x["txn_date"])
out_json = output_dir / "master_fave.json"
with out_json.open("w") as f:
json.dump(all_data_dicts, f, indent=4)
print(f"Saved {len(all_data_dicts)} transactions to {out_json}")
out_csv = output_dir / "master_fave.csv"
with out_csv.open("w") as f:
fieldnames = [
"txn_type",
"txn_id",
"txn_date",
"txn_time",
"txn_amount",
"txn_from",
"txn_to",
]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_data_dicts)
print(f"Saved {len(all_data_dicts)} transactions to {out_csv}")
if __name__ == "__main__":
main()