-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
executable file
·92 lines (70 loc) · 2.65 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import re
import categories
import pandas as pd
def search_budget_id(category, budget_dict):
res = None
for budget_name in budget_dict:
if re.search(budget_name, category) or re.search(category, budget_name):
res = budget_dict[budget_name]
return budget_dict[list(budget_dict)[-1]] if res is None else res
def update_df_cols(
df, description_column, description_query, update_column, update_value
):
df.loc[
df_find_by(df, description_column, description_query).index, update_column
] = update_value.title()
return df
def categoriser(
df,
avail_categories,
desc_column="Description",
cat_column="Category",
bud_column="Budget",
):
# FIXME: There must be a better way.
for budget, cats in avail_categories.items():
for cat, desc in cats.items():
if isinstance(desc, list):
for d in desc:
df = update_df_cols(df, desc_column, d, cat_column, cat)
df = update_df_cols(df, desc_column, d, bud_column, budget)
elif isinstance(desc, dict):
for c, d in desc.items():
for _d in d:
df = update_df_cols(
df, desc_column, _d, cat_column, f"{cat} - {c}"
)
df = update_df_cols(df, desc_column, _d, bud_column, budget)
return df
def categorise_statement(df, cat_column="Category", bud_column="Budget"):
df.insert(3, cat_column, None)
df.insert(4, bud_column, None)
df = categoriser(df, categories.CATEGORIES)
return df
def convert_str_float(df, column_name):
df[column_name] = df[column_name].str.replace(" ", "")
df[column_name] = df[column_name].astype(float).abs()
return df
def df_find_by(df, column_name, query):
return df[df[column_name].str.contains(query, case=False)]
def format_df_date(df, column_name, date_format="%d/%m/%Y"):
df[column_name] = pd.to_datetime(df[column_name], format=date_format, errors="coerce")
return df
def print_all(df):
pd.set_option("display.max_rows", df.shape[0] + 1)
return df
def rename_df_cols(df, new_names=[], prefix=None, suffix=None):
if new_names and (len(df.columns) == len(new_names)):
df.columns = new_names
df.columns = df.columns.str.replace(" ", "_")
if prefix:
df = df.add_prefix(prefix)
if suffix:
df = df.add_suffix(suffix)
return df
def update_if_nan(df, column_name):
nat_rows = df[df[column_name].isnull()]
for idx in nat_rows.index:
df.loc[idx - 1][column_name]
df = df.fillna(df.loc[idx - 1][column_name])
return df