-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
49 lines (40 loc) · 1.53 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from io import BytesIO
import requests
import functools
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame
class DataHandler:
def __init__(self, gss_key) -> None:
self.df = self.__get_dataframe(gss_key)
def __get_dataframe(self, gss_key):
r = requests.get(f'https://docs.google.com/spreadsheet/ccc?key={ gss_key }&output=csv')
data = r.content
df = pd.read_csv(
BytesIO(data),
parse_dates=['seeding_date', 'purchase_date'],
dayfirst=True,
date_format='%d.%m.%Y',
dtype={'uid': str}
)
return df
def __search(self, df: DataFrame, request: str, case=False) -> DataFrame:
""" Search string in any column in dataframe """
mask = functools.reduce(
np.logical_or,
[df[column].fillna('-').str.contains(request, case=case) for column in df.select_dtypes(include=object).columns.tolist()]
)
results = df.loc[mask]
return results
def search(self, request: str) -> DataFrame:
""" Multiple word search"""
requests = request.split()
print(requests)
df_filtered = self.df
for r in requests:
print(r, len(df_filtered))
df_filtered = self.__search(df_filtered, r)
return df_filtered
def get_plant_by_uid(self, uid: str) -> DataFrame:
""" Returns plant (as DataFrame) by uid or None """
return self.df.loc[self.df.uid == uid]