-
Notifications
You must be signed in to change notification settings - Fork 0
/
vrs_module.py
39 lines (32 loc) · 1.91 KB
/
vrs_module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
from idatamation_module import IdatamationFlow
class VRSIdatamation(IdatamationFlow):
def __init__(self):
self.fab_folder = "S3"
self.data_source = "VRS"
self.type_dict = {"廠別": str, "PartNum": str, "LotNum": str, "Layer": str, "站別": str, "Parameter": str}
super().__init__()
# After import data, the first step is to capitalize column names.
self.replace_column_list = {"廠別": "FAB_ID", "PARTNUM": "PROD_ID_RAW", "LOTNUM": "LOT_ID", "站別": "STATION",
"PARAMETER": "PARAMETER_ID", "TIMESTEMP": "TIME", }
self.data_type = {"FAB_ID": str, "PROD_ID_RAW": str, "LOT_ID": str, "TIME": object, "STEP": str,
"PARAMETER_ID": str, "VALUE": int, "PROD_ID": str, "LOT_TYPE": str, "LAYER": str, "STATION": str}
def data_transformat(self, df, filename):
# TODO: Changing the time zone is not required.
df["TIMESTEMP"] = pd.to_datetime(df["TIMESTEMP"])
df["TIMESTEMP"] = df["TIMESTEMP"].dt.tz_localize("Etc/GMT-8").dt.tz_convert("UTC")
df["廠別"] = df["廠別"].apply(lambda x: x.strip().replace("廠", ""))
df["LOTNUM"] = df["LOTNUM"].apply(lambda x: x.strip())
df["STEP"] = df["LAYER"].astype(str) + "-" + df["站別"].astype(str)
df = df.rename(columns=self.replace_column_list)
df = self.get_prodID_and_lotTYPE(df)
df = df[list(self.data_type.keys())]
# final check column type is correct
df["TIME"] = pd.to_datetime(df["TIME"])
df = self.data_type_check(df, self.data_type)
key_col = {'FAB_ID', 'STEP', 'PROD_ID', 'LOT_ID', 'PARAMETER_ID', 'TIME'}
update_col = {'VALUE', "PROD_ID_RAW", "LOT_TYPE", "LAYER", "STATION"}
self.mongo_insert_data(df, "vrs_lot", filename, key_col, update_col)
return df.shape[0]
process_data = VRSIdatamation()
process_data.main_function()