-
Notifications
You must be signed in to change notification settings - Fork 1
/
input.py
65 lines (47 loc) · 2.66 KB
/
input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from src.stochastic_compositions import make_composition
import pickle
import numpy as np
def load_data(property_to_train):
with open("Data/labeled_library.pickle", "rb") as lib_f:
df = pickle.load(lib_f)
with open("Data/pyl_input.pickle", "rb") as inp_f:
df_input = pickle.load(inp_f)
with open("Data/pyl_output.pickle", "rb") as oup_f:
df_output = pickle.load(oup_f)
lumps = df_input.keys().tolist()
all_lumps = list(df["Lump"].unique())
selected_lumps = list(set(lumps) | set(all_lumps))
smiles_dict, weight_dict = make_composition(df, selected_lumps) # TODO: Selected_lumps!
compositions = df_input.to_numpy()
header = df_output.columns
bp_columns = [i for i in header if "BP" in i]
if property_to_train.lower() == "sg":
sg_columns = [i for i in header if ("sg" in i.lower() or "specific gravity" in i.lower())]
elif property_to_train.lower() == "mu" or property_to_train.lower() == "viscosity" \
or property_to_train.lower() == "dynamic viscosity":
sg_columns = [i for i in header if ("mu" in i.lower() or "dynamic viscosity" in
i.lower() or "viscosity" in i.lower())]
elif property_to_train.lower() == "kinematic viscosity":
sg_columns = [i for i in header if "kinematic viscosity" in i.lower()]
elif property_to_train.lower() == "density" or property_to_train.lower() == "d20":
sg_columns = [i for i in header if ("d20" in i.lower() or "density" in i.lower())]
elif property_to_train.lower() == "surface tension" or property_to_train.lower() == "st":
sg_columns = [i for i in header if ("st" in i.lower() or "surface tension" in i.lower())]
else:
print("\n\nPrediction of mixture {} values is currently not yet supported. The property can be added in "
"input.py or another property name can be used.\n\n".format(property_to_train))
raise ValueError
boiling_points = df_output[bp_columns].to_numpy()
np.savetxt("bp_to_predict.txt", boiling_points, fmt="%.4f")
output_sg = df_output[sg_columns].to_numpy().reshape(-1) * 1000
print("All data is loaded!")
return compositions, boiling_points, output_sg, smiles_dict, weight_dict, df, lumps
def load_test_data():
with open("Data/labeled_library.pickle", "rb") as lib_f:
df = pickle.load(lib_f)
with open("Data/pyl_input.pickle", "rb") as inp_f:
df_input = pickle.load(inp_f)
lumps = df_input.keys().tolist()
smiles_dict, weight_dict = make_composition(df, lumps) # TODO: Selected_lumps!
compositions = df_input.to_numpy()
return compositions, smiles_dict, weight_dict, df, lumps