-
Notifications
You must be signed in to change notification settings - Fork 4
/
evaluate.py
91 lines (68 loc) · 2.66 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
from pathlib import Path
import tarfile
import sys
import pandas as pd
from pycaret.regression import load_model, predict_model, load_config
from sklearn.metrics import accuracy_score, f1_score
class EvalConfig:
ROOT_DIR = Path("/opt/ml/processing")
IN_MODEL_DIR = ROOT_DIR / "model"
IN_MODEL_TAR = IN_MODEL_DIR / "model.tar.gz"
IN_TEST_DIR = ROOT_DIR / "test"
IN_TEST_CSV = IN_TEST_DIR / "test.csv"
OUT_EVAL_DIR = ROOT_DIR / "evaluation"
OUT_EVAL_DIR.mkdir(parents=True, exist_ok=True)
OUT_EVAL_JSON = OUT_EVAL_DIR / "eval.json"
def inspect_input():
logger.info(f"Start inspect_input")
files = list_dir(EvalConfig.IN_MODEL_DIR)
logger.info(f"{EvalConfig.IN_MODEL_DIR.as_posix()}: {files}")
files = list_dir(EvalConfig.IN_TEST_DIR)
logger.info(f"{EvalConfig.IN_TEST_DIR.as_posix()}: {files}")
def evaluate():
logger.info(f"Start evaluate")
test_df = pd.read_csv(EvalConfig.IN_TEST_CSV)
logger.info(f"Pycaret load_config")
config_path = EvalConfig.IN_MODEL_DIR / "final-config"
load_config(config_path.as_posix())
logger.info(f"Pycaret load_model")
model_path = EvalConfig.IN_MODEL_DIR / "final-model"
saved_model = load_model(model_path.as_posix())
logger.info(f"Pycaret predict_model")
pred_df = predict_model(saved_model, data=test_df)
logger.info(f"Compute f1, accuracy")
y_test = pred_df["target"]
y_pred = pred_df["Label"]
f1 = f1_score(y_test, y_pred, average="macro")
acc = accuracy_score(y_test, y_pred)
# MUST follow Sagemaker convention
report_dict = {
"multiclass_classification_metrics": {
"accuracy": {"value": acc, "standard_deviation": "NaN"},
"weighted_f1": {"value": f1, "standard_deviation": "NaN"},
}
}
logger.info(f"report_dict: {report_dict}")
logger.info(f"Save report_dict to: {EvalConfig.OUT_EVAL_JSON}")
with open(EvalConfig.OUT_EVAL_JSON.as_posix(), "w") as f:
f.write(json.dumps(report_dict))
def inspect_output():
logger.info(f"Start inspect_output")
files = list_dir(EvalConfig.OUT_EVAL_DIR)
logger.info(f"{EvalConfig.OUT_EVAL_DIR.as_posix()}: {files}")
if __name__ == "__main__":
sys.path.append("/opt/program")
from utils import *
model_dir = EvalConfig.IN_MODEL_DIR
model_path = EvalConfig.IN_MODEL_TAR
if not model_path.exists():
logger.error(f"{model_path} does not exist!")
sys.exit(1)
logger.info(f"Extracting model from path: {model_path}")
with tarfile.open(model_path.as_posix()) as tar:
tar.extractall(path=EvalConfig.IN_MODEL_DIR)
inspect_input()
evaluate()
inspect_output()
sys.exit(0)