-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
103 lines (81 loc) · 3.01 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import yaml
import logging
import shutil
from datetime import datetime
from sqlalchemy.engine.url import URL
from sqlalchemy.pool import NullPool
from triage.util.db import create_engine
from triage.component.timechop import Timechop
from triage.component.timechop.plotting import visualize_chops
from triage.component.architect.feature_generators import FeatureGenerator
from triage.experiments import MultiCoreExperiment, SingleThreadedExperiment
# import os
# os.chdir('donors-choose')
now = datetime.now()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(name)-30s %(asctime)s %(levelname)10s %(process)6d %(filename)-24s %(lineno)4d: %(message)s', '%d/%m/%Y %I:%M:%S %p')
fh = logging.FileHandler(f'triage_{now}.log', mode='w')
fh.setFormatter(formatter)
logger.addHandler(fh)
# creating database engine
# dbfile = 'database.yaml'
# with open(dbfile, "r") as f:
# dbconfig = yaml.safe_load(f)
# db_url = URL(
# 'postgres',
# host=dbconfig['host'],
# username=dbconfig['user'],
# database=dbconfig['db'],
# password=dbconfig['pass'],
# port=dbconfig['port'],
# )
# TODO - Create a function that either uses the environmental variables or a database.yaml file
db_url = URL(
'postgres',
host=os.getenv('PGHOST'),
username=os.getenv('PGUSER'),
database=os.getenv('PGDATABASE'),
password=os.getenv('PGPASSWORD'),
port=os.getenv('PGPORT'),
)
db_engine = create_engine(db_url)
# loading config file
config_file = 'donors-choose-config.yaml'
# config_file = 'donors-choose-config-small.yaml'
with open(config_file, 'r') as fin:
config = yaml.safe_load(fin)
# generating temporal config plot
chopper = Timechop(**config['temporal_config'])
# We aren't interested in seeing the entire feature_start_time represented
# in our timechop plot. That would hide the interesting information. So we
# set it to equal label_start_time for the plot.
chopper.feature_start_time = chopper.label_start_time
visualize_chops(chopper, save_target = 'triage_output/timechop.png')
# creating experiment object
# experiment = MultiCoreExperiment(
# config = config,
# db_engine = db_engine,
# project_path = 's3://dsapp-education-migrated/donors-choose',
# n_processes=4,
# n_db_processes=2,
# replace=False,
# save_predictions=False
# )
experiment = SingleThreadedExperiment(
config = config,
db_engine = db_engine,
project_path = 's3://dsapp-education-migrated/donors-choose',
replace=False,
save_predictions=False
)
experiment.validate()
experiment.run()
# Creating the Triage experiment Report
template_path = 'notebooks/triage_experiment_report_template.ipynb'
timestamp = datetime.now().strftime('%Y%m%d_%H%M')
output_path = f'notebooks/triage_experiment_report_{timestamp}.ipynb'
shutil.copyfile(template_path, output_path)
os.system(f'jupyter nbconvert --inplace --execute --to notebook {output_path}')
os.system(f'jupyter nbconvert --to html {output_path}')