forked from eschenfeldt/stroke
-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
365 lines (330 loc) · 12.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Run analysis on a set of map points given times to nearby hospitals
"""
import os
import argparse
import collections
import multiprocessing as mp
import data_io
from stroke.patient import Patient
from stroke import severity,constants,stroke_model as sm
# import stroke.stroke_model as sm
import numpy as np
try:
get_ipython
from tqdm import tqdm_notebook as tqdm
except NameError:
from tqdm import tqdm
import paths
from pathlib import Path
NON_COUNT_COLS = [
'Location', 'Patient', 'Varying Hospitals', 'PSC Count', 'CSC Count',
'Sex', 'Age', 'Symptoms', 'RACE'
]
def results_name(base_dir, times_file, hospitals_file, fix_performance,
simulation_count, sex):
"""Get the name for the file storing results for the given arguments."""
times_file = os.path.basename(times_file)
hospitals_file = os.path.basename(hospitals_file)
out_name = f'times={times_file.strip(".csv")}'
out_name += f'_hospitals={hospitals_file.strip(".csv")}'
out_name += '_fixed' if fix_performance else '_random'
out_name += '_' + str(sex)
out_name += '.csv'
out_dir = os.path.join(base_dir, 'output')
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
out_file = os.path.join(out_dir, out_name)
return out_file
def _instanstiate_patients(patient_count,**kwargs):
patient_characteristics = list(locals().keys())
have_pc_and_race = np.isin(['age','sex','time_since_symptoms','race'],
patient_characteristics).all()
have_pc_and_nihss = np.isin(['age','sex','time_since_symptoms','nihss'],
patient_characteristics).all()
if have_pc_and_race:
patients = [Patient.with_RACE(**kwargs)]
elif have_pc_and_nihss:
patients = [Patient.with_NIHSS(**kwards)]
else:
patients = [Patient.random(**kwargs) for _ in range(patient_count)]
return patients
def run_model_defaul_dtn(
times_file,
hospitals_file,
fix_performance=False,
patient_count=10,
simulation_count=1000,
cores=None,
base_dir='', # default: current working directory
locations=None, # default: run for all location in times_file
res_name=None,
**kwargs):
'''Run the model on the given map points for the given hospitals. The
times file should be in data/travel_times and contain travel times to
appropriate hospitals. The hospitals file should be in data/hospitals
and contain transfer destinations and times for all primary hospitals.
kwargs -- passed through to inputs.Inputs.random to hold parameters
constant
This method use travel_time file generated from hospital list Kori gave
but instead of using DTN times from AHA, we use default_times generated
from a uniform distribution
For now, use fix_performance = True to make it fair for the hospitals
'''
hospitals = data_io.get_hospitals(hospitals_file)
hospital_lists = [
(False, hospitals)
] # false means use same DTN distribution for all hospitals
patients = _instanstiate_patients(patient_count,**kwargs)
sex = patients[0].sex
times = data_io.get_times(times_file)
if locations: # Not none
times = {loc: time for loc, time in times.items() if loc in locations}
if not res_name:
res_name = results_name(base_dir, times_file, hospitals_file,
fix_performance, simulation_count, sex)
if cores is False:
pool = False
else:
pool = mp.Pool(mp.cpu_count()-1)
for pat_num, patient in enumerate(tqdm(patients, desc='Patients')):
patient_results = []
for point, these_times in tqdm(
times.items(), desc='Map Points', leave=False):
for uses_hospital_performance, hospital_list in hospital_lists:
if pool:
results = pool.apply_async(
run_one_scenario,
(patient, point, these_times, hospital_list,
uses_hospital_performance, simulation_count,
fix_performance, res_name))
else:
results = run_one_scenario(
patient, point, these_times, hospital_list,
uses_hospital_performance, simulation_count,
fix_performance, res_name)
patient_results.append(results)
if pool:
to_fetch = tqdm(patient_results, desc='Map Points', leave=False)
patient_results = [job.get() for job in to_fetch]
# Save after each patient in case we cancel or crash
data_io.save_patient(res_name, patient_results, hospitals)
if pool:
pool.close()
return
def run_model_real_data(
times_file,
hospitals_file,
dtn_file=paths.DTN_FILE,
fix_performance=False,
patient_count=10,
simulation_count=1000,
cores=None,
base_dir='', # default: current working directory
res_name=None,
locations=None, # default: run for all location in times_file
patients=None,
**kwargs):
'''Run the model on the given map points for the given hospitals. The
times file should be in data/travel_times and contain travel times to
appropriate hospitals. The hospitals file should be in data/hospitals
and contain transfer destinations and times for all primary hospitals.
kwargs -- passed through to inputs.Inputs.random to hold parameters
constant
Also need dtn_file here to use real hospital performance data
'''
hospitals = data_io.get_hospitals(hospitals_file, dtn_file)
hospital_lists = [(True, hospitals)] # True means using hospital data
patients = _instanstiate_patients(patient_count,**kwargs)
sex = patients[0].sex
times = data_io.get_times(times_file)
if locations: # Not none
times = {loc: time for loc, time in times.items() if loc in locations}
if not res_name:
res_name = results_name(base_dir, times_file, hospitals_file,
fix_performance, simulation_count, sex)
if cores is False:
pool = False
else:
pool = mp.Pool(mp.cpu_count()-1)
for pat_num, patient in enumerate(tqdm(patients, desc='Patients')):
patient_results = []
for point, these_times in tqdm(
times.items(), desc='Map Points', leave=False):
for uses_hospital_performance, hospital_list in hospital_lists:
if pool:
results = pool.apply_async(
run_one_scenario,
(patient, point, these_times, hospital_list,
uses_hospital_performance, simulation_count,
fix_performance, res_name))
else:
results = run_one_scenario(
patient, point, these_times, hospital_list,
uses_hospital_performance, simulation_count,
fix_performance, res_name)
patient_results.append(results)
if pool:
to_fetch = tqdm(patient_results, desc='Map Points', leave=False)
patient_results = [job.get() for job in to_fetch]
# Save after each patient in case we cancel or crash
data_io.save_patient(res_name, patient_results, hospitals)
if pool:
pool.close()
return
def run_one_scenario(patient,
point,
these_times,
hospital_list,
uses_hospital_performance,
simulation_count,
fix_performance,
res_name=None):
model = sm.StrokeModel(patient, hospital_list)
model.set_times(these_times)
if str(simulation_count) == 'auto':
model_run = model.run_new
else:
try:
simulation_count = int(simulation_count)
model_run = model.run
except ValueError:
raise Exception("Num of simulation is not an integer!")
these_results, markov_results, ais_times = model_run(
n=simulation_count, fix_performance=fix_performance)
if res_name:
# output details of each simulation: Cost and QALY
#dimension: simulation# -> row index,hospital-> columns
# data_io.write_detailed_markov_outcomes(
# markov_results, res_name, point, times=ais_times,
# optimal_strategy= str(these_results.optimal_strategy), write = True)
data_io.write_aggregated_markov_outcomes(
markov_results, res_name, point, times=ais_times,
optimal_strategy= str(these_results.optimal_strategy), write = True)
results = collections.OrderedDict()
results['Location'] = point
results['Patient'] = patient.pid
results['Use Real DTN'] = uses_hospital_performance
results['Varying Hospitals'] = not fix_performance
results['PSC Count'] = len(model.primaries)
results['CSC Count'] = len(model.comprehensives)
results['Sex'] = 'male' if patient.sex == constants.Sex.MALE else 'female'
results['Age'] = patient.age
results['Symptoms'] = patient.symptom_time
if isinstance(patient.severity,severity.NIHSS):
results['NIHSS'] = patient.severity.score
else:
results['RACE'] = patient.severity.score
cbc = these_results.counts_by_center
cbc = {str(center): count for center, count in cbc.items()}
results.update(cbc)
# add nan for hospital that are never optimal
zero_c = {
str(hospital): float('nan')
for hospital in hospital_list if str(hospital) not in results.keys()
}
results.update(zero_c)
return results
def parse_extra_inputs(args):
kwargs = {}
if hasattr(args, 'sex'):
kwargs['sex'] = args.sex
if hasattr(args, 'age'):
kwargs['age'] = args.age
if hasattr(args, 'race'):
kwargs['race'] = args.race
if hasattr(args, 'nihss'):
kwargs['nihss'] = args.nihss
if hasattr(args, 'time_since_symptoms'):
kwargs['time_since_symptoms'] = args.time_since_symptoms
return kwargs
def main(args):
times_file = args.times_file
hospitals_file = args.hospital_file
patient_count = args.patients
simulation_count = args.simulations
kwargs = parse_extra_inputs(args)
# if args.base_dir:
# base_dir = args.base_dir # dir to put output file in
# else:
# base_dir = '' # default: current working directory
base_dir = ''
if hasattr(args, 'locations'):
locations = args.locations
else:
locations = None
if hasattr(args, 'res_name'):
res_name = args.res_name
else:
res_name = None
if args.multicore:
cores = None
else:
cores = False
run_model_real_data(
times_file,
hospitals_file,
patient_count=patient_count,
fix_performance=False,
simulation_count=simulation_count,
cores=cores,
base_dir=base_dir,
locations=locations,
res_name=res_name,
**kwargs)
def main_default_dtn(args):
times_file = args.times_file
hospitals_file = args.hospital_file
patient_count = args.patients
simulation_count = args.simulations
kwargs = parse_extra_inputs(args)
# if args.base_dir:
# base_dir = args.base_dir # dir to put output file in
# else:
# base_dir = '' # default: current working directory
base_dir = ''
if hasattr(args, 'locations'):
locations = args.locations
else:
locations = None
if hasattr(args, 'res_name'):
res_name = args.res_name
else:
res_name = None
if args.multicore:
cores = None
else:
cores = False
run_model_defaul_dtn(
times_file,
hospitals_file,
patient_count=patient_count,
fix_performance=False,
simulation_count=simulation_count,
cores=cores,
base_dir=base_dir,
locations=locations,
res_name=res_name,
**kwargs)
if __name__ == '__main__':
p_default = 2
s_default = 1000
parser = argparse.ArgumentParser()
parser.add_argument(
'hospital_file', help='full path to file with hospital information')
parser.add_argument(
'times_file', help='full path to file with travel times')
p_help = 'number of random patients to run at each location'
p_help += f' (default {p_default})'
parser.add_argument(
'-p', '--patients', type=int, default=p_default, help=p_help)
s_help = f'number of model runs for each scenario (default {s_default})'
parser.add_argument(
'-s', '--simulations', type=int, default=1000, help=s_help)
parser.add_argument(
'-m',
'--multicore',
action='store_true',
help='Use all available CPU cores')
args = parser.parse_args()
main(args)