-
Notifications
You must be signed in to change notification settings - Fork 6
/
associate_biomarkers_with_dcb.py
60 lines (51 loc) · 2.49 KB
/
associate_biomarkers_with_dcb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
################################################################################
# SETUP
################################################################################
# Load required modules
import sys, os, argparse, logging, pandas as pd, numpy as np, json
from i_o import getLogger
# Parse command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('-ff', '--feature_file', type=str, required=True)
parser.add_argument('-rf', '--results_file', type=str, required=True)
parser.add_argument('-of', '--output_file', type=str, required=True)
parser.add_argument('-v', '--verbosity', type=int, required=False, default=logging.INFO)
args = parser.parse_args(sys.argv[1:])
# Set up logger
logger = getLogger(args.verbosity)
################################################################################
# EXAMINE THE ASSOCIATION BETWEEN PREDICTIONS AND DCB
################################################################################
# Load the input files
with open(args.results_file, 'r') as IN:
results = json.load(IN)
patients = results['patients']
preds = results['preds']
df = pd.read_csv(args.feature_file, sep='\t', index_col=0)
# Index and exponentiate predictions so they can be merged with the dataframe
exp_preds = pd.DataFrame(np.exp(pd.Series(preds, index=patients,
name='Predicted N Expanded Clones that were TILs A->B')))
patients_with_preds = set(exp_preds.index)
all_patients = set(df.index)
patients_missing_preds = all_patients-patients_with_preds
# Add predictions to the dataframe
df = pd.merge(df, exp_preds, how='outer',left_index=True, right_index=True)
df = df.drop(patients_missing_preds)
# Plot biomarker association with DCB for various biomarkers
biomarkers = ['N Expanded Clones that were TILs A->B',
'Predicted N Expanded Clones that were TILs A->B',
'expressed_neoantigen_count','missense_snv_count','PD-L1']
biomarker_plot_items = []
for biomarker in biomarkers:
all_dcb_val = min( v for b, v in zip(df['benefit'].tolist(), df[biomarker].tolist()) if b )
for x, y in zip(df['benefit'].tolist(), df[biomarker].tolist()):
biomarker_plot_items.append({
"Biomarker": biomarker,
"Benefit": bool(x),
"Biomarker value": y,
"Treated": "Yes" if y >= all_dcb_val else "No"
})
# Save the data to our plots dictionary
with open(args.output_file, 'w') as OUT:
json.dump( dict(Biomarkers=biomarker_plot_items, params=vars(args)), OUT)