forked from comprna/SUPPA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsignificanceCalculator.py
191 lines (151 loc) · 7.05 KB
/
significanceCalculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# -*- coding: utf-8 -*-
"""
Created on Wed May 25 04:20:00 CEST 2016
@authors: Juan C Entizne
@email: juancarlos.entizne01[at]estudiant.upf.edu
Modified by Juan L. Trincado
@email: juanluis.trincado[at].upf.edu
"""
import os
import logging
from lib.diff_tools import multiple_conditions_analysis
from argparse import *
description = \
"Description:\n" + \
"This tool calculates the significance to the change in mean PSI values between conditions, across multiple conditions.\n" \
"The conditions are tested in a sequential order specified as input.\n"
parser = ArgumentParser(description=description, formatter_class=RawTextHelpFormatter,
add_help=False)
parser.add_argument('-m', '--method',
dest="method",
action="store",
required=True,
choices=['empirical', 'classical'],
help="Method to test significance. Required.")
parser.add_argument('-p', '--psi',
dest="conds",
action="store",
nargs="+",
help="Path of the PSI files. PSI files and the transcript expression (TPM) files "
"must have the same order."
"The conditions files and the tpm files must have the same order.")
parser.add_argument('-e', '--tpm',
dest="tpms",
action="store",
nargs="+",
help="Path of the transcript expression (TPM) files. Conditions files and the transcript expression "
"(TPM) files must have the same order."
"The conditions files and the tpm files must have the same order.")
parser.add_argument('-i', '--input',
dest="iox",
action="store",
nargs=1,
default=None,
help="Input file with the event-transcripts equivalence (.ioe or .ioi format)")
parser.add_argument('-a', '--area',
dest="area",
action="store",
nargs=1,
type=int,
default=[1000],
help="Number indicating the number of points in the local area distribution. (default: 1000)")
parser.add_argument('-l', '--lower-bound',
dest="lower_bound",
action="store",
nargs=1,
type=float,
default=[0],
help="Lower-bound for the absolute delta PSI value to test for significance. (Default: 0).")
parser.add_argument('-pa', '--paired',
dest="paired",
action="store_true",
default=False,
help="Boolean. Indicates if replicates in conditions are paired. (Default: False).")
parser.add_argument('-gc', '--gene-correction',
dest="gene_cor",
action="store_true",
default=False,
help="Boolean. If True, SUPPA correct the p-values by gene. (Default: False).")
parser.add_argument('-al', '--alpha',
dest="alpha",
action="store",
nargs=1,
type=float,
default=[0.05],
help="Family-wise error rate to use for the multiple test correction. (Default: 0.05).")
parser.add_argument('-s', '--save_tpm_events',
action="store_true",
default=False,
help="Boolean. If True, the average log TPM of the events will be saved in an external file (Default: False).")
parser.add_argument('-c', '--combination',
action="store_true",
dest="seq",
default=False,
help="Boolean. If True, SUPPA perform the analysis between all the possible combinations of conditions (Default: False).")
parser.add_argument('-me', '--median',
dest="median",
action="store_true",
default=False,
help="Boolean. If True, SUPPA use the median to calculate the Delta PSI. (Default: False).")
parser.add_argument('-th', '--tpm-threshold',
dest="tpm_th",
action="store",
nargs=1,
type=float,
default=[0.0],
help="Minimum transcript average TPM value within-replicates and between-conditions to be included in the analysis. (Default: 1.0).")
def nan_threshold_type(x):
x = float(x)
if x < 0.0 or x > 1.0:
raise ArgumentTypeError("nan_threshold should be a float number between 0 and 1")
return x
parser.add_argument('-nan', '--nan-threshold',
dest="nan_th",
action="store",
nargs=1,
type=nan_threshold_type,
default=[0.0],
help="Percentage allowed of samples per condition with nan values for returning a DeltaPSI (Default: 0, no missing values allowed).")
parser.add_argument('-o', '--output',
dest="output",
action="store",
default=None,
help="Name of the output files.")
parser.add_argument("-mo", "--mode", default="INFO",
help="to choose from DEBUG, INFO, WARNING, ERROR and CRITICAL")
def create_path(lst):
temp_lst = []
for fl in lst:
if not os.path.isabs(fl):
fl_path = os.getcwd()+"/"+fl
temp_lst.append(fl_path)
else:
temp_lst.append(fl)
return temp_lst
def main():
args = parser.parse_args()
# Parsing arguments
mode = "logging." + args.mode
# Setting logging preferences
logger = logging.getLogger(__name__)
logger.setLevel(eval(mode))
# Setting the level of the loggers in lib
# setToolsLoggerLevel(mode)
# Check if path is absolute, if not the program use the current working path
cond_files = create_path(args.conds)
expr_files = create_path(args.tpms)
ioe_fl = create_path(args.iox)
# Check extension of input file
id_type = ioe_fl[0].split(".")[-1].strip("\n")
if id_type != "ioe" and id_type != "ioi":
logger.info("Invalid input file. Input file has to be either IOE or IOI format "
"it must present the appropriate suffix.")
exit(1)
#multiple_conditions_analysis(args.method, cond_files, expr_files, ioe_fl[0], args.area[0], args.lower_bound[0],
# args.paired, args.gene_cor, args.alpha[0], args.output)
multiple_conditions_analysis(args.method, cond_files, expr_files, ioe_fl[0], args.area[0],
args.lower_bound[0], args.paired, args.gene_cor, args.alpha[0],
args.save_tpm_events, args.seq, args.median, args.tpm_th[0],
args.nan_th[0],args.output)
if __name__ == "__main__":
main()