-
Notifications
You must be signed in to change notification settings - Fork 6
/
univariate_analysis.py
executable file
·65 lines (46 loc) · 2.53 KB
/
univariate_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
"""Script to perform the mass-univariate analysis"""
import argparse
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import stats
from utils import COLUMNS_NAME, load_dataset, cliff_delta
PROJECT_ROOT = Path.cwd()
def main(dataset_name, disease_label):
# ----------------------------------------------------------------------------
participants_path = PROJECT_ROOT / 'data' / dataset_name / 'participants.tsv'
freesurfer_path = PROJECT_ROOT / 'data' / dataset_name / 'freesurferData.csv'
hc_label = 1
# ----------------------------------------------------------------------------
# Create directories structure
# ----------------------------------------------------------------------------
ids_path = PROJECT_ROOT / 'outputs' / (dataset_name + '_homogeneous_ids.csv')
univariate_dir = PROJECT_ROOT / 'outputs' / 'univariate_analysis'
univariate_dir.mkdir(exist_ok=True)
# ----------------------------------------------------------------------------
# Loading data
clinical_df = load_dataset(participants_path, ids_path, freesurfer_path)
x_dataset = clinical_df[COLUMNS_NAME].values
tiv = clinical_df['EstimatedTotalIntraCranialVol'].values
tiv = tiv[:, np.newaxis]
clinical_df[COLUMNS_NAME] = (np.true_divide(x_dataset, tiv)).astype('float32')
results = pd.DataFrame()
for region in COLUMNS_NAME:
statistic, pvalue = stats.mannwhitneyu(clinical_df[clinical_df['Diagn']==hc_label][region],
clinical_df[clinical_df['Diagn']==disease_label][region])
effect_size = cliff_delta(clinical_df[clinical_df['Diagn']==hc_label][region].values,
clinical_df[clinical_df['Diagn']==disease_label][region].values)
results = results.append({'regions': region, 'effect size': effect_size, 'p-value': pvalue}, ignore_index=True)
results.to_csv(univariate_dir / '{}_{}_vs_{}.csv'.format(dataset_name, hc_label, disease_label), index=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-D', '--dataset_name',
dest='dataset_name',
help='Dataset name to perform univariate analysis.')
parser.add_argument('-L', '--disease_label',
dest='disease_label',
help='Disease label to perform univariate analysis.',
type=int)
args = parser.parse_args()
main(args.dataset_name, args.disease_label)