-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAdjHE.py
97 lines (81 loc) · 2.56 KB
/
AdjHE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Delete this
import statsmodels.api as sm
import os
import numpy as np
import pandas as pd
import timeit
import resource
from functions.AdjHE_estimator import AdjHE_estimator
#os.chdir("/home/christian/Research/Stat_gen/AdjHE/")
from functions.load_data import sum_n_vec, ReadGRMBin, multirange, read_datas, load_data
from functions.AdjHE_parser import prefix, npc
from functions.AdjHE_parser import *
# good stuff
# from argparse import RawTextHelpFormatter
print(args)
# %% for troubleshooting
# os.chdir("/home/christian/Scripts/Basu_herit")
# prefix = "Example/grm"
# pheno = "Example/pheno.phen"
# covar = "Example/covar.csv"
# PC = "Example/pcas.eigenvec"
# k = 0
# npc = 2
# mpheno = 1
# std = False
# out = "Example/results"
print("reaading GRM")
# %% Read GRM
G = ReadGRMBin(prefix)
ids = G['id']
ids = ids.rename(columns = {0:"FID", 1:"IID"})
n_phen_nona = ids.shape[0]
print("loading data")
#%%
df = load_data(pheno_file = pheno, cov_file=covar, PC_file=PC, npc = npc)
# dropping nas for ease of use
df = df.dropna()
#%%
# only regress out covariates if they are entered
# NOTE: I added to drop missing values here to make it run!!!! Might want to change in the future
res_y = sm.OLS(endog=df.loc[:,"Pheno_" + str(mpheno)], exog=df.drop(["Pheno_" + str(mpheno), "FID", "IID"], 1)).fit().resid
# %%
start_read = timeit.default_timer()
nmarkers = G['N']
x = G['diag'].astype('float64')
n_phen_nona = G['diag'].size
GRM_array_nona = np.zeros((n_phen_nona, n_phen_nona))
temp_i = 0
temp = 0
# k= args.k
if(k == 0):
k = n_phen_nona
l = list(range(k, n_phen_nona, k))
l.append(n_phen_nona)
for i in l:
cor = multirange(range(temp_i, i))
GRM_array_nona[cor['b'], cor['a']] = G['off'][temp:temp+len(cor['b'])]
GRM_array_nona.T[cor['b'], cor['a']] = G['off'][temp:temp+len(cor['b'])]
temp = temp + len(cor['b'])
del(cor)
temp_i = i
GRM_array_nona[np.diag_indices(n_phen_nona)] = G['diag']
#%%
print("Calculating heritibility")
res_y.name = "Residual"
df["Residual"] = res_y
# keep portion of GRM without missingess
nonmissing = ids.IID.isin(df.IID)
GRM_nonmissing = GRM_array_nona[nonmissing,:][:,nonmissing]
#%%
h2, se = AdjHE_estimator(A= GRM_nonmissing, data = df, npc=npc, std=std)
# %%
results = {"h2" : h2[0],
"SE" : se,
"Time for analysis(s)" : timeit.default_timer() - start_read,
"Memory usage" : resource.getrusage(resource.RUSAGE_SELF).ru_maxrss}
print("Heritability estimate: " + str(h2[0]))
print("With Standard error: " + str(se))
print("Writing results")
results= pd.DataFrame(results, index =[0])
results.to_csv(out + ".csv", index = False )