-
Notifications
You must be signed in to change notification settings - Fork 0
/
gwasfunctions.r
600 lines (542 loc) · 35.9 KB
/
gwasfunctions.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
library('ggplot2')
options(stringsAsFactors=FALSE)
extractAutosomal <- function(currentdir,bimfile){
data <- read.table(sprintf('%s/%s.bim',currentdir,bimfile),h=F,stringsAsFactors=F)
write.table(data[which(data$V1 < 23),2],sprintf('%s/%s_AutosomalSnps.txt',currentdir,bimfile),quote=F,row.names=F,col.names = F)
if( length(which(data$V1 ==23)) > 0 ) {
write.table(data[which(data$V1 == 23),2],sprintf('%s/%s_SexChromosomeSnps.txt',currentdir,bimfile),quote=F,row.names=F,col.names = F)
} else {
temp <- c('NoXChr')
write.table(temp,sprintf('%s/%s_SexChromosomeSnps.txt',currentdir,bimfile),quote=F,row.names=F,col.names = F)
}
}
extractATCGsnps <- function(currentdir,bimname){
bimfile <- read.table(sprintf("%s/%s.bim",currentdir,bimname),h=F,stringsAsFactors=F)
bimfile$Alleles <- as.vector(paste(bimfile$V5,bimfile$V6,sep=""))
ATorCGsnps <- bimfile[which(bimfile$Alleles %in% c('AT','TA','GC','CG')),2]
write.table(ATorCGsnps,sprintf("%s/%s_ATorCGsnplist.txt",currentdir,bimname),quote=F,row.names=F,col.names=F)
rm(bimfile,ATorCGsnps)
}
printstats <- function(currentdir,bimname,stepname,cohortname){
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
bimfile$V7 <- as.vector(paste(bimfile$V1,bimfile$V4,sep='-'))
temp <- bimfile[which(bimfile$V1 != 0),c(2,7)]
famfile <- read.table(sprintf('%s/%s.fam',currentdir,bimname),h=F,stringsAsFactors=F)
sink(sprintf('%s/%s_stats.txt',currentdir,cohortname),append=T)
cat("=============================\n")
cat(sprintf("%s",stepname),"\n")
cat("=============================\n")
cat('No:of SNPs ',dim(bimfile)[1],"\n") # No:of SNPs
print('Different SNP names:')
print(table(gsub('^([A-za-z]*)(.*?)$','\\1',bimfile$V2,perl=T))) # Type of SNP names
print('Type of SNPs with their Number')
print(table(paste(bimfile$V5,bimfile$V6,sep=""))) # Type of SNPs with their number.
cat('No:of SNPs with duplicated names ',length(which(duplicated(bimfile$V2,fromLast=T))),"\n") # No:of SNPs with duplicated names
cat('No:of SNPs with duplicated positions ',length(which(duplicated(temp$V7,fromLast=T))),"\n") # No:of SNPs with duplicated positions
print('No:of SNPs per Chromosome')
print(table(bimfile$V1)) # No:of SNPs per Chromosome
cat('No:of Samples ',dim(famfile)[1],"\n") # No:of Samples
print('Table of Populations:')
print(table(famfile$V1)) # No:of Populations
print('No:of Cases/Controls : 1 - Control, 2 - Case, -9/0 - Unknown')
print(table(famfile$V6)) # No:of Cases/Controls
print('Table of Sex status: 1 - Male, 2 - Female, -9/0 - Unknown')
print(table(famfile$V5)) # Table of Sex Status
print('Table of Sex status per Population:')
print(table(famfile$V1,famfile$V5)) # Table of Sex Status per Population
print('Table of Case/Control status per Population:')
print(table(famfile$V1,famfile$V6)) # Table of Sex Status per Population
sink()
}
bimcheck <- function(currentdir,bimname){
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
bimfile$V7 <- as.vector(paste(bimfile$V1,bimfile$V4,sep='-'))
temp <- bimfile[which(bimfile$V1 != 0),c(2,7)]
write.table(bimfile[which(bimfile$V1 > 23 | bimfile$V1 == 0),2],sprintf('%s/%s_snpsremove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F) # Remove SNPs with Chromosome 0, Y, XY, MT
write.table(bimfile[which(duplicated(bimfile$V2,fromLast=T)),2],sprintf('%s/%s_snpsremove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F,append=T) # Remove SNPs with duplicated names
write.table(bimfile[which(bimfile$V5 %in% c("I","D") | bimfile$V6 %in% c("I","D")),2],sprintf('%s/%s_snpsremove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F,append=T) # Remove Indels and Deletions
write.table(temp[which(duplicated(temp$V7,fromLast=T)),1],sprintf('%s/%s_snpsremove.txt',currentdir,bimname),quote=F,col.names=F,row.names=F,append=T) # Remove SNPs with duplicated chr-positions
}
checksnps <- function(currentdir1,bimname1,currentdir2,bimname2){
bimfile1 <- read.table(sprintf('%s/%s.bim',currentdir1,bimname1),h=F,stringsAsFactors=F)
bimfile2 <- read.table(sprintf('%s/%s.bim',currentdir2,bimname2),h=F,stringsAsFactors=F)
bimfile1$Alleles <- paste(paste(paste(bimfile1$V5,bimfile1$V6,sep=''),paste(bimfile2$V5,bimfile2$V6,sep=''),sep=''))
bimfile1$Count <- sapply(bimfile1$Alleles,function(x) length(unique(strsplit(x,"")[[1]])))
triallelicsnpstoremove <- bimfile1[which(bimfile1$Count == 3 & !grepl('0',bimfile1$Alleles)),2] ### SNPs that need to be excluded which are tri-allelic
write.table(triallelicsnpstoremove,sprintf("%s/%s_snpsremove.txt",currentdir1,bimname1),quote=F,row.names=F,col.names=F)
snpstoflip <- bimfile1[which(bimfile1$Count == 4 & grepl('0',bimfile1$Alleles)),2] ### SNPs that need to be flipped
snpstoflip <- c(snpstoflip,bimfile1[which(bimfile1$Count == 4 & !grepl('0',bimfile1$Alleles)),2]) ### SNPs that need to be flipped
write.table(snpstoflip,sprintf("%s/%s_flipsnps.txt",currentdir1,bimname1),quote=F,row.names=F,col.names=F)
}
checkmafdiff <- function(currentdir1,bimname1,currentdir2,bimname2,mafdifference,appendoperator){
bimfile1 <- read.table(sprintf('%s/%s.frq',currentdir1,bimname1),h=T,stringsAsFactors=F)
rownames(bimfile1) <- bimfile1[,2]
colnames(bimfile1) <- c('hc_chr','hc_snp','hc_A1','hc_A2','hc_MAF','hc_NCHROBS')
bimfile2 <- read.table(sprintf('%s/%s.frq',currentdir2,bimname2),h=T,stringsAsFactors=F)
rownames(bimfile2) <- bimfile2[,2]
colnames(bimfile2) <- c('onekg_chr','onekg_snp','onekg_A1','onekg_A2','onekg_MAF','onekg_NCHROBS')
bimfile2 <- bimfile2[rownames(bimfile1),]
bimfile <- merge(bimfile1,bimfile2,by="row.names",sort=FALSE)
bimfile$Row.names <- NULL
bimfile$onekg_chr <- NULL
bimfile$onekg_snp <- NULL
bimfile <- bimfile[,c(1:4,7:8,5,9,6,10)]
bimfile$MAFdiff <- abs(bimfile$hc_MAF - bimfile$onekg_MAF)
snpsfrqfail <- bimfile[which(bimfile$MAFdiff > mafdifference),2] # SNPs that need to be excluded whihc failed the MAF difference
if(appendoperator == "Append"){
write.table(snpsfrqfail,sprintf('%s/%s_snpsmaffail.txt',currentdir1,bimname1),quote=F,row.names=F,col.names=F,append=T)
} else {
write.table(snpsfrqfail,sprintf('%s/%s_snpsmaffail.txt',currentdir1,bimname1),quote=F,row.names=F,col.names=F)
}
}
snpstoremove <- function(cohortdir,cohortname,onekgdir,onekg,mafdiffstatus,mafdifference){
hcfrq <- read.table(sprintf("%s/%sfreq.frq",cohortdir,cohortname),h=T,stringsAsFactors=F)
rownames(hcfrq) <- hcfrq[,2]
colnames(hcfrq) <- c('hc_chr','hc_snp','hc_A1','hc_A2','hc_MAF','hc_NCHROBS')
onekfrq <- read.table(sprintf("%s/%sfreq.frq",onekgdir,onekg),h=T,stringsAsFactors=F)
rownames(onekfrq) <- onekfrq[,2]
colnames(onekfrq) <- c('onekg_chr','onekg_snp','onekg_A1','onekg_A2','onekg_MAF','onekg_NCHROBS')
onekfrq <- onekfrq[rownames(hcfrq),]
hconekfrq <- merge(hcfrq,onekfrq,by="row.names",sort=FALSE)
hconekfrq$Row.names <- NULL
hconekfrq$onekg_chr <- NULL
hconekfrq$onekg_snp <- NULL
hconekfrq <- hconekfrq[,c(1:4,7:8,5,9,6,10)]
hconekfrq$MAFdiff <- abs(hconekfrq$hc_MAF - hconekfrq$onekg_MAF)
snpsfrqfail <- hconekfrq[which(hconekfrq$MAFdiff > mafdifference),2] # SNPs that need to be excluded whihc failed the MAF difference
hconekfrq$Alleles <- paste(paste(paste(hconekfrq$hc_A1,hconekfrq$hc_A2,sep=''),paste(hconekfrq$onekg_A1,hconekfrq$onekg_A2,sep=''),sep=''))
hconekfrq$Count <- sapply(hconekfrq$Alleles,function(x) length(unique(strsplit(x,"")[[1]])))
triallelicsnpstoremove <- hconekfrq[which(hconekfrq$Count == 3 & !grepl('0',hconekfrq$Alleles)),2] ### SNPs that need to be excluded which are tri-allelic
snpstoflip <- hconekfrq[which(hconekfrq$Count == 4 & grepl('0',hconekfrq$Alleles)),2] ### SNPs that need to be flipped
snpstoflip <- c(snpstoflip,hconekfrq[which(hconekfrq$Count == 4 & !grepl('0',hconekfrq$Alleles)),2]) ### SNPs that need to be flipped
write.table(snpstoflip,sprintf("%s/%s_flipsnps.txt",cohortdir,cohortname),quote=F,row.names=F,col.names=F)
hcmiss <- read.table(sprintf("%s/%smiss.lmiss",cohortdir,cohortname),h=T,stringsAsFactors=F)
rownames(hcmiss) <- hcmiss[,2]
colnames(hcmiss) <- c('hc_snp','hc_chr','hc_NMISS','hc_NGENO','hc_FMISS')
onekmiss <- read.table(sprintf("%s/%smiss.lmiss",onekgdir,onekg),h=T,stringsAsFactors=F)
rownames(onekmiss) <- onekmiss[,2]
colnames(onekmiss) <- c('onekg_snp','onekg_chr','onekg_NMISS','onekg_NGENO','onekg_FMISS')
onekmiss <- onekmiss[rownames(hcmiss),]
hconekmiss <- merge(hcmiss,onekmiss,by="row.names",sort=FALSE)
hconekmiss$Row.names <- NULL
hconekmiss$onekg_chr <- NULL
hconekmiss$onekg_snp <- NULL
hconekmiss <- hconekmiss[,c(1:4,6,7,5,8)]
hconekmiss$hc_FMISS <- (1 - hconekmiss$hc_FMISS) * 100
hconekmiss$onekg_FMISS <- (1 - hconekmiss$onekg_FMISS) * 100
hconekmiss$FMISSdiff <- abs(hconekmiss$hc_FMISS - hconekmiss$onekg_FMISS)
snpsmissfail <- hconekmiss[which(hconekmiss$FMISSdiff > 2),2]
if(mafdiffstatus == 'domafdiff'){
write.table(unique(c(snpsfrqfail,snpsmissfail,triallelicsnpstoremove)),sprintf("%s/%s_snpsremove.txt",cohortdir,cohortname),quote=F,row.names=F,col.names=F)
} else {
write.table(unique(c(snpsmissfail,triallelicsnpstoremove)),sprintf("%s/%s_snpsremove.txt",cohortdir,cohortname),quote=F,row.names=F,col.names=F)
}
rm(list=ls())
}
pcaeigensoft <- function(cohortdir,filename,softwaredir,popoperator){
if(popoperator == 'bypop'){
system(sprintf('awk \'{print $1,$2,$3,$4,$5,$1}\' %s/%s.fam > %s/%s.eig.fam',cohortdir,filename,cohortdir,filename))
sink(sprintf('%s/%s_pca.parfile',cohortdir,filename))
cat(sprintf('genotypename: %s/%s.bed\n',cohortdir,filename))
cat(sprintf('snpname: %s/%s.bim\n',cohortdir,filename))
cat(sprintf('indivname: %s/%s.eig.fam\n',cohortdir,filename))
} else {
sink(sprintf('%s/%s_pca.parfile',cohortdir,filename))
cat(sprintf('genotypename: %s/%s.bed\n',cohortdir,filename))
cat(sprintf('snpname: %s/%s.bim\n',cohortdir,filename))
cat(sprintf('indivname: %s/%s.fam\n',cohortdir,filename))
}
cat(sprintf('evecoutname: %s/%sMDS.evec\n',cohortdir,filename))
cat(sprintf('evaloutname: %s/%sMDS.eval\n',cohortdir,filename))
cat('altnormstyle: NO\n')
cat('numoutevec: 20\n')
cat('numoutlieriter: 0\n')
cat('familynames: NO\n')
cat('numthreads: 8\n')
sink()
system(sprintf('%s -p %s/%s_pca.parfile',softwaredir,cohortdir,filename))
system(sprintf('sed -i -r \'s/^\\s+//g\' %s/%sMDS.evec',cohortdir,filename))
system(sprintf('sed -i -r \'s/\\s+/ /g\' %s/%sMDS.evec',cohortdir,filename))
}
ggmafplot <- function(cohortdir,freqfilename,plotdir){
mafdata <- read.table(sprintf('%s/%s.frq',cohortdir,freqfilename),h=T,stringsAsFactors=F)
pdf(sprintf("%s/%s_AllSnps.pdf",plotdir,freqfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(mafdata,aes(x = MAF)) + xlab("MinorAlleleFrequency") + geom_histogram(colour="black",bins=50))
dev.off()
pdf(sprintf("%s/%s_RareSnps.pdf",plotdir,freqfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(mafdata[which(mafdata$MAF < 0.01),],aes(x = MAF)) + xlab("MinorAlleleFrequency") + geom_histogram(colour="black",bins=20))
dev.off()
pdf(sprintf("%s/%s_CommonSnps.pdf",plotdir,freqfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(mafdata[which(mafdata$MAF >= 0.01),],aes(x = MAF)) + xlab("MinorAlleleFrequency") + geom_histogram(colour="black",bins=49))
dev.off()
}
ggmissplot <- function(cohortdir,missfilename,plotdir){
imissdata <- read.table(sprintf('%s/%s.imiss',cohortdir,missfilename),h=T,stringsAsFactors=F)
lmissdata <- read.table(sprintf('%s/%s.lmiss',cohortdir,missfilename),h=T,stringsAsFactors=F)
pdf(sprintf("%s/%s_IndGenotypeRate.pdf",plotdir,missfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(imissdata,aes(x = F_MISS)) + xlab("IndividualMissingness") + geom_histogram(colour="black",bins=60))
dev.off()
pdf(sprintf("%s/%s_LocusGenotypeRate.pdf",plotdir,missfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(lmissdata,aes(x = F_MISS)) + xlab("SNPMissingness") + geom_histogram(colour="black",bins=100))
dev.off()
pdf(sprintf("%s/%s_LocusGenotypeRate_Zoomed.pdf",plotdir,missfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(lmissdata,aes(x = F_MISS)) + xlab("SNPMissingness") + geom_histogram(colour="black",bins=200) + coord_cartesian(xlim = c(0,0.05)))
dev.off()
}
ggmafvsmissplot <- function(cohortdir,freqfilename,missfilename,plotdir){
mafdata <- read.table(sprintf('%s/%s.frq',cohortdir,freqfilename),h=T,stringsAsFactors=F)
lmissdata <- read.table(sprintf('%s/%s.lmiss',cohortdir,missfilename),h=T,stringsAsFactors=F)
mafdata$F_MISS <- lmissdata$F_MISS
mafdata$CallRate <- 0
mafdata[which(mafdata$F_MISS > 0.05),'CallRate'] <- " < 95%"
mafdata[which(mafdata$F_MISS <= 0.05 & mafdata$F_MISS > 0.02),'CallRate'] <- "95% - 98%"
mafdata[which(mafdata$F_MISS <= 0.02),'CallRate'] <- " > 98%"
pdf(sprintf("%s/%s_MAFvsLocusMissingness.pdf",plotdir,freqfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
print(ggplot(mafdata, aes(x=MAF)) + xlab("MinorAlleleFrequency") + geom_histogram(aes(fill=CallRate)))
dev.off()
}
# ggplot of PCA for Population status Selected
ggpcapop2d <- function(cohortdir,filename,cohortname,onkgcohorttype,pcasoftware,plotfilename,plotdir){
mycolors <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","maroon","orchid","sienna",
"orchid4","maroon4","magenta4","brown4","cyan4","orange4","purple4","green4","blue4","red4",
"skyblue","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
mymarkers <- c(0:2,5:14,0:2,5:14)
mycolors2 <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","maroon","orchid","sienna",
"red4","blue4","green4","purple4","orange4","cyan4","brown4","magenta4","maroon4","orchid4",
"skyblue","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
switch(onkgcohorttype,
selected = {
if(pcasoftware %in% c('EIGENSOFT','eigensoft','EIGENSTRAT','eigenstrat','smartpca')){
data <- read.table(sprintf("%s/%s1kgSLDMDS.evec",cohortdir,filename),h=F,stringsAsFactors=F)
data <- data[,c(dim(data)[2],1:(dim(data)[2]-1))]
}
if(pcasoftware %in% c('plinkpca')){
data <- read.table(sprintf("%s/%s1kgSLDMDS.eigenvec",cohortdir,filename),h=F,stringsAsFactors=F)
}
colnames(data)[1] <- 'FID'
colnames(data)[2] <- 'IID'
colnames(data)[3:22] <- c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16','C17','C18','C19','C20')
data$SOL <- 0
data <- data[,c(1:2,dim(data)[2],3:22)]
data[!(data$FID %in% c('CEU','CHB','FIN','GBR','GIH','IBS','TSI','YRI')),1] <- paste(cohortname,data[!(data$FID %in% c('CEU','CHB','FIN','GBR','GIH','IBS','TSI','YRI')),1],sep='-')
data[which(data$FID == "CEU"),1] <- "1000g-CentralEurope"
data[which(data$FID == "CHB"),1] <- "1000g-ChinaBeijing"
data[which(data$FID == "FIN"),1] <- "1000g-Finland"
data[which(data$FID == "GBR"),1] <- "1000g-GreatBritan"
data[which(data$FID == "GIH"),1] <- "1000g-IndiaGujarat"
data[which(data$FID == "IBS"),1] <- "1000g-SpainIberia"
data[which(data$FID == "TSI"),1] <- "1000g-ItalyTuscan"
data[which(data$FID == "YRI"),1] <- "1000g-NigeriaYoruba"
mymarkersub <- rep(0,length(levels(factor(data$FID))))
mycoloursub <- rep(0,length(levels(factor(data$FID))))
for(i in 1:length(levels(factor(data$FID)))){
mymarkersub[i] <- mymarkers[i %% (length(levels(factor(data$FID)))+1)]
names(mymarkersub)[i] <- levels(factor(data$FID))[i]
mycoloursub[i] <- mycolors[i %% (length(levels(factor(data$FID)))+1)]
names(mycoloursub)[i] <- levels(factor(data$FID))[i]
}
data$FID <- as.factor(data$FID)
colnames(data)[1] <- 'Population'
require(RColorBrewer)
pdf(sprintf("%s/%s_1kgS_PCAPop2D.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:10){
for(j in i:10){
if(i != j){
print(ggplot(data,aes(data[,(i+3)],data[,(j+3)])) + geom_point(aes(shape = Population, colour=Population)) + scale_shape_manual(name="Cohort-Population",values=mymarkersub ) +
scale_color_manual(name="Cohort-Population",values=mycoloursub)+ theme(legend.text=element_text(size=5)) + xlab(sprintf("PC%s",i)) + ylab(sprintf("PC%s",j)) )
}
}
}
dev.off()
pdf(sprintf("%s/%s_1kgS_PCAPopIndPcs.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:20){
print(ggplot(data,aes(c(1:nrow(data)),data[,i+3])) + geom_point(aes(shape = Population, colour=Population)) + scale_shape_manual(name="Cohort-Population",values=mymarkersub ) +
scale_color_manual(name="Cohort-Population",values=mycoloursub) + theme(legend.text=element_text(size=5)) + xlab("Samples") + ylab(sprintf("PC%s",i)))
}
dev.off()
}
)
}
# ggplot of PCA for Case-Control status
ggpcacc2d <- function(cohortdir,filename,pcasoftware,plotfilename,plotdir){
mycolors <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","gray70",
"red4","green4","blue4","purple4","skyblue","orange4",
"maroon","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
mymarkers <- c(0,1)
mycolors2 <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","skyblue","orchid","sienna",
"red4","blue4","green4","purple4","orange4","cyan4","brown4","magenta4","maroon4","orchid4",
"maroon","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
if(pcasoftware %in% c('EIGENSOFT','eigensoft','EIGENSTRAT','eigenstrat','smartpca')){
data <- read.table(sprintf("%s/%sMDS.evec",cohortdir,filename),h=F,stringsAsFactors=F)
data <- data[,c(dim(data)[2],1:(dim(data)[2]-1))]
}
if(pcasoftware %in% c('plinkpca')){
data <- read.table(sprintf("%s/%sMDS.eigenvec",cohortdir,filename),h=F,stringsAsFactors=F)
}
#data <- read.table(sprintf('%s/%sLDMDS.eigenvec',cohortdir,filename),h=F,stringsAsFactors=F)
colnames(data)[1] <- 'FID'
colnames(data)[2] <- 'IID'
colnames(data)[3:22] <- c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16','C17','C18','C19','C20')
data$SOL <- 0
data <- data[,c(1:2,13,3:22)]
famdat <- read.table(sprintf('%s/%s.fam',cohortdir,filename),h=F,stringsAsFactors=F)
rownames(famdat) <- as.vector(famdat[,2])
data$CCStaus <- famdat[data[,2],6]
data[which(data$CCStaus == 1),(dim(data)[2])] <- 'Control'
data[which(data$CCStaus == 2),(dim(data)[2])] <- 'Case'
mymarkersub <- rep(0,length(levels(factor(data$CCStaus))))
mycoloursub <- rep(0,length(levels(factor(data$FID))))
for(i in 1:length(levels(factor(data$CCStaus)))){
if(levels(factor(data$CCStaus))[i] == 'Case'){
mymarkersub[i] <- mymarkers[2]
names(mymarkersub)[i] <- levels(factor(data$CCStaus))[i]
}
else{
mymarkersub[i] <- mymarkers[1]
names(mymarkersub)[i] <- levels(factor(data$CCStaus))[i]
}
}
for(i in 1:length(levels(factor(data$FID)))){
mycoloursub[i] <- mycolors[i %% (length(levels(factor(data$FID)))+1)]
names(mycoloursub)[i] <- levels(factor(data$FID))[i]
}
data$FID <- as.factor(data$FID)
colnames(data)[1] <- "Population"
data$CCStaus <- as.factor(data$CCStaus)
require(RColorBrewer)
pdf(sprintf("%s/%s_PCACaseControl2D.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:10){
for(j in i:10){
if(i != j){
print(ggplot(data,aes(data[,(i+3)],data[,(j+3)])) + geom_point(aes(shape = CCStaus, colour=Population)) + scale_shape_manual(name="AffectionStatus",values=mymarkersub ) +
scale_color_manual(name="Population",values=mycoloursub)+ xlab(sprintf("PC%s",i)) + ylab(sprintf("PC%s",j)) )
}
}
}
dev.off()
pdf(sprintf("%s/%s_PCACaseControlIndPcs.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:20){
print(ggplot(data,aes(c(1:nrow(data)),data[,i+3])) + geom_point(aes(shape = CCStaus, colour=Population)) + scale_shape_manual(name="AffectionStatus",values=mymarkersub ) +
scale_color_manual(name="Population",values=mycoloursub) + xlab("Samples") + ylab(sprintf("PC%s",i)))
}
dev.off()
}
ggpcaccpop2d <- function(cohortdir,filename,pcasoftware,plotfilename,plotdir){
mycolors <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","gray70",
"red3","green4","blue4","purple4","skyblue","orange4",
"maroon","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
mymarkers <- c(0,1)
mycolors2 <- c("red","blue","green","purple","orange","black","cyan","brown","magenta","skyblue","orchid","sienna",
"red4","blue4","green4","purple4","orange4","cyan4","brown4","magenta4","maroon4","orchid4",
"maroon","darkturquoise", "dodgerblue2", "#FB9A99", # lt pink
"palegreen2", "deeppink1","blue1","steelblue4","green4","green1",
"darkorange4","skyblue2","gray70","gold1","khaki","pink","azure") # More colours "yellow4","yellow3","#CAB2D6" --# lt purple ,"#FDBF6F" --# lt orange
if(pcasoftware %in% c('EIGENSOFT','eigensoft','EIGENSTRAT','eigenstrat','smartpca')){
data <- read.table(sprintf("%s/%sMDS.evec",cohortdir,filename),h=F,stringsAsFactors=F)
data <- data[,c(dim(data)[2],1:(dim(data)[2]-1))]
}
if(pcasoftware %in% c('plinkpca')){
data <- read.table(sprintf("%s/%sMDS.eigenvec",cohortdir,filename),h=F,stringsAsFactors=F)
}
#data <- read.table(sprintf('%s/%sLDMDS.eigenvec',cohortdir,filename),h=F,stringsAsFactors=F)
colnames(data)[1] <- 'FID'
colnames(data)[2] <- 'IID'
colnames(data)[3:22] <- c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16','C17','C18','C19','C20')
data$SOL <- 0
data <- data[,c(1:2,13,3:22)]
famdat <- read.table(sprintf('%s/%s.fam',cohortdir,filename),h=F,stringsAsFactors=F)
rownames(famdat) <- as.vector(famdat[,2])
data$AffectionStatus <- famdat[data[,2],6]
data[which(data$AffectionStatus == 1),(dim(data)[2])] <- 'Control'
data[which(data$AffectionStatus == 2),(dim(data)[2])] <- 'Case'
colnames(data)[1] <- "Population"
data$AffectionStatus <- as.factor(data$AffectionStatus)
require(RColorBrewer)
pdf(sprintf("%s/%s_PCACCPop2D.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:10){
for(j in i:10){
if(i != j){
print(ggplot(data,aes(data[,(i+3)],data[,(j+3)])) + geom_point(aes(shape = AffectionStatus, colour=AffectionStatus)) + xlab(sprintf("PC%s",i)) + ylab(sprintf("PC%s",j)) )
}
}
}
dev.off()
pdf(sprintf("%s/%s_PCACCPopIndPcs.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:20){
print(ggplot(data,aes(c(1:nrow(data)),data[,i+3])) + geom_point(aes(shape = AffectionStatus, colour=AffectionStatus)) + xlab("Samples") + ylab(sprintf("PC%s",i)))
}
dev.off()
pdf(sprintf("%s/%s_PCACCIndPcsPerPop.pdf",plotdir,plotfilename), onefile = TRUE, bg = "white",family = "Times", width = 6, height = 6)
for(i in 1:2){
for( j in 1:length(levels(as.factor(data$Population))) ){
temp <- data[which(data$Population == levels(as.factor(data$Population))[j] | data$AffectionStatus == 'Control'),]
temp[!(temp$Population == levels(as.factor(data$Population))[j]),'Population'] <- levels(as.factor(data$Population))[j]
print(ggplot(temp,aes(c(1:nrow(temp)),temp[,i+3])) + geom_point(aes(shape = AffectionStatus, colour=AffectionStatus)) + xlab("Samples") +
ylab(sprintf("PC%s",i)) + labs( title = sprintf("%s",levels(as.factor(data$Population))[j])) )
}
}
dev.off()
}
gg_qqplot <- function(xs, ci=0.95, cohortname) {
N = length(xs)
df = data.frame(observed=-log10(sort(xs)),
expected=-log10(1:N / (N+1)),
cupper=-log10(qbeta(ci, 1:N, N - 1:N + 1)),
clower=-log10(qbeta(1 - ci, 1:N, N - 1:N + 1)))
log10Pe = expression(paste("Expected -log"[10], plain(P)))
log10Po = expression(paste("Observed -log"[10], plain(P)))
png(sprintf("~/Plots/pcaplots/%sqqplot.png",cohortname), width = 6, height = 6, units = "in", res = 300, colortype = "true", family = "Times")
print( ggplot(df) + geom_point(aes(expected, observed), shape=1, size=1) + geom_abline(intercept=0, slope=1, alpha=0.5) + geom_line(aes(expected, cupper), linetype=2) + geom_line(aes(expected, clower), linetype=2) +
xlab(log10Pe) + ylab(log10Po) )
dev.off()
}
qqmanhatonplot <- function(currentdir,filename,plotdir,plotfilename){
library("qqman")
library("GWASTools")
logisticresult <- read.table(sprintf('%s/%s.assoc.logistic',currentdir,filename),h=T,stringsAsFactors=F)
logisticresult <- logisticresult[which(logisticresult$TEST == 'ADD'),]
logisticresult <- logisticresult[which(!is.na(logisticresult$P)), ]
logisticresult <- logisticresult[order(logisticresult$P), ]
z <- qnorm(logisticresult[,"P"]/2)
png(sprintf('%s/%s_QQ.png',plotdir,plotfilename),units="in", width=8, height=8, res=150)
qqPlot(logisticresult$P,main=sprintf("QQ plot with %s = %s",expression(lambda),round(median(z^2)/0.456,3)),cex.axis =1.2,cex.main=1.5,cex.lab=1)
dev.off()
res <- logisticresult[,c(2,1,3,9)]
snpsOfInterest <- res[which(res$P < 0.00001),1]
png(sprintf('%s/%s_Manhattan.png',plotdir,plotfilename), units="in", width=20, height=10, res=150)
manhattan(res,suggestiveline = -log10(0.00001),main="TS and Controls",genomewideline = -log10(0.00000001),col = c("black","#666666","#CC6600"),cex=1,cex.axis=1.5,cex.lab=1,cex.main=2,highlight = snpsOfInterest,ylim=c(0,ceiling(max(-log10(logisticresult$P))))) #
dev.off()
}
updatesnps <- function(currentdir,bimname,beadarray){
switch(beadarray,
oe12v1 = {
data <- read.table('~/Illumina_Files/humanomniexpress-12v1/HumanOmniExpress-12-v1-0-K_Updated.csv',h=T,sep=',',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
vgprobe <- read.table('~/Illumina_Files/humanomniexpress-12v1/HumanOmniExpress-12-v1-0-K-auxilliary-file.txt',h=T,stringsAsFactors=F)
vgprobe <- vgprobe[grep('^VG',vgprobe$Name,perl=T),]
vgprobe[which(vgprobe$RsID == '.'),2] <- vgprobe[which(vgprobe$RsID == '.'),1]
data[vgprobe$Name,2] <- vgprobe$RsID
write.table(vgprobe[,c(1,2)],sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
oe12v1_1 = {
data <- read.table('~/Illumina_Files/humanomniexpress-12v1-1/HumanOmniExpress-12-v1-1-C_Updated.csv',h=T,sep=',',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
vgprobe <- read.table('~/Illumina_Files/humanomniexpress-12v1-1/HumanOmniExpress-12-v1-1-C-auxilliary-file.txt',h=T,stringsAsFactors=F)
vgprobe <- vgprobe[grep('^VG',vgprobe$Name,perl=T),]
vgprobe[which(vgprobe$RsID == '.'),2] <- vgprobe[which(vgprobe$RsID == '.'),1]
data[vgprobe$Name,2] <- vgprobe$RsID
write.table(vgprobe[,c(1,2)],sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
oe24v1_1 = {
data <- read.table('~/Illumina_Files/humanomniexpress-24v1-1/HumanOmniExpress-24v1-1_A_annotated_Updated.txt',h=T,sep='\t',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
oe24v1_2 = {
data <- read.table('~/Illumina_Files/infiniumomniexpress-24v1-2/InfiniumOmniExpress-24v1-2_A1_Updated.csv',h=T,sep=',',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
test <- read.table('~/Illumina_Files/infiniumomniexpress-24v1-2/InfiniumOmniExpress-24v1-2_A1_b144_rsids.txt',h=T,stringsAsFactors=F)
test$RsID <- gsub(",(.*?)$","",test$RsID,perl=T)
write.table(test,sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
o25m_8v1_2 = {
data <- read.table('~/Illumina_Files/omniexpressexome-25m-8v1-2/HumanOmni2-5-8-v1-2-A-Gene-Annotation-File_Updated.txt',h=T,sep='\t',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
o8 = {
data <- read.table('~/Illumina_Files/omniexpressexome-8/InfiniumOmniExpressExome-8v1-4_A1_annotated_Updated.txt',h=T,sep='\t',stringsAsFactors=F)
rownames(data) <- as.vector(data[,'Name'])
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
temp <- data[bimfile[which(bimfile$V1 == 0),2],c('Name','Chr','MapInfo')]
temp <- na.omit(temp)
temp <- temp[-which(temp$Chr == 0),]
write.table(temp[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
test <- read.table('~/Illumina_Files/omniexpressexome-8/InfiniumOmniExpressExome-8v1-4_A1_b144_rsids.txt',h=T,stringsAsFactors=F)
test$RsID <- gsub(",(.*?)$","",test$RsID,perl=T)
test[which(test$RsID == '.'),2] <- test[which(test$RsID == '.'),1]
write.table(test,sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
hhap550 = {
data <- read.table('~/Illumina_Files/humanhap550/HumanHap550v3_A-b37_annotation_updated.txt',h=T,sep='\t',stringsAsFactors=F)
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
write.table(data[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(bimfile[!(bimfile$V2 %in% data$Name),2],sprintf('%s/%s_update_remove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
h1mduo = {
data <- read.table('~/Illumina_Files/human1mduo/Human1M-Duov3_annotated_Updated.txt',h=T,sep='\t',stringsAsFactors=F)
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
write.table(data[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(bimfile[!(bimfile$V2 %in% data$Name),2],sprintf('%s/%s_update_remove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
h670q = {
data <- read.table('~/Illumina_Files/human670quad/Human670-QuadCustom_v1_A-b37_Updated.txt',h=T,sep='\t',stringsAsFactors=F)
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
write.table(data[,c(1,3)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,2)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(bimfile[!(bimfile$V2 %in% data$Name),2],sprintf('%s/%s_update_remove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
affysnp6 = {
data <- read.table('~/Affy_Files/humansnp6/AffySNP6_annotation.txt',h=T,stringsAsFactors=F)
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
write.table(data[,c(1,4)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,3)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,2)],sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(bimfile[!(bimfile$V2 %in% data$ProbeSetID),2],sprintf('%s/%s_update_remove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
affy500 = {
data <- read.table('~/Affy_Files/human500k/Affy500K_annotation_updated.txt',h=T,sep='\t',stringsAsFactors=F)
bimfile <- read.table(sprintf('%s/%s.bim',currentdir,bimname),h=F,stringsAsFactors=F)
write.table(data[,c(1,4)],sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,3)],sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(data[,c(1,2)],sprintf('%s/%s_update_name.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(bimfile[!(bimfile$V2 %in% data$ProbeSetID),2],sprintf('%s/%s_update_remove.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
},
nochip = {
temp <- c('','')
write.table(temp,sprintf('%s/%s_update_map.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
write.table(temp,sprintf('%s/%s_update_chr.txt',currentdir,bimname),quote=F,row.names=F,col.names=F)
}
)
}