-
Notifications
You must be signed in to change notification settings - Fork 0
/
Regular100.r
133 lines (106 loc) · 5.1 KB
/
Regular100.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
###non-linear splicing###
###Nov 08 2018###
###for len100###
###new broken sample###
##load packages##
library(dplyr)
library(ggplot2)
library(ggrepel)
library(tidyr)
##read data in##
##len1000:gene_list_nl.csv##
##len100:sort_by_count_100.txt##
##len500:sort_by_count_500.txt##
#broken<-read.delim2('gene_list_broken.csv',sep = ',', skip = 1) #old broken sample#
broken<-read.delim2('broken_new_list.txt',sep = '\t', header = FALSE,stringsAsFactors = FALSE) #new broken sample#
broken <-separate(broken,V2,c("Broken.samples","de"),sep = " ") ##split V2 into 2columns
broken<-broken[,c(1,2)]
broken$Broken.samples<-as.numeric(broken$Broken.samples) ##VERY IMPORTANT TO PAY ATTENTION TO DATA TYPE "CHARACTER TO NUMERIC"#
nonlinear<-read.delim2('sort_by_count_100.txt',sep = '\t', header = FALSE)
prostate<-read.delim2('prostate.txt',sep = '\t',header = FALSE)
all<-read.delim2('combined_gene_list.txt', sep = '\t')
##rename##
broken<-rename(broken,gene=V1) #newname=oldname
nonlinear<-rename(nonlinear,Nonlinear.samples=V2)
nonlinear<-rename(nonlinear,gene=V1)
prostate$prostate<-prostate$V1
all$all<-all$GENE
prostate<-rename(prostate,gene=V1)
all<-rename(all,gene=GENE)
##join two df into one##
dat<-full_join(broken,nonlinear,by="gene")
dat[is.na(dat)] <- 0
dat<-left_join(dat,all,by="gene")
dat<-left_join(dat,prostate,by="gene")
dat$all<-as.character(dat$all)
dat$prostate<-as.character(dat$prostate)
##define vertical and horizontal line intercept##
plot(dat$Broken.samples,dat$Nonlinear.samples) #x=20,y=40 (old,new)
for (i in 1:nrow(dat)){
#ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>25,dat$labelall[i]<-as.character(dat$all[i]),' ')
#ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>25,dat$labelpro[i]<-as.character(dat$prostate[i]),' ')
ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>40,dat$plot[i]<-1,dat$plot[i]<-0)
}
for (i in 1:nrow(dat)){
ifelse(dat$plot[i]==1&!is.na(dat$all[i]), dat$labelall[i]<-dat$all[i],dat$labelall[i]<-"")
ifelse(dat$plot[i]==1&!is.na(dat$prostate[i]), dat$labelpro[i]<-dat$prostate[i],dat$labelpro[i]<-"")
}
for (i in 1:nrow(dat)){
ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>40,dat$labelcolor[i]<-1,dat$labelcolor[i]<-0)
#ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>25,dat$labelpro[i]<-as.character(dat$prostate[i]),' ')
}
######use this for label####
######all using repel#######
pt22<-ggplot(dat,aes(x=Broken.samples,y=Nonlinear.samples,label=dat$labelall))+
geom_point(size=ifelse(dat$labelcolor==0,2,3),color=ifelse(dat$labelcolor==0,"grey","red"),shape=ifelse(dat$labelall=="",1,19))+
geom_vline(xintercept=20,linetype="dashed",size=0.1)+
geom_hline(yintercept=40,linetype="dashed",size=0.1)+
geom_text_repel()+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank())+
theme(legend.position = "none")
ggsave("new_repall_100.pdf",width=8,height=8)
####prostate###
########use ggrepel for prostate##
pt33<-ggplot(dat,aes(x=Broken.samples,y=Nonlinear.samples,label=dat$labelpro))+
geom_point(size=ifelse(dat$labelcolor==0,2,3),color=ifelse(dat$labelcolor==0,"grey","red"),shape=ifelse(dat$labelpro=="",1,19))+
geom_vline(xintercept=20,linetype="dashed",size=0.1)+
geom_hline(yintercept=40,linetype="dashed",size=0.1)+
geom_text_repel()+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank())+
theme(legend.position = "none")
ggsave("new_reppro_100.pdf",width=8,height=8)
#################selected genes#########################
slgene<-c("ACPP","MYC","PTEN","CDKN1B","RB1","FOXA1","TP53","AR","TMPRSS2","ERG","ELK4","ETV1","FAT1","FOXP1")
for (i in 1:nrow(dat)){
#ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>25,dat$labelall[i]<-as.character(dat$all[i]),' ')
#ifelse(dat$Broken.samples[i]>20 | dat$Nonlinear.samples[i]>25,dat$labelpro[i]<-as.character(dat$prostate[i]),' ')
ifelse(dat$all[i]%in%slgene | dat$prostate[i]%in%slgene,dat$colorg[i]<-1,dat$colorg[i]<-0)
ifelse(dat$colorg[i]==1,dat$labelg[i]<-dat$gene[i],dat$labelg[i]<-'')
}
dat$colorg<-as.factor(dat$colorg)
#######selected genes using ggrepel########
pt44<-ggplot(dat,aes(x=Broken.samples,y=Nonlinear.samples,label=dat$labelg))+
geom_point(size=ifelse(dat$colorg==0,2,5),color=ifelse(dat$colorg==0,"grey","red"))+
geom_vline(xintercept=20,linetype="dashed",size=0.1)+
geom_hline(yintercept=40,linetype="dashed",size=0.1)+
geom_text_repel()+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank())+
theme(legend.position = "none")
ggsave("new_relselected_100.pdf",width=8,height=8)
##overlap:color size shape#
pt46<-ggplot(dat,aes(x=Broken.samples,y=Nonlinear.samples,label=dat$labelg))+
geom_point(size=ifelse(dat$colorg==0,2,4),color=ifelse(dat$colorg==0,"grey","red"),shape=dat$colorg)+
geom_vline(xintercept=20,linetype="dashed",size=0.1)+
geom_hline(yintercept=40,linetype="dashed",size=0.1)+
geom_text_repel()+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank())+
theme(legend.position = "none")
ggsave("new_relselected_shape_100.pdf",width=8,height=8)