-
Notifications
You must be signed in to change notification settings - Fork 5
/
1.read_data.R
32 lines (32 loc) · 1009 Bytes
/
1.read_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
rm(list = ls())
###1.读取全部数据----
if(F){
counts = data.table::fread("TcgaTargetGtex_gene_expected_count.gz",data.table = F)
ph = data.table::fread("TcgaTargetGTEX_phenotype.txt.gz",data.table = F)
counts[1:4,1:4]
rownames(counts) <- counts$sample
counts = counts[,-1]
rownames(ph) <- ph$sample
ph = ph[,-1]
expr = as.matrix(counts)
dim(expr)
dim(ph)
#匹配样本名
table(rownames(ph)%in%colnames(expr))
table(colnames(expr)%in%rownames(ph))
ph = ph[match(colnames(expr),rownames(ph)),]
identical(colnames(expr),rownames(ph))
#挑选出TCGA和gtex样本
table(ph$`_study`)
keep = ph$`_study`!="TARGET"
ph=ph[keep,]
expr = expr[,keep]
save(expr,ph,file = "gtex_tcga_expr_ph.Rdata")
}
load("gtex_tcga_expr_ph.Rdata")
### 挑出胰腺癌数据
table(ph$`_primary_site`)
keep2 = ph$`_primary_site`=="Pancreas"
Pancreas_expr = expr[,keep2]
Pancreas_ph = ph[keep2,]
save(Pancreas_ph,Pancreas_expr,file = "Pancreas_expr_ph.Rdata")