-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.R
89 lines (67 loc) · 2.61 KB
/
predict.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
library(affy)
library(BiocGenerics)
library(ArrayTools)
library(xbioc)
library(tidyr)
library(MuSiC)
#scRNA
pDataPN = read.csv('sampledPNcellType.csv',header = T, row.names = 1)
pDataPN$cellTypeID = as.numeric(pDataPN$cellType)
matrixPN = read.csv('sampledrawMatrix.csv',header = T, row.names = 1)
col = colnames(matrixPN)
row = row.names(pDataPN)
sum(col == row)
set = ExpressionSet(as.matrix(matrixPN))
set = ExpressionSet(as.matrix(PNmarkers))
phenoData <- new("AnnotatedDataFrame",data=pDataPN)
scSet <- ExpressionSet(assayData=as.matrix(matrixPN),
phenoData=phenoData,
annotation="hgu95av2")
#Bulk RNA data PN
matrixNN = read.table('counts_ddsHTSeq_collapsed_NN', header = T)
bulksetNN <- ExpressionSet(assayData=as.matrix(matrixNN))
bulksetNN <- ExpressionSet(assayData=as.matrix(normalized_counts))
bulksetNN <- ExpressionSet(assayData=as.matrix(normalized_countsHi))
bulksetNN <- ExpressionSet(assayData=as.matrix(logCPM2))
bulksetNN <- ExpressionSet(assayData=as.matrix(cpm))
bulksetNN <- ExpressionSet(assayData=as.matrix(n3))
bulksetNN = ExpressionSet(assayData=as.matrix(NNmarkers))
#normalize bulkrna data
library(edgeR)
library(EnrichmentBrowser)
norm.eset = normalize(bulksetNN, data.type = 'rseq')
bulksetNN <- ExpressionSet(assay(norm.eset))
#run music
cell = unique(pDataPN$cellType)
sample_num = unique(pDataPN$sampleID)
Est.prop.GSE50244 = music_prop(bulk.eset = bulksetNN,
sc.eset = scSet, clusters = 'cellType',
samples = 'sampleID',select.ct = cell,
verbose = F)
write.csv(Est.prop.GSE50244$Est.prop.weighted,'onlymarkersNormboth.csv')
write.csv(Est.prop.GSE50244$Weight.gene,'weight.csv')
View(Est.prop.GSE50244$Est.prop.weighted)
data = Est.prop.GSE50244$Est.prop.weighted
write.csv(Est.prop.GSE50244$Est.prop.weighted, 'resup.csv')
rownames = row.names(data)
##read unsampled annotation data to see the propotion
data = as.data.frame(data)
pDataPN_raw = read.csv('PN.celltype.csv',header = T)
colnames = colnames(Est.prop.GSE50244$Est.prop.weighted)
prop = rep(0, length(colnames))
for (i in 1:length(colnames)) {
prop[i] = sum(pDataPN_raw$cellType== colnames[i])
}
prop = prop/length(pDataPN_raw$cellType)
data1 = rbind(data, prop)
write.csv(data1, 'new.csv')
#proportion plot
res = read_csv('res_withCompare.csv')
res = res %>% remove_rownames %>% column_to_rownames(var=colnames(res)[1])
res
res$category <- row.names(res)
res_2 <- melt(res, id.vars = "category")
res_2
(p <- ggplot(res_2, aes(category, value, fill = variable)) +
geom_bar(position = "fill", stat = "identity")
)