-
Notifications
You must be signed in to change notification settings - Fork 0
/
v7_micro_sub_monocle.R
189 lines (154 loc) · 7.05 KB
/
v7_micro_sub_monocle.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# 241012 before pre micro subset monocle
library(Seurat)
library(ggplot2)
library(monocle)
# 加载必要的库
library(devtools)
library(monocle)
library(tidyverse)
library(patchwork)
library(stringr)
library(ggplot2)
library(Seurat)
library(future)
library(rhdf5)
library(dplyr)
library(data.table)
library(Matrix)
library(rjson)
library(monocle3)
library(patchwork)
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)
library(monocle, lib.loc = "/refdir/Rlib")
# 加载Seurat对象,提取小胶质细胞亚群
# 假设scRNAsub是你已经处理过的Seurat对象
microglia_sub <- readRDS("~/r/danny-scad/rds/micro_sub.rds")
microglia_sub <- microglia_cells
# 将Seurat对象转为CellDataSet对象
data <- as(as.matrix(microglia_sub@assays$RNA$counts), 'sparseMatrix')
pd <- new('AnnotatedDataFrame', data = microglia_sub@meta.data)
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)
mycds <- newCellDataSet(data,
phenoData = pd,
featureData = fd,
expressionFamily = negbinomial.size())
# 估算大小因子和离散度
mycds <- estimateSizeFactors(mycds)
mycds <- estimateDispersions(mycds, cores=40, relative_expr = TRUE)
# 保存和加载CellDataSet对象
saveRDS(mycds, "mycds_microglia.rds")
mycds <- readRDS("mycds_microglia.rds")
# 选择发育差异表达基因(如果有特定的marker基因)
# 也可以选择clusters差异表达基因,或者Seurat选择的高变基因
diff.genes <- read.csv('cell_identify/microglia_diff_genes_wilcox.csv')
diff.genes <- subset(diff.genes, p_val_adj < 0.01)$gene
mycds <- setOrderingFilter(mycds, diff.genes)
# 使用Seurat选择的高变基因
var.genes <- VariableFeatures(microglia_sub)
mycds <- setOrderingFilter(mycds, var.genes)
# 使用Monocle选择的高变基因
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id
mycds <- setOrderingFilter(mycds, disp.genes)
# 比较不同基因选择方法的结果
p1 <- plot_ordering_genes(mycds)
p2 <- plot_ordering_genes(mycds)
p3 <- plot_ordering_genes(mycds, genes = disp.genes)
p1 | p2 | p3
# 创建存储拟时序结果的目录
dir.create("./pseudotime/Microglia")
# 降维并进行细胞排序
mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
mycds <- orderCells(mycds)
# 保存降维和排序结果
saveRDS(mycds, "ordered_reduced_microglia.rds")
# 绘制State轨迹分布图
plot1 <- plot_cell_trajectory(mycds, color_by = "State")
ggsave("pseudotime/Microglia/State.pdf", plot = plot1, width = 6, height = 5)
ggsave("pseudotime/Microglia/State.png", plot = plot1, width = 6, height = 5)
# 绘制Cluster轨迹分布图
plot2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
ggsave("pseudotime/Microglia/Cluster.pdf", plot = plot2, width = 6, height = 5)
ggsave("pseudotime/Microglia/Cluster.png", plot = plot2, width = 6, height = 5)
# 绘制Celltype轨迹分布图(如果有celltype注释)
plot4 <- plot_cell_trajectory(mycds, color_by = "celltype")
ggsave("pseudotime/Microglia/Celltype.pdf", plot = plot4, width = 6, height = 5)
ggsave("pseudotime/Microglia/Celltype.png", plot = plot4, width = 6, height = 5)
# 绘制Pseudotime轨迹图
plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
ggsave("pseudotime/Microglia/Pseudotime.pdf", plot = plot3, width = 6, height = 5)
ggsave("pseudotime/Microglia/Pseudotime.png", plot = plot3, width = 6, height = 5)
# 合并多个轨迹图
plotc <- plot1 | plot2 | plot3
ggsave("pseudotime/Microglia/Combination.pdf", plot = plotc, width = 10, height = 3.5)
ggsave("pseudotime/Microglia/Combination.png", plot = plotc, width = 10, height = 3.5)
# 保存细胞的拟时序信息
write.csv(pData(mycds), "pseudotime/Microglia/pseudotime.csv")
# 选择感兴趣的基因(可以是小胶质细胞相关基因)
s.genes <- c("Tmem119", "Cx3cr1", "P2ry12", "Itgax", "Apoe")
# 绘制感兴趣基因的图
p1 <- plot_genes_jitter(mycds[s.genes,], grouping = "State", color_by = "State")
p2 <- plot_genes_violin(mycds[s.genes,], grouping = "State", color_by = "State")
p3 <- plot_genes_in_pseudotime(mycds[s.genes,], color_by = "State")
plotc <- p1 | p2 | p3
# 保存基因可视化结果
ggsave("pseudotime/Microglia/plot_genes_jitter.pdf", plot = p1, width = 10, height = 3.5)
ggsave("pseudotime/Microglia/plot_genes_violin.pdf", plot = p2, width = 10, height = 3.5)
ggsave("pseudotime/Microglia/plot_genes_in_pseudotime.pdf", plot = p3, width = 10, height = 3.5)
ggsave("pseudotime/Microglia/genes_visual.png", plot = plotc, width = 8, height = 4.5)
assay <- "RNA"
group.by <- "seurat_clusters"
run_monocle3 <- function(obj,assay='RNA',ex.umap=F,group.by='seurat_clusters',label='out') {
all <- microglia_sub
all$ingroup <- all@meta.data[,group.by]
expression_matrix2 <- all@assays$RNA$counts
# 提取表达矩阵(对于Seurat v5的对象)
expression_matrix <- all@assays[[assay]]$data
identical(expression_matrix,expression_matrix2)
setdiff(expression_matrix,expression_matrix2)
cell_metadata <- all@meta.data
gene_annotation <- data.frame(rownames(all), rownames(all))
rownames(gene_annotation) <- rownames(all)
colnames(gene_annotation) <- c("GeneSymbol", "gene_short_name")
NucSeq_cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
NucSeq_cds@int_colData$reducedDims$PCA<- all@reductions$pca@cell.embeddings
NucSeq_cds <- reduce_dimension(NucSeq_cds, reduction_method = 'UMAP', preprocess_method = "PCA")
NucSeq_cds$celltype <- NucSeq_cds$ingroup
jpeg('cds.umap.jpg')
p1 <- plot_cells(NucSeq_cds, reduction_method="UMAP", color_cells_by="celltype",show_trajectory_graph=F) + ggtitle('cds.umap')
print(p1)
dev.off()
NucSeq.coembed <- all
##import seurat umap
if(ex.umap){
cds.embed <- NucSeq_cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(NucSeq.coembed, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
NucSeq_cds@int_colData$reducedDims$UMAP <- int.embed
jpeg('int.umap.jpg')
p1 <- plot_cells(NucSeq_cds, reduction_method="UMAP", color_cells_by="partition",show_trajectory_graph=T) + ggtitle('int.umap')
print(p1)
dev.off()
}
NucSeq_cds <- cluster_cells(NucSeq_cds, reduction_method='UMAP')
print(length(unique(partitions(NucSeq_cds))))
jpeg('partition.umap.jpg')
p1 <- plot_cells(NucSeq_cds, show_trajectory_graph = FALSE,group_label_size = 5,color_cells_by = "partition")
print(p1)
dev.off()
NucSeq_cds <- learn_graph(NucSeq_cds)
jpeg('celltype.umap.jpg')
p1 <- plot_cells(NucSeq_cds, color_cells_by = "cluster",
label_groups_by_cluster = FALSE, label_leaves = TRUE,
label_branch_points = TRUE,group_label_size = 5)
print(p1)
dev.off()
saveRDS(NucSeq_cds,paste0(label,".rds"))
meta <- data.frame(NucSeq_cds@colData@listData)
write.table(meta,paste0(label,"_metadata.xls"),sep='\t',quote=F)
}