SCP_project_code.Rmd

---
title: "MSc Thesis Project"
auther: "Ibrahim ElZahaby"
affiliation: "Wageningen University and Research"
chair group: "Systems and Synthetic Biology"
Division: "Systems and Personalized Medicine"
output: html_notebook
---

### ---------- Load required libraries ----------

```{r}
library(readxl)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(VennDiagram)
library(clusterProfiler)
library(enrichplot)
library(DOSE)
library(org.Hs.eg.db)
library(pathfindR)
library(corrplot)
library(visNetwork)
library(igraph)
library(anvis)
library(pals)
library(FactoMineR)
library(factoextra)
library(ggpubr)
library(RColorBrewer)
library(cluster)
library(palmerpenguins)
library(umap)
library(lattice)
theme_set(theme_bw(18))

# Check if connected to Cytoscape
cyto_message <- testthat::capture_condition(RCy3::cytoscapePing())$message
cyto_active <- cyto_message == "You are connected to Cytoscape!\n"

# Define layout function for the graph
layout_func <- function(graph) igraph::layout_on_grid(graph, width = 4, height = 9)
```

### ---------- Venn Diagrams ----------

```{r}
# Read Hela proteins from Excel files
Hela_Set1 <-read_excel("hc4.xlsx")
helalist1 <- Hela_Set1$protein

Hela_Set2 <-read_excel("hc5.xlsx")
helalist2 <- Hela_Set2$Protein

# Create Venn diagram for Hela proteins
hela <- venn.diagram(list("Hela1"=helalist1, "Hela2"=helalist2),
             fill = c("cyan","red"), height = 2500, width = 2500, imagetype = "png",
             cex= 1.2, col="Red", lty="blank", lwd=1, fontface="bold",
             fontfamily="sans", alpha=0.5, filename = NULL,
             print.mode =c("raw"))

# Obtain Hela overlapped protein names
helaoverlap <- intersect(helalist4,helalist5)

# Write Hela overlapped protein names to a text file
write.table(helaoverlap, file = "Hela_intersected.txt", sep = "",
            quote = F, col.names = F, row.names = F)

# Read PDAC proteins from Excel files
PDAC_Set1 <-read_excel("PDAC2.xlsx")
PDAC_list1 <- PDAC_Set1$proteins

PDAC_Set2 <-read_excel("PDAC3.xlsx")              
PDAC_list2 <- PDAC_Set2$prot

# Create Venn diagram for PDAC proteins
pdac <- venn.diagram(list("PDAC1"=PDAC_list1, "PDAC2"=PDAC_list2),
             fill = c("cyan","red"), height = 2500, width = 2500, imagetype = "png",
             cex= 1.2, col="Red", lty="blank", lwd=1, fontface="bold",
             fontfamily="sans", alpha=0.5, filename = NULL,
             print.mode =c("raw"))

# Obtain PDAC overlapped protein names
PDACoverlap <- intersect(PDAC_list2,PDAC_list3)

# Write PDAC overlapped protein names to a text file
write.table(PDACoverlap, file = "PDAC_intersected.txt", sep = "",
            quote = F, col.names = F, row.names = F)

# Read Monocytes (U937) proteins from Excel files
U1_cells <-read_excel("U937_1.xlsx")
U937_list1 <- U1_cells$proteins

U2_cells <-read_excel("U937_3.xlsx")
U937_list2 <- U2_cells$proteins

U3_cells <-read_excel("U937_4.xlsx")
U937_list3 <- U3_cells$proteins

U4_cells <-read_excel("U937_5.xlsx")
U937_list4 <- U4_cells$proteins

U5_cells <-read_excel("U937_6.xlsx")
U937_list5 <- U5_cells$proteins

# Create Venn diagram for Monocytes (U937) proteins
monocytes <- venn.diagram(list("U1"=U937_list1, "U2"=U937_list2, "U3"=U937_list3, "U4"=U937_list4, "U5"=U937_list5),
             fill = c("cyan","red","green","blue","yellow"),  height = 500, width = 500, imagetype = "png",
             cex= 1, col="Red", lty="blank", lwd=0.8, fontface="bold",
             fontfamily="sans", alpha=0.5, filename = NULL,
             print.mode =c("raw"))

# Obtain Monocytes (U937) overlapped protein names
monocytesoverlap <- intersect(U937_list1, intersect(U937_list3,U937_list4))
monooverlap <- intersect(monocytesoverlap, intersect(U937_list5,U937_list6))

# Write Monocytes (U937) overlapped protein names to a text file
write.table(monooverlap, file = "U937_intersected.txt", sep = "",
            quote = F, col.names = F, row.names = F)

# Read Melanoma proteins from Excel files
Melanoma_Set2 <- read_excel("melanoma3.xlsx")
melanoma_list2 <- Melanoma_Set2$proteins

Melanoma_Set1 <- read_excel("melanoma1.xlsx")
melanoma_list1 <- Melanoma_Set1$proteins

# Create Venn diagram for Melanoma proteins
melanoma <- venn.diagram(list("Mela2"=melanoma_list2, "Mela1"=melanoma_list1),
             fill = c("cyan","red"),main.cex = 2, height = 2500, width = 2500, imagetype = "png",
             cex= 1.2, col="Red", lty="blank", lwd=1, fontface="bold",
             fontfamily="sans", alpha=0.5, filename = NULL,
             print.mode =c("raw"))

# Obtain Melanoma overlapped protein names
melanomaoverlap <- intersect(melanoma_list3,melanoma_list1)

# Write Melanoma overlapped protein names to a text file
write.table(melanomaoverlap, file = "melanoma_intersected.txt", sep = "",
            quote = F, col.names = F, row.names = F)

# Save Venn diagrams to png file
res <- 300
w <- 8
h <- 8
png("Venn.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(monocytes, hela, melanoma, pdac, ncol = 2, labels = c('A', 'B', 'C', 'D'), label_size = 16)
dev.off()
```

### ---------- Gene Ontology Enrichment Analysis ----------

```{r}
### Hela GO Enrichment Analysis

# Read Hela gene IDs from Excel file
hela_genes <- read_excel("hela_idmap.xlsx")

# Perform Gene Ontology (GO) enrichment analysis for Biological Processes (BP)
hela_go <- enrichGO(gene = hela_genes$entrezgene, ont = "BP",
                    OrgDb ="org.Hs.eg.db",
                    readable=TRUE,
                    pvalueCutoff = 0.05, qvalueCutoff = 0.3)

# Generate dotplot for enriched GO terms
hela_go_dot <- dotplot(hela_go)

# Calculate pairwise term similarities
hg <- pairwise_termsim(hela_go)

# Generate enrichment map plot
hela_emap <- emapplot(hg, showCategory=10)

# Set gene ID names using org.Hs.eg.db
hgcenter <- setReadable(hela_go, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate circular network plot
helanet <- cnetplot(hgcenter,  foldChange=hela_go, circular = F, 
                    colorEdge = TRUE, showCategory=5)


### PDAC GO Enrichment Analysis

# Read PDAC gene IDs from Excel file
pdac <- read_excel("idmap.xlsx")

# Perform Gene Ontology (GO) enrichment analysis for Biological Processes (BP)
pdacGO <- enrichGO(gene = pdac$entrezgene, ont = "BP",
                   OrgDb ="org.Hs.eg.db",
                   readable=TRUE,
                   pvalueCutoff = 0.05, qvalueCutoff = 0.3)

# Generate dotplot for enriched GO terms
pdac_go_dot <- dotplot(pdacGO)

# Calculate pairwise term similarities
pd <- pairwise_termsim(pdacGO)

# Generate enrichment map plot
pdac_emap <- emapplot(pd, showCategory=10)

# Set gene ID names using org.Hs.eg.db
pdcenter <- setReadable(pd, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate circular network plot
pdacnet <- cnetplot(pdcenter,  foldChange=pdacGO, circular = F, 
                    colorEdge = TRUE, showCategory=5)


### Melanoma GO Enrichment Analysis

# Read Melanoma gene IDs from Excel file
melanoma_genes <- read_excel("melanoma_idmap.xlsx")

# Perform Gene Ontology (GO) enrichment analysis for Biological Processes (BP)
melanoma_go <- enrichGO(gene = melanoma_genes$entrezgene, ont = "BP",
                        OrgDb ="org.Hs.eg.db",
                        readable=TRUE,
                        pvalueCutoff = 0.05, qvalueCutoff = 0.3)

# Generate dotplot for enriched GO terms
melanoma_go_dot <- dotplot(melanoma_go)

# Calculate pairwise term similarities
mg <- pairwise_termsim(melanoma_go)

# Generate enrichment map plot
mela_emap <- emapplot(mg, showCategory=10)

# Set gene ID names using org.Hs.eg.db
mgcenter <- setReadable(melanoma_go, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate circular network plot
melanet <- cnetplot(mgcenter,  foldChange=melanoma_go, circular = F, 
                    colorEdge = TRUE, showCategory=5)


### Monocytes GO Enrichment Analysis

# Read Monocyte gene IDs from Excel file
monocytes_genes <- read_excel("monocytes_idmap.xlsx")

# Perform Gene Ontology (GO) enrichment analysis for Biological Processes (BP)
monocytes_go <- enrichGO(gene = monocytes_genes$entrezgene, ont = "BP",
                         OrgDb ="org.Hs.eg.db",
                         readable=TRUE,
                         pvalueCutoff = 0.05, qvalueCutoff = 0.3)

# Generate dotplot for enriched GO terms
monocytes_go_dot <- dotplot(monocytes_go)

# Calculate pairwise term similarities
mg <- pairwise_termsim(monocytes_go)

# Generate enrichment map plot
mono_emap <- emapplot(mg, showCategory=10)

# Set gene ID names using org.Hs.eg.db
mkcenter <- setReadable(monocytes_go, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate circular network plot
mononet <- cnetplot(mkcenter,  foldChange=monocytes_go, circular = F, 
                    colorEdge = TRUE, showCategory=5)


### Saving the output plots

# Save Gene Ontology (GO) dot plots to png file
res <- 300
w <- 16
h <- 12
png("GO.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(monocytes_go_dot, hela_go_dot, melanoma_go_dot, pdac_go_dot, 
                   ncol = 2, labels = c('A', 'B', 'C', 'D'), label_size = 24)
dev.off()

# Save the E-map plots to png file
res <- 300
w <- 16
h <- 12
png("emap.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(mono_emap, hela_emap, mela_emap, pdac_emap, ncol = 2, 
                   labels = c('A', 'B', 'C', 'D'), label_size = 24)
dev.off()

# Save the C-net plots to png file
res <- 300
w <- 16
h <- 14
png("Cnet.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(mononet, helanet, melanet, pdacnet, ncol = 2, 
                   labels = c('A', 'B', 'C', 'D'), label_size = 24)
dev.off()
```

### ---------- Correlation and Network Analysis ----------

```{r}
### Hela_Set4 Protein-Protein Correlation Analysis

# Read set4 of Hela proteins correlation data from text file
h4 <- read.delim2("h4intersect.txt", sep = "\t", stringsAsFactors = F)
hela_cor <- as.data.frame(h4)
hela_cor <- t(hela_cor)
colnames(hela_cor) <- hela_cor[1,]
hela_cor <- hela_cor[-1,]
hj <- type.convert(hela_cor)
hela_cor <- as.numeric(hela_cor)

# Calculate correlation matrix
cor_h4 <- cor(hj, method = "spearman")
cor_h4 <- as.data.frame(cor_h4)
cor_h4[abs(cor_h4) < 0.75] <- 0
diag(cor_h4) <- 0

# Filter and clean correlation network
cor_net <- cor_h4[,colSums(abs(cor_h4), na.rm = T) > 0]
cor_net <- cor_net[rowSums(abs(cor_net), na.rm = T) > 0,]

### Hela_Set4 Protein-Protein Network Visualization

# Define custom colors for groups
custom_cols <- c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2")

# Define color palette and color ramp function
rev_pal <- function (n) rev(pals::ocean.oxy(n))
pal_gwo <- colorRampPalette(colors = c("darkgreen", "white", "darkorange"))

# Convert adjacency matrix to network
net0_h4 <- adjToNetwork(cor_net, directed = F, self_loops = F,
            node_attrs = "none", group_colors = custom_cols,
            edge_color_func = rev_pal,
            edge_attrs = c("width", "color"), colorblind = F,arrange_co = F,
            group_vec = NULL, size_type = "cytoscape",
            width_type = "partcor",output_as = 'list')

# Generate circular visualization using anvis
A <- anvis(net0_h4, 
           igr_plot_opts = list(vertex.frame.color = "black"), 
           directed = FALSE, output_type = "igraph",
           vis_edge_factor = 3, cyto_node_space = 2,
           igr_grid = T,  vis_radial_labs = T
           )

# Generate visualization using anvis changed in cytoscape
A_h4 <- anvis(net0_h4,
      igr_plot_opts = list(layout = layout_func, 
                           vertex.label.cex = 0.6, 
                           vertex.label.color = "black", 
                           vertex.label.family = "sans"),
      vis_radial_labs = FALSE, cyto_close_session = F)


### Hela_Set5 Protein-Protein Correlation Analysis

# Read set5 of Hela protein correlation data from file
h5 <- read.delim2("h5intersect.txt", sep = "\t", stringsAsFactors = F)
hela5_cor <- as.data.frame(h5)
hela5_cor <- t(hela5_cor)
colnames(hela5_cor) <- hela5_cor[1,]
hela5_cor <- hela5_cor[-1,]
hj5 <- type.convert(hela5_cor)
hela5_cor <- as.numeric(hela5_cor)

# Calculate correlation matrix
cor_h5 <- cor(hj5, method = "spearman")
cor_h5 <- as.data.frame(cor_h5)
cor_h5[abs(cor_h5) < 0.87] <- 0
diag(cor_h5) <- 0

# Filter and clean correlation network
cor_net5 <- cor_h5[,colSums(abs(cor_h5), na.rm = T) > 0]
cor_net5 <- cor_net5[rowSums(abs(cor_net5), na.rm = T) > 0,]

### Hela_Set5 Protein-Protein Network Visualization

# Convert adjacency matrix to network
net0_h5 <- adjToNetwork(cor_net5, directed = F, self_loops = F,
                        node_attrs = "none", group_colors = custom_cols,
                        edge_color_func = rev_pal,
                        edge_attrs = c("width", "color"), colorblind = F,arrange_co = F,
                        group_vec = NULL, size_type = "cytoscape",
                        width_type = "partcor",output_as = 'list')

# Generate circular visualization using anvis
B <- anvis(net0_h5, 
           igr_plot_opts = list(vertex.frame.color = "black"), 
           directed = FALSE, output_type = "igraph",
           vis_edge_factor = 3, cyto_node_space = 2,
           igr_grid = T,  vis_radial_labs = T)

# Generate visualization using anvis changed in cytoscape
A_h5 <- anvis(net0_h5,
              igr_plot_opts = list(layout = layout_func, 
                                   vertex.label.cex = 0.6, 
                                   vertex.label.color = "black", 
                                   vertex.label.family = "sans"),
              vis_radial_labs = FALSE, cyto_close_session = F)


### Melanoma_Set1 Protein-Protein Correlation Analysis

# Read set1 of Melanoma protein correlation data from csv file
mela1 <- read.csv("m1intersect.csv")
mela1_cor <- as.data.frame(mela1)
mela1_cor <- t(mela1_cor)
colnames(mela1_cor) <- mela1_cor[1,]
mela1_cor <- mela1_cor[-1,]
m1 <- type.convert(mela1_cor)
mela1_cor <- as.numeric(mela1_cor)

# Calculate correlation matrix
cor_m1 <- cor(m1, method = "spearman")
cor_m1 <- as.data.frame(cor_m1)
cor_m1[abs(cor_m1) < 0.45] <- 0
diag(cor_m1) <- 0

# Filter and clean correlation network
cor_net_m1 <- cor_m1[,colSums(abs(cor_m1), na.rm = T) > 0]
cor_net_m1 <- cor_net_m1[rowSums(abs(cor_net_m1), na.rm = T) > 0,]

### Melanoma_Set1 Protein-Protein Network Visualization

# Convert adjacency matrix to network
net0_m1 <- adjToNetwork(cor_net_m1, directed = F, self_loops = F,
                        node_attrs = "none", group_colors = custom_cols,
                        edge_color_func = rev_pal,
                        edge_attrs = c("width", "color"), colorblind = F,arrange_co = F,
                        group_vec = NULL, size_type = "cytoscape",
                        width_type = "partcor",output_as = 'list')

# Generate circular visualization using anvis
C <- anvis(net0_m1, 
           igr_plot_opts = list(vertex.frame.color = "black"), 
           directed = FALSE, output_type = "igraph",
           vis_edge_factor = 3, cyto_node_space = 2,
           igr_grid = T,  vis_radial_labs = T)

# Generate visualization using anvis changed in cytoscape
C_m1 <- anvis(net0_m1,
              igr_plot_opts = list(layout = layout_func, 
                                   vertex.label.cex = 0.6, 
                                   vertex.label.color = "black", 
                                   vertex.label.family = "sans"),
              vis_radial_labs = FALSE, cyto_close_session = F)
```

### ---------- KEGG Pathway Enrichment Analysis ----------

```{r}
# Read Hela gene IDs data from Excel file
hela_genes <- read_excel("hela_idmap.xlsx")

# Read PDAC gene IDs data from Excel file
pdac <- read_excel("idmap.xlsx")

# Read Melanoma gene IDs data from Excel file
melanoma_genes <- read_excel("melanoma_idmap.xlsx")

# Read Monocytes gene IDs data from Excel file
monocytes_genes <- read_excel("monocytes_idmap.xlsx")


### Hela KEGG pathway analysis

# Perform KEGG pathway enrichment analysis for Hela cells
hela_kegg <- enrichKEGG(gene = hela_genes$entrezgene,
                        organism = 'hsa',
                        pvalueCutoff = 0.05, 
                        qvalueCutoff = 0.3, 
                        keyType = "kegg")

# Generate a dotplot to visualize the enriched pathways for Hela cells
hela_kegg_dot <- dotplot(hela_kegg)

# Calculate pairwise similarity between enriched terms
he <- pairwise_termsim(hela_kegg)

# Generate an enriched map plot to visualize the connectivity between enriched terms
he_map <- emapplot(he, showCategory=10)

# Convert the gene IDs in the enriched pathways to readable format using the org.Hs.eg.db package
hecenter <- setReadable(hela_kegg, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate a cnetplot to visualize the enriched pathways for Hela cells
hela_kegg_cnet <- cnetplot(hecenter,  foldChange=hela_kegg, 
                           circular = F, colorEdge = TRUE, 
                           node_lable="all", showCategory = 5)


### PDAC KEGG pathway analysis

# Perform KEGG pathway enrichment analysis for PDAC cells
pdac_kegg <- enrichKEGG(gene = pdac$entrezgene,
                        organism = 'hsa',
                        pvalueCutoff = 0.05, 
                        qvalueCutoff = 0.3)

# Generate a dotplot to visualize the enriched pathways for PDAC cells
pdac_kegg_dot <- dotplot(pdac_kegg)

# Calculate pairwise similarity between enriched terms
pdkegg <- pairwise_termsim(pdac_kegg)

# Generate an enriched map plot to visualize the connectivity between enriched terms
pd_map <- emapplot(pdkegg, showCategory=10)

# Convert the gene IDs in the enriched pathways to readable format using the org.Hs.eg.db package
pkcenter <- setReadable(pdac_kegg, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate a cnetplot to visualize the enriched pathways for PDAC cells
pdac_kegg_cnet <- cnetplot(pkcenter,  foldChange=pdac_kegg, 
                           circular = F, colorEdge = TRUE, 
                           node_lable="all", showCategory = 5)


### Melanoma KEGG pathway analysis

# Perform KEGG pathway enrichment analysis for Melanoma cells
melanoma_kegg <- enrichKEGG(gene = melanoma_genes$entrezgene,
                            organism = 'hsa',
                            pvalueCutoff = 0.05, 
                            qvalueCutoff = 0.3)

# Generate a dotplot to visualize the enriched pathways for Melanoma cells
melanoma_kegg_dot <- dotplot(melanoma_kegg)

# Calculate pairwise similarity between enriched terms
mk <- pairwise_termsim(melanoma_kegg)

# Generate an enriched map plot to visualize the connectivity between enriched terms
mela_map <- emapplot(mk, showCategory=10)

# Convert the gene IDs in the enriched pathways to readable format using the org.Hs.eg.db package
mkcenter <- setReadable(melanoma_kegg, OrgDb = "org.Hs.eg.db", 
                        keyType = "ENTREZID")

# Generate a cnetplot to visualize the enriched pathways for Melanoma cells
mela_kegg_cnet <- cnetplot(mkcenter,  foldChange=melanoma_kegg, 
                           circular = F, colorEdge = TRUE, 
                           node_lable="all", showCategory = 5)


### Monocytes KEGG pathway analysis

# Perform KEGG pathway enrichment analysis for Monocytes cells
monocytes_kegg <- enrichKEGG(gene = monocytes_genes$entrezgene,
                             organism = 'hsa',
                             pvalueCutoff = 0.05, 
                             qvalueCutoff = 0.3)

# Generate a dotplot to visualize the enriched pathways for Monocytes cells
monocytes_kegg_dot <- dotplot(monocytes_kegg)

# Calculate pairwise similarity between enriched terms
mok <- pairwise_termsim(monocytes_kegg)

# Generate an enriched map plot to visualize the connectivity between enriched terms
mok_emap <- emapplot(mok, showCategory=10)

# Convert the gene IDs in the enriched pathways to readable format using the org.Hs.eg.db package
mokcenter <- setReadable(monocytes_kegg, OrgDb = "org.Hs.eg.db", 
                         keyType = "ENTREZID")

# Generate a cnetplot to visualize the enriched pathways for Monocytes cells
mono_kegg_cnet <- cnetplot(mokcenter,  foldChange=monocytes_kegg, 
                           circular = F, colorEdge = TRUE, 
                           node_lable="all", showCategory = 5)


### Saving the output plots

# Save dotplots of KEGG pathway analysis to png file
res <- 300
w <- 16
h <- 12
png("kegg_dot.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(monocytes_kegg_dot, hela_kegg_dot, 
                   melanoma_kegg_dot, pdac_kegg_dot, 
                   ncol = 2, labels = c('A', 'B', 'C', 'D'), 
                   label_size = 24)
dev.off()

# Save E-Map plots of KEGG pathway analysis to png file
res <- 300
w <- 16
h <- 12
png("kegg_E-Map.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(mok_emap, he_map, mela_map, pd_map, ncol = 2, 
                   labels = c('A', 'B', 'C', 'D'), 
                   label_size = 24)
dev.off()

# Save C-net plots of KEGG pathway analysis to png file
res <- 250
w <- 20
h <- 15
png("kegg_Cnet2.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(mono_kegg_cnet, hela_kegg_cnet, mela_kegg_cnet, 
                   pdac_kegg_cnet, ncol = 2, labels = c('A', 'B', 'C', 'D'), 
                   label_size = 24)
dev.off()
```

### ---------- Dimentionality Reduction Using Principal Component Analysis (PCA) ----------

```{r}
### Principal Component Analysis (PCA) for Hela

# Read Hela data from csv files
h4 <- read.csv("h4intersect.csv")
h5 <- read.csv("h5intersect.csv")

# Merge Hela cell datasets
h5_h4_merged <- merge(h5,h4, by='Protein')
h5_h4_merged <- as.data.frame(t(h5_h4_merged[,-1]))

# Set colors for cell types
nb.cols <- 2
mycolors <- colorRampPalette(brewer.pal(2, "Set1"))(nb.cols)

# Create labels for Hela cell types
celltype_h5 <- c(replicate('Helacells5',n=dim(h5)[2]-1))
celltype_h4 <- c(replicate('Helacells4',n=dim(h4)[2]-1))
celltype = c(celltype_h5,celltype_h4)

# Perform PCA analysis on Hela merged dataset
pca.merged <- PCA(h5_h4_merged[,-1], scale.unit = TRUE, graph = F)

# Visualize eigenvalues
fviz_eig(pca.merged, addlabels = TRUE, ylim = c(0, 20))

# Visualize PCA results with colored individual points and ellipses for Hela cell types
HM_merged <- fviz_pca_ind(pca.merged , col.ind = celltype, addEllipses = T,
                          palette = mycolors)

# Create a combined plot with PCA results for Hela cell types
merged_helaPCA <- ggpar(HM_merged,
                        title = "PCA_HelaCellTypes",
                        xlab = "PC1", ylab = "PC2",
                        legend.title = "Cell type", legend.position = "top",
                        ggtheme = theme_minimal())


### Principal Component Analysis (PCA) for Melanoma

# Read Melanoma data from csv files
melanoma1 <- read.csv("m1intersect.csv")
melanoma3 <- read.csv("m3intersect.csv")

# Merge Melanoma cell datasets
m1_m3_merged <- merge(melanoma1,melanoma3, by='proteins')
m1_m3_merged <- as.data.frame(t(m1_m3_merged[,-1]))

# Set colors for Melanoma cell types
nb.cols <- 2
mycolors <- colorRampPalette(brewer.pal(2, "Set1"))(nb.cols)

# Create labels for Melanoma cell types
celltype_m1 <- c(replicate('Melanoma1',n=dim(melanoma1)[2]-1))
celltype_m3 <- c(replicate('Melanoma3',n=dim(melanoma3)[2]-1))
celltype = c(celltype_m1,celltype_m3)

# Perform PCA analysis on Melanoma merged dataset
pca.merged <- PCA(m1_m3_merged [,-1], scale.unit = TRUE, graph = F)

# Visualize eigenvalues
fviz_eig(pca.merged, addlabels = TRUE, ylim = c(0, 20))

# Visualize PCA results with colored individual points and ellipses for Melanoma cell types
MM_merged <- fviz_pca_ind(pca.merged , col.ind = celltype, addEllipses = T,
                          palette = mycolors)

# Create a combined plot with PCA results for Melanoma cell types
merged_melanomaPCA <- ggpar(MM_merged,
                            title = "PCA_MelanomacellTypes",
                            xlab = "PC1", ylab = "PC2",
                            legend.title = "Cell type", legend.position = "top",
                            ggtheme = theme_minimal())


### Principal Component Analysis (PCA) for Monocytes

# Read Monocytes data from csv and excel files
monocytes1 <- read.csv("mono1intersect.csv")
U1_cells <-read_excel("U937_1.xlsx")

monocytes3 <- read.csv("mono3intersect.csv")
U3_cells <-read_excel("U937_3.xlsx")

monocytes4 <- read.csv("mono4intersect.csv")
U4_cells <-read_excel("U937_4.xlsx")

monocytes5 <- read.csv("mono5intersect.csv")
U5_cells <-read_excel("U937_5.xlsx")

monocytes6 <- read.csv("mono6intersect.csv")
U6_cells <-read_excel("U937_6.xlsx")

# Merge Monocyte cell datasets
U1_U3_merged <- merge(monocytes1,monocytes3, by='proteins')
U4_U5_merged <- merge(monocytes4,monocytes5, by='proteins')
U4_merged <- merge(U1_U3_merged, U4_U5_merged, by='proteins')
U_merged <- merge(U4_merged, monocytes6, by='proteins')
U_merged <- as.data.frame(t(U_merged[,-1]))

# Set colors for Monocyte cell types
nb.cols <- 5
mycolors <- colorRampPalette(brewer.pal(5, "Set1"))(nb.cols)

# Create labels for Monocyte cell types
celltype_U1 <- c(replicate('Monocytes1',n=dim(U1_cells)[2]-1))
celltype_U3 <- c(replicate('Monocytes3',n=dim(U3_cells)[2]-1))
celltype_U4 <- c(replicate('Monocytes4',n=dim(U4_cells)[2]-1))
celltype_U5 <- c(replicate('Monocytes5',n=dim(U5_cells)[2]-1))
celltype_U6 <- c(replicate('Monocytes6',n=dim(U6_cells)[2]-1))
celltype = c(celltype_U1,celltype_U3,celltype_U4,celltype_U5,celltype_U6)

# Perform PCA analysis on Monocyte merged dataset
pca.merged <- PCA(U_merged [,-1], scale.unit = TRUE, graph = F)

# Visualize eigenvalues
fviz_eig(pca.merged, addlabels = TRUE, ylim = c(0, 20))

# Visualize PCA results with colored individual points and ellipses for Monocyte cell types
MO_merged <- fviz_pca_ind(pca.merged , col.ind = celltype, addEllipses = T,
                          palette = mycolors)

# Create a combined plot with PCA results for Monocyte cell types
merged_monocytesPCA <- ggpar(MO_merged,
                             title = "PCA_MonocytescellTypes",
                             xlab = "PC1", ylab = "PC2",
                             legend.title = "Cell type", legend.position = "top",
                             ggtheme = theme_minimal())


### Principal Component Analysis (PCA) for PDAC

# Read PDAC data from csv files
pdac2 <- read.csv("p2intersect.csv")
# Extract column names from the 'proteins' column
col_names<- pdac2$proteins
# Transpose the data frame and set column names
pdac2 <- as.data.frame(t(pdac2[,-1]))
# Set the column names using the extracted names
colnames(pdac2) <- col_names
# Extract the protein names as row names
proteins <-  row.names(pdac2)
# Add the protein names as a new column
pdac2 <- cbind(proteins,pdac2)
# Reset the row names to default values
row.names(pdac2) <- NULL
PDAC2_cells <-read_excel("pdac2.xlsx")
pdac3 <- read.csv("p3intersect.csv")
PDAC3_cells <-read_excel("pdac3.xlsx")

# Merge PDAC cell datasets
p2_p3_merged <- merge(pdac2, pdac3, by='proteins')
p2_p3_merged <- as.data.frame(t(p2_p3_merged[,-1]))

# Set colors for PDAC cell types
nb.cols <- 2
mycolors <- colorRampPalette(brewer.pal(2, "Set1"))(nb.cols)

# Create labels for PDAC cell types
celltype_Pdac2 <- c(replicate('PDAC2',n=dim(PDAC2_cells)[2]-1))
celltype_Pdac3 <- c(replicate('PDAC3',n=dim(PDAC3_cells)[2]-1))
celltype = c(celltype_Pdac2,celltype_Pdac3)

# Perform PCA analysis on the PDAC merged dataset
pca.merged <- PCA(p2_p3_merged[,-1], scale.unit = TRUE, graph = F)

# Visualize eigenvalues
fviz_eig(pca.merged, addlabels = TRUE, ylim = c(0, 20))

# Visualize PCA results with colored individual points and ellipses for PDAC cell types
PD_merged <- fviz_pca_ind(pca.merged , col.ind = celltype, addEllipses = T,
                          palette = mycolors)

# Create a combined plot with PCA results for PDAC cell types
merged_pdacPCA <- ggpar(PD_merged,
                             title = "PCA_PDACcellTypes",
                             xlab = "PC1", ylab = "PC2",
                             legend.title = "Cell type", legend.position = "top",
                             ggtheme = theme_minimal())


# Save PCA plots to png file

res <- 300
w <- 14
h <- 10
png("ALL_pca.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(merged_monocytesPCA, merged_helaPCA, 
                   merged_melanomaPCA, merged_pdacPCA, 
                   ncol = 2,
                   labels = c('A', 'B', 'C', 'D'), 
                   label_size = 24)
dev.off()
```

### ---------- Single-Cell Clustering using Uniforrm Manifold Approximation and Projection (UMAP) ----------

```{r}
### UMAP for Hela Cells

# Read Hela_Set5 data from CSV file
hela5 <- read.csv("h5intersect.csv")

# Extract protein names and transpose the data frame
Protein <- hela5$Protein
hela5 <- as.data.frame(t(hela5[,-1]))

# Set column names and protein names
colnames(hela5) <- Protein
Protein <- row.names(hela5)
hela5 <- cbind(Protein, hela5)
row.names(hela5) <- NULL

# Perform UMAP analysis on Hela_Set5 data
hela5_umap <- umap(hela5[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_hela5 <- data.frame(umap1=hela5_umap$layout[,1],
                            umap2=hela5_umap$layout[,2],
                            cell=hela5$Protein)

# Calculate pairwise distances between UMAP coordinates
dist = dist(hela5_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
helaclust <- hclust(dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
helaclusters <- cutree(helaclust, k=2) 
umap_df_hela5$hcluster_label <- as.factor(helaclusters)

# Add cluster labels to the UMAP data frame
umap_df_hela5$hcluster_label <- as.factor(helaclusters)

# Generate a UMAP plot for Hela_Set5 cells
h5_umap <- ggplot(data= umap_df_hela5,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="Hela5 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Hela_Set5 cluster 1
hcluster_1 <- subset(umap_df_hela5,hcluster_label== '1')
hcluster_1$cell

# Save the abundance data for Hela_Set5 cluster 1 to a CSV file
row.names(hela5) <- hela5$Protein
h5_clust_abun1 <- hela5[c(hcluster_1$cell), ]
h5_clust_abun1 <- t(h5_clust_abun1)
h5_clust_abun1 <- as.data.frame(h5_clust_abun1)
h5_clust_abun1 <- cbind(colnames(hela5), h5_clust_abun1)
hela5_cluster1 <- write_csv(h5_clust_abun1[-1,], 
                           file = "hela_cluster1.csv", 
                           col_names = F)

# Subset the data for Hela_Set5 cluster 2
hcluster_2 <- subset(umap_df_hela5, hcluster_label=='2')
hcluster_2$cell

# Save the abundance data for Hela_Set5 cluster 2 to a CSV file
row.names(hela5) <- hela5$Protein
h5_clust_abun2 <- hela5[c(hcluster_2$cell), ]
h5_clust_abun2 <- t(h5_clust_abun2)
h5_clust_abun2 <- as.data.frame(h5_clust_abun2)
h5_clust_abun2 <- cbind(colnames(hela5), h5_clust_abun2)
hela5_cluster2 <- write_csv(h5_clust_abun2[-1,], 
                           file = "hela_cluster2.csv", 
                           col_names = F)


# Read Hela_Set4 data from CSV file
hela4 <- read.csv("h4intersect.csv")

# Extract protein names and transpose the data frame
Protein <- hela4$Protein
hela4 <- as.data.frame(t(hela4[,-1]))

# Set column names and protein names
colnames(hela4) <- Protein
Protein <- row.names(hela4)
hela4 <- cbind(Protein, hela4)
row.names(hela4) <- NULL

# Perform UMAP analysis on Hela_Set4 data
hela4_umap <- umap(hela4[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_hela4 <- data.frame(umap1=hela4_umap$layout[,1],
                            umap2=hela4_umap$layout[,2],
                            cell=hela4$Protein)

# Calculate pairwise distances between UMAP coordinates
hela4_dist = dist(hela4_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
helaclust <- hclust(hela4_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
helaclusters <- cutree(helaclust, k=4)

# Add cluster labels to the UMAP data frame
umap_df_hela4$hcluster_label <- as.factor(helaclusters)

# Generate a UMAP plot for Hela_Set4 cells
h4_umap <- ggplot(data= umap_df_hela4,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="Hela4 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Hela_Set4 cluster 1
h4cluster_1 <- subset(umap_df_hela4, hcluster_label=='1')
h4cluster_1$cell

# Save the abundance data for Hela_Set4 cluster 1 to a CSV file
row.names(hela4) <- hela4$Protein
h4_clust_abun1 <- hela4[c(h4cluster_1$cell), ]
h4_clust_abun1 <- t(h4_clust_abun1)
h4_clust_abun1 <- as.data.frame(h4_clust_abun1)
h4_clust_abun1 <- cbind(colnames(hela4), h4_clust_abun1)
hela4_cluster1 <- write_csv(h4_clust_abun1[-1,], 
                            file = "hela4_cluster1.csv", 
                            col_names = F)

# Subset the data for Hela_Set4 cluster 2
h4cluster_2 <- subset(umap_df_hela4, hcluster_label=='2')
h4cluster_2$cell

# Save the abundance data for Hela_Set4 cluster 2 to a CSV file
row.names(hela4) <- hela4$Protein
h4_clust_abun2 <- hela4[c(h4cluster_2$cell), ]
h4_clust_abun2 <- t(h4_clust_abun2)
h4_clust_abun2 <- as.data.frame(h4_clust_abun2)
h4_clust_abun2 <- cbind(colnames(hela4), h4_clust_abun2)
h4_hela4_cluster2 <- write_csv(h4_clust_abun2[-1,], 
                            file = "hela4_cluster2.csv", 
                            col_names = F)

# Subset the data for Hela_Set4 cluster 3
h4cluster_3 <- subset(umap_df_hela4, hcluster_label=='3')
h4cluster_3$cell

# Save the abundance data for Hela_Set4 cluster 3 to a CSV file
row.names(hela4) <- hela4$Protein
h4_clust_abun3 <- hela4[c(h4cluster_3$cell), ]
h4_clust_abun3 <- t(h4_clust_abun3)
h4_clust_abun3 <- as.data.frame(h4_clust_abun3)
h4_clust_abun3 <- cbind(colnames(hela4), h4_clust_abun3)
hela4_cluster3 <- write_csv(h4_clust_abun3[-1,], 
                            file = "hela4_cluster3.csv", 
                            col_names = F)

# Subset the data for Hela_Set4 cluster 4
h4cluster_4 <- subset(umap_df_hela4, hcluster_label=='4')
h4cluster_4$cell

# Save the abundance data for Hela_Set4 cluster 4 to a CSV file
row.names(hela4) <- hela4$Protein
h4_clust_abun4 <- hela4[c(h4cluster_4$cell), ]
h4_clust_abun4 <- t(h4_clust_abun4)
h4_clust_abun4 <- as.data.frame(h4_clust_abun4)
h4_clust_abun4 <- cbind(colnames(hela4), h4_clust_abun4)
hela4_cluster4 <- write_csv(h4_clust_abun4[-1,], 
                            file = "hela4_cluster4.csv", 
                            col_names = F)


### UMAP for PDAC cells

# Read PDAC_Set2 data from CSV file
pdac2 <- read.csv("p2intersect.csv")

# Perform UMAP analysis on PDAC_Set2 data
pdac2_umap <- umap(pdac2[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_pdac2 <- data.frame(umap1=pdac2_umap$layout[,1],
                            umap2=pdac2_umap$layout[,2],
                            cell=pdac2$proteins)

# Calculate pairwise distances between UMAP coordinates
pdac2_dist = dist(pdac2_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
pdac2clust <- hclust(pdac2_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
pdac2clusters <- cutree(pdac2clust, k=3)

# Add cluster labels to the UMAP data frame
umap_df_pdac2$hcluster_label <- as.factor(pdac2clusters)

# Generate a UMAP plot for PDAC_Set2 cells
pd2_umap <- ggplot(data= umap_df_pdac2,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="PDAC2 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for PDAC_Set2 cluster 1
pd2cluster_1 <- subset(umap_df_pdac2, hcluster_label=='1')
pd2cluster_1$cell

# Save the abundance data for PDAC_Set2 cluster 1 to a CSV file
row.names(pdac2) <- pdac2$proteins
p2_clust_abun1 <- pdac2[c(pd2cluster_1$cell), ]
p2_clust_abun1 <- t(p2_clust_abun1)
p2_clust_abun1 <- as.data.frame(p2_clust_abun1)
p2_clust_abun1 <- cbind(colnames(pdac2), p2_clust_abun1)
pdac2_cluster1 <- write_csv(p2_clust_abun1[-1,], 
                            file = "pdac2_cluster1.csv", 
                            col_names = F)

# Subset the data for PDAC_Set2 cluster 2
pd2cluster_2 <- subset(umap_df_pdac2, hcluster_label=='2')
pd2cluster_2$cell

# Save the abundance data for PDAC_Set2 cluster 2 to a CSV file
row.names(pdac2) <- pdac2$proteins
p2_clust_abun2 <- pdac2[c(pd2cluster_2$cell), ]
p2_clust_abun2 <- t(p2_clust_abun2)
p2_clust_abun2 <- as.data.frame(p2_clust_abun2)
p2_clust_abun2 <- cbind(colnames(pdac2), p2_clust_abun2)
pdac2_cluster2 <- write_csv(p2_clust_abun2[-1,], 
                            file = "pdac2_cluster2.csv", 
                            col_names = F)

# Subset the data for PDAC_Set2 cluster 3
pd2cluster_3 <- subset(umap_df_pdac2, hcluster_label=='3')
pd2cluster_3$cell

# Save the abundance data for PDAC_Set2 cluster 3 to a CSV file
row.names(pdac2) <- pdac2$proteins
p2_clust_abun3 <- pdac2[c(pd2cluster_3$cell), ]
p2_clust_abun3 <- t(p2_clust_abun3)
p2_clust_abun3 <- as.data.frame(p2_clust_abun3)
p2_clust_abun3 <- cbind(colnames(pdac2), p2_clust_abun3)
pdac2_cluster3 <- write_csv(p2_clust_abun3[-1,], 
                            file = "pdac2_cluster3.csv", 
                            col_names = F)


# Read PDAC_Set3 data from CSV file
pdac3 <- read.csv("p3intersect.csv")

# Extract protein names and transpose the data frame
proteins <- pdac3$proteins
pdac3 <- as.data.frame(t(pdac3[,-1]))

# Set column names and protein names
colnames(pdac3) <- proteins
proteins <- row.names(pdac3)
pdac3 <- cbind(proteins, pdac3)
row.names(pdac3) <- NULL

# Perform UMAP analysis on PDAC_Set3 data
pdac3_umap <- umap(pdac3[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_pdac3 <- data.frame(umap1=pdac3_umap$layout[,1],
                            umap2=pdac3_umap$layout[,2],
                            cell=pdac3$proteins)

# Calculate pairwise distances between UMAP coordinates
pdac3_dist = dist(pdac3_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
pdac3clust <- hclust(pdac3_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
pdac3clusters <- cutree(pdac3clust, k=2) 

# Add cluster labels to the UMAP data frame
umap_df_pdac3$hcluster_label <- as.factor(pdac3clusters)

# Generate a UMAP plot for PDAC_Set3 cells
pd3_umap <- ggplot(data= umap_df_pdac3,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="PDAC3 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")


# Subset the data for PDAC_Set3 cluster 1
pd3cluster_1 <- subset(umap_df_pdac3, hcluster_label=='1')
pd3cluster_1$cell

# Save the abundance data for PDAC_Set3 cluster 1 to a CSV file
row.names(pdac3) <- pdac3$proteins
p3_clust_abun1 <- pdac3[c(pd3cluster_1$cell), ]
p3_clust_abun1 <- t(p3_clust_abun1)
p3_clust_abun1 <- as.data.frame(p3_clust_abun1)
p3_clust_abun1 <- cbind(colnames(pdac3), p3_clust_abun1)
pdac3_cluster1 <- write_csv(p3_clust_abun1[-1,], 
                            file = "pdac3_cluster1.csv", 
                            col_names = F)

# Subset the data for PDAC_Set3 cluster 2
pd3cluster_2 <- subset(umap_df_pdac3, hcluster_label=='2')
pd3cluster_2$cell

# Save the abundance data for PDAC_Set3 cluster 2 to a CSV file
row.names(pdac3) <- pdac3$proteins
p3_clust_abun2 <- pdac3[c(pd3cluster_2$cell), ]
p3_clust_abun2 <- t(p3_clust_abun2)
p3_clust_abun2 <- as.data.frame(p3_clust_abun2)
p3_clust_abun2 <- cbind(colnames(pdac3), p3_clust_abun2)
pdac3_cluster2 <- write_csv(p3_clust_abun2[-1,], 
                            file = "pdac3_cluster2.csv", 
                            col_names = F)


### UMAP for Melanoma cells

# Read Melanoma_Set1 data from CSV file
melanoma1 <- read.csv("m1intersect.csv")

# Extract protein names and transpose the data frame
proteins <- melanoma1$proteins
melanoma1 <- as.data.frame(t(melanoma1[,-1]))

# Set column names and protein names
colnames(melanoma1) <- proteins
proteins <- row.names(melanoma1)
melanoma1 <- cbind(proteins, melanoma1)
row.names(melanoma1) <- NULL

# Perform UMAP analysis on Melanoma_Set1 data
melanoma1_umap <- umap(melanoma1[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_melanoma1 <- data.frame(umap1=melanoma1_umap$layout[,1],
                            umap2=melanoma1_umap$layout[,2],
                            cell=melanoma1)

# Calculate pairwise distances between UMAP coordinates
melanoma1_dist = dist(melanoma1_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
melanoma1clust <- hclust(melanoma1_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
melanoma1clusters <- cutree(melanoma1clust, k=3) 

# Add cluster labels to the UMAP data frame
umap_df_melanoma1$hcluster_label <- as.factor(melanoma1clusters)

# Generate a UMAP plot for Melanoma_Set1 cells
m1_umap <- ggplot(data= umap_df_melanoma1,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="Melanoma1 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Melanoma_Set1 cluster 1
me1cluster_1 <- subset(umap_df_melanoma1, hcluster_label=='1')
me1cluster_1$cell.proteins

# Save the abundance data for Melanoma_Set1 cluster 1 to a CSV file
row.names(melanoma1) <- melanoma1$proteins
m1_clust_abun1 <- melanoma1[c(me1cluster_1$cell.proteins), ]
m1_clust_abun1 <- t(m1_clust_abun1)
m1_clust_abun1 <- as.data.frame(m1_clust_abun1)
m1_clust_abun1 <- cbind(colnames(melanoma1), m1_clust_abun1)
melanoma1_cluster1 <- write_csv(m1_clust_abun1[-1,], 
                                file = "melanoma1_cluster1.csv", 
                                col_names = F)

# Subset the data for Melanoma_Set1 cluster 2
me1cluster_2 <- subset(umap_df_melanoma1, hcluster_label=='2')
me1cluster_2$cell.proteins

# Save the abundance data for Melanoma_Set1 cluster 2 to a CSV file
row.names(melanoma1) <- melanoma1$proteins
m1_clust_abun2 <- melanoma1[c(me1cluster_2$cell.proteins), ]
m1_clust_abun2 <- t(m1_clust_abun2)
m1_clust_abun2 <- as.data.frame(m1_clust_abun2)
m1_clust_abun2 <- cbind(colnames(melanoma1), m1_clust_abun2)
melanoma1_cluster2 <- write_csv(m1_clust_abun2[-1,], 
                                file = "melanoma1_cluster2.csv", 
                                col_names = F)

# Subset the data for Melanoma_Set1 cluster 3
me1cluster_3 <- subset(umap_df_melanoma1, hcluster_label=='3')
me1cluster_3$cell.proteins

# Save the abundance data for Melanoma_Set1 cluster 3 to a CSV file
row.names(melanoma1) <- melanoma1$proteins
m1_clust_abun3 <- melanoma1[c(me1cluster_3$cell.proteins), ]
m1_clust_abun3 <- t(m1_clust_abun3)
m1_clust_abun3 <- as.data.frame(m1_clust_abun3)
m1_clust_abun3 <- cbind(colnames(melanoma1), m1_clust_abun3)
melanoma1_cluster3 <- write_csv(m1_clust_abun3[-1,], 
                                file = "melanoma1_cluster3.csv", 
                                col_names = F)


# Read Melanoma_Set3 data from CSV file
melanoma3 <- read.csv("m3intersect.csv")

# Extract protein names and transpose the data frame
proteins <- melanoma3$proteins
melanoma3 <- as.data.frame(t(melanoma3[,-1]))

# Set column names and protein names
colnames(melanoma3) <- proteins
proteins <- row.names(melanoma3)
melanoma3 <- cbind(proteins, melanoma3)
row.names(melanoma3) <- NULL

# Perform UMAP analysis on Melanoma_Set3 data
melanoma3_umap <- umap(melanoma3[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_melanoma3 <- data.frame(umap1=melanoma3_umap$layout[,1],
                                umap2=melanoma3_umap$layout[,2],
                                cell=melanoma3)

# Calculate pairwise distances between UMAP coordinates
melanoma3_dist = dist(melanoma3_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
melanoma3clust <- hclust(melanoma3_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
melanoma3clusters <- cutree(melanoma3clust, k=3) 

# Add cluster labels to the UMAP data frame
umap_df_melanoma3$hcluster_label <- as.factor(melanoma3clusters)

# Generate a UMAP plot for Melanoma_Set3 cells
m3_umap <- ggplot(data= umap_df_melanoma3,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="Melanoma3 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Melanoma_Set3 cluster 1
me3cluster_1 <- subset(umap_df_melanoma3, hcluster_label=='1')
me3cluster_1$cell.proteins

# Save the abundance data for Melanoma_Set3 cluster 1 to a CSV file
row.names(melanoma3) <- melanoma3$proteins
m3_clust_abun1 <- melanoma3[c(me3cluster_1$cell.proteins), ]
m3_clust_abun1 <- t(m3_clust_abun1)
m3_clust_abun1 <- as.data.frame(m3_clust_abun1)
m3_clust_abun1 <- cbind(colnames(melanoma3), m3_clust_abun1)
melanoma3_cluster1 <- write_csv(m3_clust_abun1[-1,], 
                                file = "melanoma3_cluster1.csv", 
                                col_names = F)

# Subset the data for Melanoma_Set3 cluster 2
me3cluster_2 <- subset(umap_df_melanoma3, hcluster_label=='2')
me3cluster_2$cell.proteins

# Save the abundance data for Melanoma_Set3 cluster 2 to a CSV file
row.names(melanoma3) <- melanoma3$proteins
m3_clust_abun2 <- melanoma3[c(me3cluster_2$cell.proteins), ]
m3_clust_abun2 <- t(m3_clust_abun2)
m3_clust_abun2 <- as.data.frame(m3_clust_abun2)
m3_clust_abun2 <- cbind(colnames(melanoma3), m3_clust_abun2)
melanoma3_cluster2 <- write_csv(m3_clust_abun2[-1,], 
                                file = "melanoma3_cluster2.csv", 
                                col_names = F)

# Subset the data for Melanoma_Set3 cluster 3
me3cluster_3 <- subset(umap_df_melanoma3, hcluster_label=='3')
me3cluster_3$cell.proteins

# Save the abundance data for Melanoma_Set3 cluster 3 to a CSV file
row.names(melanoma3) <- melanoma3$proteins
m3_clust_abun3 <- melanoma3[c(me3cluster_3$cell.proteins), ]
m3_clust_abun3 <- t(m3_clust_abun3)
m3_clust_abun3 <- as.data.frame(m3_clust_abun3)
m3_clust_abun3 <- cbind(colnames(melanoma3), m3_clust_abun3)
melanoma3_cluster3 <- write_csv(m3_clust_abun3[-1,], 
                                file = "melanoma3_cluster3.csv", 
                                col_names = F)


### UMAP for Monocytes cells

# Read Monocytes_Set1 data from CSV file
monocytes1 <- read.csv("mono1intersect.csv")

# Extract protein names and transpose the data frame
proteins <- monocytes1$proteins
monocytes1 <- as.data.frame(t(monocytes1[,-1]))

# Set column names and protein names
colnames(monocytes1) <- proteins
proteins <- row.names(monocytes1)
monocytes1 <- cbind(proteins, monocytes1)
row.names(monocytes1) <- NULL

# Perform UMAP analysis on Monocytes_Set1 data
monocytes1_umap <- umap(monocytes1[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_monocytes1 <- data.frame(umap1=monocytes1_umap$layout[,1],
                                umap2=monocytes1_umap$layout[,2],
                                cell=monocytes1)

# Calculate pairwise distances between UMAP coordinates
monocytes1_dist = dist(monocytes1_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
monocytes1clust <- hclust(monocytes1_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
monocytes1clusters <- cutree(monocytes1clust, k=2) 

# Add cluster labels to the UMAP data frame
umap_df_monocytes1$hcluster_label <- as.factor(monocytes1clusters)

# Generate a UMAP plot for Monocytes_Set1 cells
mo1_umap <- ggplot(data= umap_df_monocytes1,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="U1 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Monocytes_Set1 cluster 1
mo1cluster_1 <- subset(umap_df_monocytes1, hcluster_label=='1')
mo1cluster_1$cell.proteins

# Save the abundance data for Monocytes_Set1 cluster 1 to a CSV file
row.names(monocytes1) <- monocytes1$proteins
clustmo_abun1 <- monocytes1[c(mo1cluster_1$cell.proteins), ]
clustmo_abun1 <- t(clustmo_abun1)
clustmo_abun1 <- as.data.frame(clustmo_abun1)
clustmo_abun1 <- cbind(colnames(monocytes1), clustmo_abun1)
monocytes1_cluster1 <- write_csv(clustmo_abun1[-1,], 
                                 file = "monocytes1_cluster1.csv",
                                 col_names = F)

# Subset the data for Monocytes_Set1 cluster 2
mo1cluster_2 <- subset(umap_df_monocytes1, hcluster_label=='2')
mo1cluster_2$cell.proteins

# Save the abundance data for Monocytes_Set1 cluster 2 to a CSV file
row.names(monocytes1) <- monocytes1$proteins
clustmo_abun2 <- monocytes1[c(mo1cluster_2$cell.proteins), ]
clustmo_abun2 <- t(clustmo_abun2)
clustmo_abun2 <- as.data.frame(clustmo_abun2)
clustmo_abun2 <- cbind(colnames(monocytes1), clustmo_abun2)
monocytes1_cluster2 <- write_csv(clustmo_abun2[-1,], 
                                 file = "monocytes1_cluster2.csv", 
                                 col_names = F)


# Read Monocytes_Set3 data from CSV file
monocytes3 <- read.csv("mono3intersect.csv")

# Extract protein names and transpose the data frame
proteins <- monocytes3$proteins
monocytes3 <- as.data.frame(t(monocytes3[,-1]))

# Set column names and protein names
colnames(monocytes3) <- proteins
proteins <- row.names(monocytes3)
monocytes3 <- cbind(proteins, monocytes3)
row.names(monocytes3) <- NULL

# Perform UMAP analysis on Monocytes_Set3 data
monocytes3_umap <- umap(monocytes3[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_monocytes3 <- data.frame(umap1=monocytes3_umap$layout[,1],
                                 umap2=monocytes3_umap$layout[,2],
                                 cell=monocytes3)

# Calculate pairwise distances between UMAP coordinates
monocytes3_dist = dist(monocytes3_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
monocytes3clust <- hclust(monocytes3_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
monocytes3clusters <- cutree(monocytes3clust, k=3) 

# Add cluster labels to the UMAP data frame
umap_df_monocytes3$hcluster_label <- as.factor(monocytes3clusters)

# Generate a UMAP plot for Monocytes_Set3 cells
mo3_umap <- ggplot(data= umap_df_monocytes3,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="U3 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Monocytes_Set3 cluster 1
mo3cluster_1 <- subset(umap_df_monocytes3, hcluster_label=='1')
mo3cluster_1$cell.proteins

# Save the abundance data for Monocytes_Set3 cluster 1 to a CSV file
row.names(monocytes3) <- monocytes3$proteins
clustmo3_abun1 <- monocytes3[c(mo3cluster_1$cell.proteins), ]
clustmo3_abun1 <- t(clustmo3_abun1)
clustmo3_abun1 <- as.data.frame(clustmo3_abun1)
clustmo3_abun1 <- cbind(colnames(monocytes3), clustmo3_abun1)
monocytes3_cluster1 <- write_csv(clustmo3_abun1[-1,], 
                                 file = "monocytes3_cluster1.csv",
                                 col_names = F)

# Subset the data for Monocytes_Set3 cluster 2
mo3cluster_2 <- subset(umap_df_monocytes3, hcluster_label=='2')
mo3cluster_2$cell.proteins

# Save the abundance data for Monocytes_Set3 cluster 2 to a CSV file
row.names(monocytes3) <- monocytes3$proteins
clustmo3_abun2 <- monocytes3[c(mo3cluster_2$cell.proteins), ]
clustmo3_abun2 <- t(clustmo3_abun2)
clustmo3_abun2 <- as.data.frame(clustmo3_abun2)
clustmo3_abun2 <- cbind(colnames(monocytes3), clustmo3_abun2)
monocytes3_cluster2 <- write_csv(clustmo3_abun2[-1,], 
                                 file = "monocytes3_cluster2.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set3 cluster 3
mo3cluster_3 <- subset(umap_df_monocytes3, hcluster_label=='3')
mo3cluster_3$cell.proteins

# Save the abundance data for Monocytes_Set3 cluster 3 to a CSV file
row.names(monocytes3) <- monocytes3$proteins
clustmo3_abun3 <- monocytes3[c(mo3cluster_3$cell.proteins), ]
clustmo3_abun3 <- t(clustmo3_abun3)
clustmo3_abun3 <- as.data.frame(clustmo3_abun3)
clustmo3_abun3 <- cbind(colnames(monocytes3), clustmo3_abun3)
monocytes3_cluster3 <- write_csv(clustmo3_abun3[-1,], 
                                 file = "monocytes3_cluster3.csv", 
                                 col_names = F)


# Read Monocytes_Set4 data from CSV file
monocytes4 <- read.csv("mono4intersect.csv")

# Extract protein names and transpose the data frame
proteins <- monocytes4$proteins
monocytes4 <- as.data.frame(t(monocytes4[,-1]))

# Set column names and protein names
colnames(monocytes4) <- proteins
proteins <- row.names(monocytes4)
monocytes4 <- cbind(proteins, monocytes4)
row.names(monocytes4) <- NULL

# Perform UMAP analysis on Monocytes_Set4 data
monocytes4_umap <- umap(monocytes4[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_monocytes4 <- data.frame(umap1=monocytes4_umap$layout[,1],
                                 umap2=monocytes4_umap$layout[,2],
                                 cell=monocytes4)

# Calculate pairwise distances between UMAP coordinates
monocytes4_dist = dist(monocytes4_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
monocytes4clust <- hclust(monocytes4_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
monocytes4clusters <- cutree(monocytes4clust, k=2) 

# Add cluster labels to the UMAP data frame
umap_df_monocytes4$hcluster_label <- as.factor(monocytes4clusters)

# Generate a UMAP plot for Monocytes_Set4 cells
mo4_umap <- ggplot(data= umap_df_monocytes4,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="U4 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Monocytes_Set4 cluster 1
mo4cluster_1 <- subset(umap_df_monocytes4, hcluster_label=='1')
mo4cluster_1$cell.proteins

# Save the abundance data for Monocytes_Set4 cluster 1 to a CSV file
row.names(monocytes4) <- monocytes4$proteins
clustmo4_abun1 <- monocytes4[c(mo4cluster_1$cell.proteins), ]
clustmo4_abun1 <- t(clustmo4_abun1)
clustmo4_abun1 <- as.data.frame(clustmo4_abun1)
clustmo4_abun1 <- cbind(colnames(monocytes4), clustmo4_abun1)
monocytes4_cluster1 <- write_csv(clustmo4_abun1[-1,], 
                                 file = "monocytes4_cluster1.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set4 cluster 2
mo4cluster_2 <- subset(umap_df_monocytes4, hcluster_label=='2')
mo4cluster_2$cell.proteins

# Save the abundance data for Monocytes_Set4 cluster 2 to a CSV file
row.names(monocytes4) <- monocytes4$proteins
clustmo4_abun2 <- monocytes4[c(mo4cluster_2$cell.proteins), ]
clustmo4_abun2 <- t(clustmo4_abun2)
clustmo4_abun2 <- as.data.frame(clustmo4_abun2)
clustmo4_abun2 <- cbind(colnames(monocytes4), clustmo4_abun2)
monocytes4_cluster2 <- write_csv(clustmo4_abun2[-1,], 
                                 file = "monocytes4_cluster2.csv", 
                                 col_names = F)


# Read Monocytes_Set5 data from CSV file
monocytes5 <- read.csv("mono5intersect.csv")

# Extract protein names and transpose the data frame
proteins <- monocytes5$proteins
monocytes5 <- as.data.frame(t(monocytes5[,-1]))

# Set column names and protein names
colnames(monocytes5) <- proteins
proteins <- row.names(monocytes5)
monocytes5 <- cbind(proteins, monocytes5)
row.names(monocytes5) <- NULL

# Perform UMAP analysis on Monocytes_Set5 data
monocytes5_umap <- umap(monocytes5[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_monocytes5 <- data.frame(umap1=monocytes5_umap$layout[,1],
                                 umap2=monocytes5_umap$layout[,2],
                                 cell=monocytes5)

# Calculate pairwise distances between UMAP coordinates
monocytes5_dist = dist(monocytes5_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
monocytes5clust <- hclust(monocytes5_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
monocytes5clusters <- cutree(monocytes5clust, k=4) 

# Add cluster labels to the UMAP data frame
umap_df_monocytes5$hcluster_label <- as.factor(monocytes5clusters)

# Generate a UMAP plot for Monocytes_Set5 cells
mo5_umap <- ggplot(data= umap_df_monocytes5,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="U5 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Monocytes_Set5 cluster 1
mo5cluster_1 <- subset(umap_df_monocytes5, hcluster_label=='1')
mo5cluster_1$cell.proteins

# Save the abundance data for Monocytes_Set5 cluster 1 to a CSV file
row.names(monocytes5) <- monocytes5$proteins
clustmo5_abun1 <- monocytes5[c(mo5cluster_1$cell.proteins), ]
clustmo5_abun1 <- t(clustmo5_abun1)
clustmo5_abun1 <- as.data.frame(clustmo5_abun1)
clustmo5_abun1 <- cbind(colnames(monocytes5), clustmo5_abun1)
monocytes5_cluster1 <- write_csv(clustmo5_abun1[-1,], 
                                 file = "monocytes5_cluster1.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set5 cluster 2
mo5cluster_2 <- subset(umap_df_monocytes5, hcluster_label=='2')
mo5cluster_2$cell.proteins

# Save the abundance data for Monocytes_Set5 cluster 2 to a CSV file
row.names(monocytes5) <- monocytes5$proteins
clustmo5_abun2 <- monocytes5[c(mo5cluster_2$cell.proteins), ]
clustmo5_abun2 <- t(clustmo5_abun2)
clustmo5_abun2 <- as.data.frame(clustmo5_abun2)
clustmo5_abun2 <- cbind(colnames(monocytes5), clustmo5_abun2)
monocytes5_cluster2 <- write_csv(clustmo5_abun2[-1,], 
                                 file = "monocytes5_cluster2.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set5 cluster 3
mo5cluster_3 <- subset(umap_df_monocytes5, hcluster_label=='3')
mo5cluster_3$cell.proteins

# Save the abundance data for Monocytes_Set5 cluster 3 to a CSV file
row.names(monocytes5) <- monocytes5$proteins
clustmo5_abun3 <- monocytes5[c(mo5cluster_3$cell.proteins), ]
clustmo5_abun3 <- t(clustmo5_abun3)
clustmo5_abun3 <- as.data.frame(clustmo5_abun3)
clustmo5_abun3 <- cbind(colnames(monocytes5), clustmo5_abun3)
monocytes5_cluster3 <- write_csv(clustmo5_abun3[-1,], 
                                 file = "monocytes5_cluster3.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set5 cluster 4
mo5cluster_4 <- subset(umap_df_monocytes5, hcluster_label=='4')
mo5cluster_4$cell.proteins

# Save the abundance data for Monocytes_Set5 cluster 4 to a CSV file
row.names(monocytes5) <- monocytes5$proteins
clustmo5_abun4 <- monocytes5[c(mo5cluster_4$cell.proteins), ]
clustmo5_abun4 <- t(clustmo5_abun4)
clustmo5_abun4 <- as.data.frame(clustmo5_abun4)
clustmo5_abun4 <- cbind(colnames(monocytes5), clustmo5_abun4)
monocytes5_cluster4 <- write_csv(clustmo5_abun4[-1,], 
                                 file = "monocytes5_cluster4.csv", 
                                 col_names = F)


# Read Monocytes_Set6 data from CSV file
monocytes6 <- read.csv("mono6intersect.csv")

# Extract protein names and transpose the data frame
proteins <- monocytes6$proteins
monocytes6 <- as.data.frame(t(monocytes6[,-1]))

# Set column names and protein names
colnames(monocytes6) <- proteins
proteins <- row.names(monocytes6)
monocytes6 <- cbind(proteins, monocytes6)
row.names(monocytes6) <- NULL

# Perform UMAP analysis on Monocytes_Set6 data
monocytes6_umap <- umap(monocytes6[,-1])

# Create a data frame for UMAP coordinates and cell labels
umap_df_monocytes6 <- data.frame(umap1=monocytes6_umap$layout[,1],
                                 umap2=monocytes6_umap$layout[,2],
                                 cell=monocytes6)

# Calculate pairwise distances between UMAP coordinates
monocytes6_dist = dist(monocytes6_umap$layout)

# Perform hierarchical clustering on the UMAP coordinates
monocytes6clust <- hclust(monocytes6_dist, method = "complete")

# Assign clusters to UMAP coordinates based on cutting the hierarchical tree
monocytes6clusters <- cutree(monocytes6clust, k=3) 

# Add cluster labels to the UMAP data frame
umap_df_monocytes6$hcluster_label <- as.factor(monocytes6clusters)

# Generate a UMAP plot for Monocytes_Set6 cells
mo6_umap <- ggplot(data= umap_df_monocytes6,
       aes(x=umap1,y=umap2, color=hcluster_label))+
  geom_point(size = 3, alpha=0.5)+
  labs(x = "UMAP1", y = "UMAP2", colour="U6 subgroups")+
  scale_y_continuous()+scale_x_continuous()+
  theme(legend.position = "bottom")

# Subset the data for Monocytes_Set6 cluster 1
mo6cluster_1 <- subset(umap_df_monocytes6, hcluster_label=='1')
mo6cluster_1$cell.proteins

# Save the abundance data for Monocytes_Set6 cluster 1 to a CSV file
row.names(monocytes6) <- monocytes6$proteins
clustmo6_abun1 <- monocytes6[c(mo6cluster_1$cell.proteins), ]
clustmo6_abun1 <- t(clustmo6_abun1)
clustmo6_abun1 <- as.data.frame(clustmo6_abun1)
clustmo6_abun1 <- cbind(colnames(monocytes6), clustmo6_abun1)
monocytes6_cluster1 <- write_csv(clustmo6_abun1[-1,], 
                                 file = "monocytes6_cluster1.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set6 cluster 2
mo6cluster_2 <- subset(umap_df_monocytes6, hcluster_label=='2')
mo6cluster_2$cell.proteins

# Save the abundance data for Monocytes_Set6 cluster 2 to a CSV file
row.names(monocytes6) <- monocytes6$proteins
clustmo6_abun2 <- monocytes6[c(mo6cluster_2$cell.proteins), ]
clustmo6_abun2 <- t(clustmo6_abun2)
clustmo6_abun2 <- as.data.frame(clustmo6_abun2)
clustmo6_abun2 <- cbind(colnames(monocytes6), clustmo6_abun2)
monocytes6_cluster2 <- write_csv(clustmo6_abun2[-1,], 
                                 file = "monocytes6_cluster2.csv", 
                                 col_names = F)

# Subset the data for Monocytes_Set6 cluster 3
mo6cluster_3 <- subset(umap_df_monocytes6, hcluster_label=='3')
mo6cluster_3$cell.proteins

# Save the abundance data for Monocytes_Set6 cluster 3 to a CSV file
row.names(monocytes6) <- monocytes6$proteins
clustmo6_abun3 <- monocytes6[c(mo6cluster_3$cell.proteins), ]
clustmo6_abun3 <- t(clustmo6_abun3)
clustmo6_abun3 <- as.data.frame(clustmo6_abun3)
clustmo6_abun3 <- cbind(colnames(monocytes6), clustmo6_abun3)
monocytes6_cluster3 <- write_csv(clustmo6_abun3[-1,], 
                                 file = "monocytes6_cluster3.csv", 
                                 col_names = F)


### Saving the output plots

# Save UMAP of Hela cells to png file
res <- 300
w <- 9
h <- 6
png("Hela_umap.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(h4_umap, h5_umap, ncol = 2, 
                   labels = c('A', 'B'), 
                   label_size = 16)
dev.off()

# Save UMAP of PDAC cells to png file
res <- 300
w <- 9
h <- 6
png("PDAC_umap.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(pd2_umap, pd3_umap, ncol = 2, 
                   labels = c('A', 'B'), 
                   label_size = 16)
dev.off()

# Save UMAP of Melanoma cells to png file
res <- 300
w <- 9
h <- 6
png("Melanoma_umap.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(m1_umap, m3_umap, ncol = 2, 
                   labels = c('A', 'B'), 
                   label_size = 16)
dev.off()

# Save UMAP of Monocytes cells to png file
res <- 300
w <- 18
h <- 6
png("Monocytes_umap.png", width = w*res, height = h*res, res = res)
cowplot::plot_grid(mo1_umap, mo3_umap, mo4_umap, mo5_umap, mo6_umap, 
                   ncol = 5, labels = c('A', 'B', 'C', 'D', 'E'), 
                   label_size = 16)
dev.off()
```