-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexploration.R
70 lines (53 loc) · 1.8 KB
/
exploration.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Exploratory analysis of expression of collagen-related genes shared by all
# transcriptomic datasets
#
# 1) Characteristic of the cohorts and samples
#
# 2) Normality of distribution of the collagen-related gene variables
# (Shapiro-Wilk test) and their information content
# (element frequency, variance and Gini coefficient)
#
# 3) Co-expression analysis of the genes in the cancer tissue. This analysis
# step involves calculation of Euclidean distances between the genes and
# multi-dimensional scaling as well as by a PCA
#
# 4) Assessment of clustering tendency of the collagen expression data set
# by Hopkins statistic and visualizations (heat maps and UMAP)
# tools ------
library(tidyverse)
library(rlang)
library(trafo)
library(stringi)
library(readxl)
library(exda)
library(microViz)
library(clustTools)
library(survival)
library(survminer)
library(ggrepel)
library(ggtext)
library(furrr)
library(soucer)
insert_head()
explore <- exda::explore
select <- dplyr::select
reduce <- purrr::reduce
set_rownames <- trafo::set_rownames
c('./tools/globals.R',
'./tools/functions.R') %>%
source_all(message = TRUE, crash = TRUE)
# Analysis scripts ---------
insert_msg('Analysis scripts')
## general distribution stats
## coexpression and clustering tendency analysis
c('./exploration scripts/cohorts.R',
'./exploration scripts/distribution.R',
'./exploration scripts/coexpression.R',
'./exploration scripts/clust_tendency.R') %>%
source_all(message = TRUE, crash = TRUE) %>%
print
## characteristic of the pooled GEO cohort
c('./exploration scripts/pooled_geo.R') %>%
source_all(message = TRUE, crash = TRUE)
# END ------
insert_tail()