-
Notifications
You must be signed in to change notification settings - Fork 0
/
20-global-study.Rmd
103 lines (74 loc) · 5.17 KB
/
20-global-study.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# (PART) 国际研究格局 {-}
# 全球合作网络 {#global}
在这一部分,我们将解析肠道菌群研究的国际合作关系。
```{r top-country-collaboration-sankey, fig.cap="主要国家间的合作关系", fig.asp=2,fig.width=5}
# WA <- cocMatrix(data.frame(M), Field = "AU_CO_NR") # 共现矩阵
# all_country <- colnames(WA)
# china_part <- all_country[is.part_of_china(all_country)]
# CHINA <- Matrix::rowSums(WA[,china_part])
# WA <- WA[, !all_country %in% c(china_part,"NA")]
# WA <- cbind(WA,CHINA)
# netMatrix <- Matrix::crossprod(WA,WA)
# country_collaboration_netMatrix <- netMatrix[colnames(netMatrix)!= "NA", colnames(netMatrix)!= "NA"]
# saveRDS(country_collaboration_netMatrix,file = "data/country_collaboration_networks.RDS")
netMatrix <- readRDS("data/country_collaboration_networks.RDS")
mostProdCountries <- readRDS("data/mostProdCountries.RDS")
top_country <- levels(mostProdCountries$Country)
top_country_adj_matrix <- netMatrix[top_country,top_country]
m <- top_country_adj_matrix
# m[upper.tri(m,diag = TRUE)] <- 0
sankey_df <- as.matrix(m) %>%
data.frame(check.names = FALSE) %>%
rownames_to_column() %>%
pivot_longer(cols = -rowname) %>%
filter(value > 0)
colnames(sankey_df) <- c("source","target","value")
sankey_df <- sankey_df %>%
filter(source!=target)
sankey_df$target <- paste(sankey_df$target," ",sep = "")
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(name=c(rev(top_country),paste(rev(top_country)," ",sep = "")),stringsAsFactors = FALSE) %>%
mutate(countries=trimws(name)) %>%
left_join(country_translations)
# nodes <- data.frame(name=c(as.character(sankey_df$source), as.character(sankey_df$target)) %>% unique())
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
sankey_df$IDsource=match(sankey_df$source, nodes$name)-1
sankey_df$IDtarget=match(sankey_df$target, nodes$name)-1
# prepare colour scale
ColourScal ='d3.scaleOrdinal() .range(["#FDE725FF","#B4DE2CFF","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF"])'
# Make the Network
library(networkD3)
sankeyNetwork(Links = sankey_df, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "China",
sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20,
iterations=0)
```
首先,我们看一下全球肠道菌群研究前 `r length(top_country)` 个国家间合作发表论文的情况。在图 \@ref(fig:top-country-collaboration-sankey) 中,展示了两两之间合作发表论文的数量。例如,中美合作发表论文的数目有 1378 篇,这是所有国家中最多的;其次是英美之间合作发表论文的情形,有 898 篇;其次是美加之间合作发表论文 800 篇;再次是美德之间合作发表论文 705 篇;第五名是美法之间合作发表论文 522 篇。
这前五名中,都有**美国**的出现,说明了美国科学家在合作研究中占有的核心地位。
```{r country-collaboration-network, fig.cap="国家间的合作网络",fig.width=6,fig.asp=1}
adj_mat <- top_country_adj_matrix
# adj_mat <- netMatrix
g <- graph_from_adjacency_matrix(adj_mat,weighted = TRUE,mode = "undirected")
g <- simplify(g)
vertex.attributes(g)$centrality <- eigen_centrality(g)[[1]]
vertex.attributes(g)$size <- ( vertex.attributes(g)$centrality * 100 ) %>% sqrt() * 3
edge.attributes(g)$width <- log10(edge.attributes(g)$weight)
# plot(g)
data <- toVisNetworkData(g)
data$nodes %<>% mutate(id=str_to_title(id))
data$edges %<>% mutate_at(c("from","to"),str_to_title)
pal_Reds <- brewer.pal(n=7,name = "Reds")
Reds <- colorRampPalette(pal_Reds)(nrow(data$nodes))
data$nodes <- data$nodes %>% arrange(size) %>%
mutate(color=Reds)
visNetwork(nodes=data$nodes, edges = data$edges %>% filter(weight>100)) %>%
visIgraphLayout(physics = TRUE) %>%
visNodes(size = "size",color = "color",shadow = TRUE) %>%
visEdges(width = "width",color = "#B3B3B3") %>%
visOptions(
highlightNearest = TRUE
)
```
为了更直观的观察全球的合作网络,我们采用网络分析的方法进一步阐述。点的大小代表*特征向量中心性*,线的宽度表示国家间合作关系的多少(图 \@ref(fig:country-collaboration-network))。这个网络是一个基本上完全连接的网络,即所有的 `r length(top_country)` 个国家与其它所有国家间都有合作关系的存在,为了简化这一网络,我们去掉了连接度小于100的边,得到上图。可以很明显的看出,与中国联系最紧密的国家依次是美国、英国、加拿大和澳大利亚。*特征向量中心性*大的节点对相连的节点影响更大,因此可以直观的看到各个国家在整个合作网络中的重要性。
毫无疑问,美国是肠道菌群研究的领跑者。这不仅体现在美国的菌群研究开展的最早,体量最大,领域最广泛,而且体现在各大高校、研究所和社会力量的广泛参与上。除此之外,我们还可以发现当前肠道转化最积极的地方,也是美国。