-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_PLS.R
378 lines (306 loc) · 14.2 KB
/
04_PLS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# ===========================================================================================
# Title: Afrotheria - Principal component analysis (PCA)
# Author: Anne Le Maitre, Nicole D. S. Grunstra
# Date: 2024-07-12
# -------------------------------------------------------------------------------------------
# R version: 4.3.1
# Required extensions: ape, geomorph, Morpho, ggrepel, phytools
# -------------------------------------------------------------------------------------------
# Aims: Perform the two-block partial least squares (2B-PLS) analysis
# of the Procrustes-aligned coordinates of 124 landmarks placed on the bony labyrinth
# against 12 contextual variables for 40 taxa (20 Afrotheria, 20 non-afrotherian),
# as well as cross-validation of the results, and the phylogenetic PLS analyses
# Input:
# - Results of the script "00_Definitions.R"
# - Slid landmark coordinates as a CSV file
# - Phylogenetic tree for the 40 taxa
# Output:
# - 2B-PLS results and related plots
# - Leave-one-out and leave-two-out cross-validation of the PLS loadings
# - Phylogenetic signal in the PLS scores
# - phylogenetic PLS results and related plots
# ===========================================================================================
# ---- Packages ----
library(ape) # for phylogenetic analyses
library(geomorph) # geometric morphometrics
library(Morpho) # geometric morphometrics
library(phytools) # phylogenetic signal
library(ggrepel) # plots
# ---- 1.3 Prepare contextual data ----
# First, run the R script '00_Definitions'
source("00_Definitions.R")
# Center and scale contextual variables
ECOL <- as.matrix(Afrotheria[, -c(1:4)]) # matrix of the 12 contextual variables
rownames(ECOL) <- Afrotheria$BinomialName
ECOL_std <- scale(ECOL) # center and scale
# Check associations among contextual variables
PCA_ecol <- prcomp(ECOL_std)
summary(PCA_ecol)
screeplot(PCA_ecol, xlab = "Dimensions", las = 1) # scree plot
biplot(PCA_ecol, c(1, 2)) # biplot of PC 1 and 2
# ---- 1.4 Prepare landmark data ----
# Here we directly load the slid landmark coordinates
# They are in the original coordinate space
# Load the slid landmark coordinates
slid_data <- read.csv("Grunstra_et_al_Afrotheria_landmarks_slid_not_aligned.csv")
# Conversion to a matrix
slid <- as.matrix(slid_data[, -1])
# Name rows and columns
rownames(slid) <- slid_data$Species
# Conversion to an array
A_slid <- arrayspecs(slid, p = ie$p, k = 3)
# Generalized Procrustes superimposition (no sliding needed)
A.gpa <- procSym(A_slid)
# ---- 1.5 Phylogenetic tree ----
# Load tree
tree <- read.nexus("Grunstra_et_al_Afrotheria_phyl_tree.txt")
# Plot tree
plot(tree, cex = .8)
# ---- 2. Two-block PLS analysis ----
# ---- 2.1 Perform the analysis ----
# 2B-PLS
afro.pls <- pls2B(A.gpa$rotated, ECOL_std) # analysis
# Table of the proportion of covariance explained by each PLS dimension
afro.pls$CoVar
# Scree plot
plot(afro.pls$CoVar[, 2], type = "b", col = "blue", las = 1,
main = "Scree plot", xlab = "Dimension",
ylab = "% of total covariance explained")
# Rename column and row names for future export and analyses
colnames(afro.pls$Xscores) <- paste("PLS", 1:ncol(afro.pls$Xscores), sep = "")
colnames(afro.pls$Yscores) <- paste("PLS", 1:ncol(afro.pls$Yscores), sep = "")
colnames(afro.pls$svd$u) <- paste("PLS", 1:ncol(afro.pls$svd$u), sep = "")
colnames(afro.pls$svd$v) <- paste("PLS", 1:ncol(afro.pls$svd$v), sep = "")
rownames(afro.pls$svd$v) <- colnames(ECOL_std) # contextual variable names
# OPTIONAL: export data
#write.csv(afro.pls$Xscores, file = "Grunstra_et_al_Afrotheria_PLS_ShapeScores.csv")
#write.csv(afro.pls$Yscores, file = "Grunstra_et_al_Afrotheria_PLS_ContextScores.csv")
#write.csv(afro.pls$svd$u, file = "Grunstra_et_al_Afrotheria_PLS_ShapeLoadings.csv")
#write.csv(afro.pls$svd$v, file = "Grunstra_et_al_Afrotheria_PLS_ContextLoadings.csv")
# ---- 2.2 Scatter plot: PLS scores ----
# PLS scores for a given dimension
pls <- 1 # example: PLS 1
PLSscores <- cbind(afro.pls$Xscores[, pls], afro.pls$Yscores[, pls])
colnames(PLSscores) <- c("Xscores", "Yscores")
PLSscores <- data.frame(PLSscores)
PLSscores$Habitat <- ecol
PLSscores$Clade <- afro
# Scatter plot of shape scores vs. context scores
ggplot(data = PLSscores, aes(x = Xscores,
y = Yscores,
shape = Clade, colour = Habitat,
label = Afrotheria$CommonName)) +
geom_text_repel(box.padding = 0.5,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20,
color = "black",
max.overlaps = Inf) +
geom_point(size = 2.5) +
scale_color_manual(values = couleurs) +
theme_classic() +
xlab("shape scores") +
ylab("context scores")
# Scatter plot of shape scores vs. context scores
PLSscores$Habitat <- factor(pch.ecol2)
PLSscores$Clade <- col.ecol2
ggplot(data = PLSscores, aes(x = Xscores,
y = Yscores,
shape = Habitat, colour = Clade,
label = Afrotheria$CommonName)) +
geom_text_repel(box.padding = 0.5,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20,
color = "black",
max.overlaps = Inf) +
geom_point(size = 2.5) +
scale_color_manual(values = c("black", "blue")) +
theme_classic() +
xlab("shape scores") +
ylab("context scores")
# ---- 2.3 Bar plot: PLS loadings of contextual variables ----
# Contextual variables
plsi <- 1 # dimension
par(mar = c(6, 4, 4, 2) + 0.1) # set margin sizes
barplot(afro.pls$svd$v[, plsi],
main = paste("PLS dimension", plsi),
col = "lightblue", las = 2,
cex.names = 0.8)
# Back to initial margin values
par(mar = c(5, 4, 4, 2) + 0.1)
# ---- 2.4 3D plots: PLS loadings of Procrustes coordinates ----
# Option 1: use the Morpho function
# Compute coordinates to visualize shape patterns
plsi <- 1 # PLS 1
std <- 2 # 2 standard deviations of the scores
pls.change <- plsCoVar(afro.pls, i = plsi, sdx = std)
# Visualize 3D shape patterns
layout3d(matrix(1:2, ncol = 2, byrow = TRUE)) # 2 plots in the same window
plotLaby(pls.change$x[,, 1], title = paste("-", std, "sd")) # Negative scores
next3d() # Go to the next 3D plot
plotLaby(pls.change$x[,, 2], title = paste("+", std, "sd")) # Positive scores
# Option 2: use the geomorph function
# Do the 2B-PLS with the geomorph function (same results, except sign changes)
afro.pls2 <- two.b.pls(A.gpa$rotated, ECOL_std)
# Use the function plot.pls to compute required parameters
P <- plot(afro.pls2)
preds <- shape.predictor(P$A1,
x = P$plot.args$x,
min = min(P$plot.args$x),
max = max(P$plot.args$x))
# Visualize 3D shape patterns
open3d()
layout3d(matrix(1:2, ncol = 2, byrow = TRUE)) # 2 plots in the same window
plotLaby(preds$min, title = paste("minimum")) # minimum values
next3d() # next plot
plotLaby(preds$max, title = paste("maximum")) # maximum values
# ---- 3. Cross-validation of the PLS scores ----
# ---- 3.1 Leave-one-out cross-validation ----
# Here we perform a cross-validation of the PLS loadings for contextual variables,
# by leaving out one species at a time
# PLS on the whole sample
afro.pls <- pls2B(A.gpa$rotated, ECOL_std)
rownames(afro.pls$svd$v) <- colnames(ECOL_std) # contextual variable names
# Ecological variables
plsi <- 1 # dimension
par(mar = c(7, 4, 4, 2) + 0.1) # set margin sizes
barplot(afro.pls$svd$v[, plsi],
main = paste("PLS dimension", plsi),
col = "lightblue", las = 2,
ylim = c(-0.7, 0.7),
cex.names = 1)
# PLS with leave-one-out
# (specimen removed *after* GPA and standardization of contextual variables)
for (i in 1:nrow(ECOL_std)) {
afro.plsi <- pls2B(A.gpa$rotated[, , -i], ECOL_std[-i, ])
ecoli <- afro.plsi$svd$v
si <- cor(afro.pls$svd$v[, plsi], afro.plsi$svd$v[, plsi])
if (si < 0) {ecoli <- -ecoli}
points(x = 1:nrow(ecoli) * 1.2 - 0.5, y = ecoli[, plsi],
pch = 16, col = "blue")
}
# ---- 3.2 Leave-two-out cross-validation ----
# Here we perform a cross-validation of the PLS loadings for contextual variables,
# by leaving out two species at a time
# PLS on the whole sample
afro.pls <- pls2B(A.gpa$rotated, ECOL_std)
rownames(afro.pls$svd$v) <- colnames(ECOL_std) # contextual variable names
# PLS with leave-two-out
plsi <- 1 # dimension
par(mar = c(7, 4, 4, 2) + 0.1) # set margin sizes
barplot(afro.pls$svd$v[, plsi],
main = paste("PLS dimension", plsi),
col = "lightblue", las = 2,
ylim = c(-0.7, 0.7),
cex.names = 1)
# PLS with leave-two-out
# (specimen removed *after* GPA and standardization of contextual variables)
for (i in 2:nrow(ECOL_std)) {
for (j in 1:(i-1)) {
spi <- c(i,j)
afro.plsi <- pls2B(A.gpa$rotated[, , -spi], ECOL_std[-spi, ])
ecoli <- afro.plsi$svd$v
si <- cor(afro.pls$svd$v[, plsi], afro.plsi$svd$v[, plsi])
if (si < 0) {ecoli <- -ecoli}
points(x = 1:nrow(ecoli) * 1.2 - 0.5, y = ecoli[, plsi],
pch = 16, col = "blue")
}
}
# Back to initial margin values
par(mar = c(5, 4, 4, 2) + 0.1)
# ---- 4. Phylogenetic signal on PLS scores (phylogeny-unadjusted) ----
# Here we compute the phylogenetic signal in the first four dimensions of the 2B-PLS
# Function to generate phylogenetic signal for multiple traits simultaneously,
# using phytools:phylosig (code copied from Liam Revell's blog)
# Choose the first 4 PLS dimensions
plsi <- 1:4
# Shape scores from 2B-PLS (not shown)
K_shape <- t(simplify2array(apply(afro.pls$Xscores[, plsi], 2,
phylosig,
tree = tree,
method = "K",
test = TRUE,
nsim = 1000)))
K_shape
# SW 1: K=0.518 (p=0.043), SW 2: K=0.704 (p=0.004), SW 3: K=0.476 (p=0.023), SW 4: K=0.433 (p=0.052)
# Context scores from same 2B-PLS as above (not shown)
K_context <- t(simplify2array(apply(afro.pls$Yscores[, plsi], 2,
phylosig,
tree=tree,
method="K",
test=TRUE,
nsim=1000)))
K_context
# along PLS 1: K=0.689 (p=0.007), PLS 2: K=0.486 (p=0.016), PLS 3: K=0.481 (p=0.017), PLS 4: K=0.385 (p=0.09)
# ---- 5. Phylogenetic PLS ----
# ---- 5.1 Perform the analysis ----
# Phylogenetic PLS
afro.phyl.pls <- phylo.integration(A.gpa$rotated, ECOL_std, phy = tree)
# Summary results
summary(afro.phyl.pls)
# Proportion of covariance explained by each PLS dimension
afro.phyl.pls$CoVar <- cbind(afro.phyl.pls$svd$d,
100 * afro.phyl.pls$svd$d^2 / sum(afro.phyl.pls$svd$d^2))
dimnames(afro.phyl.pls$CoVar) <- list(1:12, c("singular value", "% total covar."))
# Scree plot
plot(afro.phyl.pls$CoVar[, 2], type = "b", col = "blue", las = 1,
main = "Scree plot", xlab = "Dimension",
ylab = "% of total covariance explained")
# Rename column and row names for future export and analyses
colnames(afro.phyl.pls$XScores) <- paste("PLS", 1:ncol(afro.phyl.pls$XScores), sep = "")
colnames(afro.phyl.pls$YScores) <- paste("PLS", 1:ncol(afro.phyl.pls$YScores), sep = "")
colnames(afro.phyl.pls$svd$u) <- paste("PLS", 1:ncol(afro.phyl.pls$svd$u), sep = "")
colnames(afro.phyl.pls$svd$v) <- paste("PLS", 1:ncol(afro.phyl.pls$svd$v), sep = "")
rownames(afro.phyl.pls$svd$v) <- colnames(ECOL_std) # contextual variable names
# OPTIONAL: export data
#write.csv(afro.phyl.pls$XScores, file = "Grunstra_et_al_Afrotheria_phylPLS_ShapeScores.csv")
#write.csv(afro.phyl.pls$YScores, file = "Grunstra_et_al_Afrotheria_phylPLS_ContextScores.csv")
#write.csv(afro.phyl.pls$svd$u, file = "Grunstra_et_al_Afrotheria_phylPLS_ShapeLoadings.csv")
#write.csv(afro.phyl.pls$svd$v, file = "Grunstra_et_al_Afrotheria_phylPLS_ContextLoadings.csv")
# ---- 5.2 Scatter plot: phylogenetic PLS scores ----
# PLS scores for a given dimension
pls <- 1 # example: PLS 1
PLSscores <- cbind(afro.phyl.pls$XScores[, pls], afro.phyl.pls$YScores[, pls])
colnames(PLSscores) <- c("Xscores", "Yscores")
PLSscores <- data.frame(PLSscores)
PLSscores$Habitat <- ecol
PLSscores$Clade <- afro
# Scatter plot of shape scores vs. context scores
ggplot(data = PLSscores, aes(x = Xscores,
y = Yscores,
shape = Clade, colour = Habitat,
label = Afrotheria$CommonName)) +
geom_text_repel(box.padding = 0.5,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20,
color = "black",
max.overlaps = Inf) +
geom_point(size = 2.5) +
scale_color_manual(values = couleurs) +
theme_classic() +
xlab("shape scores") +
ylab("context scores")
# ---- 5.3 Bar plot: phylogenetic PLS loadings of contextual variables ----
# Ecological variables
plsi <- 1 # dimension
par(mar = c(6, 4, 4, 2) + 0.1) # set margin sizes
barplot(afro.phyl.pls$right.pls.vectors[, plsi],
main = paste("PLS dimension", plsi),
col = "lightblue", las = 2,
cex.names = 0.8)
# Back to initial margin values
par(mar = c(5, 4, 4, 2) + 0.1)
# ---- 5.4 3D plots: phylogenetic PLS loadings of Procrustes coordinates ----
# Compute shape patterns (PLS 1)
phyl.P <- plot(afro.phyl.pls)
phyl.preds <- shape.predictor(phyl.P$A1,
x = phyl.P$plot.args$x,
min = min(phyl.P$plot.args$x),
max = max(phyl.P$plot.args$x))
# Visualize 3D shape patterns
layout3d(matrix(1:2, ncol = 2, byrow = TRUE)) # 2 plots in the same window
plotLaby(phyl.preds$min, title = paste("minimum")) # minimum values
next3d() # next plot
plotLaby(phyl.preds$max, title = paste("maximum")) # maximum values