forked from michaelgreenacre/CODAinPractice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CAGEOscript.R
115 lines (96 loc) · 4.89 KB
/
CAGEOscript.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
### -------------------------------------------------------------------------------
### Install easyCODA package from CRAN in usual way or latest version from R-Forge:
### (Note: version 0.32 of easyCODA was used here)
install.packages("easyCODA", repos="http://R-Forge.R-project.org")
### -------------------------------------------
### Aar Massif data set is in data object 'Aar'
### in the package 'compositions'
library(compositions)
data(Aar)
### columns 3-12 are the oxide data
aar <- Aar[,3:12]
### close the data set
aar <- aar / rowSums(aar)
rownames(aar) <- 1:nrow(aar)
### ---------------------------------------------------------------
### compute the (unweighted) logratio variance three different ways
library(easyCODA)
### (1) as the sum of all pairwise logratio variances divided by 100 (=J squared)
### option weight=FALSE implies equal weights, each logratio gets weight (1/10)x(1/10)
### notice that the weights are always passed on in the logratio object
aar.LR <- LR(aar, weight=FALSE)
LR.VAR(aar.LR)
# [1] 0.1522912
### (2) as the sum of all CLR variances divided by 10 (=J)
aar.CLR <- CLR(aar, weight=FALSE)
LR.VAR(aar.CLR)
# [1] 0.1522912
### (3) as the average of all squared elements of the double-centred log-transformed data matrix
aarlog <- log(aar)
aarlog.DC <- sweep(aarlog, 1, rowMeans(aarlog))
aarlog.DC <- sweep(aarlog.DC, 2, colMeans(aarlog.DC))
sum(aarlog.DC^2) /(nrow(aar)*ncol(aar))
# [1] 0.1522912
### ---------------------------------------------------------------
### Stepwise selection of ratios, including the three amalgamations
### Define the amalgamations and add them to the set of 10 parts
mafic <- rowSums(aar[,c(5,10,4)])
felsic <- rowSums(aar[,c(7,1,3,8)])
carb_apat <- rowSums(aar[,c(6,9)])
aar.amalg <- cbind(aar, mafic, felsic, carb_apat)
### Perform the stepwise analysis
aar.step <- STEP(aar.amalg, aar, weight=FALSE)
### Table 1 of article
cbind(aar.step$ratios, round(100*aar.step$R2max,1), round(aar.step$pro.cor,3))
### -------------------------------------------------------------------------
### LRA of original data and PCA of the 9 selected LRs
### (abbreviation LR = pairwise logratio, in this case including SLR balance)
par(mar=c(4.2,4,3,3), font.lab=2, cex.lab=1.2, mgp=c(2.7,0.7,0), las=1, mfrow=c(1,2), cex.axis=0.8)
### LRA (logratio analysis, = PCA of the CLRs) (Fig 2(a) of article)
aar.lra <- LRA(aar, weight=FALSE)
PLOT.LRA(aar.lra, map="contribution")
### PCA of the selected logratios (Fig 2(b) of article)
rownames(aar.step$logratios) <- 1:nrow(aar)
aar.ratios.pca <- PCA(aar.step$logratios, weight=FALSE)
PLOT.PCA(aar.ratios.pca, map="contribution", axes.inv=c(1,-1), rescale=2)
### --------------------------------------------------------
### Procrustes correlation of full-space geometry of samples
### (the element $rowpcoord means row principal coordinates)
protest(aar.ratios.pca$rowpcoord, aar.lra$rowpcoord)$t0
# [1] 0.993197
### Procrustes correlation of two configurations of samples in two dimensions
protest(aar.ratios.pca$rowpcoord[,1:2], aar.lra$rowpcoord[,1:2])$t0
# [1] 0.9971076
### ----------------------------------------------------------------------------
### Ratio selection after SLR balance of mafic/felsic is forced in at first step
### First check the top 10 ratios at the first step
foo.step <- STEP(aar.amalg, aar, weight=FALSE, nsteps=1, top=10)
cbind(foo.step$ratios.top, round(100*foo.step$R2.top,1))
# row col
# MgO/Na2O 5 7 69.1
# Na2O/mafic 7 11 69.0
# MnO/felsic 4 12 68.9
# mafic/felsic 11 12 68.8
# Al2O3/mafic 3 11 68.6
# .....
### (mafic/felsic is 4th in the list, with an explained variance of 68.8%)
### The remaining ratios
aar.step2 <- STEP(aar.amalg, aar, weight=FALSE, previous=log(aar.amalg[,"mafic"]/aar.amalg[,"felsic"]), previous.wt=0.3*0.4)
cbind(aar.step2$ratios, round(100*aar.step2$R2max,1))
### PCA of the new selection of logratios (Fig 3 of article)
### First add the mafic/felsic logratio at the start with the others
aar.ratios2 <- cbind(log(aar.amalg[,"mafic"]/aar.amalg[,"felsic"]), aar.step2$logratios)
colnames(aar.ratios2)[1] <- "mafic/felsic"
rownames(aar.ratios2) <- 1:nrow(aar)
aar.ratios2.pca <- PCA(aar.ratios2, weight=FALSE)
par(mar=c(4.2,4,3,3), font.lab=2, cex.lab=1.2, mgp=c(2.7,0.7,0), las=1, mfrow=c(1,1), cex.axis=0.8)
PLOT.PCA(aar.ratios2.pca, map="contribution", axes.inv=c(1,-1), rescale=2)
### ----------------------------------------------------------
### Procrustes correlation of full-space geometry of samples
### (for the new set of ratios, which is sub-"optimal" because
### of forcing mafic/felsic in at step 1)
protest(aar.ratios2.pca$rowpcoord, aar.lra$rowpcoord)$t0
# [1] 0.9824111
### Procrustes correlation of two configurations of samples in two dimensions
protest(aar.ratios2.pca$rowpcoord[,1:2], aar.lra$rowpcoord[,1:2])$t0
# [1] 0.9817307