Skip to content
Mike Cheung edited this page Oct 26, 2022 · 2 revisions

Reading correlation matrices to R

Mike Cheung October 26, 2022

Example 1

  • We may arrange the study characteristics and correlation coefficients as columns. That is, a row represents a study. Boer16.xlsx contains a sample dataset (see help(Boer16) in the metaSEM package for the details of this study.)
library(metaSEM)
library(readxl)

## Read the first spreadsheet in the Excel file
my.df <- read_excel("./Boer16.xlsx", sheet=1)

## Show the first few cases
head(my.df)
## # A tibble: 6 × 14
##   Study  `Sample size`   LMX   TFL `TFL-LMX` `JS-LMX` `OC-LMX` `LE-LMX` `JS-TFL`
##   <chr>          <dbl> <dbl> <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 Atwat…           286  NA    0.9         NA       NA       NA    NA          NA
## 2 Avoli…           115  NA    0.93        NA       NA       NA    NA          NA
## 3 Awaml…           304  NA    0.94        NA       NA       NA    NA          NA
## 4 Balku…            56  NA    0.92        NA       NA       NA    NA          NA
## 5 Barli…            20  NA    0.97        NA       NA       NA    NA          NA
## 6 Bauer…            67   0.9 NA           NA       NA       NA     0.33       NA
## # … with 5 more variables: `OC-TFL` <dbl>, `LE-TFL` <dbl>, `OC-JS` <dbl>,
## #   `LE-JS` <dbl>, `LE-OC` <dbl>
## Show the variable names
names(my.df)
##  [1] "Study"       "Sample size" "LMX"         "TFL"         "TFL-LMX"    
##  [6] "JS-LMX"      "OC-LMX"      "LE-LMX"      "JS-TFL"      "OC-TFL"     
## [11] "LE-TFL"      "OC-JS"       "LE-JS"       "LE-OC"
## Names of the variables
my.var <- c("LMX", "TFL", "JS", "OC", "LE")

## Create the variable names of the correlations
my.cor <- vechs(outer(my.var, my.var, function(x, y) paste(x, y, sep="-")))
my.cor
##  [1] "TFL-LMX" "JS-LMX"  "OC-LMX"  "LE-LMX"  "JS-TFL"  "OC-TFL"  "LE-TFL" 
##  [8] "OC-JS"   "LE-JS"   "LE-OC"
## Split the data by rows
my.list <- split(my.df, 1:nrow(my.df))

my.list <- lapply(my.list, 
                function(x) {mat <- vec2symMat(unlist(x[my.cor]), diag = FALSE)
                             dimnames(mat) <- list(my.var, my.var)
                             ## Assign NA in the diagonals when there is 
                             ## no correlation at all
                             index <- rowMeans(mat, na.rm=TRUE)==1
                             diag(mat)[index] <- NA
                             mat})

## Trim the trailing white space in "study"
my.df$Study <- trimws(my.df$Study)
names(my.list) <- my.df$Study

## Put everything into a list
my.Boer16 <- list(data=my.list, n=my.df$`Sample size`, RelLMX=my.df$LMX, RelTFL=my.df$TFL)

Example 2

  • We may also use a spreadsheet to represent one correlation matrix. Hunter83.xlsx contains a sample dataset (see help(Hunter83) in the metaSEM package for the details of this study.)
## Create an empty list to store the correlation matrices
my.df <- list()

## Read sheets 1 to 14
for (i in as.character(1:14)) {
  my.df[[i]] <- read_excel("./Hunter83.xlsx", sheet=i)
}

## Read study names and sample sizes in sheet "0"
my.study <- read_excel("./Hunter83.xlsx", sheet="0")

## Names of the variables
my.var <- c("Ability", "Job_knowledge", "Work_sample", "Supervisor")

my.df <- lapply(my.df, function(x) {mat <- matrix(unlist(x), ncol=4)
                                    mat <- vechs(mat)
                                    mat <- vec2symMat(mat, diag=FALSE)
                                    dimnames(mat) <- list(my.var, my.var)
                                    ## Assign NA in the diagonals when there is 
                                    ## no correlation at all
                                    index <- rowMeans(mat, na.rm=TRUE)==1
                                    diag(mat)[index] <- NA
                                    mat})
          
names(my.df) <- my.study$Study

## Put everything into a list
my.Hunter83 <- list(data=my.df, n=my.study$n)
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_SG.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_SG.UTF-8        LC_COLLATE=en_SG.UTF-8    
##  [5] LC_MONETARY=en_SG.UTF-8    LC_MESSAGES=en_SG.UTF-8   
##  [7] LC_PAPER=en_SG.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_SG.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] readxl_1.4.0    metaSEM_1.2.6.1 OpenMx_2.20.7  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.9         compiler_4.2.1     cellranger_1.1.0   pillar_1.7.0      
##  [5] tools_4.2.1        digest_0.6.29      evaluate_0.15      lifecycle_1.0.1   
##  [9] tibble_3.1.7       lattice_0.20-45    pkgconfig_2.0.3    rlang_1.0.4       
## [13] Matrix_1.5-1       cli_3.3.0          rstudioapi_0.13    yaml_2.3.5        
## [17] parallel_4.2.1     mvtnorm_1.1-3      pbivnorm_0.6.0     xfun_0.31         
## [21] fastmap_1.1.0      stringr_1.4.0      knitr_1.39         vctrs_0.4.1       
## [25] stats4_4.2.1       grid_4.2.1         glue_1.6.2         ellipse_0.4.3     
## [29] fansi_1.0.3        rmarkdown_2.14     lavaan_0.6-11      magrittr_2.0.3    
## [33] htmltools_0.5.2    ellipsis_0.3.2     MASS_7.3-58        mnormt_2.0.2      
## [37] utf8_1.2.2         stringi_1.7.8      RcppParallel_5.1.5 tmvnsim_1.0-2     
## [41] crayon_1.5.1
Clone this wiki locally