Skip to content

fredlandfors/FIA3-SCD_protf

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

QC document

Fredrik Landfors 12/2/2020

Source libraries and functions:

source("./src/check_packages.R")
check_packages(
  cran_packages = c("ggplot2", "naniar", "UpSetR"),
  bioc_packages = c("ropls")
)
## [1] "Passed dependency check."
library(ggplot2)
source("./src/plot_pca.R")
source("./src/geom_cwise_boxplot.R")
source("./src/geom_rwise_boxplot.R")

Load data:

load("~/projekt_data/2019-11-19_SCD-FIA3_data/rData/markdown.RData")

QC: Proximity extension assay

Get data:

qc <- list()
preanalytic_factors <- c(
  "Patient_code",
  "FIAnum_FIA3",
  "Set_FIA3",
  "Sample_date",
  "case_freeze_thawed",
  "Fast_sample",
  "Age", 
  "Sex"
)
randomization_factors <- c(
  "Glc_0h",
  "Chol_tot",
  "BMI",
  "Smoker_2fct",
  "Sbt_VIP",
  "Case_control"
)
qc$inf <- merge(
  x = preproc$raw_fia$data_out[c(preanalytic_factors, randomization_factors)],
  y = preproc$raw_inf$data_out,
  by = "Patient_code"
)
qc$org <- merge(
  x = preproc$raw_fia$data_out[c(preanalytic_factors, randomization_factors)],
  y = preproc$raw_org$data_out,
  by = "Patient_code"
)
qc$org_proteins <- names(preproc$raw_org$data_out)[
  names(preproc$raw_org$data_out) %in% preproc$raw_org$varmeta_data$name1
]
qc$inf_proteins <- names(preproc$raw_inf$data_out)[
  names(preproc$raw_inf$data_out) %in% preproc$raw_inf$varmeta_data$name1
]

Olink ORGAN DAMAGE (v.3311)

A. Missing values

naniar::vis_miss(qc$org)

B. Lower limit of detection (LOD)

Values with > 5 % of values below LOD were excluded from further analysis. This reduced the count of proteins by 39 from 92 to 53.

length(qc$org_proteins)
## [1] 53
length(preproc$raw_org$varmeta_data$name1)
## [1] 92
length(preproc$raw_org$varmeta_data$name1) - length(qc$org_proteins)
## [1] 39

C. Batch effects

PCA colored for plate ID

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Plate_ID_2"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

PLS model

# ropls::opls(
#   x = qc$org[, qc$org_proteins],
#   y = qc$org[, "Plate_ID_2"]
# )

PLS model: force two predictive components because first model did not pass permutation test

ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Plate_ID_2"],
  predI = 2
)
## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.326   0.0197 -0.0147 0.397   2   0  0.7 0.35

D. Preanalytic effects

Freeze-thaw cycles

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["case_freeze_thawed"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls1 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "case_freeze_thawed"]
)
## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.325    0.158   0.121 0.265   2   0 0.05 0.05

ropls::plot(pls1, typeVc = "xy-weight")

Fasting

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Fast_sample"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls2 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Fast_sample"],
  predI = 2
)
## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.303   0.0648 -0.0282 0.311   2   0 0.05 0.8

Sample date

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Sample_date"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls3 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = sapply(
    qc$org[, "Sample_date"] ,
      function(x) {
        round(((as.numeric(x) - 6159) / 365.25 + 1986.86242), 0)
      }
    )
)
## PLS
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.321    0.146  0.0664  3.44   2   0 0.05 0.05

ropls::plot(pls3, typeVc = "xy-weight")

Age at sampling

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Age"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Age"]
)
## PLS
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.389    0.286   0.172  5.93   3   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

Sex

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Sex"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Sex"]
)
## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.383    0.478   0.361 0.293   3   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

E. Olink QC flag

Looking at QC-flagged samples in plates 1-5 (coloured blue):

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-1", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-2", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-3", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-4", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-5", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

F. Plate randomization

Checks if case/ctrl pairs were analyzed on the same plates

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-1", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-2", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-3", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-4", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-5", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   44

Check covariate randomization across plates

# BMI
anova(lm(formula = BMI ~ Plate_ID_2, data = qc$org))
## Analysis of Variance Table
## 
## Response: BMI
##             Df Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_2   4   30.0  7.5077  0.4049 0.8051
## Residuals  437 8102.4 18.5411
# Total cholesterol
anova(lm(formula = Chol_tot ~ Plate_ID_2, data = qc$org))
## Analysis of Variance Table
## 
## Response: Chol_tot
##             Df Sum Sq Mean Sq F value  Pr(>F)  
## Plate_ID_2   4  13.83  3.4587   2.272 0.06077 .
## Residuals  425 646.98  1.5223                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Systolic blood pressure
anova(lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org))
## Analysis of Variance Table
## 
## Response: Sbt_VIP
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## Plate_ID_2   4   6529 1632.37  4.3887 0.001729 **
## Residuals  435 161797  371.95                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org))
## 
## Call:
## lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.552 -13.753  -2.045  11.955  54.986 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            138.7528     2.0443  67.873   <2e-16 ***
## Plate_ID_2FIA3_2-ORG-2   4.7989     2.9077   1.650   0.0996 .  
## Plate_ID_2FIA3_2-ORG-3  -0.7303     2.8911  -0.253   0.8007    
## Plate_ID_2FIA3_2-ORG-4  -6.7079     2.8911  -2.320   0.0208 *  
## Plate_ID_2FIA3_2-ORG-5  -3.7384     2.9162  -1.282   0.2005    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.29 on 435 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.03879,    Adjusted R-squared:  0.02995 
## F-statistic: 4.389 on 4 and 435 DF,  p-value: 0.001729
# Glucose
anova(lm(formula = Glc_0h ~ Plate_ID_2, data = qc$org))
## Analysis of Variance Table
## 
## Response: Glc_0h
##             Df  Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_2   4   24.69  6.1726   1.456 0.2149
## Residuals  400 1695.75  4.2394
# Smoking status
table(qc$org[c("Smoker_2fct", "Plate_ID_2")])
##            Plate_ID_2
## Smoker_2fct FIA3_2-ORG-1 FIA3_2-ORG-2 FIA3_2-ORG-3 FIA3_2-ORG-4 FIA3_2-ORG-5
##         no            58           63           65           55           58
##         yes           24           22           24           33           29
chisq.test(table(qc$org[c("Smoker_2fct", "Plate_ID_2")]))
## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("Smoker_2fct", "Plate_ID_2")])
## X-squared = 3.7896, df = 4, p-value = 0.4352

Check pre-analytic factor randomization across plates

# Sample date
anova(lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org))
## Analysis of Variance Table
## 
## Response: as.numeric(Sample_date)
##             Df    Sum Sq  Mean Sq F value    Pr(>F)    
## Plate_ID_2   4  88654663 22163666  13.415 2.499e-10 ***
## Residuals  443 731879195  1652098                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org))
## 
## Call:
## lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3313.2  -716.7    20.5   734.2  4828.2 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8296.8      135.5  61.237  < 2e-16 ***
## Plate_ID_2FIA3_2-ORG-2    705.2      191.6   3.680 0.000262 ***
## Plate_ID_2FIA3_2-ORG-3   1237.5      191.6   6.459 2.78e-10 ***
## Plate_ID_2FIA3_2-ORG-4   1175.5      191.6   6.135 1.89e-09 ***
## Plate_ID_2FIA3_2-ORG-5    861.8      192.7   4.473 9.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1285 on 443 degrees of freedom
## Multiple R-squared:  0.108,  Adjusted R-squared:  0.09999 
## F-statistic: 13.42 on 4 and 443 DF,  p-value: 2.499e-10
# Fasting
table(qc$org[c("Fast_sample", "Plate_ID_2")])
##            Plate_ID_2
## Fast_sample FIA3_2-ORG-1 FIA3_2-ORG-2 FIA3_2-ORG-3 FIA3_2-ORG-4 FIA3_2-ORG-5
##        0-4h            5            8            3            6            2
##        4-6h           26            6           15            8           27
##        6-8h            1            2            3            0            2
##        >8h            58           74           69           76           57
chisq.test(table(qc$org[c("Fast_sample", "Plate_ID_2")]))
## Warning in chisq.test(table(qc$org[c("Fast_sample", "Plate_ID_2")])): Chi-
## squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("Fast_sample", "Plate_ID_2")])
## X-squared = 36.23, df = 12, p-value = 0.0002975
table(table(subset(qc$org, Fast_sample == "0-4h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    16     4
table(table(subset(qc$org, Fast_sample == "4-6h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    32    25
table(table(subset(qc$org, Fast_sample == "6-8h", select = "Set_FIA3")) == 2)
## 
## FALSE 
##     8
table(table(subset(qc$org, Fast_sample == ">8h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    46   144
# Freeze/thaw status
table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])
##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-1 FIA3_2-ORG-2
##   NO                                                    62           58
##   ONCE                                                  18           20
##   Thawed due to broken freezer but still cold            6            8
##   TWICE                                                  4            4
##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-3 FIA3_2-ORG-4
##   NO                                                    74           78
##   ONCE                                                  12            4
##   Thawed due to broken freezer but still cold            4            8
##   TWICE                                                  0            0
##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-5
##   NO                                                    82
##   ONCE                                                   2
##   Thawed due to broken freezer but still cold            4
##   TWICE                                                  0
chisq.test(table(qc$org[c("case_freeze_thawed", "Plate_ID_2")]))
## Warning in chisq.test(table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])):
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])
## X-squared = 44.039, df = 12, p-value = 1.504e-05
table(table(subset(qc$org, case_freeze_thawed == "NO", select = "Set_FIA3")) == 2)
## 
## TRUE 
##  177
table(table(subset(qc$org, case_freeze_thawed == "ONCE", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   28
table(table(subset(qc$org, case_freeze_thawed == "Thawed due to broken freezer but still cold", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   15
table(table(subset(qc$org, case_freeze_thawed == "TWICE", select = "Set_FIA3")) == 2)
## 
## TRUE 
##    4

Case control matching was perfect for freeze/thaw cycles, but not for fasting time.

Check fasting time overlap using a UpSet plot.

qc_split <- split(
  qc$org[c(preanalytic_factors, randomization_factors)],
  f = qc$org$Case_control
)
qc_overlap <- merge(
  x = qc_split$Ctrl[c("Set_FIA3", "Fast_sample")],
  y = qc_split$Case[c("Set_FIA3", "Fast_sample")],
  by = "Set_FIA3"
)
names(qc_overlap) <- c("Set_FIA3", "ctrl_", "case_")
qc_overlap2 <- data.frame(
  qc_overlap["Set_FIA3"],
  model.matrix(~ ctrl_ - 1, data = qc_overlap["ctrl_"]),
  model.matrix(~ case_ - 1, data = qc_overlap["case_"])
)
UpSetR::upset(
  qc_overlap2,
  #nsets = 8, 
  sets = c("ctrl_0.4h",
           "case_0.4h",
           "ctrl_4.6h",
           "case_4.6h",
           "ctrl_6.8h",
           "case_6.8h",
           "ctrl_.8h",
           "case_.8h"),
  keep.order = TRUE
)

Conclusion: 12 pairs where fasted subjects were matched with recently fed subjects.

Olink INFLAMMATION (v.3021)

A. Missing values

naniar::vis_miss(qc$inf)

B. Lower limit of detection (LOD)

Values with > 5 % of values below LOD were excluded from further analysis. This reduced the count of proteins by 45 from 92 to 47.

length(qc$inf_proteins)
## [1] 47
length(preproc$raw_inf$varmeta_data$name1)
## [1] 92
length(preproc$raw_inf$varmeta_data$name1) - length(qc$inf_proteins)
## [1] 45

C. Batch effects

PCA colored for plate ID

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Plate_ID_1"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

PLS model

# ropls::opls(
#   x = qc$inf[, qc$inf_proteins],
#   y = qc$inf[, "Plate_ID_1"]
# )

PLS model: force two predictive components

ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Plate_ID_1"],
  predI = 2
)
## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.236   0.0231 -0.0475 0.397   2   0  0.6   1

D. Preanalytic effects

Freeze-thaw cycles

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["case_freeze_thawed"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls1 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "case_freeze_thawed"]
)
## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.277   0.0838  0.0168 0.282   2   0 0.05 0.05

ropls::plot(pls1, typeVc = "xy-weight")

Fasting

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Fast_sample"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls2 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Fast_sample"],
  predI = 2
)
## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.202   0.0499 -0.0505 0.309   2   0 0.05 0.9

Sample date

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Sample_date"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls3 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = sapply(
    qc$inf[, "Sample_date"] ,
      function(x) {
        round(((as.numeric(x) - 6159) / 365.25 + 1986.86242), 0)
      }
    )
)
## PLS
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.204   0.0467  0.0184  3.63   1   0  0.4 0.05

## Warning: Single component model: only 'overview' and 'permutation' (in case of
## single response (O)PLS(-DA)) plots available
ropls::plot(pls3, typeVc = "xy-weight")
## Warning: Single component model: only 'overview' and 'permutation' (in case of
## single response (O)PLS(-DA)) plots available

Age at sampling

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Age"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Age"]
)
## PLS
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.303    0.191   0.125  6.31   2   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

Sex

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Sex"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)
## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Sex"]
)
## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.265    0.202   0.101 0.362   2   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

E. Olink QC flag

Looking at QC-flagged samples in plates 1-5 (coloured blue):

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-1", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-2", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-3", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-4", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF5", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

F. Plate randomization

Checks if case/ctrl pairs were analyzed on the same plates

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-1", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-2", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-3", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-4", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   45
table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF5", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   44

Check covariate randomization across plates

# BMI
anova(lm(formula = BMI ~ Plate_ID_1, data = qc$inf))
## Analysis of Variance Table
## 
## Response: BMI
##             Df Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_1   4   30.0  7.5077  0.4049 0.8051
## Residuals  437 8102.4 18.5411
# Total cholesterol
anova(lm(formula = Chol_tot ~ Plate_ID_1, data = qc$inf))
## Analysis of Variance Table
## 
## Response: Chol_tot
##             Df Sum Sq Mean Sq F value  Pr(>F)  
## Plate_ID_1   4  13.83  3.4587   2.272 0.06077 .
## Residuals  425 646.98  1.5223                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Systolic blood pressure
anova(lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf))
## Analysis of Variance Table
## 
## Response: Sbt_VIP
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## Plate_ID_1   4   6529 1632.37  4.3887 0.001729 **
## Residuals  435 161797  371.95                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf))
## 
## Call:
## lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.552 -13.753  -2.045  11.955  54.986 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            138.7528     2.0443  67.873   <2e-16 ***
## Plate_ID_1FIA3_2-INF-2   4.7989     2.9077   1.650   0.0996 .  
## Plate_ID_1FIA3_2-INF-3  -0.7303     2.8911  -0.253   0.8007    
## Plate_ID_1FIA3_2-INF-4  -6.7079     2.8911  -2.320   0.0208 *  
## Plate_ID_1FIA3_2-INF5   -3.7384     2.9162  -1.282   0.2005    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.29 on 435 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.03879,    Adjusted R-squared:  0.02995 
## F-statistic: 4.389 on 4 and 435 DF,  p-value: 0.001729
# Glucose
anova(lm(formula = Glc_0h ~ Plate_ID_1, data = qc$inf))
## Analysis of Variance Table
## 
## Response: Glc_0h
##             Df  Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_1   4   24.69  6.1726   1.456 0.2149
## Residuals  400 1695.75  4.2394
# Smoking status
table(qc$inf[c("Smoker_2fct", "Plate_ID_1")])
##            Plate_ID_1
## Smoker_2fct FIA3_2-INF-1 FIA3_2-INF-2 FIA3_2-INF-3 FIA3_2-INF-4 FIA3_2-INF5
##         no            58           63           65           55          58
##         yes           24           22           24           33          29
chisq.test(table(qc$inf[c("Smoker_2fct", "Plate_ID_1")]))
## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("Smoker_2fct", "Plate_ID_1")])
## X-squared = 3.7896, df = 4, p-value = 0.4352

Check pre-analytic factor randomization across plates

# Sample date
anova(lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf))
## Analysis of Variance Table
## 
## Response: as.numeric(Sample_date)
##             Df    Sum Sq  Mean Sq F value    Pr(>F)    
## Plate_ID_1   4  88654663 22163666  13.415 2.499e-10 ***
## Residuals  443 731879195  1652098                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf))
## 
## Call:
## lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3313.2  -716.7    20.5   734.2  4828.2 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8296.8      135.5  61.237  < 2e-16 ***
## Plate_ID_1FIA3_2-INF-2    705.2      191.6   3.680 0.000262 ***
## Plate_ID_1FIA3_2-INF-3   1237.5      191.6   6.459 2.78e-10 ***
## Plate_ID_1FIA3_2-INF-4   1175.5      191.6   6.135 1.89e-09 ***
## Plate_ID_1FIA3_2-INF5     861.8      192.7   4.473 9.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1285 on 443 degrees of freedom
## Multiple R-squared:  0.108,  Adjusted R-squared:  0.09999 
## F-statistic: 13.42 on 4 and 443 DF,  p-value: 2.499e-10
# Fasting
table(qc$inf[c("Fast_sample", "Plate_ID_1")])
##            Plate_ID_1
## Fast_sample FIA3_2-INF-1 FIA3_2-INF-2 FIA3_2-INF-3 FIA3_2-INF-4 FIA3_2-INF5
##        0-4h            5            8            3            6           2
##        4-6h           26            6           15            8          27
##        6-8h            1            2            3            0           2
##        >8h            58           74           69           76          57
chisq.test(table(qc$inf[c("Fast_sample", "Plate_ID_1")]))
## Warning in chisq.test(table(qc$inf[c("Fast_sample", "Plate_ID_1")])): Chi-
## squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("Fast_sample", "Plate_ID_1")])
## X-squared = 36.23, df = 12, p-value = 0.0002975
table(table(subset(qc$inf, Fast_sample == "0-4h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    16     4
table(table(subset(qc$inf, Fast_sample == "4-6h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    32    25
table(table(subset(qc$inf, Fast_sample == "6-8h", select = "Set_FIA3")) == 2)
## 
## FALSE 
##     8
table(table(subset(qc$inf, Fast_sample == ">8h", select = "Set_FIA3")) == 2)
## 
## FALSE  TRUE 
##    46   144
# Freeze/thaw status
table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])
##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF-1 FIA3_2-INF-2
##   NO                                                    62           58
##   ONCE                                                  18           20
##   Thawed due to broken freezer but still cold            6            8
##   TWICE                                                  4            4
##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF-3 FIA3_2-INF-4
##   NO                                                    74           78
##   ONCE                                                  12            4
##   Thawed due to broken freezer but still cold            4            8
##   TWICE                                                  0            0
##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF5
##   NO                                                   82
##   ONCE                                                  2
##   Thawed due to broken freezer but still cold           4
##   TWICE                                                 0
chisq.test(table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")]))
## Warning in chisq.test(table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])):
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])
## X-squared = 44.039, df = 12, p-value = 1.504e-05
table(table(subset(qc$inf, case_freeze_thawed == "NO", select = "Set_FIA3")) == 2)
## 
## TRUE 
##  177
table(table(subset(qc$inf, case_freeze_thawed == "ONCE", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   28
table(table(subset(qc$inf, case_freeze_thawed == "Thawed due to broken freezer but still cold", select = "Set_FIA3")) == 2)
## 
## TRUE 
##   15
table(table(subset(qc$inf, case_freeze_thawed == "TWICE", select = "Set_FIA3")) == 2)
## 
## TRUE 
##    4

Case control matching was perfect for freeze/thaw cycles, but not for fasting time.

Check fasting time overlap using a UpSet plot.

qc_split <- split(
  qc$inf[c(preanalytic_factors, randomization_factors)],
  f = qc$inf$Case_control
)
qc_overlap <- merge(
  x = qc_split$Ctrl[c("Set_FIA3", "Fast_sample")],
  y = qc_split$Case[c("Set_FIA3", "Fast_sample")],
  by = "Set_FIA3"
)
names(qc_overlap) <- c("Set_FIA3", "ctrl_", "case_")
qc_overlap2 <- data.frame(
  qc_overlap["Set_FIA3"],
  model.matrix(~ ctrl_ - 1, data = qc_overlap["ctrl_"]),
  model.matrix(~ case_ - 1, data = qc_overlap["case_"])
)
UpSetR::upset(
  qc_overlap2,
  #nsets = 8, 
  sets = c("ctrl_0.4h",
           "case_0.4h",
           "ctrl_4.6h",
           "case_4.6h",
           "ctrl_6.8h",
           "case_6.8h",
           "ctrl_.8h",
           "case_.8h"),
  keep.order = TRUE
)

Conclusion: 12 pairs where fasted subjects were matched with recently fed subjects.

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages