QC document

Fredrik Landfors 12/2/2020

QC: Proximity extension assay
- Olink ORGAN DAMAGE (v.3311)
- Olink INFLAMMATION (v.3021)

Source libraries and functions:

source("./src/check_packages.R")
check_packages(
  cran_packages = c("ggplot2", "naniar", "UpSetR"),
  bioc_packages = c("ropls")
)

## [1] "Passed dependency check."

library(ggplot2)
source("./src/plot_pca.R")
source("./src/geom_cwise_boxplot.R")
source("./src/geom_rwise_boxplot.R")

Load data:

load("~/projekt_data/2019-11-19_SCD-FIA3_data/rData/markdown.RData")

QC: Proximity extension assay

Get data:

qc <- list()
preanalytic_factors <- c(
  "Patient_code",
  "FIAnum_FIA3",
  "Set_FIA3",
  "Sample_date",
  "case_freeze_thawed",
  "Fast_sample",
  "Age", 
  "Sex"
)
randomization_factors <- c(
  "Glc_0h",
  "Chol_tot",
  "BMI",
  "Smoker_2fct",
  "Sbt_VIP",
  "Case_control"
)
qc$inf <- merge(
  x = preproc$raw_fia$data_out[c(preanalytic_factors, randomization_factors)],
  y = preproc$raw_inf$data_out,
  by = "Patient_code"
)
qc$org <- merge(
  x = preproc$raw_fia$data_out[c(preanalytic_factors, randomization_factors)],
  y = preproc$raw_org$data_out,
  by = "Patient_code"
)
qc$org_proteins <- names(preproc$raw_org$data_out)[
  names(preproc$raw_org$data_out) %in% preproc$raw_org$varmeta_data$name1
]
qc$inf_proteins <- names(preproc$raw_inf$data_out)[
  names(preproc$raw_inf$data_out) %in% preproc$raw_inf$varmeta_data$name1
]

Olink ORGAN DAMAGE (v.3311)

A. Missing values

naniar::vis_miss(qc$org)

B. Lower limit of detection (LOD)

Values with > 5 % of values below LOD were excluded from further analysis. This reduced the count of proteins by 39 from 92 to 53.

length(qc$org_proteins)

## [1] 53

length(preproc$raw_org$varmeta_data$name1)

## [1] 92

length(preproc$raw_org$varmeta_data$name1) - length(qc$org_proteins)

## [1] 39

C. Batch effects

PCA colored for plate ID

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Plate_ID_2"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

PLS model

# ropls::opls(
#   x = qc$org[, qc$org_proteins],
#   y = qc$org[, "Plate_ID_2"]
# )

PLS model: force two predictive components because first model did not pass permutation test

ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Plate_ID_2"],
  predI = 2
)

## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.326   0.0197 -0.0147 0.397   2   0  0.7 0.35

D. Preanalytic effects

Freeze-thaw cycles

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["case_freeze_thawed"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls1 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "case_freeze_thawed"]
)

## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.325    0.158   0.121 0.265   2   0 0.05 0.05

ropls::plot(pls1, typeVc = "xy-weight")

Fasting

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Fast_sample"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls2 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Fast_sample"],
  predI = 2
)

## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.303   0.0648 -0.0282 0.311   2   0 0.05 0.8

Sample date

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Sample_date"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls3 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = sapply(
    qc$org[, "Sample_date"] ,
      function(x) {
        round(((as.numeric(x) - 6159) / 365.25 + 1986.86242), 0)
      }
    )
)

## PLS
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.321    0.146  0.0664  3.44   2   0 0.05 0.05

ropls::plot(pls3, typeVc = "xy-weight")

Age at sampling

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Age"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Age"]
)

## PLS
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.389    0.286   0.172  5.93   3   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

Sex

plot_pca(
      x = qc$org[, qc$org_proteins],
      c = qc$org["Sex"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$org[, qc$org_proteins],
  y = qc$org[, "Sex"]
)

## PLS-DA
## 448 samples x 53 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.383    0.478   0.361 0.293   3   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

E. Olink QC flag

Looking at QC-flagged samples in plates 1-5 (coloured blue):

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-1", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-2", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-3", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-4", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

geom_rwise_boxplot(
      x = subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-5", select = qc$org_proteins),
      fill1 = qc$org$QC_Warning_2
)

F. Plate randomization

Checks if case/ctrl pairs were analyzed on the same plates

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-1", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-2", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-3", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-4", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$org, Plate_ID_2 == "FIA3_2-ORG-5", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   44

Check covariate randomization across plates

# BMI
anova(lm(formula = BMI ~ Plate_ID_2, data = qc$org))

## Analysis of Variance Table
## 
## Response: BMI
##             Df Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_2   4   30.0  7.5077  0.4049 0.8051
## Residuals  437 8102.4 18.5411

# Total cholesterol
anova(lm(formula = Chol_tot ~ Plate_ID_2, data = qc$org))

## Analysis of Variance Table
## 
## Response: Chol_tot
##             Df Sum Sq Mean Sq F value  Pr(>F)  
## Plate_ID_2   4  13.83  3.4587   2.272 0.06077 .
## Residuals  425 646.98  1.5223                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Systolic blood pressure
anova(lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org))

## Analysis of Variance Table
## 
## Response: Sbt_VIP
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## Plate_ID_2   4   6529 1632.37  4.3887 0.001729 **
## Residuals  435 161797  371.95                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org))

## 
## Call:
## lm(formula = Sbt_VIP ~ Plate_ID_2, data = qc$org)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.552 -13.753  -2.045  11.955  54.986 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            138.7528     2.0443  67.873   <2e-16 ***
## Plate_ID_2FIA3_2-ORG-2   4.7989     2.9077   1.650   0.0996 .  
## Plate_ID_2FIA3_2-ORG-3  -0.7303     2.8911  -0.253   0.8007    
## Plate_ID_2FIA3_2-ORG-4  -6.7079     2.8911  -2.320   0.0208 *  
## Plate_ID_2FIA3_2-ORG-5  -3.7384     2.9162  -1.282   0.2005    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.29 on 435 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.03879,    Adjusted R-squared:  0.02995 
## F-statistic: 4.389 on 4 and 435 DF,  p-value: 0.001729

# Glucose
anova(lm(formula = Glc_0h ~ Plate_ID_2, data = qc$org))

## Analysis of Variance Table
## 
## Response: Glc_0h
##             Df  Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_2   4   24.69  6.1726   1.456 0.2149
## Residuals  400 1695.75  4.2394

# Smoking status
table(qc$org[c("Smoker_2fct", "Plate_ID_2")])

##            Plate_ID_2
## Smoker_2fct FIA3_2-ORG-1 FIA3_2-ORG-2 FIA3_2-ORG-3 FIA3_2-ORG-4 FIA3_2-ORG-5
##         no            58           63           65           55           58
##         yes           24           22           24           33           29

chisq.test(table(qc$org[c("Smoker_2fct", "Plate_ID_2")]))

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("Smoker_2fct", "Plate_ID_2")])
## X-squared = 3.7896, df = 4, p-value = 0.4352

Check pre-analytic factor randomization across plates

# Sample date
anova(lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org))

## Analysis of Variance Table
## 
## Response: as.numeric(Sample_date)
##             Df    Sum Sq  Mean Sq F value    Pr(>F)    
## Plate_ID_2   4  88654663 22163666  13.415 2.499e-10 ***
## Residuals  443 731879195  1652098                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org))

## 
## Call:
## lm(formula = as.numeric(Sample_date) ~ Plate_ID_2, data = qc$org)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3313.2  -716.7    20.5   734.2  4828.2 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8296.8      135.5  61.237  < 2e-16 ***
## Plate_ID_2FIA3_2-ORG-2    705.2      191.6   3.680 0.000262 ***
## Plate_ID_2FIA3_2-ORG-3   1237.5      191.6   6.459 2.78e-10 ***
## Plate_ID_2FIA3_2-ORG-4   1175.5      191.6   6.135 1.89e-09 ***
## Plate_ID_2FIA3_2-ORG-5    861.8      192.7   4.473 9.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1285 on 443 degrees of freedom
## Multiple R-squared:  0.108,  Adjusted R-squared:  0.09999 
## F-statistic: 13.42 on 4 and 443 DF,  p-value: 2.499e-10

# Fasting
table(qc$org[c("Fast_sample", "Plate_ID_2")])

##            Plate_ID_2
## Fast_sample FIA3_2-ORG-1 FIA3_2-ORG-2 FIA3_2-ORG-3 FIA3_2-ORG-4 FIA3_2-ORG-5
##        0-4h            5            8            3            6            2
##        4-6h           26            6           15            8           27
##        6-8h            1            2            3            0            2
##        >8h            58           74           69           76           57

chisq.test(table(qc$org[c("Fast_sample", "Plate_ID_2")]))

## Warning in chisq.test(table(qc$org[c("Fast_sample", "Plate_ID_2")])): Chi-
## squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("Fast_sample", "Plate_ID_2")])
## X-squared = 36.23, df = 12, p-value = 0.0002975

table(table(subset(qc$org, Fast_sample == "0-4h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    16     4

table(table(subset(qc$org, Fast_sample == "4-6h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    32    25

table(table(subset(qc$org, Fast_sample == "6-8h", select = "Set_FIA3")) == 2)

## 
## FALSE 
##     8

table(table(subset(qc$org, Fast_sample == ">8h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    46   144

# Freeze/thaw status
table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])

##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-1 FIA3_2-ORG-2
##   NO                                                    62           58
##   ONCE                                                  18           20
##   Thawed due to broken freezer but still cold            6            8
##   TWICE                                                  4            4
##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-3 FIA3_2-ORG-4
##   NO                                                    74           78
##   ONCE                                                  12            4
##   Thawed due to broken freezer but still cold            4            8
##   TWICE                                                  0            0
##                                              Plate_ID_2
## case_freeze_thawed                            FIA3_2-ORG-5
##   NO                                                    82
##   ONCE                                                   2
##   Thawed due to broken freezer but still cold            4
##   TWICE                                                  0

chisq.test(table(qc$org[c("case_freeze_thawed", "Plate_ID_2")]))

## Warning in chisq.test(table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])):
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$org[c("case_freeze_thawed", "Plate_ID_2")])
## X-squared = 44.039, df = 12, p-value = 1.504e-05

table(table(subset(qc$org, case_freeze_thawed == "NO", select = "Set_FIA3")) == 2)

## 
## TRUE 
##  177

table(table(subset(qc$org, case_freeze_thawed == "ONCE", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   28

table(table(subset(qc$org, case_freeze_thawed == "Thawed due to broken freezer but still cold", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   15

table(table(subset(qc$org, case_freeze_thawed == "TWICE", select = "Set_FIA3")) == 2)

## 
## TRUE 
##    4

Case control matching was perfect for freeze/thaw cycles, but not for fasting time.

Check fasting time overlap using a UpSet plot.

qc_split <- split(
  qc$org[c(preanalytic_factors, randomization_factors)],
  f = qc$org$Case_control
)
qc_overlap <- merge(
  x = qc_split$Ctrl[c("Set_FIA3", "Fast_sample")],
  y = qc_split$Case[c("Set_FIA3", "Fast_sample")],
  by = "Set_FIA3"
)
names(qc_overlap) <- c("Set_FIA3", "ctrl_", "case_")
qc_overlap2 <- data.frame(
  qc_overlap["Set_FIA3"],
  model.matrix(~ ctrl_ - 1, data = qc_overlap["ctrl_"]),
  model.matrix(~ case_ - 1, data = qc_overlap["case_"])
)
UpSetR::upset(
  qc_overlap2,
  #nsets = 8, 
  sets = c("ctrl_0.4h",
           "case_0.4h",
           "ctrl_4.6h",
           "case_4.6h",
           "ctrl_6.8h",
           "case_6.8h",
           "ctrl_.8h",
           "case_.8h"),
  keep.order = TRUE
)

Conclusion: 12 pairs where fasted subjects were matched with recently fed subjects.

Olink INFLAMMATION (v.3021)

A. Missing values

naniar::vis_miss(qc$inf)

B. Lower limit of detection (LOD)

Values with > 5 % of values below LOD were excluded from further analysis. This reduced the count of proteins by 45 from 92 to 47.

length(qc$inf_proteins)

## [1] 47

length(preproc$raw_inf$varmeta_data$name1)

## [1] 92

length(preproc$raw_inf$varmeta_data$name1) - length(qc$inf_proteins)

## [1] 45

C. Batch effects

PCA colored for plate ID

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Plate_ID_1"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

PLS model

# ropls::opls(
#   x = qc$inf[, qc$inf_proteins],
#   y = qc$inf[, "Plate_ID_1"]
# )

PLS model: force two predictive components

ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Plate_ID_1"],
  predI = 2
)

## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.236   0.0231 -0.0475 0.397   2   0  0.6   1

D. Preanalytic effects

Freeze-thaw cycles

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["case_freeze_thawed"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls1 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "case_freeze_thawed"]
)

## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.277   0.0838  0.0168 0.282   2   0 0.05 0.05

ropls::plot(pls1, typeVc = "xy-weight")

Fasting

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Fast_sample"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls2 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Fast_sample"],
  predI = 2
)

## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total    0.202   0.0499 -0.0505 0.309   2   0 0.05 0.9

Sample date

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Sample_date"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls3 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = sapply(
    qc$inf[, "Sample_date"] ,
      function(x) {
        round(((as.numeric(x) - 6159) / 365.25 + 1986.86242), 0)
      }
    )
)

## PLS
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.204   0.0467  0.0184  3.63   1   0  0.4 0.05

## Warning: Single component model: only 'overview' and 'permutation' (in case of
## single response (O)PLS(-DA)) plots available

ropls::plot(pls3, typeVc = "xy-weight")

## Warning: Single component model: only 'overview' and 'permutation' (in case of
## single response (O)PLS(-DA)) plots available

Age at sampling

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Age"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Age"]
)

## PLS
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.303    0.191   0.125  6.31   2   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

Sex

plot_pca(
      x = qc$inf[, qc$inf_proteins],
      c = qc$inf["Sex"],
      opt_ellipse = "auto",
      return_fit = FALSE,
      n_pc = 7
)

## $pc1vs2_plot

## 
## $scores_plot

## 
## $var_expl

## 
## $loadings_plot

pls4 <- ropls::opls(
  x = qc$inf[, qc$inf_proteins],
  y = qc$inf[, "Sex"]
)

## PLS-DA
## 448 samples x 47 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.265    0.202   0.101 0.362   2   0 0.05 0.05

ropls::plot(pls4, typeVc = "xy-weight")

E. Olink QC flag

Looking at QC-flagged samples in plates 1-5 (coloured blue):

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-1", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-2", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-3", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-4", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

geom_rwise_boxplot(
      x = subset(qc$inf, Plate_ID_1 == "FIA3_2-INF5", select = qc$inf_proteins),
      fill1 = qc$inf$QC_Warning_1
)

F. Plate randomization

Checks if case/ctrl pairs were analyzed on the same plates

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-1", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-2", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-3", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF-4", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   45

table(table(subset(qc$inf, Plate_ID_1 == "FIA3_2-INF5", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   44

Check covariate randomization across plates

# BMI
anova(lm(formula = BMI ~ Plate_ID_1, data = qc$inf))

## Analysis of Variance Table
## 
## Response: BMI
##             Df Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_1   4   30.0  7.5077  0.4049 0.8051
## Residuals  437 8102.4 18.5411

# Total cholesterol
anova(lm(formula = Chol_tot ~ Plate_ID_1, data = qc$inf))

## Analysis of Variance Table
## 
## Response: Chol_tot
##             Df Sum Sq Mean Sq F value  Pr(>F)  
## Plate_ID_1   4  13.83  3.4587   2.272 0.06077 .
## Residuals  425 646.98  1.5223                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Systolic blood pressure
anova(lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf))

## Analysis of Variance Table
## 
## Response: Sbt_VIP
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## Plate_ID_1   4   6529 1632.37  4.3887 0.001729 **
## Residuals  435 161797  371.95                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf))

## 
## Call:
## lm(formula = Sbt_VIP ~ Plate_ID_1, data = qc$inf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.552 -13.753  -2.045  11.955  54.986 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            138.7528     2.0443  67.873   <2e-16 ***
## Plate_ID_1FIA3_2-INF-2   4.7989     2.9077   1.650   0.0996 .  
## Plate_ID_1FIA3_2-INF-3  -0.7303     2.8911  -0.253   0.8007    
## Plate_ID_1FIA3_2-INF-4  -6.7079     2.8911  -2.320   0.0208 *  
## Plate_ID_1FIA3_2-INF5   -3.7384     2.9162  -1.282   0.2005    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.29 on 435 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.03879,    Adjusted R-squared:  0.02995 
## F-statistic: 4.389 on 4 and 435 DF,  p-value: 0.001729

# Glucose
anova(lm(formula = Glc_0h ~ Plate_ID_1, data = qc$inf))

## Analysis of Variance Table
## 
## Response: Glc_0h
##             Df  Sum Sq Mean Sq F value Pr(>F)
## Plate_ID_1   4   24.69  6.1726   1.456 0.2149
## Residuals  400 1695.75  4.2394

# Smoking status
table(qc$inf[c("Smoker_2fct", "Plate_ID_1")])

##            Plate_ID_1
## Smoker_2fct FIA3_2-INF-1 FIA3_2-INF-2 FIA3_2-INF-3 FIA3_2-INF-4 FIA3_2-INF5
##         no            58           63           65           55          58
##         yes           24           22           24           33          29

chisq.test(table(qc$inf[c("Smoker_2fct", "Plate_ID_1")]))

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("Smoker_2fct", "Plate_ID_1")])
## X-squared = 3.7896, df = 4, p-value = 0.4352

Check pre-analytic factor randomization across plates

# Sample date
anova(lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf))

## Analysis of Variance Table
## 
## Response: as.numeric(Sample_date)
##             Df    Sum Sq  Mean Sq F value    Pr(>F)    
## Plate_ID_1   4  88654663 22163666  13.415 2.499e-10 ***
## Residuals  443 731879195  1652098                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf))

## 
## Call:
## lm(formula = as.numeric(Sample_date) ~ Plate_ID_1, data = qc$inf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3313.2  -716.7    20.5   734.2  4828.2 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8296.8      135.5  61.237  < 2e-16 ***
## Plate_ID_1FIA3_2-INF-2    705.2      191.6   3.680 0.000262 ***
## Plate_ID_1FIA3_2-INF-3   1237.5      191.6   6.459 2.78e-10 ***
## Plate_ID_1FIA3_2-INF-4   1175.5      191.6   6.135 1.89e-09 ***
## Plate_ID_1FIA3_2-INF5     861.8      192.7   4.473 9.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1285 on 443 degrees of freedom
## Multiple R-squared:  0.108,  Adjusted R-squared:  0.09999 
## F-statistic: 13.42 on 4 and 443 DF,  p-value: 2.499e-10

# Fasting
table(qc$inf[c("Fast_sample", "Plate_ID_1")])

##            Plate_ID_1
## Fast_sample FIA3_2-INF-1 FIA3_2-INF-2 FIA3_2-INF-3 FIA3_2-INF-4 FIA3_2-INF5
##        0-4h            5            8            3            6           2
##        4-6h           26            6           15            8          27
##        6-8h            1            2            3            0           2
##        >8h            58           74           69           76          57

chisq.test(table(qc$inf[c("Fast_sample", "Plate_ID_1")]))

## Warning in chisq.test(table(qc$inf[c("Fast_sample", "Plate_ID_1")])): Chi-
## squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("Fast_sample", "Plate_ID_1")])
## X-squared = 36.23, df = 12, p-value = 0.0002975

table(table(subset(qc$inf, Fast_sample == "0-4h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    16     4

table(table(subset(qc$inf, Fast_sample == "4-6h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    32    25

table(table(subset(qc$inf, Fast_sample == "6-8h", select = "Set_FIA3")) == 2)

## 
## FALSE 
##     8

table(table(subset(qc$inf, Fast_sample == ">8h", select = "Set_FIA3")) == 2)

## 
## FALSE  TRUE 
##    46   144

# Freeze/thaw status
table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])

##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF-1 FIA3_2-INF-2
##   NO                                                    62           58
##   ONCE                                                  18           20
##   Thawed due to broken freezer but still cold            6            8
##   TWICE                                                  4            4
##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF-3 FIA3_2-INF-4
##   NO                                                    74           78
##   ONCE                                                  12            4
##   Thawed due to broken freezer but still cold            4            8
##   TWICE                                                  0            0
##                                              Plate_ID_1
## case_freeze_thawed                            FIA3_2-INF5
##   NO                                                   82
##   ONCE                                                  2
##   Thawed due to broken freezer but still cold           4
##   TWICE                                                 0

chisq.test(table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")]))

## Warning in chisq.test(table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])):
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(qc$inf[c("case_freeze_thawed", "Plate_ID_1")])
## X-squared = 44.039, df = 12, p-value = 1.504e-05

table(table(subset(qc$inf, case_freeze_thawed == "NO", select = "Set_FIA3")) == 2)

## 
## TRUE 
##  177

table(table(subset(qc$inf, case_freeze_thawed == "ONCE", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   28

table(table(subset(qc$inf, case_freeze_thawed == "Thawed due to broken freezer but still cold", select = "Set_FIA3")) == 2)

## 
## TRUE 
##   15

table(table(subset(qc$inf, case_freeze_thawed == "TWICE", select = "Set_FIA3")) == 2)

## 
## TRUE 
##    4

Case control matching was perfect for freeze/thaw cycles, but not for fasting time.

Check fasting time overlap using a UpSet plot.

qc_split <- split(
  qc$inf[c(preanalytic_factors, randomization_factors)],
  f = qc$inf$Case_control
)
qc_overlap <- merge(
  x = qc_split$Ctrl[c("Set_FIA3", "Fast_sample")],
  y = qc_split$Case[c("Set_FIA3", "Fast_sample")],
  by = "Set_FIA3"
)
names(qc_overlap) <- c("Set_FIA3", "ctrl_", "case_")
qc_overlap2 <- data.frame(
  qc_overlap["Set_FIA3"],
  model.matrix(~ ctrl_ - 1, data = qc_overlap["ctrl_"]),
  model.matrix(~ case_ - 1, data = qc_overlap["case_"])
)
UpSetR::upset(
  qc_overlap2,
  #nsets = 8, 
  sets = c("ctrl_0.4h",
           "case_0.4h",
           "ctrl_4.6h",
           "case_4.6h",
           "ctrl_6.8h",
           "case_6.8h",
           "ctrl_.8h",
           "case_.8h"),
  keep.order = TRUE
)

Conclusion: 12 pairs where fasted subjects were matched with recently fed subjects.

Name		Name	Last commit message	Last commit date
Latest commit History 2 Commits
2-QC_doc_files/figure-gfm		2-QC_doc_files/figure-gfm
out		out
src		src
.gitattributes		.gitattributes
1-preproc.R		1-preproc.R
2-QC_doc.Rmd		2-QC_doc.Rmd
3-table1.R		3-table1.R
4-tableS1.R		4-tableS1.R
5-tableS2.R		5-tableS2.R
6-figure12.R		6-figure12.R
7-table2.R		7-table2.R
LICENSE		LICENSE
README.md		README.md
data_paths.csv		data_paths.csv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

QC document

QC: Proximity extension assay

Olink ORGAN DAMAGE (v.3311)

A. Missing values

B. Lower limit of detection (LOD)

C. Batch effects

D. Preanalytic effects

Freeze-thaw cycles

Fasting

Sample date

Age at sampling

Sex

E. Olink QC flag

F. Plate randomization

Olink INFLAMMATION (v.3021)

A. Missing values

B. Lower limit of detection (LOD)

C. Batch effects

D. Preanalytic effects

Freeze-thaw cycles

Fasting

Sample date

Age at sampling

Sex

E. Olink QC flag

F. Plate randomization

About

Releases

Packages

Languages

License

fredlandfors/FIA3-SCD_protf

Folders and files

Latest commit

History

Repository files navigation

QC document

QC: Proximity extension assay

Olink ORGAN DAMAGE (v.3311)

A. Missing values

B. Lower limit of detection (LOD)

C. Batch effects

D. Preanalytic effects

Freeze-thaw cycles

Fasting

Sample date

Age at sampling

Sex

E. Olink QC flag

F. Plate randomization

Olink INFLAMMATION (v.3021)

A. Missing values

B. Lower limit of detection (LOD)

C. Batch effects

D. Preanalytic effects

Freeze-thaw cycles

Fasting

Sample date

Age at sampling

Sex

E. Olink QC flag

F. Plate randomization

About

Resources

License

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages