diff --git a/R/PseudovaluesSP_A.R b/R/PseudovaluesSP_A.R new file mode 100644 index 00000000..e1fd3f89 --- /dev/null +++ b/R/PseudovaluesSP_A.R @@ -0,0 +1,95 @@ +# simplified for debugging +PseudovaluesSP_A <- function(dataset, FOM) { + + NL <- dataset$ratings$NL + LL <- dataset$ratings$LL + maxNL <- dim(NL)[4] + maxLL <- dim(LL)[4] + I <- dim(NL)[1] + J <- dim(NL)[2] + K <- dim(NL)[3] + K2 <- dim(LL)[3] + K1 <- K - K2 + + t <- dataset$descriptions$truthTableStr + + jkFomValues <- array(dim = c(I, J/I, K)) + jkPseudoValues <- array(dim = c(I, J/I, K)) + + fomArray <- FOM_SP(dataset, FOM) + + lastCase <- 0 + caseTransitions <- array(dim = J) + for (i in 1:I) { + for (j in (J/I*(i-1)+1):(J/I*i)) { + j1 <- j-J/I*(i-1) + perCase_ij <- dataset$lesions$perCase + lID_ij <- dataset$lesions$IDs + lW_ij <- dataset$lesions$weights + nl_ij <- NL[i, j, 1:K, 1:maxNL]; dim(nl_ij) <- c(K, maxNL) + ll_ij <- LL[i, j, 1:K2, 1:maxLL]; dim(ll_ij) <- c(K2, maxLL) + + for (k in 1:K) { + if (k <= K1) { + nl_ij_jk <- nl_ij[-k, ];dim(nl_ij_jk) <- c(K - 1, maxNL) + ll_ij_jk <- ll_ij;dim(ll_ij_jk) <- c(K2, maxLL) + lID_ij_jk <- lID_ij;dim(lID_ij_jk) <- c(K2, maxLL) + lW_ij_jk <- lW_ij;dim(lW_ij_jk) <- c(K2, maxLL) + jkFomValues[i, j1, k] <- + MyFom_ij_SP(nl_ij_jk, ll_ij_jk, perCase_ij, + lID_ij_jk, lW_ij_jk, maxNL, maxLL, + K1 - 1, K2, FOM) + + jkPseudoValues[i, j1, k] <- + fomArray[i, j1] * K - jkFomValues[i, j1, k] * (K - 1) + } else { # (k > K1) + nl_ij_jk <- nl_ij[-k, ];dim(nl_ij_jk) <- c(K - 1, maxNL) + ll_ij_jk <- ll_ij[-(k - K1), ];dim(ll_ij_jk) <- c(K2 - 1, maxLL) + lV_ij_jk <- perCase_ij[-(k - K1)] + lW_ij_jk <- lW_ij[-(k - K1), ];dim(lW_ij_jk) <- c(K2 - 1, maxLL) + lID_ij_jk <- lID_ij[-(k - K1), ];dim(lID_ij_jk) <- c(K2 - 1, maxLL) + jkFomValues[i, j1, k] <- + MyFom_ij_SP(nl_ij_jk, ll_ij_jk, lV_ij_jk, + lID_ij_jk, lW_ij_jk, maxNL, maxLL, + K1, K2 - 1, FOM) + jkPseudoValues[i, j1, k] <- + fomArray[i, j1] * K - jkFomValues[i, j1, k] * (K - 1) + } + } + # center the pseudovalues + if (FOM %in% c("MaxNLF", "ExpTrnsfmSp", "HrSp")) { + # FOMs defined over NORMAL cases + jkPseudoValues[i, j1, ] <- jkPseudoValues[i, j1, ] + + (fomArray[i, j1] - mean(jkPseudoValues[i, j1, ])) + } else if (FOM %in% c("MaxLLF", "HrSe")) { + # FOMs defined over ABNORMAL cases + jkPseudoValues[i, j1, ] <- + jkPseudoValues[i, j1, ] + + (fomArray[i, j1] - mean(jkPseudoValues[i, j1, ])) + } else { + # FOMs defined over ALL cases + jkPseudoValues[i, j1, ] <- + jkPseudoValues[i, j1, ] + (fomArray[i, j1] - mean(jkPseudoValues[i, j1, ])) + } + caseTransitions[j] <- lastCase + lastCase <- (lastCase + K) %% K + } + } + + # there should not be any NAs in each of the following arrays + # any(is.na(jkPseudoValues)) + # [1] FALSE + # any(is.na(jkFomValues)) + # [1] FALSE + # any(is.na(fomArray)) + # [1] FALSE + + caseTransitions <- c(caseTransitions, K) + return(list( + jkPseudoValues = jkPseudoValues, + jkFomValues = jkFomValues, + caseTransitions = caseTransitions + )) +} + + diff --git a/R/StSP.R b/R/StSP.R index e90bbf61..e809558e 100644 --- a/R/StSP.R +++ b/R/StSP.R @@ -578,7 +578,7 @@ convert2dataset <- function(NL, LL, LL_IL, -FOM_SP <- function(dataset, FOM = "wAFROC") { # dpc +FOM_SP <- function(dataset, FOM) { # dataType <- dataset$descriptions$type # if (dataType == "ROC" && FOM != "Wilcoxon") { @@ -665,9 +665,9 @@ FOM_SP <- function(dataset, FOM = "wAFROC") { # dpc modalityID <- dataset$descriptions$modalityID # readers in other treatments can have same names as there is not need to - # distinguish between them - by definition readers with same names in + # distinguish between them as, readers with same names in # different treatments are different readers - readerID <- dataset$descriptions$readerID[1:(J/2)] + readerID <- dataset$descriptions$readerID[1:(J/I)] rownames(fomArray) <- paste("trt", sep = "", modalityID) colnames(fomArray) <- paste("rdr", sep = "", readerID) return(as.data.frame(fomArray)) @@ -677,7 +677,7 @@ FOM_SP <- function(dataset, FOM = "wAFROC") { # dpc -PseudovaluesSP_A <- function(dataset, FOM) { +PseudovaluesSP_A1 <- function(dataset, FOM) { dataType <- dataset$descriptions$type NL <- dataset$ratings$NL @@ -808,7 +808,7 @@ PseudovaluesSP_A <- function(dataset, FOM) { jkPseudoValues[i, j1, kIndxAll] <- fomArray[i, j1] * K_ij - jkFomValues[i, j1, kIndxAll] * (K_ij - 1) } else stop("overwriting Pseudovalues") - } else { + } else { # (k > K1_ij) nlij_jk <- nl_ij[-k, ];dim(nlij_jk) <- c(K_ij - 1, maxNL) llij_jk <- ll_ij[-(k - K1_ij), ];dim(llij_jk) <- c(K2_ij - 1, maxLL) lV_j_jk <- perCase_ij[-(k - K1_ij)] diff --git a/R/StSP_A.R b/R/StSP_A.R index d93e98c8..c9fc2951 100644 --- a/R/StSP_A.R +++ b/R/StSP_A.R @@ -1,44 +1,44 @@ # Implement SP_A formulae in Hillis 2014 for balance design # total number of readers, split equally between all treatments -OR_SP_A <- function(dataset, FOM, alpha, analysisOption) +OR_SP_A <- function(dataset, FOM, alpha, analysisOption) { - + I <- dim(dataset$ratings$NL)[1] - + # J is the number of readers in each treatment; as clarified in Hillis - # 2023 paper; - J <- dim(dataset$ratings$NL)[2]/I - + # 2023 paper; + J <- dim(dataset$ratings$NL)[2]/I + # no need to have different reader names in different treatments modalityID <- dataset$descriptions$modalityID readerID <- dataset$descriptions$readerID[1:J/I] - - ############################################################################## + + ############################################################################## # get figures of merit etc for each modality i and reader j theta_ij <- as.matrix(FOM_SP(dataset, FOM)) cat("Figure of merit matrix\n") print(theta_ij) cat("\n") - + # average FOM over readers in each treatment theta_i_dot <- array(dim = I) for (i in 1:I) { theta_i_dot[i] <- mean(theta_ij[i,]) } - + # average FOM over readers and treatments theta_dot_dot <- mean(theta_ij) - - ############################################################################## + + ############################################################################## # get mean squares - # msT denotes MS(T), in Hillis 2014 p 331 + # msT denotes MS(T), in Hillis 2014 p 332 msT <- 0 for (i in 1:I) { msT <- msT + (theta_i_dot[i] - theta_dot_dot)^2 } msT <- msT * J/(I-1) cat("msT =", msT, "\n") - + # msR_i denotes MS(R) for modality i, in Hillis 2014 p 341 msR_i <- rep(0, I) for (i in 1:I) { @@ -47,7 +47,7 @@ OR_SP_A <- function(dataset, FOM, alpha, analysisOption) } msR_i[i] <- msR_i[i] / (J-1) } - + # msTR denotes MS(TR), in Hillis 2014 p 331 msTR <- 0 for (i in 1:I) { @@ -56,12 +56,12 @@ OR_SP_A <- function(dataset, FOM, alpha, analysisOption) } } msTR <- msTR/(I-1)/(J-1) - - # R(T) denotes reader nested within treatment + + # In Hillis 2014, R(T) denotes reader nested within treatment # msR__T__ denotes MS[R(T)] # double underscore denotes left/right parenthesis # Hillis 2014, definition of MS[R(T)], last line on page 344 - + # note added after reading Hillis 2023 # This matches ??? msR__T__ <- 0 @@ -72,128 +72,129 @@ OR_SP_A <- function(dataset, FOM, alpha, analysisOption) } msR__T__ <- msR__T__ / I / (J - 1) cat("MS(R(T)) =", msR__T__, "\n") - - ############################################################################## + + ############################################################################## # get pseudo values ret1 <- PseudovaluesSP_A(dataset, FOM) - - - ############################################################################## + + + ############################################################################## # get variance covariance matrix, averaged over all treatments - ret <- FOMijk2ORVarComp(ret1$jkFomValues, varInflFactor = TRUE) + ret <- FOMijk2ORVarCompSP(ret1$jkFomValues, varInflFactor = TRUE) Var <- ret$Var Cov2 <- ret$Cov2 - Cov3 <- ret$Cov3 + Cov3 <- ret$Cov3 # setting Cov3 <- 0 makes no difference to values # even though Cov3 = -3.469447e-18 cat("Cov2 =", Cov2, ", Cov3 =", Cov3, "\n") - - ############################################################################## + + ############################################################################## # Cov2 for individual treatments Cov2_i <- rep(0, I) for (i in 1:I) { ret_i <- FOMjk2ORVarComp(ret1$jkFomValues[i,,], varInflFactor = TRUE) Cov2_i[i] <- ret_i$Cov2 } - - ############################################################################## + + ############################################################################## # Overall F-test # F_OR statistic, last equation on page 344 - den <- msR__T__ + J * max(Cov2-Cov3,0) + den <- msR__T__ + J*max(Cov2-Cov3,0) F_OR <- msT/den cat("F_OR =", F_OR, "\n") - + # implement Eqn. 27 df2 <- (msR__T__ + J * max(Cov2-Cov3,0))^2 /((msR__T__^2)/(J-1)/I) cat("ddf_H = df2 = ", df2, "\n") - - ############################################################################## + + ############################################################################## # p-value pValue <- 1 - pf(F_OR, I - 1, df2) cat("pValue = ", pValue, "\n") - - ############################################################################## - # confidence intervals for difference FOM between first two treatments + + ############################################################################## + # confidence intervals for difference FOM between first two treatments # compare first two treatments only - - trtMeanDiffs <- theta_i_dot[1] - theta_i_dot[2] - + + stop("need to fix") + trtMeanDiffs <- theta_i_dot[1] - theta_i_dot[2] + # as on page 346, first para # el_1 = 1; el_2 = - 1 ## Note to myself ... - ## Hillis does not state a constraint on the l_i, namely they + ## Hillis does not state a constraint on the l_i, namely they ## should sum to zero; otherwise one could use l_1 = 1_2 = 1/2 and compute the CI ## on the average FOM using the very same method, which does not match his result sq_root_V_hat <- sqrt((I / J) * (msR__T__ + J * max(Cov2-Cov3,0))) - + # following matches Hillis 2023 Eqn. 6 for balanced design - CI <- sort(c(trtMeanDiffs - qt(alpha/2, df2) * sq_root_V_hat, + CI <- sort(c(trtMeanDiffs - qt(alpha/2, df2) * sq_root_V_hat, trtMeanDiffs + qt(alpha/2, df2) * sq_root_V_hat)) cat("CI_lower = ", CI[1], ", CI_upper = ", CI[2], "\n") - + # last equation in box on page 346, using alternate method ... df2_i <- array(dim = I) for (i in 1:I) { df2_i[i] <- (msR_i[i] + J * max(Cov2_i[i], 0))^2/((msR_i[i])^2/(J-1)) } - - ############################################################################## + + ############################################################################## # confidence interval for reader averaged FOM for each treatment # as on page 346, first para # alternative method, using only data from each modality - + ciAvgRdrEachTrt <- array(dim = c(I,2)) - + # these are the formulae for V_hat_i and df2_i in the text area in the middle of page 346 # skipping the formulae that take both modalities into account sqrt_V_hat_i <- array(dim = I) for (i in 1:I) { sqrt_V_hat_i[i] <- sqrt((1 / J) * (msR_i[i] + J * max(Cov2_i[i],0))) - ciAvgRdrEachTrt[i,] <- sort(c(theta_i_dot[i] - qt(alpha/2, df2) * sqrt_V_hat_i[i], + ciAvgRdrEachTrt[i,] <- sort(c(theta_i_dot[i] - qt(alpha/2, df2) * sqrt_V_hat_i[i], theta_i_dot[i] + qt(alpha/2, df2) * sqrt_V_hat_i[i])) } - + stop("add code here") # return(list( # FOMs = FOMs, # ANOVA = ANOVA, # RRRC = RRRC # )) - -} + +} # unbalanced dataset -OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) +OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) { - + I <- dim(dataset$ratings$NL)[1] - + # J is the number of readers in each treatment; as clarified in Hillis - # 2023 paper; - J <- dim(dataset$ratings$NL)[2]/I - + # 2023 paper; + J <- dim(dataset$ratings$NL)[2]/I + # no need to have different reader names in different treatments modalityID <- dataset$descriptions$modalityID readerID <- dataset$descriptions$readerID[1:J/I] - - ############################################################################## + + ############################################################################## # get figures of merit etc for each modality i and reader j theta_ij <- as.matrix(FOM_SP(dataset, FOM)) cat("Figure of merit matrix\n") print(theta_ij) cat("\n") - + # average FOM over readers in each treatment theta_i_dot <- array(dim = I) for (i in 1:I) { theta_i_dot[i] <- mean(theta_ij[i,]) } - + # average FOM over readers and treatments theta_dot_dot <- mean(theta_ij) - - ############################################################################## + + ############################################################################## # get mean squares # msT denotes MS(T), in Hillis 2014 p 331 msT <- 0 @@ -202,7 +203,7 @@ OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) } msT <- msT * J/(I-1) cat("msT =", msT, "\n") - + # msR_i denotes MS(R) for modality i, in Hillis 2014 p 341 msR_i <- rep(0, I) for (i in 1:I) { @@ -211,7 +212,7 @@ OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) } msR_i[i] <- msR_i[i] / (J-1) } - + # msTR denotes MS(TR), in Hillis 2014 p 331 msTR <- 0 for (i in 1:I) { @@ -220,12 +221,12 @@ OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) } } msTR <- msTR/(I-1)/(J-1) - + # R(T) denotes reader nested within treatment # msR__T__ denotes MS[R(T)] # double underscore denotes left/right parenthesis # Hillis 2014, definition of MS[R(T)], last line on page 344 - + # note added after reading Hillis 2023 # This matches ??? msR__T__ <- 0 @@ -236,92 +237,92 @@ OR_SP_A_UNB <- function(dataset, FOM, alpha, analysisOption) } msR__T__ <- msR__T__ / I / (J - 1) cat("MS(R(T)) =", msR__T__, "\n") - - ############################################################################## + + ############################################################################## # get pseudo values ret1 <- PseudovaluesSP_A(dataset, FOM) - - - ############################################################################## + + + ############################################################################## # get variance covariance matrix, averaged over all treatments ret <- FOMijk2ORVarComp(ret1$jkFomValues, varInflFactor = TRUE) Var <- ret$Var Cov2 <- ret$Cov2 - Cov3 <- ret$Cov3 + Cov3 <- ret$Cov3 # setting Cov3 <- 0 makes no difference to values # even though Cov3 = -3.469447e-18 cat("Cov2 =", Cov2, ", Cov3 =", Cov3, "\n") - - ############################################################################## + + ############################################################################## # Cov2 for individual treatments Cov2_i <- rep(0, I) for (i in 1:I) { ret_i <- FOMjk2ORVarComp(ret1$jkFomValues[i,,], varInflFactor = TRUE) Cov2_i[i] <- ret_i$Cov2 } - - ############################################################################## + + ############################################################################## # Overall F-test # F_OR statistic, last equation on page 344 den <- msR__T__ + J * max(Cov2-Cov3,0) F_OR <- msT/den cat("F_OR =", F_OR, "\n") - + # implement Eqn. 27 df2 <- (msR__T__ + J * max(Cov2-Cov3,0))^2 /((msR__T__^2)/(J-1)/I) cat("ddf_H = df2 = ", df2, "\n") - - ############################################################################## + + ############################################################################## # p-value pValue <- 1 - pf(F_OR, I - 1, df2) cat("pValue = ", pValue, "\n") - - ############################################################################## - # confidence intervals for difference FOM between first two treatments + + ############################################################################## + # confidence intervals for difference FOM between first two treatments # compare first two treatments only - - trtMeanDiffs <- theta_i_dot[1] - theta_i_dot[2] - + + trtMeanDiffs <- theta_i_dot[1] - theta_i_dot[2] + # as on page 346, first para # el_1 = 1; el_2 = - 1 ## Note to myself ... - ## Hillis does not state a constraint on the l_i, namely they + ## Hillis does not state a constraint on the l_i, namely they ## should sum to zero; otherwise one could use l_1 = 1_2 = 1/2 and compute the CI ## on the average FOM using the very same method, which does not match his result sq_root_V_hat <- sqrt((I / J) * (msR__T__ + J * max(Cov2-Cov3,0))) - + # following matches Hillis 2023 Eqn. 6 for balanced design - CI <- sort(c(trtMeanDiffs - qt(alpha/2, df2) * sq_root_V_hat, + CI <- sort(c(trtMeanDiffs - qt(alpha/2, df2) * sq_root_V_hat, trtMeanDiffs + qt(alpha/2, df2) * sq_root_V_hat)) cat("CI_lower = ", CI[1], ", CI_upper = ", CI[2], "\n") - + # last equation in box on page 346, using alternate method ... df2_i <- array(dim = I) for (i in 1:I) { df2_i[i] <- (msR_i[i] + J * max(Cov2_i[i], 0))^2/((msR_i[i])^2/(J-1)) } - - ############################################################################## + + ############################################################################## # confidence interval for reader averaged FOM for each treatment # as on page 346, first para # alternative method, using only data from each modality - + ciAvgRdrEachTrt <- array(dim = c(I,2)) - + # these are the formulae for V_hat_i and df2_i in the text area in the middle of page 346 # skipping the formulae that take both modalities into account sqrt_V_hat_i <- array(dim = I) for (i in 1:I) { sqrt_V_hat_i[i] <- sqrt((1 / J) * (msR_i[i] + J * max(Cov2_i[i],0))) - ciAvgRdrEachTrt[i,] <- sort(c(theta_i_dot[i] - qt(alpha/2, df2) * sqrt_V_hat_i[i], + ciAvgRdrEachTrt[i,] <- sort(c(theta_i_dot[i] - qt(alpha/2, df2) * sqrt_V_hat_i[i], theta_i_dot[i] + qt(alpha/2, df2) * sqrt_V_hat_i[i])) } - + stop("add code here") # return(list( # FOMs = FOMs, # ANOVA = ANOVA, # RRRC = RRRC # )) - -} + +} diff --git a/inst/extdata/toyFiles/ROC/rocSpA1.xlsx b/inst/extdata/toyFiles/ROC/rocSpA1.xlsx new file mode 100644 index 00000000..6f0d7f13 Binary files /dev/null and b/inst/extdata/toyFiles/ROC/rocSpA1.xlsx differ