Skip to content

Commit

Permalink
Implementing diagnostic for exposure rate changes due to outcome
Browse files Browse the repository at this point in the history
  • Loading branch information
schuemie committed Oct 7, 2024
1 parent fd71ffd commit 65ba03f
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 35 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(print,SccsModel)
S3method(print,summary.SccsData)
S3method(print,summary.SccsIntervalData)
export(computeEventDependentObservationP)
export(computeExposureChangeP)
export(computeMdrr)
export(computePreExposureGainP)
export(computeTimeStability)
Expand Down Expand Up @@ -58,6 +59,7 @@ export(plotCalendarTimeSpans)
export(plotEventObservationDependence)
export(plotEventToCalendarTime)
export(plotExposureCentered)
export(plotOutcomeCentered)
export(plotSeasonality)
export(runSccsAnalyses)
export(saveExposuresOutcomeList)
Expand Down
190 changes: 190 additions & 0 deletions R/Diagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,193 @@ computeEventDependentObservationP <- function(sccsModel) {
p <- 0.5 * pchisq(2 * llr, df = 0, lower.tail = FALSE) + 0.5 * pchisq(2 * llr, df = 1, lower.tail = FALSE)
return(p)
}


computeExposureDaysToEvent <- function(studyPopulation, sccsData, exposureEraId) {
# This number of days before exposure start are assumed to be dealt with and are removed from
# both numerator and denominator:
preExposureDays <- 30

cases <- studyPopulation$cases |>
select("caseId", "startDay", "endDay")

# Keep only exposures that overlap with the observation periods of the study population (also
# truncate exposures to the observation period):
exposures <- sccsData$eras |>
filter(.data$eraId == exposureEraId & .data$eraType == "rx") |>
inner_join(cases,
by = join_by("caseId", "eraEndDay" >= "startDay", "eraStartDay" < "endDay"),
copy = TRUE) |>
collect() |>
mutate(eraStartDay = pmax(eraStartDay, startDay),
eraEndDay = pmin(eraEndDay, endDay))

if (nrow(exposures) == 0) {
warning("No exposures found with era ID ", exposureEraId)
return(NULL)
}
firstOutcomes <- studyPopulation$outcomes |>
group_by(.data$caseId) |>
filter(row_number(.data$outcomeDay) == 1)

# Merge overlapping exposures if needed:
exposures <- exposures |>
arrange(caseId, eraStartDay) |>
group_by(caseId) |>
mutate(newGroup = cumsum(lag(eraEndDay, default = first(eraEndDay)) < eraStartDay)) |>
group_by(caseId, newGroup) |>
summarise(
eraStartDay = min(eraStartDay),
eraEndDay = max(eraEndDay),
.groups = 'drop'
) |>
select(caseId, eraStartDay, eraEndDay)

# Ensure at least <preExposureDays> before each exposure start, by moving end day back:
truncatedExposures <- exposures |>
arrange(caseId, eraStartDay) |>
group_by(caseId) |>
mutate(
eraEndDay = ifelse(lead(eraStartDay, default = Inf) - eraEndDay < preExposureDays, lead(eraStartDay) - preExposureDays, eraEndDay)
) |>
filter(eraEndDay > eraStartDay) |>
select(caseId, eraStartDay, eraEndDay)

exposureDeltas <- truncatedExposures |>
inner_join(firstOutcomes, by = join_by("caseId")) |>
mutate(deltaExposureStart = .data$eraStartDay - .data$outcomeDay,
deltaExposureEnd = .data$eraEndDay - .data$outcomeDay) |>
select("caseId", "deltaExposureStart", "deltaExposureEnd")

# Remove pre-exposure time from observation periods:
joined <- studyPopulation$cases |>
select(caseId, startDay, endDay) |>
left_join(truncatedExposures, by = "caseId") |>
arrange(caseId, eraStartDay)
truncatedObservationPeriods <- joined |>
group_by(caseId) |>
mutate(
periodStart = lag(eraStartDay, default = first(startDay)),
periodEnd = pmin(eraStartDay - 30, endDay),
lastPeriodStart = eraStartDay,
lastPeriodEnd = endDay
) |>
filter(periodStart <= periodEnd) |>
select(caseId, start = periodStart, end = periodEnd) |>
bind_rows(
joined |>
group_by(caseId) |>
slice(n()) |>
transmute(caseId, start = if_else(is.na(eraStartDay), startDay, eraStartDay), end = endDay) |>
filter(start <= end)
) |>
arrange(caseId, start) |>
ungroup()

observationPeriodDeltas <- truncatedObservationPeriods |>
inner_join(firstOutcomes, by = join_by("caseId")) |>
mutate(deltaStart = .data$start - .data$outcomeDay,
deltaEnd = .data$end - .data$outcomeDay) |>
select("caseId", "deltaStart", "deltaEnd")

return(list(exposureDeltas = exposureDeltas, observationPeriodDeltas = observationPeriodDeltas))
}

#' Compute p for whether exposure probability changed following the outcome
#'
#' @param exposureEraId The exposure to create the era data for. If not specified it is
#' assumed to be the one exposure for which the data was loaded from
#' the database.
#' @template StudyPopulation
#' @template SccsData
#' @param bounds Bounds for the null of no change in the exposure rate.
#'
#' @return
#' The p-value
#'
#' @export
computeExposureChangeP <- function(sccsData, studyPopulation, exposureEraId = NULL, bounds = log(c(0.5, 2))) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertClass(sccsData, "SccsData", add = errorMessages)
checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages)
checkmate::assertInt(exposureEraId, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

if (is.null(exposureEraId)) {
exposureEraId <- attr(sccsData, "metaData")$exposureIds
if (length(exposureEraId) != 1) {
stop("No exposure ID specified, but multiple exposures found")
}
}
data <- computeExposureDaysToEvent(studyPopulation = studyPopulation,
sccsData = sccsData,
exposureEraId = exposureEraId)
if (is.null(data)) {
return(NA)
}
periods <- dplyr::tibble(status = c(0,1),
start = c(-30, 0),
end = c(-1, 30))

exposure <- periods |>
cross_join(data$exposureDeltas) |>
mutate(daysExposure = pmax(0, pmin(end, deltaExposureEnd) - pmax(start, deltaExposureStart))) |>
group_by(caseId, status) |>
summarise(daysExposure = sum(daysExposure), .groups = "drop") |>
select(caseId, status, daysExposure)

observation <- periods |>
cross_join(data$observationPeriodDeltas) |>
mutate(daysObserved = pmax(0, pmin(end, deltaEnd) - pmax(start, deltaStart))) |>
group_by(caseId, status) |>
summarise(daysObserved = sum(daysObserved), .groups = "drop") |>
select(caseId, status, daysObserved)


casesWithExposure <- exposure |>
distinct(caseId) |>
pull()

poissonData <- observation |>
filter(caseId %in% casesWithExposure & daysObserved > 0) |>
left_join(exposure, by = join_by(caseId, status)) |>
mutate(
rowId = row_number(),
covariateId = 1
) |>
select(
"rowId",
stratumId = "caseId",
"covariateId",
covariateValue = "status",
time = "daysObserved",
y = "daysExposure"
)

cyclopsData <- Cyclops::convertToCyclopsData(outcomes = poissonData,
covariates = poissonData,
addIntercept = FALSE,
modelType = "cpr",
quiet = TRUE)
fit <- Cyclops::fitCyclopsModel(cyclopsData)
fit$log_likelihood
logRr <- coef(fit)
if (logRr >= bounds[1] && logRr <= bounds[2]) {
llNull <- fit$log_likelihood
} else {
if (logRr < bounds[1]) {
nullLogRr <- bounds[1]
} else {
nullLogRr <- bounds[2]
}
llNull <- Cyclops::getCyclopsProfileLogLikelihood(
object = fit,
parm = 1,
x = nullLogRr,
includePenalty = TRUE
)$value
}
llr <- fit$log_likelihood - llNull
p <- pchisq(2 * llr, df = 1, lower.tail = FALSE)
return(p)
}
Loading

0 comments on commit 65ba03f

Please sign in to comment.