Skip to content

Commit

Permalink
Better truncation handling (#44)
Browse files Browse the repository at this point in the history
* implement xc

* integral over delay

* add lots of catches and debug modes

* discover xc can be negative intentionally

* test with many chains

* relax truncation of mu

* midpoint approx

* get rid of the mid point and instead do the truncation outside the integral

* rejig where we truncate

* update approx

* reformulate stan code

* keep hacking

* try dropping xc support

* extend xc boundaries

* add a test for the weibull sampling recovery for R code

* enhance stan to R cross testing

* refactor

* update getting started

* update news

* package checking and turn off more exact approach

* update check
  • Loading branch information
seabbs authored Sep 6, 2024
1 parent 74c2c8c commit e648b4f
Show file tree
Hide file tree
Showing 14 changed files with 465 additions and 244 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-cmdstan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
# We can't use TRUE here as pendatic check return lots of false
# positives related to our use of functions.
stopifnot(
length(message) > 11 &&
length(message) != 0 && length(message) < 12 &&
any(message == "Stan program is syntactically correct")
)
shell: Rscript {0}
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ export(rpcens)
export(rprimarycensoreddist)
importFrom(stats,dunif)
importFrom(stats,runif)
importFrom(stats,setNames)
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ interface changes in `0.3.0` for the Stan code.
* Added support for `swindow = 0` to `rprimarycensoreddist` to allow for non-secondary event censored distributions.
* Adapted `rprimarycensoreddist` so that truncation is based on the primary censored distribution before secondary events are censored. This better matches the generative process.
* Added a new Stan interface tool to enable finding which files functions are implemented in the Stan code.
* Updated the approach to truncation to be outside the primary censored distribution integral.
* Improved tests that compare random sampling and probability mass/density functions between R and Stan.
* Improved cross testing between R and Stan implementations of the primary censored distributions.
* Worked on improving the stability of the `primary_censored_dist_lpmf` when used for NUTS based fitting (i.e. in Stan).

## Documentation

Expand Down
63 changes: 53 additions & 10 deletions R/dprimarycensoreddist.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,29 @@
#' \deqn{
#' f_{\text{cens}}(d) = F_{\text{cens}}(d + \text{swindow}) - F_{\text{cens}}(d)
#' }
#' where \eqn{F_{\text{cens}}} is the primary event censored CDF. For the
#' explanation and mathematical details of the CDF, refer to the documentation
#' of [pprimarycensoreddist()].
#' where \eqn{F_{\text{cens}}} is the primary event censored CDF.
#'
#' The function first computes the CDFs for all unique points (including both
#' \eqn{d} and \eqn{d + \text{swindow}}) using [pprimarycensoreddist()]. It then
#' creates a lookup table for these CDFs to efficiently calculate the PMF for
#' each input value. For non-positive delays, the function returns 0.
#'
#' If a finite maximum delay \eqn{D} is specified, the PMF is normalized to
#' ensure it sums to 1 over the range \[0, D\]. This normalization can be
#' expressed as:
#' \deqn{
#' f_{\text{cens,norm}}(d) = \frac{f_{\text{cens}}(d)}{\sum_{i=0}^{D-1}
#' f_{\text{cens}}(i)}
#' }
#' where \eqn{f_{\text{cens,norm}}(d)} is the normalized PMF and
#' \eqn{f_{\text{cens}}(d)} is the unnormalized PMF. For the explanation and
#' mathematical details of the CDF, refer to the documentation of
#' [pprimarycensoreddist()].
#'
#' @family primarycensoreddist
#'
#' @importFrom stats setNames
#'
#' @examples
#' # Example: Weibull distribution with uniform primary events
#' dprimarycensoreddist(c(0.1, 0.5, 1), pweibull, shape = 1.5, scale = 2.0)
Expand All @@ -65,19 +82,45 @@ dprimarycensoreddist <- function(
)
}

# Compute CDFs for all unique points
unique_points <- sort(unique(c(x, x + swindow)))
unique_points <- unique_points[unique_points > 0]
if (length(unique_points) == 0) {
return(rep(0, length(x)))
}

cdfs <- pprimarycensoreddist(
unique_points, pdist, pwindow, Inf, dprimary, dprimary_args, ...
)

# Create a lookup table for CDFs
cdf_lookup <- stats::setNames(cdfs, as.character(unique_points))

result <- vapply(x, function(d) {
if (d < 0) {
return(0) # Return log(0) for non-positive delays
return(0) # Return 0 for negative delays
} else if (d == 0) {
# Special case for d = 0
cdf_upper <- cdf_lookup[as.character(swindow)]
return(cdf_upper)
} else {
pprimarycensoreddist(
d + swindow, pdist, pwindow, D, dprimary, dprimary_args, ...
) -
pprimarycensoreddist(
d, pdist, pwindow, D, dprimary, dprimary_args, ...
)
cdf_upper <- cdf_lookup[as.character(d + swindow)]
cdf_lower <- cdf_lookup[as.character(d)]
return(cdf_upper - cdf_lower)
}
}, numeric(1))

if (is.finite(D)) {
if (max(unique_points) == D) {
cdf_D <- max(cdfs)
} else {
cdf_D <- pprimarycensoreddist(
D, pdist, pwindow, Inf, dprimary, dprimary_args, ...
)
}
result <- result / cdf_D
}

if (log) {
return(log(result))
} else {
Expand Down
45 changes: 23 additions & 22 deletions R/pprimarycensoreddist.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#'
#' @details
#' The primary event censored CDF is computed by integrating the product of
#' the primary event distribution function (CDF) and the delay distribution
#' the delay distribution function (CDF) and the primary event distribution
#' function (PDF) over the primary event window. The integration is adjusted
#' for truncation if a finite maximum delay (D) is specified.
#'
Expand All @@ -50,15 +50,16 @@
#' F_{\text{cens}}(q) = \int_{0}^{pwindow} F(q - p) \cdot f_{\text{primary}}(p)
#' \, dp
#' }
#' where \eqn{F} is the CDF of the primary event distribution,
#' where \eqn{F} is the CDF of the delay distribution,
#' \eqn{f_{\text{primary}}} is the PDF of the primary event times, and
#' \eqn{pwindow} is the primary event window.
#'
#' If the maximum delay \eqn{D} is finite, the CDF is normalized by \eqn{F(D)}:
#' If the maximum delay \eqn{D} is finite, the CDF is normalized by dividing
#' by \eqn{F_{\text{cens}}(D)}:
#' \deqn{
#' F_{\text{cens}}(q) = \int_{0}^{pwindow} \frac{F(q - p)}{F(D - p)} \cdot
#' f_{\text{primary}}(p) \, dp
#' F_{\text{cens,norm}}(q) = \frac{F_{\text{cens}}(q)}{F_{\text{cens}}(D)}
#' }
#' where \eqn{F_{\text{cens,norm}}(q)} is the normalized CDF.
#'
#' @family primarycensoreddist
#'
Expand All @@ -82,28 +83,28 @@ pprimarycensoreddist <- function(
if (d <= 0) {
return(0) # Return 0 for non-positive delays
} else {
if (is.infinite(D)) {
integrand <- function(p) {
d_adj <- d - p
pdist(d_adj, ...) *
do.call(
dprimary, c(list(x = p, min = 0, max = pwindow), dprimary_args)
)
}
} else {
integrand <- function(p) {
d_adj <- d - p
D_adj <- D - p
pdist(d_adj, ...) / pdist(D_adj, ...) *
do.call(
dprimary, c(list(x = p, min = 0, max = pwindow), dprimary_args)
)
}
integrand <- function(p) {
d_adj <- d - p
pdist(d_adj, ...) *
do.call(
dprimary, c(list(x = p, min = 0, max = pwindow), dprimary_args)
)
}

stats::integrate(integrand, lower = 0, upper = pwindow)$value
}
}, numeric(1))

if (!is.infinite(D)) {
# Compute normalization factor for finite D
normalizer <- if (max(q) == D) {
result[length(result)]
} else {
pprimarycensoreddist(D, pdist, pwindow, Inf, dprimary, dprimary_args, ...)
}
result <- result / normalizer
}

return(result)
}

Expand Down
Loading

0 comments on commit e648b4f

Please sign in to comment.