Skip to content

Commit

Permalink
Update fastcpd to 0.4.0
Browse files Browse the repository at this point in the history
*   Add the transition from vanilla PELT to SeN by using `vanilla_percentage` parameter.
  • Loading branch information
doccstat committed Aug 31, 2023
1 parent 477ebc0 commit 84de3c2
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 4 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: fastcpd
Type: Package
Title: Fast Change Point Detection Based on Dynamic Programming with Pruning
with Sequential Gradient Descent
Version: 0.3.3
Version: 0.4.0
Authors@R: c(
person("Xingchi", "Li", email = "anthony.li@stat.tamu.edu",
role = c("aut", "cre", "cph"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# fastcpd 0.4.0

* Add the transition from vanilla PELT to SeN by using `vanilla_percentage`
parameter.

# fastcpd 0.3.3

* Merge the implementation of vanilla PELT and SeN.
Expand Down
6 changes: 3 additions & 3 deletions R/fastcpd.R
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,10 @@ fastcpd_builtin <- function(
}

data_segment <- data[(tau + 1):t, , drop = FALSE]
if (vanilla_percentage == 1) {
if (t <= vanilla_percentage * n) {
cost_optim_result <- cost_optim(family, p, data_segment, cost, 0, TRUE)
# fastcpd_parameters$theta_hat[, i] <- cost_optim_result$par
# fastcpd_parameters$theta_sum[, i] <- fastcpd_parameters$theta_sum[, i] + cost_optim_result$par
fastcpd_parameters$theta_hat[, i] <- cost_optim_result$par
fastcpd_parameters$theta_sum[, i] <- fastcpd_parameters$theta_sum[, i] + cost_optim_result$par
cval[i] <- cost_optim_result$value
} else {
fastcpd_parameters <- update_fastcpd_parameters(
Expand Down
58 changes: 58 additions & 0 deletions tests/testthat/test-fastcpd.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,64 @@ test_that("huber regression", {
expect_equal(huber_regression_result@cp_set, c(401, 726))
})

test_that("huber regression with 0.1 vanilla", {
set.seed(1)
n <- 400 + 300 + 500
p <- 5
x <- mvtnorm::rmvnorm(n, mean = rep(0, p), sigma = diag(p))
theta <- rbind(
mvtnorm::rmvnorm(1, mean = rep(0, p - 3), sigma = diag(p - 3)),
mvtnorm::rmvnorm(1, mean = rep(5, p - 3), sigma = diag(p - 3)),
mvtnorm::rmvnorm(1, mean = rep(9, p - 3), sigma = diag(p - 3))
)
theta <- cbind(theta, matrix(0, 3, 3))
theta <- theta[rep(seq_len(3), c(400, 300, 500)), ]
y_true <- rowSums(x * theta)
factor <- c(
2 * stats::rbinom(400, size = 1, prob = 0.95) - 1,
2 * stats::rbinom(300, size = 1, prob = 0.95) - 1,
2 * stats::rbinom(500, size = 1, prob = 0.95) - 1
)
y <- factor * y_true + stats::rnorm(n)
data <- cbind.data.frame(y, x)
huber_threshold <- 1
huber_loss <- function(data, theta) {
residual <- data[, 1] - data[, -1, drop = FALSE] %*% theta
indicator <- abs(residual) <= huber_threshold
sum(
residual^2 / 2 * indicator +
huber_threshold * (abs(residual) - huber_threshold / 2) * (1 - indicator)
)
}
huber_loss_gradient <- function(data, theta) {
residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta)
if (abs(residual) <= huber_threshold) {
-residual * data[nrow(data), -1]
} else {
-huber_threshold * sign(residual) * data[nrow(data), -1]
}
}
huber_loss_hessian <- function(data, theta) {
residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta)
if (abs(residual) <= huber_threshold) {
outer(data[nrow(data), -1], data[nrow(data), -1])
} else {
0.01 * diag(length(theta))
}
}
huber_regression_result <- fastcpd(
formula = y ~ . - 1,
data = data,
beta = (p + 1) * log(n) / 2,
cost = huber_loss,
cost_gradient = huber_loss_gradient,
cost_hessian = huber_loss_hessian,
vanilla_percentage = 0.1
)

expect_equal(huber_regression_result@cp_set, c(401, 726))
})

test_that("confidence interval experiment", {
set.seed(1)
kDimension <- 1
Expand Down

0 comments on commit 84de3c2

Please sign in to comment.