Update fastcpd to 0.4.0

* Add the transition from vanilla PELT to SeN by using `vanilla_percentage` parameter.
doccstat · Aug 31, 2023 · 84de3c2 · 84de3c2
1 parent 477ebc0
commit 84de3c2
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 4 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: fastcpd
 Type: Package
 Title: Fast Change Point Detection Based on Dynamic Programming with Pruning
     with Sequential Gradient Descent
-Version: 0.3.3
+Version: 0.4.0
 Authors@R: c(
     person("Xingchi", "Li", email = "anthony.li@stat.tamu.edu",
            role = c("aut", "cre", "cph"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# fastcpd 0.4.0
+
+*   Add the transition from vanilla PELT to SeN by using `vanilla_percentage`
+    parameter.
+
 # fastcpd 0.3.3
 
 *   Merge the implementation of vanilla PELT and SeN.

diff --git a/R/fastcpd.R b/R/fastcpd.R
@@ -446,10 +446,10 @@ fastcpd_builtin <- function(
       }
 
       data_segment <- data[(tau + 1):t, , drop = FALSE]
-      if (vanilla_percentage == 1) {
+      if (t <= vanilla_percentage * n) {
         cost_optim_result <- cost_optim(family, p, data_segment, cost, 0, TRUE)
-        # fastcpd_parameters$theta_hat[, i] <- cost_optim_result$par
-        # fastcpd_parameters$theta_sum[, i] <- fastcpd_parameters$theta_sum[, i] + cost_optim_result$par
+        fastcpd_parameters$theta_hat[, i] <- cost_optim_result$par
+        fastcpd_parameters$theta_sum[, i] <- fastcpd_parameters$theta_sum[, i] + cost_optim_result$par
         cval[i] <- cost_optim_result$value
       } else {
         fastcpd_parameters <- update_fastcpd_parameters(

diff --git a/tests/testthat/test-fastcpd.R b/tests/testthat/test-fastcpd.R
@@ -337,6 +337,64 @@ test_that("huber regression", {
   expect_equal(huber_regression_result@cp_set, c(401, 726))
 })
 
+test_that("huber regression with 0.1 vanilla", {
+  set.seed(1)
+  n <- 400 + 300 + 500
+  p <- 5
+  x <- mvtnorm::rmvnorm(n, mean = rep(0, p), sigma = diag(p))
+  theta <- rbind(
+    mvtnorm::rmvnorm(1, mean = rep(0, p - 3), sigma = diag(p - 3)),
+    mvtnorm::rmvnorm(1, mean = rep(5, p - 3), sigma = diag(p - 3)),
+    mvtnorm::rmvnorm(1, mean = rep(9, p - 3), sigma = diag(p - 3))
+  )
+  theta <- cbind(theta, matrix(0, 3, 3))
+  theta <- theta[rep(seq_len(3), c(400, 300, 500)), ]
+  y_true <- rowSums(x * theta)
+  factor <- c(
+    2 * stats::rbinom(400, size = 1, prob = 0.95) - 1,
+    2 * stats::rbinom(300, size = 1, prob = 0.95) - 1,
+    2 * stats::rbinom(500, size = 1, prob = 0.95) - 1
+  )
+  y <- factor * y_true + stats::rnorm(n)
+  data <- cbind.data.frame(y, x)
+  huber_threshold <- 1
+  huber_loss <- function(data, theta) {
+    residual <- data[, 1] - data[, -1, drop = FALSE] %*% theta
+    indicator <- abs(residual) <= huber_threshold
+    sum(
+      residual^2 / 2 * indicator +
+        huber_threshold * (abs(residual) - huber_threshold / 2) * (1 - indicator)
+    )
+  }
+  huber_loss_gradient <- function(data, theta) {
+    residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta)
+    if (abs(residual) <= huber_threshold) {
+      -residual * data[nrow(data), -1]
+    } else {
+      -huber_threshold * sign(residual) * data[nrow(data), -1]
+    }
+  }
+  huber_loss_hessian <- function(data, theta) {
+    residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta)
+    if (abs(residual) <= huber_threshold) {
+      outer(data[nrow(data), -1], data[nrow(data), -1])
+    } else {
+      0.01 * diag(length(theta))
+    }
+  }
+  huber_regression_result <- fastcpd(
+    formula = y ~ . - 1,
+    data = data,
+    beta = (p + 1) * log(n) / 2,
+    cost = huber_loss,
+    cost_gradient = huber_loss_gradient,
+    cost_hessian = huber_loss_hessian,
+    vanilla_percentage = 0.1
+  )
+
+  expect_equal(huber_regression_result@cp_set, c(401, 726))
+})
+
 test_that("confidence interval experiment", {
   set.seed(1)
   kDimension <- 1