diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 47798fb1..9beaf36a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,6 +13,7 @@ repos:
           - shiny
           - lubridate
           - DescTools
+          - lmtest
       # codemeta must be above use-tidy-description when both are used
       # -   id: codemeta-description-updated
       - id: use-tidy-description
diff --git a/NAMESPACE b/NAMESPACE
index 081263e9..5933373f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(basic_kmplot)
 export(basic_kmplot2)
 export(bootstrap_HR)
 export(bucher)
+export(calculate_weights_legend)
 export(center_ipd)
 export(check_weights)
 export(dummize_ipd)
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 00000000..0990ef58
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,220 @@
+# unanchored datasets ------
+
+#' Patient data from single arm study
+#' @format a data frame with 500 rows and 8 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm.}
+#'     \item{AGE}{Age in years at baseline.}
+#'     \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
+#'     \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
+#'     \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
+#'     \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
+#'     \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
+#'
+#'   }
+#' @keywords dataset
+#' @family unanchored datasets
+"adsl_sat"
+
+#' Survival data from single arm trial
+#' @format A data frame with 500 rows and 10 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm, `"A"`.}
+#'     \item{AVAL}{Analysis value which in this dataset overall survival time in days.}
+#'     \item{AVALU}{Unit of `AVAL`.}
+#'     \item{PARAMCD}{Paramater code of `AVAL`, `"OS"`.}
+#'     \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.}
+#'     \item{CNSR}{Censoring indicator `0`/`1`.}
+#'     \item{TIME}{Survival time in days.}
+#'     \item{EVENT}{Event indicator `0`/`1`.}
+#'   }
+#' @family unanchored datasets
+#' @keywords dataset
+"adtte_sat"
+
+
+#' Pseudo individual patient survival data from published study
+#' @format A data frame with 300 rows and 3 columns:
+#'   \describe{
+#'     \item{Time}{Survival time in days.}
+#'     \item{Event}{Event indicator `0`/`1`.}
+#'     \item{ARM}{Assigned treatment arm, `"B"`.}
+#'   }
+#' @family unanchored datasets
+#' @keywords dataset
+"pseudo_ipd_sat"
+
+
+#' Centered patient data from single arm trial
+#' @format A data frame with 500 rows and 14 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm.}
+#'     \item{AGE}{Age in years at baseline.}
+#'     \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
+#'     \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
+#'     \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
+#'     \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
+#'     \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
+#'     \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].}
+#'     \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at
+#'      0.5.}
+#'     \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the
+#'       aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
+#'     \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]}
+#'     \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]}
+#'     \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]}
+#'     \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].}
+#'   }
+#' @family unanchored datasets
+#' @keywords dataset
+"centered_ipd_sat"
+
+#' Binary outcome data from single arm trial
+#' @format A data frame with 500 rows and 5 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm.}
+#'     \item{AVAL}{Analysis value, in this dataset an indicator of response.}
+#'     \item{PARAM}{Parameter type of `AVAL`.}
+#'     \item{RESPONSE}{Indicator of response.}
+#'   }
+#' @family unanchored datasets
+#' @keywords dataset
+"adrs_sat"
+
+#' Weighted object for single arm trial data
+#' @format A `maicplus_estimate_weights` object created by [estimate_weights()] containing
+#'   \describe{
+#'     \item{data}{patient level data with weights}
+#'     \item{centered_colnames}{Columns used in MAIC}
+#'     \item{nr_missing}{Number of observations with missing data}
+#'     \item{ess}{Expected sample size}
+#'     \item{opt}{Information from `optim` from weight calculation}
+#'     \item{boot}{Parameters and bootstrap sample weights, `NULL` in this object}
+#'   }
+#' @family unanchored datasets
+#' @keywords dataset
+"weighted_sat"
+
+# aggregate data ------
+
+#' Aggregate effect modifier data from published study
+#'
+#' This data is formatted to be used in [center_ipd()].
+#'
+#' @format A data frame with 3 rows and 9 columns:
+#'   \describe{
+#'     \item{STUDY}{The study name, Study_XXXX}
+#'     \item{ARM}{Study arm name or total}
+#'     \item{N}{Number of observations in study arm}
+#'     \item{AGE_MEAN}{Mean age in study arm}
+#'     \item{AGE_MEDIAN}{Median age in study arm}
+#'     \item{AGE_SD}{Standard deviation of age in study arm}
+#'     \item{SEX_MALE_COUNT}{Number of male patients}
+#'     \item{ECOG0_COUNT}{Number of patients with ECOG score = 0}
+#'     \item{SMOKE_COUNT}{Number of smokers}
+#'     \item{N_PR_THER_MEDIAN}{Median number of prior therapies}
+#'   }
+#' @family unanchored datasets
+#' @family anchored datasets
+#' @keywords dataset
+"agd"
+
+
+# anchored datasets -------
+
+#' Patient data from two arm trial
+#' @format A data frame with 1000 rows and 8 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm.}
+#'     \item{AGE}{Age in years at baseline.}
+#'     \item{SEX}{Sex of patient recorded as character "Male"/"Female"}
+#'     \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
+#'     \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
+#'     \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
+#'     \item{SEX_MALE}{Indicator of SEX == "Male" as numeric 1/0}
+#'   }
+#' @family anchored datasets
+#' @keywords dataset
+"adsl_twt"
+
+
+#' Survival data from two arm trial
+#' @format A data frame with 1000 rows and 10 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.}
+#'     \item{AVAL}{Analysis value which in this dataset overall survival time in days.}
+#'     \item{AVALU}{Unit of `AVAL`.}
+#'     \item{PARAMCD}{Parameter code of `AVAL`, `"OS"`.}
+#'     \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.}
+#'     \item{CNSR}{Censoring indicator `0`/`1`.}
+#'     \item{TIME}{Survival time in days.}
+#'     \item{EVENT}{Event indicator `0`/`1`.}
+#'   }
+#' @family anchored datasets
+#' @keywords dataset
+"adtte_twt"
+
+#' Binary outcome data from two arm trial
+#' @format A data frame with 1000 rows and 5 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.}
+#'     \item{AVAL}{Analysis value, in this dataset an indicator of response.}
+#'     \item{PARAM}{Parameter type of `AVAL`.}
+#'     \item{RESPONSE}{Indicator of response.}
+#'   }
+"adrs_twt"
+
+#' Pseudo individual patient survival data from published two arm study
+#' @format A data frame with 800 rows and 3 columns:
+#'   \describe{
+#'     \item{Time}{Survival time in days.}
+#'     \item{Event}{Event indicator `0`/`1`.}
+#'     \item{ARM}{Assigned treatment arm, `"B"`, `"C"`.}
+#'   }
+#' @family anchored datasets
+#' @keywords dataset
+"pseudo_ipd_twt"
+
+
+#' Centered patient data from two arm trial
+#' @format A data frame with 1000 rows and 14 columns:
+#'   \describe{
+#'     \item{USUBJID}{Unique subject identifiers for patients.}
+#'     \item{ARM}{Assigned treatment arm.}
+#'     \item{AGE}{Age in years at baseline.}
+#'     \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
+#'     \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
+#'     \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
+#'     \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
+#'     \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
+#'     \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].}
+#'     \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at
+#'      0.5.}
+#'     \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the
+#'       aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
+#'     \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]}
+#'     \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]}
+#'     \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]}
+#'     \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].}
+#'   }
+#' @keywords dataset
+#' @family anchored datasets
+"centered_ipd_twt"
+
+
+if (FALSE) {
+  make_roxygen_data <- function(df) {
+    cn <- colnames(df)
+    cat("#' @format A data frame with", nrow(df), "rows and", ncol(df), "columns:\n")
+    cat("#'   \\describe{\n")
+    for (i in cn) cat("#'     \\item{", i, "}{}\n", sep = "")
+    cat("#'   }")
+  }
+}
diff --git a/R/matching.R b/R/matching.R
index a85f1822..fcf24f9d 100644
--- a/R/matching.R
+++ b/R/matching.R
@@ -37,14 +37,19 @@
 #' }
 #'
 #' @examples
-#' load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE))
-#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
-#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
-#'
-#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
-#' centered_colnames <- paste0(centered_colnames, "_CENTERED")
+#' data(agd)
+#' data(adsl_sat)
+#' ipd_centered <- center_ipd(ipd = adsl_sat, agd = process_agd(agd))
+#' centered_colnames <- grep("_CENTERED", colnames(ipd_centered), value = TRUE)
+#' centered_colnames
 #' weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
-#'
+#' \donttest{
+#' # To later estimate bootstrap confidence intervals, we calculate the weights
+#' # for the bootstrap samples:
+#' weighted_data_boot <- estimate_weights(
+#'   data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500
+#' )
+#' }
 #' @export
 
 estimate_weights <- function(data,
@@ -199,10 +204,9 @@ optimise_weights <- function(matrix,
 #'
 #' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
 #' @examples
-#' \dontrun{
-#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-#' calculate_weights_legend(weighted_data)
-#' }
+#' data("weighted_sat")
+#' calculate_weights_legend(weighted_sat)
+#' @export
 #' @keywords internal
 
 calculate_weights_legend <- function(weighted_data) {
@@ -362,11 +366,11 @@ plot_weights_ggplot <- function(weighted_data, bin_col, vline_col,
 #' @param bins (`ggplot` only) number of bin parameter to use
 #'
 #' @examples
-#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-#' plot(weighted_data)
+#' plot(weighted_sat)
 #'
-#' library(ggplot2)
-#' plot(weighted_data, ggplot = TRUE)
+#' if (requireNamespace("ggplot2")) {
+#'   plot(weighted_sat, ggplot = TRUE)
+#' }
 #' @describeIn estimate_weights Plot method for estimate_weights objects
 #' @export
 
@@ -397,9 +401,7 @@ plot.maicplus_estimate_weights <- function(x, ggplot = FALSE,
 #' aggregated data following the same naming convention
 #'
 #' @examples
-#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
-#' check_weights(weighted_data, agd)
+#' check_weights(weighted_sat, process_agd(agd))
 #'
 #' @import DescTools
 #'
diff --git a/R/process_data.R b/R/process_data.R
index b11dfade..d125835b 100644
--- a/R/process_data.R
+++ b/R/process_data.R
@@ -114,8 +114,8 @@ process_agd <- function(raw_agd) {
 #' @param dummize_ref_level vector of reference level of the variables to binarize
 #'
 #' @examples
-#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
-#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#' data(adsl_twt)
+#' dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male"))
 #'
 #' @return ipd with dummized columns
 #' @export
@@ -152,8 +152,8 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
 #' suffix is no longer accepted.
 #' @examples
 #' # load in IPD
-#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
-#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#' data(adsl_sat)
+#' adsl <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
 #'
 #' # Reading aggregate data by Excel
 #' target_pop <- read.csv(
@@ -162,7 +162,7 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
 #' agd <- process_agd(target_pop)
 #'
 #' # Alternatively, you can specify aggregate data manually in data frame
-#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
+#' data(agd)
 #' ipd_centered <- center_ipd(ipd = adsl, agd = agd)
 #'
 #' @return centered ipd using aggregate level data averages
diff --git a/data-raw/dummy_anchored.R b/data-raw/dummy_anchored.R
new file mode 100644
index 00000000..19fd99ee
--- /dev/null
+++ b/data-raw/dummy_anchored.R
@@ -0,0 +1,80 @@
+#### create anchored example datasets ####
+
+devtools::load_all()
+library(flexsurv)
+set.seed(2024)
+
+# create adsl_twt
+adsl <- read.csv(system.file("extdata", "adsl.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+adsl$X <- NULL
+adsl$USUBJID <- paste0("xx", adsl$USUBJID)
+adsl2 <- adsl
+adsl2$ARM <- "C"
+adsl2$USUBJID <- sample(size = nrow(adsl2), paste0("yy", adsl2$USUBJID), replace = FALSE)
+adsl2 <- adsl2[order(adsl2$USUBJID), ]
+
+adsl_twt <- rbind(adsl, adsl2)
+
+# create adtte_twt
+adtte <- read.csv(system.file("extdata", "adtte.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+adtte$TIME <- adtte$AVAL
+adtte$EVENT <- 1 - adtte$CNSR
+adtte$USUBJID <- paste0("xx", adtte$USUBJID)
+
+adtte2 <- adtte
+adtte2$ARM <- "C"
+adtte2$TIME <- adtte2$TIME * runif(nrow(adtte2), 0.15, 0.3)
+fit_C <- flexsurv::flexsurvspline(formula = Surv(TIME, EVENT) ~ 1, data = adtte2, k = 3)
+tmp <- simulate(fit_C, nsim = 1, seed = 1234, newdata = adtte2, censtime = max(adtte$TIME))
+adtte2$TIME <- tmp$time_1
+adtte2$EVENT <- tmp$event_1
+adtte2$USUBJID <- paste0("yy", adtte2$USUBJID)
+
+adtte_twt <- rbind(adtte, adtte2)
+adtte_twt$EVNT <- NULL
+
+### Binary
+adrs_twt1 <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE))
+adrs_twt1$USUBJID <- paste0("xx", adrs_twt1$USUBJID)
+adrs_twt1$RESPONSE <- adrs_twt1$AVAL
+
+adrs_twt2 <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE))
+adrs_twt2$ARM <- "C"
+adrs_twt2$AVAL <- adrs_twt2$RESPONSE <- rbinom(nrow(adrs_twt2), size = 1, prob = 0.68)
+adrs_twt2$USUBJID <- paste0("yy", adrs_twt2$USUBJID)
+
+adrs_twt <- rbind(adrs_twt1, adrs_twt2)
+
+# Make sure that agd is up-to-date!
+data("agd")
+
+# create pseudo_ipd_twt
+pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+pseudo_ipd$ARM <- "B"
+pseudo_ipd2 <- adtte2[, c("TIME", "EVENT", "ARM")]
+names(pseudo_ipd2) <- c("Time", "Event", "ARM")
+tmp <- simulate(fit_C, nsim = 1, seed = 4321, newdata = adtte2, censtime = max(pseudo_ipd$Time))
+pseudo_ipd2$Time <- tmp$time_1
+pseudo_ipd2$Event <- tmp$event_1
+
+pseudo_ipd_twt <- rbind(pseudo_ipd, pseudo_ipd2)
+
+# create centered adsl_twt
+agd <- process_agd(agd)
+adsl_twt <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+centered_ipd_twt <- center_ipd(ipd = adsl_twt, agd = agd)
+
+
+### Output
+usethis::use_data(adsl_twt, adtte_twt, pseudo_ipd_twt, centered_ipd_twt, adrs_twt,
+  internal = FALSE, overwrite = TRUE
+)
diff --git a/data-raw/dummy_unanchored.R b/data-raw/dummy_unanchored.R
new file mode 100644
index 00000000..198ee4a2
--- /dev/null
+++ b/data-raw/dummy_unanchored.R
@@ -0,0 +1,53 @@
+#### create datasets for unanchored case ####
+## adsl_sat, adtte_sat, adrs_sat, agd (AgD of effect modifiers), pseudo_ipd_sat (AgD, tte data)
+
+devtools::load_all()
+# Read in relevant ADaM data and rename variables of interest
+adsl_sat <- read.csv(system.file("extdata", "adsl.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+adsl_sat$X <- NULL
+adtte_sat <- read.csv(system.file("extdata", "adtte.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+adtte_sat$TIME <- adtte_sat$AVAL
+adtte_sat$EVENT <- 1 - adtte_sat$CNSR
+adtte_sat <- adtte_sat[adtte_sat$ARM == "A", , drop = FALSE]
+adtte_sat$EVNT <- NULL
+
+adsl_sat <- adsl_sat[adsl_sat$USUBJID %in% adtte_sat$USUBJID, , drop = FALSE]
+
+
+### AgD
+# Baseline aggregate data for the comparator population
+agd <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
+  package = "maicplus", mustWork = TRUE
+))
+# for time-to-event endpoints, pseudo IPD from digitalized KM
+pseudo_ipd_sat <- read.csv(system.file("extdata", "psuedo_IPD.csv",
+  package = "maicplus",
+  mustWork = TRUE
+))
+pseudo_ipd_sat$ARM <- "B"
+
+### Centered IPD
+agd_sat <- process_agd(agd)
+adsl_sat <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+centered_ipd_sat <- center_ipd(ipd = adsl_sat, agd = agd_sat)
+
+### Binary
+adrs_sat <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE))
+adrs_sat$RESPONSE <- adrs_sat$AVAL
+
+## Example weighted TTE
+ipd_centered <- center_ipd(ipd = adsl_sat, agd = agd_sat)
+centered_colnames <- paste0(c("AGE", "AGE_MEDIAN", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE"), "_CENTERED")
+weighted_sat <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
+
+
+### Output
+usethis::use_data(adsl_sat, adtte_sat, agd, pseudo_ipd_sat, centered_ipd_sat, adrs_sat, weighted_sat,
+  internal = FALSE, overwrite = TRUE
+)
diff --git a/data/adrs_sat.rda b/data/adrs_sat.rda
new file mode 100644
index 00000000..7a7b5e1b
Binary files /dev/null and b/data/adrs_sat.rda differ
diff --git a/data/adrs_twt.rda b/data/adrs_twt.rda
new file mode 100644
index 00000000..b202158b
Binary files /dev/null and b/data/adrs_twt.rda differ
diff --git a/data/adsl_sat.rda b/data/adsl_sat.rda
new file mode 100644
index 00000000..a770c8b0
Binary files /dev/null and b/data/adsl_sat.rda differ
diff --git a/data/adsl_twt.rda b/data/adsl_twt.rda
new file mode 100644
index 00000000..7d9c10c4
Binary files /dev/null and b/data/adsl_twt.rda differ
diff --git a/data/adtte_sat.rda b/data/adtte_sat.rda
new file mode 100644
index 00000000..3a64dcc1
Binary files /dev/null and b/data/adtte_sat.rda differ
diff --git a/data/adtte_twt.rda b/data/adtte_twt.rda
new file mode 100644
index 00000000..35da640e
Binary files /dev/null and b/data/adtte_twt.rda differ
diff --git a/data/agd.rda b/data/agd.rda
new file mode 100644
index 00000000..394384be
Binary files /dev/null and b/data/agd.rda differ
diff --git a/data/centered_ipd_sat.rda b/data/centered_ipd_sat.rda
new file mode 100644
index 00000000..897a60e1
Binary files /dev/null and b/data/centered_ipd_sat.rda differ
diff --git a/data/centered_ipd_twt.rda b/data/centered_ipd_twt.rda
new file mode 100644
index 00000000..8d04e22e
Binary files /dev/null and b/data/centered_ipd_twt.rda differ
diff --git a/data/pseudo_ipd_sat.rda b/data/pseudo_ipd_sat.rda
new file mode 100644
index 00000000..4d9f29cb
Binary files /dev/null and b/data/pseudo_ipd_sat.rda differ
diff --git a/data/pseudo_ipd_twt.rda b/data/pseudo_ipd_twt.rda
new file mode 100644
index 00000000..758778bf
Binary files /dev/null and b/data/pseudo_ipd_twt.rda differ
diff --git a/data/weighted_sat.rda b/data/weighted_sat.rda
new file mode 100644
index 00000000..4938e68a
Binary files /dev/null and b/data/weighted_sat.rda differ
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 801bb23e..9f5dac06 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -31,6 +31,7 @@ Kaplan
 MAIC
 multivariable
 optim
+Paramater
 pharma
 Phillippo
 phillippo
@@ -44,6 +45,7 @@ sd
 Signorovitch
 signorovitch
 Texp
+THER
 TSD
 TTE
 tte
@@ -54,3 +56,4 @@ unscaled
 unstacked
 USUBJID
 vcovHC
+XXXX
diff --git a/inst/examples/kmplot_anchored_ex.R b/inst/examples/kmplot_anchored_ex.R
index 514d2e24..3112eb56 100644
--- a/inst/examples/kmplot_anchored_ex.R
+++ b/inst/examples/kmplot_anchored_ex.R
@@ -2,40 +2,19 @@
 
 ### IPD
 # Read in relevant ADaM data and rename variables of interest
-adsl <- read.csv(system.file("extdata", "adsl.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-adtte <- read.csv(system.file("extdata", "adtte.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-adtte$TIME <- adtte$AVAL
-adtte$EVENT <- adtte$EVNT
-adtte2 <- adtte
-adtte2$ARM <- "C"
-adtte2$TIME <- adtte2$TIME + 7
-adtte <- rbind(adtte, adtte2)
+adsl_twt
+adtte_twt
 
 ### AgD
 # Baseline aggregate data for the comparator population
-target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
-  package = "maicplus", mustWork = TRUE
-))
+agd
+
 # for time-to-event endpoints, pseudo IPD from digitalized KM
-pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-pseudo_ipd$ARM <- "B"
-pseudo_ipd2 <- pseudo_ipd
-pseudo_ipd2$ARM <- "C"
-pseudo_ipd2$Time <- pseudo_ipd2$Time + 5
-pseudo_ipd <- rbind(pseudo_ipd, pseudo_ipd2)
+pseudo_ipd_twt
 
 #### prepare data
-target_pop <- process_agd(target_pop)
-adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+target_pop <- process_agd(agd)
+adsl <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
 use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
 
 #### derive weights
@@ -49,9 +28,9 @@ match_res <- estimate_weights(
 # plot by trial
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
@@ -71,9 +50,9 @@ kmplot(
 # plot by arm
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
@@ -92,9 +71,9 @@ kmplot(
 # plot all
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
diff --git a/inst/examples/maic_unanchored_binary_ex.R b/inst/examples/maic_unanchored_binary_ex.R
index 2baeccce..edcdb995 100644
--- a/inst/examples/maic_unanchored_binary_ex.R
+++ b/inst/examples/maic_unanchored_binary_ex.R
@@ -1,19 +1,18 @@
-# load in prognostic IPD data and AgD
-load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE))
-load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
-ipd_centered <- center_ipd(ipd = ipd, agd = agd)
+# load in centered prognostic IPD data
 
-# estimate weights
-centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
-centered_colnames <- paste0(centered_colnames, "_CENTERED")
+centered_ipd_sat
+centered_colnames <- grep("_CENTERED$", colnames(centered_ipd_sat), value = TRUE)
+weighted_data <- estimate_weights(data = centered_ipd_sat, centered_colnames = centered_colnames)
+weighted_data2 <- estimate_weights(
+  data = centered_ipd_sat, centered_colnames = centered_colnames,
+  n_boot_iteration = 500
+)
 
-weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
-weighted_data2 <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 400)
+# binary IPD
+adrs_sat
 
-# get dummy binary IPD
-adrs <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE))
-adrs$RESPONSE <- adrs$AVAL
 
+# get dummy binary IPD
 pseudo_adrs <- get_pseudo_ipd_binary(
   binary_agd = data.frame(
     ARM = rep("B", 2),
@@ -26,7 +25,7 @@ pseudo_adrs <- get_pseudo_ipd_binary(
 # unanchored binary MAIC, with CI based on sandwich estimator
 maic_unanchored(
   weights_object = weighted_data,
-  ipd = adrs,
+  ipd = adrs_sat,
   pseudo_ipd = pseudo_adrs,
   trt_ipd = "A",
   trt_agd = "B",
@@ -42,7 +41,7 @@ maic_unanchored(
 # unanchored binary MAIC, with bootstrapped CI
 maic_unanchored(
   weights_object = weighted_data2,
-  ipd = adrs,
+  ipd = adrs_sat,
   pseudo_ipd = pseudo_adrs,
   trt_ipd = "A",
   trt_agd = "B",
diff --git a/inst/extdata/aggregate_data_example_1.csv b/inst/extdata/aggregate_data_example_1.csv
index 32d705b2..a1128627 100644
--- a/inst/extdata/aggregate_data_example_1.csv
+++ b/inst/extdata/aggregate_data_example_1.csv
@@ -1,4 +1,4 @@
-STUDY,ARM,N,AGE_MEAN,AGE_MEDIAN,AGE_SD,SEX_MALE_COUNT,ECOG0_COUNT,SMOKE_COUNT
-Study_XXXX,Total,300,51,49,3.25,147,105,58
-Study_XXXX,Intervention,,,,,,,
-Study_XXXX,Comparator,,,,,,,
+STUDY,ARM,N,AGE_MEAN,AGE_MEDIAN,AGE_SD,SEX_MALE_COUNT,ECOG0_COUNT,SMOKE_COUNT,N_PR_THER_MEDIAN
+Study_XXXX,Total,300,51,49,3.25,147,105,58,2
+Study_XXXX,Intervention,,,,,,,,
+Study_XXXX,Comparator,,,,,,,,
diff --git a/man/adrs_sat.Rd b/man/adrs_sat.Rd
new file mode 100644
index 00000000..e015731f
--- /dev/null
+++ b/man/adrs_sat.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adrs_sat}
+\alias{adrs_sat}
+\title{Binary outcome data from single arm trial}
+\format{
+A data frame with 500 rows and 5 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm.}
+\item{AVAL}{Analysis value, in this dataset an indicator of response.}
+\item{PARAM}{Parameter type of \code{AVAL}.}
+\item{RESPONSE}{Indicator of response.}
+}
+}
+\usage{
+adrs_sat
+}
+\description{
+Binary outcome data from single arm trial
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adsl_sat}},
+\code{\link{adtte_sat}},
+\code{\link{agd}},
+\code{\link{centered_ipd_sat}},
+\code{\link{pseudo_ipd_sat}},
+\code{\link{weighted_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/adrs_twt.Rd b/man/adrs_twt.Rd
new file mode 100644
index 00000000..11105141
--- /dev/null
+++ b/man/adrs_twt.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adrs_twt}
+\alias{adrs_twt}
+\title{Binary outcome data from two arm trial}
+\format{
+A data frame with 1000 rows and 5 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm, \code{"A"}, \code{"C"}.}
+\item{AVAL}{Analysis value, in this dataset an indicator of response.}
+\item{PARAM}{Parameter type of \code{AVAL}.}
+\item{RESPONSE}{Indicator of response.}
+}
+}
+\usage{
+adrs_twt
+}
+\description{
+Binary outcome data from two arm trial
+}
+\keyword{datasets}
diff --git a/man/adsl_sat.Rd b/man/adsl_sat.Rd
new file mode 100644
index 00000000..bf6c48db
--- /dev/null
+++ b/man/adsl_sat.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adsl_sat}
+\alias{adsl_sat}
+\title{Patient data from single arm study}
+\format{
+a data frame with 500 rows and 8 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm.}
+\item{AGE}{Age in years at baseline.}
+\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.}
+\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.}
+\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.}
+\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.}
+\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.}
+
+}
+}
+\usage{
+adsl_sat
+}
+\description{
+Patient data from single arm study
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adtte_sat}},
+\code{\link{agd}},
+\code{\link{centered_ipd_sat}},
+\code{\link{pseudo_ipd_sat}},
+\code{\link{weighted_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/adsl_twt.Rd b/man/adsl_twt.Rd
new file mode 100644
index 00000000..6ef07138
--- /dev/null
+++ b/man/adsl_twt.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adsl_twt}
+\alias{adsl_twt}
+\title{Patient data from two arm trial}
+\format{
+A data frame with 1000 rows and 8 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm.}
+\item{AGE}{Age in years at baseline.}
+\item{SEX}{Sex of patient recorded as character "Male"/"Female"}
+\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.}
+\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.}
+\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.}
+\item{SEX_MALE}{Indicator of SEX == "Male" as numeric 1/0}
+}
+}
+\usage{
+adsl_twt
+}
+\description{
+Patient data from two arm trial
+}
+\seealso{
+Other anchored datasets: 
+\code{\link{adtte_twt}},
+\code{\link{agd}},
+\code{\link{centered_ipd_twt}},
+\code{\link{pseudo_ipd_twt}}
+}
+\concept{anchored datasets}
+\keyword{dataset}
diff --git a/man/adtte_sat.Rd b/man/adtte_sat.Rd
new file mode 100644
index 00000000..6d21be80
--- /dev/null
+++ b/man/adtte_sat.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adtte_sat}
+\alias{adtte_sat}
+\title{Survival data from single arm trial}
+\format{
+A data frame with 500 rows and 10 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm, \code{"A"}.}
+\item{AVAL}{Analysis value which in this dataset overall survival time in days.}
+\item{AVALU}{Unit of \code{AVAL}.}
+\item{PARAMCD}{Paramater code of \code{AVAL}, \code{"OS"}.}
+\item{PARAM}{Parameter name of \code{AVAL}, \verb{"Overall Survival}.}
+\item{CNSR}{Censoring indicator \code{0}/\code{1}.}
+\item{TIME}{Survival time in days.}
+\item{EVENT}{Event indicator \code{0}/\code{1}.}
+}
+}
+\usage{
+adtte_sat
+}
+\description{
+Survival data from single arm trial
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adsl_sat}},
+\code{\link{agd}},
+\code{\link{centered_ipd_sat}},
+\code{\link{pseudo_ipd_sat}},
+\code{\link{weighted_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/adtte_twt.Rd b/man/adtte_twt.Rd
new file mode 100644
index 00000000..5e6d2618
--- /dev/null
+++ b/man/adtte_twt.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{adtte_twt}
+\alias{adtte_twt}
+\title{Survival data from two arm trial}
+\format{
+A data frame with 1000 rows and 10 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm, \code{"A"}, \code{"C"}.}
+\item{AVAL}{Analysis value which in this dataset overall survival time in days.}
+\item{AVALU}{Unit of \code{AVAL}.}
+\item{PARAMCD}{Parameter code of \code{AVAL}, \code{"OS"}.}
+\item{PARAM}{Parameter name of \code{AVAL}, \verb{"Overall Survival}.}
+\item{CNSR}{Censoring indicator \code{0}/\code{1}.}
+\item{TIME}{Survival time in days.}
+\item{EVENT}{Event indicator \code{0}/\code{1}.}
+}
+}
+\usage{
+adtte_twt
+}
+\description{
+Survival data from two arm trial
+}
+\seealso{
+Other anchored datasets: 
+\code{\link{adsl_twt}},
+\code{\link{agd}},
+\code{\link{centered_ipd_twt}},
+\code{\link{pseudo_ipd_twt}}
+}
+\concept{anchored datasets}
+\keyword{dataset}
diff --git a/man/agd.Rd b/man/agd.Rd
new file mode 100644
index 00000000..31b097e0
--- /dev/null
+++ b/man/agd.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{agd}
+\alias{agd}
+\title{Aggregate effect modifier data from published study}
+\format{
+A data frame with 3 rows and 9 columns:
+\describe{
+\item{STUDY}{The study name, Study_XXXX}
+\item{ARM}{Study arm name or total}
+\item{N}{Number of observations in study arm}
+\item{AGE_MEAN}{Mean age in study arm}
+\item{AGE_MEDIAN}{Median age in study arm}
+\item{AGE_SD}{Standard deviation of age in study arm}
+\item{SEX_MALE_COUNT}{Number of male patients}
+\item{ECOG0_COUNT}{Number of patients with ECOG score = 0}
+\item{SMOKE_COUNT}{Number of smokers}
+\item{N_PR_THER_MEDIAN}{Median number of prior therapies}
+}
+}
+\usage{
+agd
+}
+\description{
+This data is formatted to be used in \code{\link[=center_ipd]{center_ipd()}}.
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adsl_sat}},
+\code{\link{adtte_sat}},
+\code{\link{centered_ipd_sat}},
+\code{\link{pseudo_ipd_sat}},
+\code{\link{weighted_sat}}
+
+Other anchored datasets: 
+\code{\link{adsl_twt}},
+\code{\link{adtte_twt}},
+\code{\link{centered_ipd_twt}},
+\code{\link{pseudo_ipd_twt}}
+}
+\concept{anchored datasets}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/calculate_weights_legend.Rd b/man/calculate_weights_legend.Rd
index d9078b88..617c206c 100644
--- a/man/calculate_weights_legend.Rd
+++ b/man/calculate_weights_legend.Rd
@@ -16,9 +16,7 @@ list of ESS, ESS reduction, median value of scaled and unscaled weights, and mis
 Calculates ESS reduction and median weights which is used to create legend for weights plot
 }
 \examples{
-\dontrun{
-load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-calculate_weights_legend(weighted_data)
-}
+data("weighted_sat")
+calculate_weights_legend(weighted_sat)
 }
 \keyword{internal}
diff --git a/man/center_ipd.Rd b/man/center_ipd.Rd
index beba52d8..57d6ad7f 100644
--- a/man/center_ipd.Rd
+++ b/man/center_ipd.Rd
@@ -27,8 +27,8 @@ IPD and aggregate data variable names should match.
 }
 \examples{
 # load in IPD
-adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
-adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+data(adsl_sat)
+adsl <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
 
 # Reading aggregate data by Excel
 target_pop <- read.csv(
@@ -37,7 +37,7 @@ target_pop <- read.csv(
 agd <- process_agd(target_pop)
 
 # Alternatively, you can specify aggregate data manually in data frame
-load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
+data(agd)
 ipd_centered <- center_ipd(ipd = adsl, agd = agd)
 
 }
diff --git a/man/centered_ipd_sat.Rd b/man/centered_ipd_sat.Rd
new file mode 100644
index 00000000..671e44d4
--- /dev/null
+++ b/man/centered_ipd_sat.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{centered_ipd_sat}
+\alias{centered_ipd_sat}
+\title{Centered patient data from single arm trial}
+\format{
+A data frame with 500 rows and 14 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm.}
+\item{AGE}{Age in years at baseline.}
+\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.}
+\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.}
+\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.}
+\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.}
+\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.}
+\item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data \link{agd}.}
+\item{AGE_MEDIAN_CENTERED}{\code{AGE} greater/less than \code{MEDIAN_AGE} in \link{agd} coded as \code{1}/\code{0} and then centered at
+0.5.}
+\item{AGE_SQUARED_CENTERED}{\code{AGE} squared and centered with respect to the \code{AGE} in \link{agd}. The squared age in the
+aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
+\item{SEX_MALE_CENTERED}{\code{SEX_MALE} centered by the proportion of male patients in \link{agd}}
+\item{ECOG0_CENTERED}{\code{ECOG0} centered by the proportion of \code{ECOG0} in \link{agd}}
+\item{SMOKE_CENTERED}{\code{SMOKE} centered by the proportion of \code{SMOKE} in \link{agd}}
+\item{N_PR_THER_MEDIAN_CENTERED}{\code{N_PR_THER} centered by the median in \link{agd}.}
+}
+}
+\usage{
+centered_ipd_sat
+}
+\description{
+Centered patient data from single arm trial
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adsl_sat}},
+\code{\link{adtte_sat}},
+\code{\link{agd}},
+\code{\link{pseudo_ipd_sat}},
+\code{\link{weighted_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/centered_ipd_twt.Rd b/man/centered_ipd_twt.Rd
new file mode 100644
index 00000000..a34fa20c
--- /dev/null
+++ b/man/centered_ipd_twt.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{centered_ipd_twt}
+\alias{centered_ipd_twt}
+\title{Centered patient data from two arm trial}
+\format{
+A data frame with 1000 rows and 14 columns:
+\describe{
+\item{USUBJID}{Unique subject identifiers for patients.}
+\item{ARM}{Assigned treatment arm.}
+\item{AGE}{Age in years at baseline.}
+\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.}
+\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.}
+\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.}
+\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.}
+\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.}
+\item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data \link{agd}.}
+\item{AGE_MEDIAN_CENTERED}{\code{AGE} greater/less than \code{MEDIAN_AGE} in \link{agd} coded as \code{1}/\code{0} and then centered at
+0.5.}
+\item{AGE_SQUARED_CENTERED}{\code{AGE} squared and centered with respect to the \code{AGE} in \link{agd}. The squared age in the
+aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
+\item{SEX_MALE_CENTERED}{\code{SEX_MALE} centered by the proportion of male patients in \link{agd}}
+\item{ECOG0_CENTERED}{\code{ECOG0} centered by the proportion of \code{ECOG0} in \link{agd}}
+\item{SMOKE_CENTERED}{\code{SMOKE} centered by the proportion of \code{SMOKE} in \link{agd}}
+\item{N_PR_THER_MEDIAN_CENTERED}{\code{N_PR_THER} centered by the median in \link{agd}.}
+}
+}
+\usage{
+centered_ipd_twt
+}
+\description{
+Centered patient data from two arm trial
+}
+\seealso{
+Other anchored datasets: 
+\code{\link{adsl_twt}},
+\code{\link{adtte_twt}},
+\code{\link{agd}},
+\code{\link{pseudo_ipd_twt}}
+}
+\concept{anchored datasets}
+\keyword{dataset}
diff --git a/man/check_weights.Rd b/man/check_weights.Rd
index bb25ff46..992a41b2 100644
--- a/man/check_weights.Rd
+++ b/man/check_weights.Rd
@@ -48,8 +48,6 @@ before and after adjustment.
 
 }}
 \examples{
-load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
-check_weights(weighted_data, agd)
+check_weights(weighted_sat, process_agd(agd))
 
 }
diff --git a/man/dummize_ipd.Rd b/man/dummize_ipd.Rd
index b9471b16..f44fea9d 100644
--- a/man/dummize_ipd.Rd
+++ b/man/dummize_ipd.Rd
@@ -22,7 +22,7 @@ This would be especially useful if the variable has more than two factors.
 Note that the original variable is kept after a variable is dummized.
 }
 \examples{
-adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
-adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+data(adsl_twt)
+dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male"))
 
 }
diff --git a/man/estimate_weights.Rd b/man/estimate_weights.Rd
index 23011211..73535f68 100644
--- a/man/estimate_weights.Rd
+++ b/man/estimate_weights.Rd
@@ -93,17 +93,22 @@ for \code{ggplot} is to plot unscaled and scaled weights on a same plot.
 
 }}
 \examples{
-load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE))
-load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
-ipd_centered <- center_ipd(ipd = ipd, agd = agd)
-
-centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
-centered_colnames <- paste0(centered_colnames, "_CENTERED")
+data(agd)
+data(adsl_sat)
+ipd_centered <- center_ipd(ipd = adsl_sat, agd = process_agd(agd))
+centered_colnames <- grep("_CENTERED", colnames(ipd_centered), value = TRUE)
+centered_colnames
 weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
+\donttest{
+# To later estimate bootstrap confidence intervals, we calculate the weights
+# for the bootstrap samples:
+weighted_data_boot <- estimate_weights(
+  data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500
+)
+}
+plot(weighted_sat)
 
-load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
-plot(weighted_data)
-
-library(ggplot2)
-plot(weighted_data, ggplot = TRUE)
+if (requireNamespace("ggplot2")) {
+  plot(weighted_sat, ggplot = TRUE)
+}
 }
diff --git a/man/kmplot.Rd b/man/kmplot.Rd
index d81ef433..744c6524 100644
--- a/man/kmplot.Rd
+++ b/man/kmplot.Rd
@@ -65,40 +65,19 @@ It is wrapper function of \code{basic_kmplot}. The argument setting is similar t
 
 ### IPD
 # Read in relevant ADaM data and rename variables of interest
-adsl <- read.csv(system.file("extdata", "adsl.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-adtte <- read.csv(system.file("extdata", "adtte.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-adtte$TIME <- adtte$AVAL
-adtte$EVENT <- adtte$EVNT
-adtte2 <- adtte
-adtte2$ARM <- "C"
-adtte2$TIME <- adtte2$TIME + 7
-adtte <- rbind(adtte, adtte2)
+adsl_twt
+adtte_twt
 
 ### AgD
 # Baseline aggregate data for the comparator population
-target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
-  package = "maicplus", mustWork = TRUE
-))
+agd
+
 # for time-to-event endpoints, pseudo IPD from digitalized KM
-pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
-  package = "maicplus",
-  mustWork = TRUE
-))
-pseudo_ipd$ARM <- "B"
-pseudo_ipd2 <- pseudo_ipd
-pseudo_ipd2$ARM <- "C"
-pseudo_ipd2$Time <- pseudo_ipd2$Time + 5
-pseudo_ipd <- rbind(pseudo_ipd, pseudo_ipd2)
+pseudo_ipd_twt
 
 #### prepare data
-target_pop <- process_agd(target_pop)
-adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+target_pop <- process_agd(agd)
+adsl <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
 use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
 
 #### derive weights
@@ -112,9 +91,9 @@ match_res <- estimate_weights(
 # plot by trial
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
@@ -134,9 +113,9 @@ kmplot(
 # plot by arm
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
@@ -155,9 +134,9 @@ kmplot(
 # plot all
 kmplot(
   weights_object = match_res,
-  tte_ipd = adtte,
+  tte_ipd = adtte_twt,
   trt_var_ipd = "ARM",
-  tte_pseudo_ipd = pseudo_ipd,
+  tte_pseudo_ipd = pseudo_ipd_twt,
   trt_var_agd = "ARM",
   endpoint_name = "Overall Survival",
   trt_ipd = "A",
diff --git a/man/pseudo_ipd_sat.Rd b/man/pseudo_ipd_sat.Rd
new file mode 100644
index 00000000..d226548e
--- /dev/null
+++ b/man/pseudo_ipd_sat.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{pseudo_ipd_sat}
+\alias{pseudo_ipd_sat}
+\title{Pseudo individual patient survival data from published study}
+\format{
+A data frame with 300 rows and 3 columns:
+\describe{
+\item{Time}{Survival time in days.}
+\item{Event}{Event indicator \code{0}/\code{1}.}
+\item{ARM}{Assigned treatment arm, \code{"B"}.}
+}
+}
+\usage{
+pseudo_ipd_sat
+}
+\description{
+Pseudo individual patient survival data from published study
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adsl_sat}},
+\code{\link{adtte_sat}},
+\code{\link{agd}},
+\code{\link{centered_ipd_sat}},
+\code{\link{weighted_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/man/pseudo_ipd_twt.Rd b/man/pseudo_ipd_twt.Rd
new file mode 100644
index 00000000..15a2be3e
--- /dev/null
+++ b/man/pseudo_ipd_twt.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{pseudo_ipd_twt}
+\alias{pseudo_ipd_twt}
+\title{Pseudo individual patient survival data from published two arm study}
+\format{
+A data frame with 800 rows and 3 columns:
+\describe{
+\item{Time}{Survival time in days.}
+\item{Event}{Event indicator \code{0}/\code{1}.}
+\item{ARM}{Assigned treatment arm, \code{"B"}, \code{"C"}.}
+}
+}
+\usage{
+pseudo_ipd_twt
+}
+\description{
+Pseudo individual patient survival data from published two arm study
+}
+\seealso{
+Other anchored datasets: 
+\code{\link{adsl_twt}},
+\code{\link{adtte_twt}},
+\code{\link{agd}},
+\code{\link{centered_ipd_twt}}
+}
+\concept{anchored datasets}
+\keyword{dataset}
diff --git a/man/weighted_sat.Rd b/man/weighted_sat.Rd
new file mode 100644
index 00000000..2e244022
--- /dev/null
+++ b/man/weighted_sat.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{weighted_sat}
+\alias{weighted_sat}
+\title{Weighted object for single arm trial data}
+\format{
+A \code{maicplus_estimate_weights} object created by \code{\link[=estimate_weights]{estimate_weights()}} containing
+\describe{
+\item{data}{patient level data with weights}
+\item{centered_colnames}{Columns used in MAIC}
+\item{nr_missing}{Number of observations with missing data}
+\item{ess}{Expected sample size}
+\item{opt}{Information from \code{optim} from weight calculation}
+\item{boot}{Parameters and bootstrap sample weights, \code{NULL} in this object}
+}
+}
+\usage{
+weighted_sat
+}
+\description{
+Weighted object for single arm trial data
+}
+\seealso{
+Other unanchored datasets: 
+\code{\link{adrs_sat}},
+\code{\link{adsl_sat}},
+\code{\link{adtte_sat}},
+\code{\link{agd}},
+\code{\link{centered_ipd_sat}},
+\code{\link{pseudo_ipd_sat}}
+}
+\concept{unanchored datasets}
+\keyword{dataset}
diff --git a/tests/testthat/test-maic_anchored.R b/tests/testthat/test-maic_anchored.R
index 7aa2848e..97935991 100644
--- a/tests/testthat/test-maic_anchored.R
+++ b/tests/testthat/test-maic_anchored.R
@@ -55,9 +55,14 @@ test_that("maic_anchored works for TTE using robust SE", {
   use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
 
   #### derive weights
+  cols <- c(
+    "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED",
+    "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED"
+  )
+  # cols <- grep("_CENTERED$", names(use_adsl))
   match_res <- estimate_weights(
     data = use_adsl,
-    centered_colnames = grep("_CENTERED$", names(use_adsl)),
+    centered_colnames = cols,
     start_val = 0,
     method = "BFGS"
   )
@@ -164,9 +169,14 @@ test_that("maic_anchored works for TTE using bootstrap SE", {
   use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
 
   #### derive weights
+  cols <- c(
+    "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED",
+    "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED"
+  )
+  # cols <- grep("_CENTERED$", names(use_adsl))
   match_res_boot <- estimate_weights(
     data = use_adsl,
-    centered_colnames = grep("_CENTERED$", names(use_adsl)),
+    centered_colnames = cols,
     start_val = 0,
     method = "BFGS",
     n_boot_iteration = 5,
diff --git a/tests/testthat/test-maic_unanchored.R b/tests/testthat/test-maic_unanchored.R
index 470b3b0f..81949c84 100644
--- a/tests/testthat/test-maic_unanchored.R
+++ b/tests/testthat/test-maic_unanchored.R
@@ -120,16 +120,21 @@ test_that("test time to event case", {
   use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
 
   #### derive weights
+  cols <- c(
+    "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED",
+    "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED"
+  )
+  # cols <-  grep("_CENTERED$", names(use_adsl))
   match_res <- estimate_weights(
     data = use_adsl,
-    centered_colnames = grep("_CENTERED$", names(use_adsl)),
+    centered_colnames = cols,
     start_val = 0,
     method = "BFGS"
   )
 
   match_res_boot <- estimate_weights(
     data = use_adsl,
-    centered_colnames = grep("_CENTERED$", names(use_adsl)),
+    centered_colnames = cols,
     start_val = 0,
     method = "BFGS",
     n_boot_iteration = 500,