-
Notifications
You must be signed in to change notification settings - Fork 1
/
weVAR.R
272 lines (267 loc) · 11.4 KB
/
weVAR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#' Automatically Fit VAR(1) Models Based on Input Data
#'
#' @author Jonathan Park
#'
#' This function takes in a `data.frame`, fits individual models for each subject indicated by the ID variable
#' and stores the fitted models as a list.
#' Note that if `ini.*` arguments are not provided, initial values are set to null matrices or null vectors.
#'
#' @return Returns a list of fitted VAR(1) models for each subject.
#'
#' @details I encapsulated `weVAR` into a temporary package so I can render the documentation and do some tests.
#'
#' I made the following changes to the code.
#' 1. I changed the argument `data` to `dataframe` to match [dynr::dynr.data()].
#' 1. I changed the argument `ID` to `id` to match [dynr::dynr.data()].
#' 1. I changed the argument `p.val` to `alpha`.
#' 1. I removed the default `NULL` value on arguments that require explicit values (`dataframe`, `nv`, and `time`).
#' 1. I made all initial condition arguments begin with `ini.`.
#' 1. I set a default value for the argument `dir` to remove it from the argument handling section.
#' 1. I cleaned up the argument handling section.
#' 1. I changed `result$estimation.result` to `results$estimation.result`.
#' 1. I gave the output a class of `weVAR` for subsequent use of the output.
#' 1. I used [styler::style_pkg()] to style the code for added readability.
#'
#' Note that I mainly based the way I documented the arguments from functions in the `dynr` package used within this function. If you are amenable to the way I documented this function, I will proceed to documenting the rest.
#'
#' @references Add references to [weVAR()] here.
#'
#' @param dataframe a data frame object of data that contain a column of subject ID numbers (i.e., an ID variable), a column indicating subject-specific measurement occasions (i.e., a TIME variable), at least one column of observed values, and any number of covariates. The TIME variable should contain subject-specific sequences of (subsets of) consecutively equally spaced numbers (e.g, 1, 2, 3, ...). That is, the program assumes that the input data.frame is equally spaced with potential missingness. If the measurement occasions for a subject are a subset of an arithmetic sequence but are not consecutive, NAs will be inserted automatically to create an equally spaced data set before estimation. Missing values in the observed variables shoud be indicated by NA.
#' @param varnames the names of the variables to be modeled in the VAR
#' @param time a character string of the name of the TIME variable in the data.
#' @param id a character string of the name of the ID variable in the data.
#' @param dir path for output files.
#' @param alpha significance level for testing of transition matrix coefficients.
#' @param ini.mu a vector of the starting or fixed values of the initial state vector.
#' @param ini.cov a positive definite matrix of the starting or fixed values of the initial error covariance structure.
#' @param ini.beta the matrix of starting/fixed values for the transition matrix in the specified linear dynamic model.
#' @param ini.loadings matrix of starting or fixed values for factor loadings.
#' @param use.mi if `use.mi = TRUE`, use [dynr::dynr.mi()] to address missing data in covariates.
#' @param aux names of the auxiliary variables used in the imputation model
#' if `use.mi = TRUE`.
#' @param MLVAR if `MLVAR = TRUE`, time-series for multiple subjects will be chained and a group-level model will be fitted to the entire group.
#' @param avgnet if `avgnet = TRUE`, parameter estimates will be averaged across the sample and distributional characteristics will be returned.
#' @param verbose if `verbose = FALSE` dynr will not give specifics regarding estimation
#' @import dynr
#' @export
weVAR <- function(dataframe,
varnames = NULL,
id = NULL,
time,
dir = getwd(),
alpha = 0.05,
ini.mu = NULL,
ini.cov = NULL,
ini.beta = NULL,
ini.loadings = NULL,
use.mi = FALSE,
aux = NULL,
MLVAR = FALSE,
verbose = FALSE) {
# User Argument Handling
nv = length(varnames)
if (is.null(id)) {
message(
"We are assuming you are only fitting to N = 1 subject.\n",
"If this is a mistake FIX IT."
)
}
if (is.null(ini.mu)) {
ini.mu <- rep(0, nv)
}
if (is.null(ini.cov)) {
ini.cov <- diag(1, nv)
}
if (is.null(ini.beta)) {
ini.beta <- diag(0.00, nv)
}
if (is.null(ini.loadings)) {
ini.loadings <- diag(1, nv)
}
# Preparing Measurement
meas <- prep.measurement(
values.load = ini.loadings,
params.load = matrix(rep("fixed", (nv^2)), ncol = nv), # Flag for customization
state.names = paste0("X", 1:nv),
obs.names = varnames
)
# Matrix Dynamics
# By using 'letters' we are limited to 26 variables
dynm <- prep.matrixDynamics(
values.dyn = ini.beta,
params.dyn = matrix(levels(interaction(letters[1:nv], 1:nv, sep = "_")),
nv, nv,
byrow = TRUE
),
isContinuousTime = FALSE
)
# Prepping Initial Conditions
initial <- prep.initial(
values.inistate = ini.mu,
params.inistate = rep("fixed", nv),
values.inicov = ini.cov,
params.inicov = matrix("fixed", nv, nv)
)
# Prepping Noise
# Creating a covariance matrix to be estimated
covs <- paste0("C_", 1:nv)
m1 <- matrix(levels(interaction(covs[1:nv], 1:nv, sep = "_")), nv, nv, byrow = TRUE)
m1[lower.tri(m1)] <- t(m1)[lower.tri(m1)]
diag(m1) <- paste0("VAR_", 1:nv)
mdcov <- prep.noise(
values.latent = diag(1, nv),
params.latent = m1, # Flag; change later post-testing for GVAR
values.observed = diag(rep(0, nv)),
params.observed = matrix(("fixed"), nv, nv)
)
# Creating Subsetted Dataset
individualMods <- list()
for (k in unique(dataframe[, id])) {
pseudo.dat <- dataframe[dataframe[, id] == k, ]
rawdata <- dynr.data(pseudo.dat, id = id, time = time, observed = varnames)
# Model Fitting
model <- dynr.model(
dynamics = dynm, measurement = meas,
noise = mdcov, initial = initial, data = rawdata,
outfile = paste0(dir, "ModSubj", k, ".c")
)
##
# Plot Formula Here
##
# Setting upper/lower bounds for parameter estimates.
# I've found that this helps speed things up.
model$ub[1:nv^2] <- 1.00
model$lb[1:nv^2] <- -1.00
# Cooking the dynr model
if (use.mi == TRUE) {
results <- dynr.mi(model,
which.aux = aux,
which.lag = varnames, lag = 1,
which.lead = NULL, lead = 0,
m = 5, iter = 10,
imp.obs = FALSE, imp.exo = TRUE,
diag = TRUE, Rhat = 1.1,
conf.level = 0.95,
verbose = FALSE, seed = 1472
)
obj <- results$estimation.result
} else {
results <- dynr.cook(model, verbose = verbose)
obj <- summary(results)$Coefficients
}
# Significance Testing of Transition Matrix Coefficients
effects <- matrix(obj[1:nv^2, 1], nv, nv, byrow = FALSE)
sigs <- matrix(as.numeric(obj[1:nv^2, 6] < alpha), nv, nv)
OG.betas <- effects * sigs
# Significance Testing of Residual Covariance Matrix
sig.cov <- matrix(NA, nv, nv)
sig.cov[lower.tri(sig.cov, diag = TRUE)] <- as.numeric(obj[(nv^2 + 1):nrow(obj), 6] < alpha)
sig.cov[upper.tri(sig.cov)] <- t(sig.cov)[upper.tri(sig.cov)]
res.cov <- matrix(NA, nv, nv)
res.cov[lower.tri(res.cov, diag = TRUE)] <- obj[(nv^2 + 1):nrow(obj), 1]
res.cov[upper.tri(res.cov)] <- t(res.cov)[upper.tri(res.cov)]
OG.cov <- sig.cov * res.cov
PDC <- matrix(NA, nv, nv)
PCC <- matrix(NA, nv, nv)
OG.kappa <- solve(OG.cov)
# Calculating the PDC/PCCs
# See Wild, Eichler, et al (2010)
for (p in 1:nv) {
for (q in 1:nv) {
PCC[p, q] <- -((OG.kappa[p, q]) / (sqrt(OG.kappa[p, p] * OG.kappa[q, q])))
PDC[p, q] <- (OG.betas[p, q]) / (sqrt(OG.betas[p, p] * OG.kappa[q, q] + (OG.betas[p, q]^2)))
}
}
individualMods[[k]] <- list(
Betas = OG.betas,
ResidCov = OG.cov,
PCC = PCC,
PDC = PDC,
Res = results
)
}
##
group.info = NULL
if(MLVAR == TRUE){
message('Fitting an ML VAR to the Chained Time-Series!')
# 1. Create Chained Time-Series
# Take each individual, i, and chain them with the i+1 subject
# Add a blank space between i and i+1
# Knowns
# 1. Dataset
# 2. ID Variable
chain.temp = NULL
for(i in unique(dataframe[,id])){
temp1 = dataframe[dataframe[,id]==i,varnames]
if(i != unique(dataframe[,id])[length(unique(dataframe[,id]))]){
temp1[nrow(temp1)+1,] = rep(NA, length(varnames))
}
chain.temp = rbind(chain.temp, temp1)
}
chain.temp$TIMES = 1:nrow(chain.temp)
chain.temp$ID = rep(1, nrow(chain.temp))
names(chain.temp) = c(varnames, time, id)
# 2. Fit the Fuckin' Model
rawdata <- dynr.data(chain.temp, id = id, time = time, observed = varnames)
model <- dynr.model(
dynamics = dynm, measurement = meas,
noise = mdcov, initial = initial, data = rawdata,
outfile = paste0(dir, "GroupMod", ".c")
)
model$ub[1:nv^2] <- 1.00
model$lb[1:nv^2] <- -1.00
# Cooking the dynr model
if (use.mi == TRUE) {
results <- dynr.mi(model,
which.aux = aux,
which.lag = varnames, lag = 1,
which.lead = NULL, lead = 0,
m = 5, iter = 10,
imp.obs = FALSE, imp.exo = TRUE,
diag = TRUE, Rhat = 1.1,
conf.level = 0.95,
verbose = FALSE, seed = 1472
)
obj <- results$estimation.result
} else {
results <- dynr.cook(model)
obj <- summary(results)$Coefficients
}
# Significance Testing of Transition Matrix Coefficients
effects <- matrix(obj[1:nv^2, 1], nv, nv, byrow = FALSE)
sigs <- matrix(as.numeric(obj[1:nv^2, 6] < alpha), nv, nv)
OG.betas <- effects * sigs
# Significance Testing of Residual Covariance Matrix
sig.cov <- matrix(NA, nv, nv)
sig.cov[lower.tri(sig.cov, diag = TRUE)] <- as.numeric(obj[(nv^2 + 1):nrow(obj), 6] < alpha)
sig.cov[upper.tri(sig.cov)] <- t(sig.cov)[upper.tri(sig.cov)]
res.cov <- matrix(NA, nv, nv)
res.cov[lower.tri(res.cov, diag = TRUE)] <- obj[(nv^2 + 1):nrow(obj), 1]
res.cov[upper.tri(res.cov)] <- t(res.cov)[upper.tri(res.cov)]
OG.cov <- sig.cov * res.cov
PDC <- matrix(NA, nv, nv)
PCC <- matrix(NA, nv, nv)
OG.kappa <- solve(OG.cov)
# Calculating the PDC/PCCs
# See Wild, Eichler, et al (2010)
for (p in 1:nv) {
for (q in 1:nv) {
PCC[p, q] <- -((OG.kappa[p, q]) / (sqrt(OG.kappa[p, p] * OG.kappa[q, q])))
PDC[p, q] <- (OG.betas[p, q]) / (sqrt(OG.betas[p, p] * OG.kappa[q, q] + (OG.betas[p, q]^2)))
}
}
GroupMod = list(
Betas = OG.betas,
ResidCov = OG.cov,
PCC = PCC,
PDC = PDC,
Res = results
)
}else{GroupMod = NULL}
individualMods = list(individualMods, GroupMod)
class(individualMods) <- c(
"weVAR",
class(individualMods)
)
return(individualMods)
}