-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_fit_pe.R
182 lines (149 loc) · 6.12 KB
/
model_fit_pe.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#' Model fitting strategy
#' Fit model using data from 1997-2006
#' Predict on data till 2030
#' Pacakges required
library(bmstdr)
library(raster)
library(spdep)
library(sf)
library(tidyr)
library(dplyr)
# make the variables in the data in appropriate form
pe_ts$mean_temp_high <- as.numeric(pe_ts$mean_temp_high)
pe_ts$mean_temp_low <- as.numeric(pe_ts$mean_temp_low)
pe_ts$mean_pcp_high <- as.numeric(pe_ts$mean_pcp_high)
pe_ts$mean_pcp_low <- as.numeric(pe_ts$mean_pcp_low)
#' Add total productivity column
tot_prod <- pe_ts %>%
select(starts_with("production")) %>%
apply(1, mean)
#' Full dataframe
pe_ts_full <- cbind(pe_ts, tot_prod)
ns = length(unique(pe_ts_full$GeoUID)) #No of regions
nt = length(unique(pe_ts_full$Date)) #No of time points
pe_ts_full <- pe_ts_full %>%
mutate(t = rep(seq(1:nt), ns)) %>%
mutate(s = rep(seq(1,ns), each = nt))
#' Get the training data for fitting model
pe_train <- pe_ts_full %>%
filter(Date < "2006-01-01") %>%
select(GeoUID, Date, mean_temp_high, mean_temp_low, mean_pcp_high, mean_pcp_low, t, s, tot_prod)
#' Which region has missing value for tot_prod??
unique(pe_train$GeoUID[is.na(pe_train$tot_prod)])
#' remove that region from pe_train
pe_train <- pe_train %>%
filter(GeoUID != unique(pe_train$GeoUID[is.na(pe_train$tot_prod)]))
#' Make the dataframe corresponding to high emission
#' with NA values for tot_prod for years post 2006
#' --> This is needed for predictions
pe_model <- pe_ts_full %>%
filter(Date < "2030-01-01") %>%
select(GeoUID, Date, mean_temp_high, mean_pcp_high, mean_temp_low, mean_pcp_low, t, s, tot_prod) %>%
mutate(tot_prod_na = if_else(Date >= "2006-01-01", NA, tot_prod))
pe_model <- pe_model %>%
filter(GeoUID != 1101050)
#' Constructing Neigbourhood matrix for PEI
#' 1. Load Polygons Data Frame (type "sf")
#' 2. Convert "sf" to Spatialpolydf
#' 3. Convert polygons to the neighbour's list (poly2nb)
#' (either using "rook's case" or "queen")
#' 4. Convert neighbour's list to neighbourhood matrix (nb2mat)
#' 5. Plot the links b/w regions
raw_geom_data_pe <- as_Spatial(tar_read(raw_geom_data_pe))
#switch off spherical geometry
sf_use_s2(FALSE)
## For Rook's case
#CreatNeigbhour's list
wr_pe <- poly2nb(raw_geom_data_pe, row.names=seq(1:length(raw_geom_data_pe$GeoUID)), queen=FALSE)
wr_pe
w.list <- nb2listw(wr_pe, style = "B")
#Neighbours matrix
w_pe <- nb2mat(wr_pe, style='B', zero.policy = TRUE)
dim(w_pe)
#Remove the row corresponding to missing region
row.rm =unique(pe_train$s[pe_train$GeoUID == 1101050])
w_pe = w_pe [-row.rm, -row.rm]
#' Set MCMC run parameters
#' Below specification produces 3,000 samples from posterior distribution
Ncar <- 40000
burn.in.car <- 10000
thin <- 10
# set the rows to use model validation
vs = sample(nrow(pe_train), 0.1*nrow(pe_train))
# Specify the linear model
f.pe <- tot_prod ~ mean_temp_high + mean_pcp_high
#########################
## For model selection ##
#########################
## Model 1
M.lin.pe <- Bcartime(formula=f.pe, data=pe_train, scol= "s", tcol= "t",
W=w_pe, model="linear", family="gaussian", package="CARBayesST",
validrows = vs,
N=Ncar, burn.in=burn.in.car, thin=thin)
#'Extract the summaries needed!
#' 1: Model choice criteria
#' 2. Model validation statistics
mchoice1 = M.lin.pe$mchoice
mvalid1 = M.lin.pe$stats
## Model 2
M.av.pe <- Bcartime(formula=f.pe, data=pe_train, scol= "s", tcol= "t",
W=w_pe, model="anova", family="gaussian", package="CARBayesST",
validrows = vs,
N=Ncar, burn.in=burn.in.car, thin=thin)
#'Extract the summaries needed!
#' 1: Model choice criteria
#' 2. Model validation statistics
mchoice2 = M.av.pe$mchoice
mvalid2 = M.av.pe$stats
## Model 3
M.ar1.pe <- Bcartime(formula=f.pe, data=pe_train, scol= "s", tcol= "t",
W=w_pe, model="ar", AR = 1, family="gaussian", package="CARBayesST",
validrows = vs,
N=Ncar, burn.in=burn.in.car, thin=thin)
#'Extract the summaries needed!
#' 1: Model choice criteria
#' 2. Model validation statistics
mchoice3 = M.ar1.pe$mchoice
mvalid3 = M.ar1.pe$stats
## Model 4
M.ar2.pe <- Bcartime(formula=f.pe, data=pe_train, scol= "s", tcol= "t",
W=w_pe, model="ar", AR = 2, family="gaussian", package="CARBayesST",
validrows = vs,
N=Ncar, burn.in=burn.in.car, thin=thin)
#'Extract the summaries needed!
#' 1: Model choice criteria
#' 2. Model validation statistics
mchoice4 = M.ar2.pe$mchoice
mvalid4 = M.ar2.pe$stats
#' Model comparison table
a <- rbind(mchoice1, mchoice2, mchoice3, mchoice4)
a <- a[, -(5:6)]
a <- a[, c(2, 1, 4, 3)]
b.rmse <- c(mvalid1$rmse, mvalid2$rmse, mvalid3$rmse, mvalid4$rmse)
a <- cbind(a, b.rmse)
rownames(a) <- c("Linear", "ANOVA", "AR(1)", "AR(2)")
colnames(a) <- c("pDIC", "DIC", "pWAIC", "WAIC", "RMSE")
table.pei <- a
###########################
## For Model Predictions ##
###########################
f.pe_high <- tot_prod_na ~ mean_temp_high + mean_pcp_high
## Final model with predictions for high emission scenario
M.final.pe_high <- Bcartime(formula=f.pe_high, data=pe_model, scol= "s", tcol= "t",
W=w_pe, model="ar", AR = 2, family="gaussian", package="CARBayesST",
N=Ncar, burn.in=burn.in.car, thin=thin)
#' Final model with predictions for low emission scenario
#' tot_prod_na is the productivity till 2005
#' tot_prod is th productivity till 2021
f.pe_low <- tot_prod_na ~ mean_temp_low + mean_pcp_low
M.final.pe_low <- Bcartime(formula=f.pe_low, data= pe_model, scol= "s", tcol= "t",
W=w_pe, model="ar", AR = 2, family="gaussian", package="CARBayesST",
N=Ncar, burn.in=burn.in.car, thin=thin)
# Get the predictions
tot_prod_preds_high <- apply(M.final.pe_high$fit$samples$Y, 2, mean)
tot_prod_preds_low <- apply(M.final.pe_low$fit$samples$Y, 2, mean)
# Add the predictions with the pe_model dataframe
pe_pred_h <- c(pe_train$tot_prod, tot_prod_preds_high)
pe_pred_l <- c(pe_train$tot_prod, tot_prod_preds_low)
pe_model_pred <- cbind(pe_model, pe_pred_h, pe_pred_l) %>%
mutate(diff = pe_pred_h - pe_pred_l)