-
Notifications
You must be signed in to change notification settings - Fork 4
/
bayes_03_wild_model.R
333 lines (291 loc) · 12.6 KB
/
bayes_03_wild_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# Author: Kelvin Gorospe
# Calculate greenhouse gas footprint for wild caught fisheries
rm(list = ls())
library(tidyverse)
library(rstan)
library(bayesplot) # for mcmc_areas_ridges
library(shinystan)
library(brms)
library(tidybayes)
# Mac
datadir <- "/Volumes/jgephart/BFA Environment 2/Data"
outdir <- "/Volumes/jgephart/BFA Environment 2/Outputs"
# GHG data
wild_dat <- read.csv(file.path(datadir, "fisheries_fuel_use.csv")) %>% as_tibble()
# Apply gear, species, and consumption weighting
wild_dat_new_weights <- wild_dat %>%
filter(species_group != "Finfish") %>%
# Remove mixed gear and nei observations
filter(!str_detect(pattern = " nei", species)) %>%
filter(gear != "Other, Mixed, or Unknown") %>%
# Remove observations with 0 gear, species, or consumption weighting
filter(gear_weighting > 0 & species_weighting > 0 & consumption_weighting > 0) %>%
# Re-weight gear within species
group_by(species) %>%
mutate(gear_weights_new = gear_weighting/sum(gear_weighting)) %>%
ungroup() %>%
# Re-weight species and consumption within species_groups
group_by(species_group) %>%
mutate(species_weights_new = species_weighting/sum(species_weighting),
consumption_weights_new = consumption_weighting/sum(consumption_weighting)) %>%
ungroup() %>%
# Calculate overall weights and re-weight
mutate(prod_of_weights = gear_weights_new * species_weights_new * consumption_weights_new) %>%
group_by(species_group) %>%
mutate(overall_weights = prod_of_weights/sum(prod_of_weights)) %>%
ungroup() %>%
# ADD sci and taxa-level indices
mutate(clean_sci_name = as.factor(species),
sci = as.numeric(clean_sci_name),
taxa = as.factor(species_group),
tx = as.numeric(taxa)) %>%
# LAST FORMATING STEP - always arrange by clean_sci_name
arrange(species)
# OPTION: Apply EDIBLE PORTIONS adjustment
wild_edible <- read.csv(file.path(datadir, "capture_edible_CFs.csv"))
wild_dat_new_weights <- wild_dat_new_weights %>% # Join and apply edible portions weightings
left_join(wild_edible, by = c("taxa" = "full_taxa_name")) %>%
mutate(ghg = ghg * 1/(edible_mean/100))
# Get priors on taxa-level GHG
source("Functions.R")
priors_csv <- clean_wild_priors("Priors - Capture.csv") %>%
select(taxa = Group.name, Mean.GHG) %>%
filter(taxa %in% unique(wild_dat_new_weights$taxa)) %>%
right_join(wild_dat_new_weights %>% select(taxa) %>% unique(), by = "taxa") %>%
arrange(taxa)
# Format priors for STAN
# can't pass NAs into STAN - drop NAs but keep track of vector positions
prior_vec_index <- which(is.na(priors_csv$Mean.GHG)==FALSE)
priors <- priors_csv$Mean.GHG[prior_vec_index]
#################################################################
# Set data, indices, and weights for STAN
# DATA
ghg <- wild_dat_new_weights$ghg
# INDICES
N = nrow(wild_dat_new_weights)
N_SCI <- length(unique(wild_dat_new_weights$species))
n_to_sci <- wild_dat_new_weights$sci
n_to_tx <- wild_dat_new_weights$tx
N_TX <- length(unique(wild_dat_new_weights$tx))
sci_to_tx <- wild_dat_new_weights %>%
select(sci, tx) %>%
unique() %>%
pull(tx)
# WEIGHTS:
# Get sci-level weightings for generating taxa-level quantities:
# Overall_weights column was calculated per row, before normalizing within a taxa group
# To get sci-level weightings, sum within species
# IMPORTANT arrange by clean_sci_name so that the order matches data
sci_w <- wild_dat_new_weights %>%
group_by(clean_sci_name, taxa) %>%
summarise(sci_weights = sum(overall_weights)) %>%
ungroup() %>%
arrange(clean_sci_name) %>%
pull(sci_weights)
# Need the following for generating ragged array in STAN - indexing vector of sci_mu's based on their taxa identity (each slice will have a different length)
where_tx <- order(sci_to_tx) # i.e., give the positions in sci_to_tx in order of their taxa level (where in sci_to_tx is taxa level 1, followed by taxa level 2, etc)
# How many sci are in each taxa - need this to declare length of each sci_mu vector
n_sci_in_tx <- wild_dat_new_weights %>%
select(sci, tx) %>%
unique() %>%
group_by(tx) %>%
mutate(n_sci_in_tx = n()) %>%
ungroup() %>%
arrange(tx) %>%
select(tx, n_sci_in_tx) %>%
unique() %>%
pull(n_sci_in_tx)
slice_where_tx <- cumsum(n_sci_in_tx) # These are the breaks in where_tx corresponding to each taxa level - need this to split up where_tx
slice_where_tx <- c(0, slice_where_tx)
# Set data for stan:
# REMINDER RE: PRIORS vs NO PRIORS - make sure STAN code below for defining/applying priors is allowed to run or commented out as needed
# NO PRIORS
# stan_data <- list(N = N,
# N_SCI = N_SCI,
# n_to_sci = n_to_sci,
# N_TX = N_TX,
# sci_to_tx = sci_to_tx,
# ghg = ghg,
# sci_w = sci_w,
# where_tx = where_tx,
# n_sci_in_tx = n_sci_in_tx,
# slice_where_tx = slice_where_tx)
# WITH PRIORS
# REMINDER RE: PRIORS vs NO PRIORS - make sure STAN code below for defining/applying priors is allowed to run or commented out as needed
stan_data <- list(N = N,
N_SCI = N_SCI,
n_to_sci = n_to_sci,
N_TX = N_TX,
sci_to_tx = sci_to_tx,
ghg = ghg,
sci_w = sci_w,
where_tx = where_tx,
n_sci_in_tx = n_sci_in_tx,
slice_where_tx = slice_where_tx,
priors = priors,
prior_vec_index = prior_vec_index)
# NORMAL DISTRIBUTION model - fed and non-fed
stan_no_na <- 'data {
// indices
int<lower=0> N; // number of observations
int N_TX; // number of taxa groups
int N_SCI; // number of scientific names
int n_to_sci[N]; // sciname index
int sci_to_tx[N_SCI]; // taxa-group indices
// data
vector<lower=0>[N] ghg; // data
// PRIORS
vector[8] priors;
int prior_vec_index[8];
// indices for slicing vectors for calculating weighted means
vector<lower=0>[N_SCI] sci_w; // sci-level production weights
int where_tx[N_SCI]; // order sci_to_tx by taxa-level
int n_sci_in_tx[N_TX]; // number of sci in each taxa-level, ordered by taxa-level
int slice_where_tx[N_TX + 1]; // breaks in where_tx by taxa-level
}
parameters {
// On farm model
vector<lower=0>[N_TX] tx_mu_ghg;
vector<lower=0>[N_SCI] sci_mu_ghg;
real<lower=0> tx_sigma_ghg;
real<lower=0> sci_sigma_ghg;
}
model {
// PRIORS
tx_mu_ghg[prior_vec_index] ~ normal(priors, 1000);
// example priors for dirichlet model for feed proportions
// sci_phi defined as sci_phi[n_to_sci][K]
// sci_phi[2][1] ~ normal(0.13, 5); // mean for Oncorhynhchus mykiss soy feed
// weak priors on sigma
tx_sigma_ghg ~ cauchy(0, 10000);
sci_sigma_ghg ~ cauchy(0, 10000);
// normal model for sci and taxa-level on-farm footrpint
ghg ~ normal(sci_mu_ghg[n_to_sci], sci_sigma_ghg);
sci_mu_ghg ~ normal(tx_mu_ghg[sci_to_tx], tx_sigma_ghg);
}
generated quantities {
// Declare vectors for weightings
vector[N_TX] tx_ghg_w;
vector[N_SCI] sci_ghg_w;
// Apply weightings
sci_ghg_w = sci_mu_ghg .* sci_w; // WEIGHTED sci-level
for (n_tx in 1:N_TX) {
vector[n_sci_in_tx[n_tx]] sci_w_vec; // declare vector of sci_ghg in taxa-level n_tx
sci_w_vec = sci_ghg_w[where_tx[slice_where_tx[n_tx]+1:slice_where_tx[n_tx+1]]]; // get all the sci_mu in taxa-level n_tx
tx_ghg_w[n_tx] = sum(sci_w_vec); // sum sci_w_vec to get WEIGHTED tx-level outputs
}
}'
no_na_mod <- stan_model(model_code = stan_no_na)
# Note: For Windows, apparently OK to ignore this warning message:
# Warning message:
# In system(paste(CXX, ARGS), ignore.stdout = TRUE, ignore.stderr = TRUE) :
# 'C:/rtools40/usr/mingw_/bin/g++' not found
# Fit model:
# Set seed while testing
fit_no_na <- sampling(object = no_na_mod,
data = stan_data,
cores = 4,
seed = "11729",
iter = 2500,
control = list(adapt_delta = 0.99, max_treedepth = 15))
#fit_no_na <- sampling(object = no_na_mod, data = stan_data, cores = 4, iter = 5000, control = list(adapt_delta = 0.99))
summary(fit_no_na)$summary
######################################################################################################
# STEP 3: OUTPUT RESULTS
###########################################################
# RESTARTING POINT
rm(list=ls()[!(ls() %in% c("datadir", "outdir", "impact", "set_allocation",
"wild_dat_new_weights", "fit_no_na"))])
save.image(file = file.path(outdir, paste(Sys.Date(), "_full-model-posterior_Wild-capture-ghg.RData", sep = "")))
###########################################################
# SET THEME
x <- seq(0, 1, length.out = 16)
base_color <- "#364F6B"
show_col(seq_gradient_pal(base_color, "white")(x)) # Get hexadecimals for other colors
interval_palette <- c("#9EA8B7", "#6A7A90", "#364F6B") # Order: light to dark
sci_plot_theme <- theme(title = element_text(size = 18),
axis.title.x = element_text(size = 16),
axis.text=element_text(size=10, color = "black"))
tx_plot_theme <- list(theme(title = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.text=element_text(size=20, color = "black"),
legend.position = "none"),
scale_color_manual(values = interval_palette))
units_for_plot = "kg CO2-eq per tonne"
# Key for naming taxa levels
# Get full taxa group names back
tx_index_key <- wild_dat_new_weights %>%
group_by(clean_sci_name) %>%
mutate(n_obs = n()) %>%
ungroup() %>%
select(taxa, tx) %>%
unique() %>%
arrange(taxa)
# Key for naming sci levels
# Get full taxa group names back
sci_index_key <- wild_dat_new_weights %>%
group_by(clean_sci_name) %>%
mutate(n_obs = n()) %>%
ungroup() %>%
select(clean_sci_name, sci, taxa, tx, n_obs) %>%
unique() %>%
arrange(taxa)
# Use tidybayes + ggdist for finer control of aes mapping (instead of bayesplots)
get_variables(fit_no_na)
######################################################################################################
# PLOT final outputs
# WEIGHTED taxa-level GHG
fit_no_na %>%
spread_draws(tx_ghg_w[tx]) %>%
median_qi(tx_ghg_w, .width = c(0.95, 0.8, 0.5)) %>%
left_join(tx_index_key, by = "tx") %>% # Join with index key to get sci and taxa names
ggplot(aes(y = taxa, x = tx_ghg_w)) +
geom_interval(aes(xmin = .lower, xmax = .upper)) +
theme_classic() +
#coord_cartesian(xlim = c(0, 12500)) +
tx_plot_theme +
labs(x = units_for_plot, y = "", title = "")
ggsave(filename = file.path(outdir, "plot_WILD-GHG-TAXA-LEVEL-WEIGHTED.png"), width = 11, height = 8.5)
# Same but as CSV output
fit_no_na %>%
spread_draws(tx_ghg_w[tx]) %>%
median_qi(tx_ghg_w, .width = c(0.95, 0.8, 0.5)) %>%
left_join(tx_index_key, by = "tx") %>% # Join with index key to get sci and taxa names
rename(total_stressor = tx_ghg_w) %>%
write.csv(file = file.path(outdir, "summary_WILD-GHG-TAXA-LEVEL-WEIGHTED.csv"), row.names = FALSE)
# UNWEIGHTED taxa-level GHG
fit_no_na %>%
spread_draws(tx_mu_ghg[tx]) %>%
median_qi(tx_mu_ghg, .width = c(0.95, 0.8, 0.5)) %>%
left_join(tx_index_key, by = "tx") %>% # Join with index key to get sci and taxa names
# SET plants and bivalves to 0
ggplot(aes(y = taxa, x = tx_mu_ghg)) +
geom_interval(aes(xmin = .lower, xmax = .upper)) +
theme_classic() +
tx_plot_theme +
labs(x = units_for_plot, y = "", title = "")
ggsave(filename = file.path(outdir, "plot_WILD-GHG-TAXA-LEVEL-UNWEIGHTED.png"), width = 11, height = 8.5)
# WEIGHTED sci-level GHG
fit_no_na %>%
spread_draws(sci_ghg_w[sci]) %>%
median_qi(sci_ghg_w, .width = c(0.95, 0.8, 0.5)) %>%
left_join(sci_index_key, by = "sci") %>% # Join with index key to get sci and taxa names
ggplot(aes(y = paste(taxa, clean_sci_name, sep = ""), x = sci_ghg_w)) +
geom_interval(aes(xmin = .lower, xmax = .upper)) +
scale_color_brewer() +
theme_classic() +
sci_plot_theme +
labs(x = units_for_plot, y = "", title = "")
ggsave(filename = file.path(outdir, "plot_WILD-GHG-SCI-LEVEL-WEIGHTED.png"), width = 11, height = 8.5)
# UNWEIGHTED taxa-level GHG
fit_no_na %>%
spread_draws(sci_mu_ghg[sci]) %>%
median_qi(sci_mu_ghg, .width = c(0.95, 0.8, 0.5)) %>%
left_join(sci_index_key, by = "sci") %>% # Join with index key to get sci and taxa names
ggplot(aes(y = paste(taxa, clean_sci_name, sep = ""), x = sci_mu_ghg)) +
geom_interval(aes(xmin = .lower, xmax = .upper)) +
scale_color_brewer() +
theme_classic() +
sci_plot_theme +
labs(x = units_for_plot, y = "", title = "")
ggsave(filename = file.path(outdir, "plot_WILD-GHG-SCI-LEVEL-UNWEIGHTED.png"), width = 11, height = 8.5)