forked from jagephart/FishPrint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayes_01_process_data_for_analysis.R
159 lines (127 loc) · 7.97 KB
/
bayes_01_process_data_for_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Author: Kelvin Gorospe
# Clean compiled LCA data needed for all models
rm(list = ls())
# Libraries for processing and analyses
library(tidyverse)
library(rstan)
library(data.table)
library(countrycode) # part of clean.lca
library(bayesplot) # for mcmc_areas_ridges
library(shinystan)
library(brms)
library(tidybayes)
# Mac
datadir <- "/Volumes/jgephart/BFA Environment 2/Data"
outdir <- "/Volumes/jgephart/BFA Environment 2/Outputs"
source("Functions.R")
# Full raw dataset used in published analysis
# lca_dat <- read.csv(file.path(datadir, "LCI_compiled_FINAL.csv"))
# Clean and replicate LCA data
# lca_dat_clean <- clean.lca(LCA_data = lca_dat, replicate_dat = TRUE)
# Read in cleaned, aggregated, and replicated dataset:
# DATA DOCUMENTATION: 80% of the data points from the original analysis are identical in the aggregated file, the remaining 20% were aggregated as they come from unpublished studies
# ORIGINAL RAW FILE CAN BE REQUESTED: CONTACT THE CORRESPONDING AUTHOR FOR INFORMATION
lca_dat_clean <- read.csv(file.path(datadir, "LCI_compiled_for_SI.csv"))
# Rebuild FAO fish production from zip file
#fishstat_dat <- rebuild_fish("/Volumes/jgephart/FishStatR/Data/Production-Global/ZippedFiles/GlobalProduction_2019.1.0.zip")
fishstat_dat <- rebuild_fish("/Volumes/jgephart/FishStatR/Data/Production-Global/ZippedFiles/GlobalProduction_2020.1.0.zip")
# DATA DOCUMENTATION: Zip file can be downloaded from FAO FishStat: http://www.fao.org/fishery/static/Data/GlobalProduction_2020.1.0.zip
# File can also be provided by corresponding author upon request
# Classify species into taxa groupings
# Use ISSCAAP grouping (in FAO data) to help with classification
lca_dat_clean_groups <- add_taxa_group(lca_dat_clean, fishstat_dat) %>%
group_by(clean_sci_name, taxa_group_name, taxa) %>%
mutate(n_in_sci = n()) %>%
ungroup()
# Check after add_taxa_group, there should be no taxa == "unassigned"
sort(unique(lca_dat_clean_groups$taxa))
# Output clean data with groups (later will read this back in to join with model predictions)
write.csv(lca_dat_clean_groups, file.path(outdir, "lca_clean_with_groups.csv"), row.names = FALSE)
# Output taxa groupings and sample sizes:
n_and_composition <- lca_dat_clean_groups %>%
select(taxa_group_name, Sample_size_n_farms, Country, iso3c, Source, clean_sci_name) %>%
distinct() %>%
group_by(taxa_group_name, clean_sci_name, Country, iso3c) %>%
summarize(n_farms = sum(Sample_size_n_farms),
n_studies = n()) %>% # each row is a distinct study
ungroup() # number of studies/sources and farms per taxa
write.csv(n_and_composition, file.path(outdir, "taxa_group_n_and_composition.csv"))
#________________________________________________________________________________________________________________________________________________________________#
# Calculate production weightings for each taxa group
#________________________________________________________________________________________________________________________________________________________________#
prod_weightings <- fishstat_dat %>%
filter(year > 2012) %>%
filter(unit == "t") %>%
filter(source_name_en %in% c("Aquaculture production (marine)", "Aquaculture production (brackishwater)", "Aquaculture production (freshwater)")) %>%
group_by(isscaap_group, species_scientific_name) %>%
summarise(species_prod = sum(quantity, na.rm = TRUE))
# Check for species names in lca data that does not match fao data
unique(lca_dat_clean_groups$clean_sci_name[!(lca_dat_clean_groups$clean_sci_name %in% prod_weightings$species_scientific_name)])
# Correct species names to merge with fao production data
lca_dat_clean_groups <- lca_dat_clean_groups %>%
mutate(fao_species = case_when(
(clean_sci_name == "Litopenaeus vannamei") ~ "Penaeus vannamei",
(clean_sci_name == "Scophthalmidae") ~ "",
(clean_sci_name == "Cynoscion spp") ~ "Sciaenidae", # Replace with the family, but this may underestimate production
(clean_sci_name == "Laminaria digitata") ~ "Macrocystis pyrifera", # Or use Saccharina latissima to weight evenly with other brown seaweeds
(clean_sci_name == "Gracilaria chilensis") ~ "Gracilaria spp", # Use as representative for red algae in general
(clean_sci_name == "Mixed Hypophthalmichthys molitrix and H. nobilis") ~ "Hypophthalmichthys molitrix",
(clean_sci_name == "Ctenopharyngodon idella") ~ "Ctenopharyngodon idellus",
(clean_sci_name == "Anoplopoma fimbria") ~ "",
(clean_sci_name == "Macrobrachium amazonicum") ~ "Macrobrachium spp"
))
lca_dat_clean_groups$fao_species[is.na(lca_dat_clean_groups$fao_species)] <- lca_dat_clean_groups$clean_sci_name[is.na(lca_dat_clean_groups$fao_species)]
prod_weightings <- prod_weightings %>%
right_join(lca_dat_clean_groups, by = c("species_scientific_name" = "fao_species", "isscaap_group")) %>%
select(isscaap_group, taxa_group_name, taxa, clean_sci_name, species_prod) %>%
distinct()
# Add small amount to na's
prod_weightings$species_prod[is.na(prod_weightings$species_prod)] <- 10
# Calculate proportions
prod_weightings <- prod_weightings %>%
group_by(taxa_group_name, taxa) %>%
# For taxa levels higher than species, use production volumes equal to the average in the taxa group
mutate(species_prod_reweight = ifelse(str_detect(clean_sci_name, pattern = "spp")|length(str_split(string = clean_sci_name, pattern = "\\s")[[1]]) == 1,
mean(species_prod, na.rm = TRUE), species_prod),
prod_weighting = species_prod_reweight/sum(species_prod_reweight, na.rm = TRUE))
# Check that species sum to 1
prod_weightings %>% group_by(taxa_group_name, taxa) %>% summarise(total_weight = sum(prod_weighting))
write.csv(prod_weightings, file.path(outdir, "aqua_prod_weightings.csv"), row.names = FALSE)
#________________________________________________________________________________________________________________________________________________________________#
# Calculate the weighted averages for the feed components
#________________________________________________________________________________________________________________________________________________________________#
feed_fp <- read.csv(file.path(datadir, "feed_impact_factors.csv"))
feed_fp$iso3c <- countrycode(feed_fp$Country.Region, origin = "country.name", destination = "iso3c")
feed_fp$iso3c[is.na(feed_fp$iso3c)] <- "Other"
feed_fp <- feed_fp %>%
mutate(iso3c = ifelse(Country.Region == "Europe" & Input %in% c("Chicken by-product meal", "Chicken by-product oil"),
"EUR", iso3c))
# DATA DOCUMENTATION: These data come from FAO and can be downloaded from FAOSTAT
# http://www.fao.org/faostat/en/#data/TP
# Query terms for downloading: SELECT All countries, Export quantity, All items
# File can also be provided by corresponding author upon request
faostat <- read.csv(file.path(datadir, "FAOSTAT_data_12-9-2020.csv"))
faostat$iso3c <- countrycode(faostat$Area, origin = "country.name", destination = "iso3c")
# Calculate soy weights
weighted_soy <- calc_soy_weights(faostat, feed_fp)
# Calculate crop weights
weighted_crop <- calc_crop_weights(faostat, feed_fp)
# Weights for animal by products - only using chicken
weighted_livestock <- calc_chicken_weights(faostat, feed_fp)
# Fishery products
# # UN Comtrade data for 2015
# fmfo_trade <- read.csv(file.path(datadir, "FMFO_trade.csv"))
# Use landings weightings instead of trade weightings
fmfo_prod <- read.csv(file.path(datadir, "fish_weightings.csv"))
# Calculate fishery weights
weighted_fishery <- calc_fishery_weights(fmfo_prod, feed_fp)
# Calculate fishery byproduct weights
weighted_fishbyproduct <- calc_byproduct_weights(fmfo_prod, feed_fp)
# Combine fishery and byproduct weights
weighted_fish <- combine_fish_weights(weighted_fishery, weighted_fishbyproduct)
weighted_fish$Input.type <- "Fishery"
# Combine data frames
weighted_fp <- rbind(weighted_soy, weighted_crop)
weighted_fp <- rbind(weighted_fp, weighted_livestock)
weighted_fp <- rbind(weighted_fp, weighted_fish)
write.csv(weighted_fp, file.path(outdir, "weighted_feed_fp.csv"), row.names = FALSE)