From 553746be7c412e600375311cf86b66067d927bef Mon Sep 17 00:00:00 2001 From: MAmbrose-IDM Date: Sun, 3 Nov 2024 23:13:48 +0100 Subject: [PATCH] add functions to process data and create new plots of simulation inputs and results (cherry picked from commit dde1c4ac8e0ba7a8c041fc2acdb4eb34cbab7c50) --- .../data_processing/1_DHS_data_extraction.R | 116 +++++----- ...map_intervention_coverage_input_example.R} | 0 ...ap_intervention_coverage_input_functions.R | 110 +++++++++ .../plot_sim_output_functions.R | 216 +++++++++++++++++- .../process_sim_output_functions.R | 70 +++++- 5 files changed, 446 insertions(+), 66 deletions(-) rename r_utilities/data_processing/{plot_map_intervention_coverage_input.R => plot_map_intervention_coverage_input_example.R} (100%) create mode 100644 r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R diff --git a/r_utilities/data_processing/1_DHS_data_extraction.R b/r_utilities/data_processing/1_DHS_data_extraction.R index 6c34a00..71c9900 100644 --- a/r_utilities/data_processing/1_DHS_data_extraction.R +++ b/r_utilities/data_processing/1_DHS_data_extraction.R @@ -924,35 +924,35 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3 # plots of cluster-level DHS results #=========================================================================================================## if(!dir.exists(paste0(hbhi_dir,'/estimates_from_DHS/plots'))) dir.create(paste0(hbhi_dir,'/estimates_from_DHS/plots')) - # if(plot_separate_pdfs){ - # for(yy in 1:length(years)){ - # pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE) - # par(mfrow=c(1,1)) - # for(i_var in 1:length(variables)){ - # var = variables[i_var] - # cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1] - # cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA - # cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA - # if(paste0(var,'_num_total') %in% colnames(cluster_obs)){ - # max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE) - # layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE)) - # plot(admin_shape, main=var) - # points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude))) - # # legend - colorbar - # legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1)) - # plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var) - # text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5)) - # rasterImage(legend_image, 0, 0, 1,1) - # # legend - survey size - # plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2) - # } - # } - # dev.off() - # } - # } - # - # - # + if(plot_separate_pdfs){ + for(yy in 1:length(years)){ + pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE) + par(mfrow=c(1,1)) + for(i_var in 1:length(variables)){ + var = variables[i_var] + cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1] + cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA + cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA + if(paste0(var,'_num_total') %in% colnames(cluster_obs)){ + max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE) + layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE)) + plot(admin_shape, main=var) + points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude))) + # legend - colorbar + legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1)) + plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var) + text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5)) + rasterImage(legend_image, 0, 0, 1,1) + # legend - survey size + plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2) + } + } + dev.off() + } + } + + + ##=========================================================================================================## # plots of cluster-level DHS results - separated by interventions, each plot panel showing across years ##=========================================================================================================## @@ -963,7 +963,7 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3 variables2 = variables[variables %in% c('mic', 'rdt', 'itn_all', 'itn_u5', 'iptp','cm','received_treatment','sought_treatment','blood_test', 'art')] } else variables2 = variables nyears = length(years) - + if(separate_plots_for_each_var){ for(i_var in 1:length(variables2)){ var = variables2[i_var] @@ -1029,37 +1029,37 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3 dev.off() } } - # - # - # + # + # + # # ####=========================================================================================================#### # # map of LGA-level DHS results, allowing for aggregation to admin1 level when sample sizes too small # ####=========================================================================================================#### - # if(plot_separate_pdfs){ - # for(yy in 1:length(years)){ - # pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE) - # for(i_var in 1:length(variables)){ - # var = variables[i_var] - # admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1] - # reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names)) - # admin_sums = admin_sums0[reorder_admins,] - # if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){ - # if(paste0(var,'_num_total') %in% colnames(admin_sums)){ - # layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE)) - # admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)] - # plot(admin_shape, main=var, col=admin_colors) - # - # # legend - colorbar - # legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1)) - # plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var) - # text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5)) - # rasterImage(legend_image, 0, 0, 1,1) - # } - # } else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.') - # } - # dev.off() - # } - # } + if(plot_separate_pdfs){ + for(yy in 1:length(years)){ + pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE) + for(i_var in 1:length(variables)){ + var = variables[i_var] + admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1] + reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names)) + admin_sums = admin_sums0[reorder_admins,] + if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){ + if(paste0(var,'_num_total') %in% colnames(admin_sums)){ + layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE)) + admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)] + plot(admin_shape, main=var, col=admin_colors) + + # legend - colorbar + legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1)) + plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var) + text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5)) + rasterImage(legend_image, 0, 0, 1,1) + } + } else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.') + } + dev.off() + } + } diff --git a/r_utilities/data_processing/plot_map_intervention_coverage_input.R b/r_utilities/data_processing/plot_map_intervention_coverage_input_example.R similarity index 100% rename from r_utilities/data_processing/plot_map_intervention_coverage_input.R rename to r_utilities/data_processing/plot_map_intervention_coverage_input_example.R diff --git a/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R b/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R new file mode 100644 index 0000000..7554c30 --- /dev/null +++ b/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R @@ -0,0 +1,110 @@ +# plot_map_intervention_coverage_input_functions.R + + +library(raster) +library(ggplot2) +library(gridExtra) +library(grid) +library(RColorBrewer) +library(tidyverse) +library(sf) +library(reshape2) +library(pals) +library(prettyGraphs) + + + +########################################################## +# functions for ggplot version +########################################################## + +# function to combine multiple plots for same intervention that share a legend +grid_arrange_shared_legend_plotlist =function(..., + plotlist=NULL, + ncol = length(list(...)), + nrow = NULL, + position = c("bottom", "right")) { + + plots <- c(list(...), plotlist) + + if (is.null(nrow)) nrow = ceiling(length(plots)/ncol) + + position <- match.arg(position) + g <- ggplotGrob(plots[[1]] + theme(legend.position = position) + guides(fill = guide_legend(reverse=T)))$grobs + legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]] + lheight <- sum(legend$height) + lwidth <- sum(legend$width) + gl <- lapply(plots, function(x) x + theme(legend.position="none")) + gl <- c(gl, ncol = ncol, nrow = nrow) + + combined <- switch(position, + "bottom" = arrangeGrob(do.call(arrangeGrob, gl), + legend, + ncol = 1, + heights = unit.c(unit(1, "npc") - lheight, lheight)), + "right" = arrangeGrob(do.call(arrangeGrob, gl), + legend, + ncol = 2, + widths = unit.c(unit(1, "npc") - lwidth, lwidth))) + + grid.newpage() + grid.draw(combined) + + # return gtable invisibly + invisible(combined) +} + +change_legend_size <- function(myPlot, pointSize = 0.5, textSize = 3, spaceLegend = 0.1) { + myPlot + + guides(shape = guide_legend(override.aes = list(size = pointSize)), + color = guide_legend(override.aes = list(size = pointSize))) + + theme(legend.title = element_text(size = textSize), + legend.text = element_text(size = textSize), + legend.key.size = unit(spaceLegend, "lines")) +} + +# function to create and save maps of intervention coverages +create_coverage_input_maps = function(inter_input, inter_years, output_filename, colorscale, min_value, max_value, num_colors){ + plot_list = list() + for(yy in 1:length(inter_years)){ + inter_input_cur = inter_input[inter_input$year == inter_years[yy],] + inter_input_cur$output_value = inter_input_cur[[coverage_colname]] + admin_cur = admin_shapefile %>% + left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name')) %>% + mutate(binned_values=cut(output_value, + breaks=round(seq((min_value), (max_value), length.out = (num_colors+1)),2))) + plot_list[[yy]] = ggplot(admin_cur) + + geom_sf(aes(fill=binned_values), size=0.1) + + scale_fill_manual(values=setNames(colorscale, levels(admin_cur$binned_values)), drop=FALSE, name='coverage', na.value='grey96') + + ggtitle(inter_years[yy]) + + guides(fill = guide_legend(reverse=T)) + + theme_void() + + theme(plot.title = element_text(hjust = 0.5)) + plot_list[[yy]] = change_legend_size(plot_list[[yy]], pointSize=10, textSize=10, spaceLegend=1) + } + + gg = grid_arrange_shared_legend_plotlist(plotlist=plot_list, ncol=length(plot_list), position='right') + ggsave(output_filename, gg, width = (length(inter_years)+1)*1.8, height=2.3, units='in', dpi=800) +} + + + +# function to create and save maps of intervention coverages +create_maps_state_groups = function(admin_shapefile_filepath, shapefile_admin_colname, admin_group_df, column_name, output_filename, colorscale){ + admin_shapefile = shapefile(admin_shapefile_filepath) + # standardize shapefile names + admin_shapefile$NOMDEP = standardize_admin_names_in_vector(target_names=archetype_info$LGA, origin_names=admin_shapefile$NOMDEP) + + admin_group_df$output_value = admin_group_df[[column_name]] + admin_cur = admin_shapefile %>% + left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name')) + gg = ggplot(admin_cur) + + geom_sf(aes(fill=binned_values), size=0.1) + + scale_fill_manual(values=setNames(colorscale, levels(admin_cur$output_value)), drop=FALSE, name='group', na.value='grey96') + + ggtitle(inter_years[yy]) + + guides(fill = guide_legend(reverse=T)) + + theme_void() + + theme(plot.title = element_text(hjust = 0.5)) + + ggsave(output_filename, gg, width = (2)*1.8, height=2.3, units='in', dpi=800) +} diff --git a/r_utilities/plots_results_analyses/plot_sim_output_functions.R b/r_utilities/plots_results_analyses/plot_sim_output_functions.R index 413bd23..6bb6d80 100644 --- a/r_utilities/plots_results_analyses/plot_sim_output_functions.R +++ b/r_utilities/plots_results_analyses/plot_sim_output_functions.R @@ -23,7 +23,7 @@ save_plots = TRUE #################################################################################### -# barplots for burden relative to BAU at the national level +# barplots for burden relative to BAU: percent reduction #################################################################################### plot_relative_burden_barplots = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, @@ -148,7 +148,7 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil barplot_start_year, barplot_end_year, pyr, chw_cov, scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, show_error_bar=TRUE, align_seeds=TRUE, - burden_metric_subset=c(), include_to_present=TRUE){ + burden_metric_subset=c(), include_to_present=TRUE, file_suffix=''){ admin_pop = read.csv(pop_filepath) @@ -213,7 +213,7 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil rel_burden_agg$code = rel_burden_agg$State gg = ggplot(rel_burden_agg) + geom_bar(aes(x=scenario, y=mean_rel, fill=scenario), stat='identity') + - scale_y_continuous(labels=percent_format()) + #,limits=c(standard_min_y, standard_max_y)) + # turn into percent reduction + scale_y_continuous(labels=percent_format(), n.breaks=4) + #,limits=c(standard_min_y, standard_max_y)) + # turn into percent reduction ylab('Percent reduction in burden \n ((Baseline - Plan) / Baseline) * 100') + geom_hline(yintercept=0, color='black') + ggtitle(gsub('\\(births\\)', '', burden_metric_name)) + @@ -228,13 +228,131 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil gg = gg + geom_errorbar(aes(x=scenario, ymin=min_rel, ymax=max_rel), width=0.4, colour="black", alpha=0.9, size=1) } - ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_percent_reduction_', burden_metric_name,'_stateGrid.png'), gg, dpi=600, width=18, height=12, units='in') + ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_percent_reduction_', burden_metric_name,'_stateGrid',file_suffix,'.png'), gg, dpi=600, width=18*.6, height=12*.6, units='in') # , width=18, height=12, units='in' } } +#################################################################################### +# barplots for burden reduction relative to BAU: difference +#################################################################################### + +plot_difference_burden_barplots = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, + barplot_start_year, barplot_end_year, + pyr, chw_cov, + scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, separate_plots_flag=FALSE, show_error_bar=TRUE, align_seeds=TRUE, + include_to_present=TRUE, burden_metric_subset=c()){ + admin_pop = read.csv(pop_filepath) + + # burden metrics + burden_metrics = c('PfPR', 'PfPR', 'incidence', 'incidence', 'directMortality', 'directMortality', 'allMortality', 'allMortality', 'mLBW_deaths', 'MiP_stillbirths') + burden_colnames = c('average_PfPR_U5', 'average_PfPR_all', 'incidence_U5', 'incidence_all', 'direct_death_rate_mean_U5', 'direct_death_rate_mean_all', 'all_death_rate_mean_U5', 'all_death_rate_mean_all', 'annual_num_mLBW', 'annual_num_mStill') + burden_metric_names = c('PfPR (U5)', 'PfPR (all ages)', 'incidence (U5)', 'incidence (all ages)', 'direct mortality (U5)', 'direct mortality (all ages)', 'mortality (U5)', 'mortality (all ages)', 'mLBW mortality (births)', 'stillbirths (births)') + # allow subsetting of which burden metrics plotted (based on burden_metric_subset argument) + if((length(burden_metric_subset)>=1)){ + burden_metrics_subset_indices = which(burden_metrics %in% burden_metric_subset) + burden_colnames = burden_colnames[burden_metrics_subset_indices] + burden_metric_names = burden_metric_names[burden_metrics_subset_indices] + } + + # first comparison name is to-present (skip it), second is BAU (use as reference), comparison scenarios start at the third index + if(include_to_present){ + reference_experiment_name = experiment_names[2] + comparison_start_index = 3 + } else{ + reference_experiment_name = experiment_names[1] + comparison_start_index = 2 + } + # iterate through comparison scenarios, calculating the burden reduction of all metrics relative to BAU (seedwise comparisons, so one output for each run). Combine all scenario reductions into a dataframe (each scenario set in separate rows) + difference_burden_all_df = data.frame() + for(ss in comparison_start_index:length(scenario_names)){ + comparison_experiment_name = experiment_names[ss] + comparison_scenario_name = scenario_names[ss] + difference_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, + start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds) + # only save relevant columns for plotting + difference_burden_df = difference_burden_df[,which(colnames(difference_burden_df) %in% c('scenario', 'Run_Number', burden_colnames))] + if(nrow(difference_burden_all_df) == 0){ + difference_burden_all_df = difference_burden_df + }else{ + difference_burden_all_df = rbind(difference_burden_all_df, difference_burden_df) + } + } + + # get factors in the correct order (rather than alphabetical) + difference_burden_all_df$scenario = factor(difference_burden_all_df$scenario, levels=scenario_names[comparison_start_index:length(scenario_names)]) + + # get minimum and maximum reductions - these will be used if they are smaller / greater than the current min/max + standard_min_x = 0 + standard_max_x = 0.1 + cur_min = min(difference_burden_all_df[,2:(1+length(burden_colnames))]) + cur_max = max(difference_burden_all_df[,2:(1+length(burden_colnames))]) + if(cur_min < standard_min_x) standard_min_x = cur_min + if(cur_max > standard_max_x) standard_max_x = cur_max + + gg_list = list() + for(bb in 1:length(burden_colnames)){ + current_burden_name = burden_colnames[bb] + burden_metric_name = burden_metric_names[bb] + select_col_names = c(current_burden_name, 'scenario') + # get mean, min, and max among all runs for this burden metric + rel_burden_agg = as.data.frame(difference_burden_all_df) %>% dplyr::select(match(select_col_names, names(.))) %>% + dplyr::group_by(scenario) %>% + dplyr::summarise(mean_rel = mean(get(current_burden_name)), + max_rel = max(get(current_burden_name)), + min_rel = min(get(current_burden_name))) + + gg_list[[bb]] = ggplot(rel_burden_agg) + + geom_bar(aes(x=scenario, y=mean_rel, fill=scenario), stat='identity') + + scale_y_continuous(labels=percent_format(), limits=c(standard_min_x, standard_max_x)) + # turn into percent reduction + ylab('Burden averted') + + geom_hline(yintercept=0, color='black') + + ggtitle(gsub('\\(births\\)', '', burden_metric_name)) + + scale_fill_manual(values = scenario_palette) + + theme_classic()+ + theme(legend.position = "top", legend.box='horizontal', legend.title = element_blank(), text = element_text(size = text_size), legend.text=element_text(size = text_size), + axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(),axis.line.x=element_blank(), + plot.margin=unit(c(0,1,1,0), 'cm')) + if(show_error_bar){ + gg_list[[bb]] = gg_list[[bb]] + + geom_errorbar(aes(x=scenario, ymin=min_rel, ymax=max_rel), width=0.4, colour="black", alpha=0.9, size=1) + } + if(separate_plots_flag){ + separate_plot = gg_list[[bb]] + + ylab('Burden averted \n ((Baseline - Plan)') + + theme(legend.position='none', plot.title = element_blank(), text=element_text(size =separate_plot_text_size)) + ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_burden_averted_', burden_metric_name,'_',district_subset,'.png'), separate_plot, dpi=600, width=4, height=3, units='in') + } + } + # for each burden type, + # get mean, min, and max among all runs for each burden metric, each saved as a separate column + # create barplot for each burden type (using columns of dataframe, separate bar for each scenario) + + gg_list = append(list(ggpubr::as_ggplot(ggpubr::get_legend(gg_list[[1]]))), gg_list) + # remove legend from main plots + for(bb in 2:(length(burden_colnames)+1)){ + gg_list[[bb]] = gg_list[[bb]] + theme(legend.position = "none") + theme(text = element_text(size = text_size)) + } + + if(save_plots){ + gg_saved = grid.arrange(grobs = gg_list[-1], layout_matrix = matrix(c(1:(length(burden_colnames))), nrow=2, byrow=FALSE)) + ggsave(paste0(sim_future_output_dir, '/_plots/barplot_burden_averted_', pyr, '_', chw_cov, 'CHW_',district_subset,'.png'), gg_saved, dpi=600, width=14, height=7, units='in') + } + + # ----- combine all burden plots ----- # + # gg = grid.arrange(grobs = gg_list, layout_matrix = matrix(c(1,1,2:(length(burden_colnames)+1)), ncol=2, byrow=TRUE)) + gg = grid.arrange(grobs = gg_list, layout_matrix = rbind(matrix(rep(1, length(burden_colnames)/2), nrow=1), matrix(2:(length(burden_colnames)+1), nrow=2, byrow=FALSE))) + + return(gg) +} + + + + + + #################################################################################################################################### # barplot of the impact a specific intervention has in relevant admins # (percent reduction when intervention is included versus matched simulation without the intervention) @@ -375,6 +493,90 @@ plot_barplot_impact_specific_intervention = function(sim_future_output_dir, pop_ +#################################################################################################################################### +# barplot of the impact a specific intervention has in relevant admins +# (burden reduction when intervention is included versus matched simulation without the intervention) +#################################################################################################################################### + + +table_difference_impact_specific_intervention = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, + barplot_start_year, barplot_end_year, + pyr, chw_cov, + experiment_names_without, experiment_names_with, intervention_name='PMC', age_group = 'U1', LLIN2y_flag=FALSE, overwrite_files=FALSE, align_seeds=TRUE, + burden_metric_subset=c()){ + admin_pop = read.csv(pop_filepath) + comparison_scenario_name = intervention_name + + # iterate through the matched pairs of experiments without / with the intervention + rel_burden_agg = data.frame() + if(length(experiment_names_without) == length(experiment_names_with)){ + for(ii in 1:length(experiment_names_without)){ + # first experiment is without interventions, second experiment is with intervention + reference_experiment_name = experiment_names_without[ii] + # calculating the burden reduction of all metrics relative to no intervention (seedwise comparisons, so one output for each run). + comparison_experiment_name = experiment_names_with[ii] + + # set which burden metrics are relevant and get relative burden between simulations + burden_metrics = c('PfPR', 'incidence', 'directMortality', 'allMortality') + if(age_group=='U1'){ + warning('Have not yet added support for burden reduction for U1... need to add relevant functions') + # burden_colnames = c('average_PfPR_U1', 'incidence_U1', 'direct_death_rate_mean_U1', 'all_death_rate_mean_U1') + # burden_metric_names = c('PfPR (U1)', 'incidence (U1)', 'direct mortality (U1)', 'mortality (U1)') + # relative_burden_df = get_relative_U1_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds) + } else if (age_group=='U5'){ + burden_colnames = c('average_PfPR_U5', 'incidence_U5', 'direct_death_rate_mean_U5', 'all_death_rate_mean_U5') + burden_metric_names = c('PfPR (U5)', 'incidence (U5)', 'direct mortality (U5)', 'mortality (U5)') + relative_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds) + }else{ + burden_metrics = c(burden_metrics, 'mLBW_deaths', 'MiP_stillbirths') + burden_colnames = c('average_PfPR_all', 'incidence_all', 'direct_death_rate_mean_all', 'all_death_rate_mean_all', 'annual_num_mLBW', 'annual_num_mStill') + burden_metric_names = c('PfPR (all ages)', 'incidence (all ages)', 'direct mortality (all ages)', 'mortality (all ages)', 'mLBW mortality (births)', 'stillbirths (births)') + relative_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds) + } + + + # allow subsetting of which burden metrics plotted (based on burden_metric_subset argument) + if((length(burden_metric_subset)>=1)){ + burden_metrics_subset_indices = which(burden_metrics %in% burden_metric_subset) + burden_colnames = burden_colnames[burden_metrics_subset_indices] + burden_metric_names = burden_metric_names[burden_metrics_subset_indices] + } + + # only save relevant columns for plotting + relative_burden_df = relative_burden_df[,which(colnames(relative_burden_df) %in% c('scenario', 'Run_Number', burden_colnames))] + + for(bb in 1:length(burden_colnames)){ + current_burden_name = burden_colnames[bb] + burden_metric_name = burden_metric_names[bb] + select_col_names = c(current_burden_name, 'scenario') + # get mean, min, and max among all runs for this burden metric + rel_burden_agg_bb = as.data.frame(relative_burden_df) %>% dplyr::select(match(select_col_names, names(.))) %>% + dplyr::group_by(scenario) %>% + dplyr::summarise(mean_rel = mean(get(current_burden_name)), + max_rel = max(get(current_burden_name)), + min_rel = min(get(current_burden_name))) + rel_burden_agg_bb$burden_metric = burden_metric_name + rel_burden_agg_bb$scenario_name = experiment_names_with[ii] + if(nrow(rel_burden_agg)<1){ + rel_burden_agg = rel_burden_agg_bb + } else{ + rel_burden_agg = merge(rel_burden_agg, rel_burden_agg_bb, all=TRUE) + } + } + } + } + + rel_burden_agg$burden_metric = gsub('\\(births\\)', '', rel_burden_agg$burden_metric) + rel_burden_agg$burden_metric = factor(rel_burden_agg$burden_metric, levels=gsub('\\(births\\)', '', burden_metric_names)) + rel_burden_agg$scenario_name = factor(rel_burden_agg$scenario_name, levels=experiment_names_with) + + return(rel_burden_agg) +} + + + + + plot_barplot_impact_two_specific_interventions = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, @@ -746,7 +948,7 @@ plot_simulation_output_burden_all = function(sim_future_output_dir, pop_filepath plot_simulation_output_burden_by_state = function(sim_future_output_dir, pop_filepath, grid_layout_state_locations, min_year, max_year, sim_end_years, relative_year=NA, scenario_filepaths, scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, - extend_past_timeseries_year=NA, scenario_linetypes=NA){ + extend_past_timeseries_year=NA, scenario_linetypes=NA,filename_suffix=''){ if (!is.na(relative_year)){ if(relative_year% summarise_all(mean) + comparison_df = comparison_df %>% summarise_all(mean) + } + difference_burden_df = data.frame('Run_Number' = reference_df$Run_Number) + # iterate through burden indicators, calculating relative burden and adding to dataframe + burden_indicators = colnames(reference_df)[-which(colnames(reference_df) == 'Run_Number')] + for(bb in 1:length(burden_indicators)){ + difference_burden_cur = (reference_df[[burden_indicators[bb]]] - comparison_df[[burden_indicators[bb]]]) + difference_burden_df[[burden_indicators[bb]]] = difference_burden_cur + } + difference_burden_df$scenario = comparison_scenario_name + return(difference_burden_df) +} + + + +# get relative burden by state for grid plot +get_difference_burden_by_state = function(sim_output_filepath, reference_experiment_name, comparison_experiment_name, comparison_scenario_name, start_year, end_year, admin_pop, district_subset='allDistricts', cur_admins='all', + LLIN2y_flag=FALSE, overwrite_files=FALSE, align_seeds=TRUE ){ + #' @description get relative change in U5 and all-age burden when comparing between two simulations in specified years and in specified districts (for all malaria metrics, separate values for each seed) + #' @return data frame where each row is a seed and each column is the relative change of different burden metrics, calculated as (reference-comparison) / reference: + + reference_df = get_cumulative_burden_by_state(sim_output_filepath=sim_output_filepath, experiment_name=reference_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files) + comparison_df = get_cumulative_burden_by_state(sim_output_filepath=sim_output_filepath, experiment_name=comparison_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files) + + if(align_seeds){ # compare one run seed against the matching run seed in the other experiment + # align seeds and states into same order + reference_df = reference_df[order(reference_df$State, reference_df$Run_Number),] + comparison_df = comparison_df[order(comparison_df$State, comparison_df$Run_Number),] + } else{ # compare the averages across all seeds from one experiment against the average from the other experiment + reference_df = reference_df %>% group_by(State) %>% summarise_all(mean) + comparison_df = comparison_df %>% group_by(State) %>% summarise_all(mean) + # align states into same order + reference_df = reference_df[order(reference_df$State),] + comparison_df = comparison_df[order(comparison_df$State),] + } + difference_burden_df = data.frame('State'=reference_df$State, 'Run_Number'=reference_df$Run_Number) + # iterate through burden indicators, calculating relative burden and adding to dataframe + burden_indicators = colnames(reference_df)[-which(colnames(reference_df) %in% c('State', 'Run_Number'))] + for(bb in 1:length(burden_indicators)){ + difference_burden_cur = (reference_df[[burden_indicators[bb]]] - comparison_df[[burden_indicators[bb]]]) + difference_burden_df[[burden_indicators[bb]]] = difference_burden_cur + } + difference_burden_df$scenario = comparison_scenario_name + return(difference_burden_df) +} + + + + + #################################################################################### # timeseries of simulation burden and/or interventions @@ -721,8 +789,8 @@ get_burden_timeseries_exp = function(exp_filepath, exp_name, district_subset, cu } cur_sim_output_agg$scenario = exp_name + write.csv(cur_sim_output_agg, output_filename, row.names=FALSE) } - write.csv(cur_sim_output_agg, output_filename, row.names=FALSE) return(cur_sim_output_agg) }