From 553746be7c412e600375311cf86b66067d927bef Mon Sep 17 00:00:00 2001
From: MAmbrose-IDM <monique.ambrose@gatesfoundation.org>
Date: Sun, 3 Nov 2024 23:13:48 +0100
Subject: [PATCH] add functions to process data and create new plots of
 simulation inputs and results

(cherry picked from commit dde1c4ac8e0ba7a8c041fc2acdb4eb34cbab7c50)
---
 .../data_processing/1_DHS_data_extraction.R   | 116 +++++-----
 ...map_intervention_coverage_input_example.R} |   0
 ...ap_intervention_coverage_input_functions.R | 110 +++++++++
 .../plot_sim_output_functions.R               | 216 +++++++++++++++++-
 .../process_sim_output_functions.R            |  70 +++++-
 5 files changed, 446 insertions(+), 66 deletions(-)
 rename r_utilities/data_processing/{plot_map_intervention_coverage_input.R => plot_map_intervention_coverage_input_example.R} (100%)
 create mode 100644 r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R

diff --git a/r_utilities/data_processing/1_DHS_data_extraction.R b/r_utilities/data_processing/1_DHS_data_extraction.R
index 6c34a00..71c9900 100644
--- a/r_utilities/data_processing/1_DHS_data_extraction.R
+++ b/r_utilities/data_processing/1_DHS_data_extraction.R
@@ -924,35 +924,35 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
   # plots of cluster-level DHS results
   #=========================================================================================================##
   if(!dir.exists(paste0(hbhi_dir,'/estimates_from_DHS/plots'))) dir.create(paste0(hbhi_dir,'/estimates_from_DHS/plots'))
-  # if(plot_separate_pdfs){
-  #   for(yy in 1:length(years)){
-  #     pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
-  #     par(mfrow=c(1,1))
-  #     for(i_var in 1:length(variables)){
-  #       var = variables[i_var]
-  #       cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1]
-  #       cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA
-  #       cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA
-  #       if(paste0(var,'_num_total') %in% colnames(cluster_obs)){
-  #         max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE)
-  #         layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
-  #         plot(admin_shape, main=var)
-  #         points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude)))
-  #         # legend - colorbar
-  #         legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
-  #         plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
-  #         text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
-  #         rasterImage(legend_image, 0, 0, 1,1)
-  #         # legend - survey size
-  #         plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2)
-  #       }
-  #     }
-  #     dev.off()
-  #   }
-  # }
-  # 
-  # 
-  # 
+  if(plot_separate_pdfs){
+    for(yy in 1:length(years)){
+      pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
+      par(mfrow=c(1,1))
+      for(i_var in 1:length(variables)){
+        var = variables[i_var]
+        cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1]
+        cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA
+        cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA
+        if(paste0(var,'_num_total') %in% colnames(cluster_obs)){
+          max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE)
+          layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
+          plot(admin_shape, main=var)
+          points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude)))
+          # legend - colorbar
+          legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
+          plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
+          text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
+          rasterImage(legend_image, 0, 0, 1,1)
+          # legend - survey size
+          plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2)
+        }
+      }
+      dev.off()
+    }
+  }
+
+
+
   ##=========================================================================================================##
   # plots of cluster-level DHS results - separated by interventions, each plot panel showing across years
   ##=========================================================================================================##
@@ -963,7 +963,7 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
       variables2 = variables[variables %in% c('mic', 'rdt', 'itn_all', 'itn_u5', 'iptp','cm','received_treatment','sought_treatment','blood_test', 'art')]
     } else variables2 = variables
     nyears = length(years)
-    
+
     if(separate_plots_for_each_var){
       for(i_var in 1:length(variables2)){
         var = variables2[i_var]
@@ -1029,37 +1029,37 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
       dev.off()
     }
   }
-  # 
-  # 
-  # 
+  #
+  #
+  #
   # ####=========================================================================================================####
   # # map of LGA-level DHS results, allowing for aggregation to admin1 level when sample sizes too small
   # ####=========================================================================================================####
-  # if(plot_separate_pdfs){
-  #   for(yy in 1:length(years)){
-  #     pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
-  #     for(i_var in 1:length(variables)){
-  #       var = variables[i_var]
-  #       admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1]
-  #       reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names))
-  #       admin_sums = admin_sums0[reorder_admins,]
-  #       if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){
-  #         if(paste0(var,'_num_total') %in% colnames(admin_sums)){
-  #           layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
-  #           admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)]
-  #           plot(admin_shape, main=var, col=admin_colors)
-  #           
-  #           # legend - colorbar
-  #           legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
-  #           plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
-  #           text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
-  #           rasterImage(legend_image, 0, 0, 1,1)
-  #         }
-  #       } else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.')
-  #     }
-  #     dev.off()
-  #   }
-  # }
+  if(plot_separate_pdfs){
+    for(yy in 1:length(years)){
+      pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
+      for(i_var in 1:length(variables)){
+        var = variables[i_var]
+        admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1]
+        reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names))
+        admin_sums = admin_sums0[reorder_admins,]
+        if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){
+          if(paste0(var,'_num_total') %in% colnames(admin_sums)){
+            layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
+            admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)]
+            plot(admin_shape, main=var, col=admin_colors)
+
+            # legend - colorbar
+            legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
+            plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
+            text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
+            rasterImage(legend_image, 0, 0, 1,1)
+          }
+        } else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.')
+      }
+      dev.off()
+    }
+  }
 
 
 
diff --git a/r_utilities/data_processing/plot_map_intervention_coverage_input.R b/r_utilities/data_processing/plot_map_intervention_coverage_input_example.R
similarity index 100%
rename from r_utilities/data_processing/plot_map_intervention_coverage_input.R
rename to r_utilities/data_processing/plot_map_intervention_coverage_input_example.R
diff --git a/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R b/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R
new file mode 100644
index 0000000..7554c30
--- /dev/null
+++ b/r_utilities/data_processing/plot_map_intervention_coverage_input_functions.R
@@ -0,0 +1,110 @@
+# plot_map_intervention_coverage_input_functions.R
+
+
+library(raster)
+library(ggplot2)
+library(gridExtra)
+library(grid)
+library(RColorBrewer)
+library(tidyverse)
+library(sf)
+library(reshape2)
+library(pals)
+library(prettyGraphs)
+
+
+
+##########################################################
+# functions for ggplot version
+##########################################################
+
+# function to combine multiple plots for same intervention that share a legend
+grid_arrange_shared_legend_plotlist =function(..., 
+                                              plotlist=NULL,
+                                              ncol = length(list(...)),
+                                              nrow = NULL,
+                                              position = c("bottom", "right")) {
+  
+  plots <- c(list(...), plotlist)
+  
+  if (is.null(nrow)) nrow = ceiling(length(plots)/ncol)
+  
+  position <- match.arg(position)
+  g <- ggplotGrob(plots[[1]] + theme(legend.position = position) + guides(fill = guide_legend(reverse=T)))$grobs
+  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
+  lheight <- sum(legend$height)
+  lwidth <- sum(legend$width)
+  gl <- lapply(plots, function(x) x + theme(legend.position="none"))
+  gl <- c(gl, ncol = ncol, nrow = nrow)
+  
+  combined <- switch(position,
+                     "bottom" = arrangeGrob(do.call(arrangeGrob, gl),
+                                            legend,
+                                            ncol = 1,
+                                            heights = unit.c(unit(1, "npc") - lheight, lheight)),
+                     "right" = arrangeGrob(do.call(arrangeGrob, gl),
+                                           legend,
+                                           ncol = 2,
+                                           widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
+  
+  grid.newpage()
+  grid.draw(combined)
+  
+  # return gtable invisibly
+  invisible(combined)
+}
+
+change_legend_size <- function(myPlot, pointSize = 0.5, textSize = 3, spaceLegend = 0.1) {
+  myPlot +
+    guides(shape = guide_legend(override.aes = list(size = pointSize)),
+           color = guide_legend(override.aes = list(size = pointSize))) +
+    theme(legend.title = element_text(size = textSize), 
+          legend.text  = element_text(size = textSize),
+          legend.key.size = unit(spaceLegend, "lines"))
+}
+
+# function to create and save maps of intervention coverages
+create_coverage_input_maps = function(inter_input, inter_years, output_filename, colorscale, min_value, max_value, num_colors){
+  plot_list = list()
+  for(yy in 1:length(inter_years)){
+    inter_input_cur = inter_input[inter_input$year == inter_years[yy],]
+    inter_input_cur$output_value = inter_input_cur[[coverage_colname]]
+    admin_cur = admin_shapefile %>%
+      left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name')) %>%
+      mutate(binned_values=cut(output_value,
+                               breaks=round(seq((min_value), (max_value), length.out = (num_colors+1)),2)))
+    plot_list[[yy]] = ggplot(admin_cur) +
+      geom_sf(aes(fill=binned_values), size=0.1) +
+      scale_fill_manual(values=setNames(colorscale, levels(admin_cur$binned_values)), drop=FALSE, name='coverage', na.value='grey96') + 
+      ggtitle(inter_years[yy]) +
+      guides(fill = guide_legend(reverse=T)) +
+      theme_void() +
+      theme(plot.title = element_text(hjust = 0.5)) 
+    plot_list[[yy]] = change_legend_size(plot_list[[yy]], pointSize=10, textSize=10, spaceLegend=1)
+  }
+  
+  gg = grid_arrange_shared_legend_plotlist(plotlist=plot_list, ncol=length(plot_list), position='right')
+  ggsave(output_filename, gg, width = (length(inter_years)+1)*1.8, height=2.3, units='in', dpi=800)
+}
+
+
+
+# function to create and save maps of intervention coverages
+create_maps_state_groups = function(admin_shapefile_filepath, shapefile_admin_colname, admin_group_df, column_name, output_filename, colorscale){
+  admin_shapefile = shapefile(admin_shapefile_filepath)
+  # standardize shapefile names
+  admin_shapefile$NOMDEP = standardize_admin_names_in_vector(target_names=archetype_info$LGA, origin_names=admin_shapefile$NOMDEP)
+  
+  admin_group_df$output_value = admin_group_df[[column_name]]
+  admin_cur = admin_shapefile %>%
+    left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name'))
+  gg = ggplot(admin_cur) +
+    geom_sf(aes(fill=binned_values), size=0.1) +
+    scale_fill_manual(values=setNames(colorscale, levels(admin_cur$output_value)), drop=FALSE, name='group', na.value='grey96') + 
+    ggtitle(inter_years[yy]) +
+    guides(fill = guide_legend(reverse=T)) +
+    theme_void() +
+    theme(plot.title = element_text(hjust = 0.5)) 
+
+  ggsave(output_filename, gg, width = (2)*1.8, height=2.3, units='in', dpi=800)
+}
diff --git a/r_utilities/plots_results_analyses/plot_sim_output_functions.R b/r_utilities/plots_results_analyses/plot_sim_output_functions.R
index 413bd23..6bb6d80 100644
--- a/r_utilities/plots_results_analyses/plot_sim_output_functions.R
+++ b/r_utilities/plots_results_analyses/plot_sim_output_functions.R
@@ -23,7 +23,7 @@ save_plots = TRUE
 
 
 ####################################################################################
-# barplots for burden relative to BAU at the national level
+# barplots for burden relative to BAU: percent reduction
 ####################################################################################
 
 plot_relative_burden_barplots = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, 
@@ -148,7 +148,7 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil
                                                  barplot_start_year, barplot_end_year, 
                                                  pyr, chw_cov,
                                                  scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, show_error_bar=TRUE, align_seeds=TRUE,
-                                                 burden_metric_subset=c(), include_to_present=TRUE){
+                                                 burden_metric_subset=c(), include_to_present=TRUE, file_suffix=''){
   
   admin_pop = read.csv(pop_filepath)
   
@@ -213,7 +213,7 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil
     rel_burden_agg$code = rel_burden_agg$State
     gg = ggplot(rel_burden_agg) + 
       geom_bar(aes(x=scenario, y=mean_rel, fill=scenario), stat='identity') +
-      scale_y_continuous(labels=percent_format()) + #,limits=c(standard_min_y, standard_max_y)) +   # turn into percent reduction
+      scale_y_continuous(labels=percent_format(), n.breaks=4) + #,limits=c(standard_min_y, standard_max_y)) +   # turn into percent reduction
       ylab('Percent reduction in burden \n ((Baseline - Plan) / Baseline) * 100') + 
       geom_hline(yintercept=0, color='black') +
       ggtitle(gsub('\\(births\\)', '', burden_metric_name)) +
@@ -228,13 +228,131 @@ plot_relative_burden_barplots_by_state = function(sim_future_output_dir, pop_fil
       gg = gg +
         geom_errorbar(aes(x=scenario, ymin=min_rel, ymax=max_rel), width=0.4, colour="black", alpha=0.9, size=1) 
     }
-    ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_percent_reduction_', burden_metric_name,'_stateGrid.png'), gg, dpi=600, width=18, height=12, units='in')
+    ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_percent_reduction_', burden_metric_name,'_stateGrid',file_suffix,'.png'), gg, dpi=600, width=18*.6, height=12*.6, units='in')  # , width=18, height=12, units='in'
   }
 }
 
 
 
 
+####################################################################################
+# barplots for burden reduction relative to BAU: difference
+####################################################################################
+
+plot_difference_burden_barplots = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, 
+                                         barplot_start_year, barplot_end_year, 
+                                         pyr, chw_cov,
+                                         scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, separate_plots_flag=FALSE, show_error_bar=TRUE, align_seeds=TRUE,
+                                         include_to_present=TRUE, burden_metric_subset=c()){
+  admin_pop = read.csv(pop_filepath)
+  
+  # burden metrics
+  burden_metrics = c('PfPR', 'PfPR', 'incidence', 'incidence', 'directMortality', 'directMortality', 'allMortality', 'allMortality', 'mLBW_deaths', 'MiP_stillbirths')
+  burden_colnames = c('average_PfPR_U5', 'average_PfPR_all', 'incidence_U5', 'incidence_all', 'direct_death_rate_mean_U5', 'direct_death_rate_mean_all', 'all_death_rate_mean_U5', 'all_death_rate_mean_all', 'annual_num_mLBW', 'annual_num_mStill')
+  burden_metric_names = c('PfPR (U5)', 'PfPR (all ages)', 'incidence (U5)', 'incidence (all ages)', 'direct mortality (U5)', 'direct mortality (all ages)', 'mortality (U5)', 'mortality (all ages)', 'mLBW mortality (births)', 'stillbirths (births)')
+  # allow subsetting of which burden metrics plotted (based on burden_metric_subset argument)
+  if((length(burden_metric_subset)>=1)){
+    burden_metrics_subset_indices = which(burden_metrics %in% burden_metric_subset)
+    burden_colnames = burden_colnames[burden_metrics_subset_indices]
+    burden_metric_names = burden_metric_names[burden_metrics_subset_indices]
+  }
+  
+  # first comparison name is to-present (skip it), second is BAU (use as reference), comparison scenarios start at the third index
+  if(include_to_present){
+    reference_experiment_name = experiment_names[2]
+    comparison_start_index = 3
+  } else{
+    reference_experiment_name = experiment_names[1]
+    comparison_start_index = 2  
+  }
+  # iterate through comparison scenarios, calculating the burden reduction of all metrics relative to BAU (seedwise comparisons, so one output for each run). Combine all scenario reductions into a dataframe (each scenario set in separate rows)
+  difference_burden_all_df = data.frame()
+  for(ss in comparison_start_index:length(scenario_names)){
+    comparison_experiment_name = experiment_names[ss]
+    comparison_scenario_name = scenario_names[ss]
+    difference_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, 
+                                             start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds)
+    # only save relevant columns for plotting
+    difference_burden_df = difference_burden_df[,which(colnames(difference_burden_df) %in% c('scenario', 'Run_Number', burden_colnames))]
+    if(nrow(difference_burden_all_df) == 0){
+      difference_burden_all_df = difference_burden_df
+    }else{
+      difference_burden_all_df = rbind(difference_burden_all_df, difference_burden_df)
+    }
+  }
+  
+  # get factors in the correct order (rather than alphabetical)
+  difference_burden_all_df$scenario = factor(difference_burden_all_df$scenario, levels=scenario_names[comparison_start_index:length(scenario_names)])
+  
+  # get minimum and maximum reductions - these will be used if they are smaller / greater than the current min/max
+  standard_min_x = 0
+  standard_max_x = 0.1
+  cur_min = min(difference_burden_all_df[,2:(1+length(burden_colnames))])
+  cur_max = max(difference_burden_all_df[,2:(1+length(burden_colnames))])
+  if(cur_min < standard_min_x) standard_min_x = cur_min
+  if(cur_max > standard_max_x) standard_max_x = cur_max
+  
+  gg_list = list()
+  for(bb in 1:length(burden_colnames)){
+    current_burden_name = burden_colnames[bb]
+    burden_metric_name = burden_metric_names[bb]
+    select_col_names = c(current_burden_name, 'scenario')
+    # get mean, min, and max among all runs for this burden metric
+    rel_burden_agg = as.data.frame(difference_burden_all_df) %>% dplyr::select(match(select_col_names, names(.))) %>%
+      dplyr::group_by(scenario) %>%
+      dplyr::summarise(mean_rel = mean(get(current_burden_name)),
+                       max_rel = max(get(current_burden_name)),
+                       min_rel = min(get(current_burden_name)))
+    
+    gg_list[[bb]] = ggplot(rel_burden_agg) + 
+      geom_bar(aes(x=scenario, y=mean_rel, fill=scenario), stat='identity') +
+      scale_y_continuous(labels=percent_format(), limits=c(standard_min_x, standard_max_x)) +   # turn into percent reduction
+      ylab('Burden averted') + 
+      geom_hline(yintercept=0, color='black') +
+      ggtitle(gsub('\\(births\\)', '', burden_metric_name)) +
+      scale_fill_manual(values = scenario_palette) + 
+      theme_classic()+ 
+      theme(legend.position = "top", legend.box='horizontal', legend.title = element_blank(), text = element_text(size = text_size), legend.text=element_text(size = text_size), 
+            axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(),axis.line.x=element_blank(),
+            plot.margin=unit(c(0,1,1,0), 'cm'))
+    if(show_error_bar){
+      gg_list[[bb]] = gg_list[[bb]] +
+        geom_errorbar(aes(x=scenario, ymin=min_rel, ymax=max_rel), width=0.4, colour="black", alpha=0.9, size=1) 
+    }
+    if(separate_plots_flag){
+      separate_plot = gg_list[[bb]] + 
+        ylab('Burden averted \n ((Baseline - Plan)') + 
+        theme(legend.position='none', plot.title = element_blank(), text=element_text(size =separate_plot_text_size))
+      ggsave(paste0(sim_future_output_dir, '/_plots/','barplot_burden_averted_', burden_metric_name,'_',district_subset,'.png'), separate_plot, dpi=600, width=4, height=3, units='in')
+    }
+  }
+  # for each burden type, 
+  # get mean, min, and max among all runs for each burden metric, each saved as a separate column
+  # create barplot for each burden type (using columns of dataframe, separate bar for each scenario)
+  
+  gg_list = append(list(ggpubr::as_ggplot(ggpubr::get_legend(gg_list[[1]]))), gg_list)
+  # remove legend from main plots
+  for(bb in 2:(length(burden_colnames)+1)){
+    gg_list[[bb]] = gg_list[[bb]] + theme(legend.position = "none")  + theme(text = element_text(size = text_size))   
+  }
+  
+  if(save_plots){
+    gg_saved = grid.arrange(grobs = gg_list[-1], layout_matrix = matrix(c(1:(length(burden_colnames))), nrow=2, byrow=FALSE))
+    ggsave(paste0(sim_future_output_dir, '/_plots/barplot_burden_averted_', pyr, '_', chw_cov, 'CHW_',district_subset,'.png'), gg_saved, dpi=600, width=14, height=7, units='in')
+  }
+  
+  # ----- combine all burden plots ----- #
+  # gg = grid.arrange(grobs = gg_list, layout_matrix = matrix(c(1,1,2:(length(burden_colnames)+1)), ncol=2, byrow=TRUE))
+  gg = grid.arrange(grobs = gg_list, layout_matrix = rbind(matrix(rep(1, length(burden_colnames)/2), nrow=1), matrix(2:(length(burden_colnames)+1), nrow=2, byrow=FALSE)))
+  
+  return(gg)
+}
+
+
+
+
+
+
 ####################################################################################################################################
 # barplot of the impact a specific intervention has in relevant admins 
 #  (percent reduction when intervention is included versus matched simulation without the intervention)
@@ -375,6 +493,90 @@ plot_barplot_impact_specific_intervention = function(sim_future_output_dir, pop_
 
 
 
+####################################################################################################################################
+# barplot of the impact a specific intervention has in relevant admins 
+#  (burden reduction when intervention is included versus matched simulation without the intervention)
+####################################################################################################################################
+
+
+table_difference_impact_specific_intervention = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins, 
+                                                     barplot_start_year, barplot_end_year, 
+                                                     pyr, chw_cov,
+                                                     experiment_names_without, experiment_names_with, intervention_name='PMC', age_group = 'U1', LLIN2y_flag=FALSE, overwrite_files=FALSE, align_seeds=TRUE,
+                                                     burden_metric_subset=c()){
+  admin_pop = read.csv(pop_filepath)
+  comparison_scenario_name = intervention_name
+  
+  # iterate through the matched pairs of experiments without / with the intervention
+  rel_burden_agg = data.frame()
+  if(length(experiment_names_without) == length(experiment_names_with)){
+    for(ii in 1:length(experiment_names_without)){
+      # first experiment is without interventions, second experiment is with intervention
+      reference_experiment_name = experiment_names_without[ii]
+      # calculating the burden reduction of all metrics relative to no intervention (seedwise comparisons, so one output for each run). 
+      comparison_experiment_name = experiment_names_with[ii]  
+      
+      # set which burden metrics are relevant and get relative burden between simulations
+      burden_metrics = c('PfPR', 'incidence', 'directMortality', 'allMortality')
+      if(age_group=='U1'){
+        warning('Have not yet added support for burden reduction for U1... need to add relevant functions')
+        # burden_colnames = c('average_PfPR_U1', 'incidence_U1', 'direct_death_rate_mean_U1', 'all_death_rate_mean_U1')
+        # burden_metric_names = c('PfPR (U1)', 'incidence (U1)', 'direct mortality (U1)', 'mortality (U1)')
+        # relative_burden_df = get_relative_U1_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds)
+      } else if (age_group=='U5'){
+        burden_colnames = c('average_PfPR_U5', 'incidence_U5', 'direct_death_rate_mean_U5', 'all_death_rate_mean_U5')
+        burden_metric_names = c('PfPR (U5)', 'incidence (U5)', 'direct mortality (U5)', 'mortality (U5)')
+        relative_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds)
+      }else{
+        burden_metrics = c(burden_metrics, 'mLBW_deaths', 'MiP_stillbirths')
+        burden_colnames = c('average_PfPR_all', 'incidence_all', 'direct_death_rate_mean_all', 'all_death_rate_mean_all', 'annual_num_mLBW', 'annual_num_mStill')
+        burden_metric_names = c('PfPR (all ages)', 'incidence (all ages)', 'direct mortality (all ages)', 'mortality (all ages)', 'mLBW mortality (births)', 'stillbirths (births)')
+        relative_burden_df = get_difference_burden(sim_output_filepath=sim_future_output_dir, reference_experiment_name=reference_experiment_name, comparison_experiment_name=comparison_experiment_name, comparison_scenario_name=comparison_scenario_name, start_year=barplot_start_year, end_year=barplot_end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files, align_seeds=align_seeds)
+      }
+      
+      
+      # allow subsetting of which burden metrics plotted (based on burden_metric_subset argument)
+      if((length(burden_metric_subset)>=1)){
+        burden_metrics_subset_indices = which(burden_metrics %in% burden_metric_subset)
+        burden_colnames = burden_colnames[burden_metrics_subset_indices]
+        burden_metric_names = burden_metric_names[burden_metrics_subset_indices]
+      }
+      
+      # only save relevant columns for plotting
+      relative_burden_df = relative_burden_df[,which(colnames(relative_burden_df) %in% c('scenario', 'Run_Number', burden_colnames))]
+      
+      for(bb in 1:length(burden_colnames)){
+        current_burden_name = burden_colnames[bb]
+        burden_metric_name = burden_metric_names[bb]
+        select_col_names = c(current_burden_name, 'scenario')
+        # get mean, min, and max among all runs for this burden metric
+        rel_burden_agg_bb = as.data.frame(relative_burden_df) %>% dplyr::select(match(select_col_names, names(.))) %>%
+          dplyr::group_by(scenario) %>%
+          dplyr::summarise(mean_rel = mean(get(current_burden_name)),
+                           max_rel = max(get(current_burden_name)),
+                           min_rel = min(get(current_burden_name)))
+        rel_burden_agg_bb$burden_metric = burden_metric_name
+        rel_burden_agg_bb$scenario_name = experiment_names_with[ii]
+        if(nrow(rel_burden_agg)<1){
+          rel_burden_agg = rel_burden_agg_bb
+        } else{
+          rel_burden_agg = merge(rel_burden_agg, rel_burden_agg_bb, all=TRUE)
+        }
+      }
+    }
+  }
+  
+  rel_burden_agg$burden_metric = gsub('\\(births\\)', '', rel_burden_agg$burden_metric)
+  rel_burden_agg$burden_metric = factor(rel_burden_agg$burden_metric, levels=gsub('\\(births\\)', '', burden_metric_names))
+  rel_burden_agg$scenario_name = factor(rel_burden_agg$scenario_name, levels=experiment_names_with)
+  
+  return(rel_burden_agg)
+}
+
+
+
+
+
 
 
 plot_barplot_impact_two_specific_interventions = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins,
@@ -746,7 +948,7 @@ plot_simulation_output_burden_all = function(sim_future_output_dir, pop_filepath
 plot_simulation_output_burden_by_state = function(sim_future_output_dir, pop_filepath, grid_layout_state_locations,
                                              min_year, max_year, sim_end_years, relative_year=NA,
                                              scenario_filepaths, scenario_names, experiment_names, scenario_palette, LLIN2y_flag=FALSE, overwrite_files=FALSE, 
-                                             extend_past_timeseries_year=NA, scenario_linetypes=NA){
+                                             extend_past_timeseries_year=NA, scenario_linetypes=NA,filename_suffix=''){
   if (!is.na(relative_year)){ if(relative_year<min_year){
     warning('specified minimum year must be <= relative year. Setting min_year to relative_year.')
     min_year = relative_year
@@ -896,7 +1098,7 @@ plot_simulation_output_burden_by_state = function(sim_future_output_dir, pop_fil
       theme(legend.position = "top", legend.box='horizontal', legend.title = element_blank(), legend.text=element_text(size = text_size)) +  # legend.position = "none"
       facet_geo(~code, grid = grid_layout_state_locations, label="name")#, scales='free') 
 
-      ggsave(paste0(sim_future_output_dir, '/_plots/Timeseries_burden',relative_string,'_state_grid_',burden_metric,'.png'), gg, dpi=600, width=12, height=10, units='in')
+      ggsave(paste0(sim_future_output_dir, '/_plots/Timeseries_burden',relative_string,'_state_grid_',burden_metric,filename_suffix,'.png'), gg, dpi=600, width=12*0.7, height=10*0.7, units='in')
   }
   return(gg)
 }
@@ -1645,7 +1847,7 @@ plot_burden_maps = function(sim_future_output_dir, pop_filepath, district_subset
 
 
 
-# plot map with the reduction in burden relative to teh first scenario
+# plot map with the reduction in burden relative to the first scenario
 plot_burden_relative_reduction_maps = function(sim_future_output_dir, pop_filepath, district_subset, cur_admins,
                             barplot_start_year, barplot_end_year,
                             pyr, chw_cov,
diff --git a/r_utilities/plots_results_analyses/process_sim_output_functions.R b/r_utilities/plots_results_analyses/process_sim_output_functions.R
index 3ae68a2..43fd760 100644
--- a/r_utilities/plots_results_analyses/process_sim_output_functions.R
+++ b/r_utilities/plots_results_analyses/process_sim_output_functions.R
@@ -621,6 +621,74 @@ get_relative_U1_burden = function(sim_output_filepath, reference_experiment_name
 
 
 
+####################################################################################
+# absolute difference in simulation burden between two experiments over specified time interval
+####################################################################################
+
+get_difference_burden = function(sim_output_filepath, reference_experiment_name, comparison_experiment_name, comparison_scenario_name, start_year, end_year, admin_pop, district_subset='allDistricts', cur_admins='all', 
+                               LLIN2y_flag=FALSE, overwrite_files=FALSE, align_seeds=TRUE ){
+  #'  @description get relative change in U5 and all-age burden when comparing between two simulations in specified years and in specified districts (for all malaria metrics, separate values for each seed)
+  #'  @return data frame where each row is a seed and each column is the relative change of different burden metrics, calculated as (reference-comparison) / reference:
+  
+  reference_df = get_cumulative_burden(sim_output_filepath=sim_output_filepath, experiment_name=reference_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files)
+  comparison_df = get_cumulative_burden(sim_output_filepath=sim_output_filepath, experiment_name=comparison_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, district_subset=district_subset, cur_admins=cur_admins, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files)
+  
+  if(align_seeds){  # compare one run seed against the matching run seed in the other experiment
+    # align seeds
+    reference_df = reference_df[order(reference_df$Run_Number),]
+    comparison_df = comparison_df[order(comparison_df$Run_Number),]
+  } else{  # compare the averages across all seeds from one experiment against the average from the other experiment
+    reference_df = reference_df %>% summarise_all(mean)
+    comparison_df = comparison_df %>% summarise_all(mean)
+  }
+  difference_burden_df = data.frame('Run_Number' = reference_df$Run_Number)
+  # iterate through burden indicators, calculating relative burden and adding to dataframe
+  burden_indicators = colnames(reference_df)[-which(colnames(reference_df) == 'Run_Number')]
+  for(bb in 1:length(burden_indicators)){
+    difference_burden_cur = (reference_df[[burden_indicators[bb]]] - comparison_df[[burden_indicators[bb]]])
+    difference_burden_df[[burden_indicators[bb]]] = difference_burden_cur
+  }
+  difference_burden_df$scenario = comparison_scenario_name
+  return(difference_burden_df)
+}
+
+
+
+# get relative burden by state for grid plot
+get_difference_burden_by_state = function(sim_output_filepath, reference_experiment_name, comparison_experiment_name, comparison_scenario_name, start_year, end_year, admin_pop, district_subset='allDistricts', cur_admins='all', 
+                                        LLIN2y_flag=FALSE, overwrite_files=FALSE, align_seeds=TRUE ){
+  #'  @description get relative change in U5 and all-age burden when comparing between two simulations in specified years and in specified districts (for all malaria metrics, separate values for each seed)
+  #'  @return data frame where each row is a seed and each column is the relative change of different burden metrics, calculated as (reference-comparison) / reference:
+  
+  reference_df = get_cumulative_burden_by_state(sim_output_filepath=sim_output_filepath, experiment_name=reference_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files)
+  comparison_df = get_cumulative_burden_by_state(sim_output_filepath=sim_output_filepath, experiment_name=comparison_experiment_name, start_year=start_year, end_year=end_year, admin_pop=admin_pop, LLIN2y_flag=LLIN2y_flag, overwrite_files=overwrite_files)
+  
+  if(align_seeds){  # compare one run seed against the matching run seed in the other experiment
+    # align seeds and states into same order
+    reference_df = reference_df[order(reference_df$State, reference_df$Run_Number),]
+    comparison_df = comparison_df[order(comparison_df$State, comparison_df$Run_Number),]
+  } else{  # compare the averages across all seeds from one experiment against the average from the other experiment
+    reference_df = reference_df %>% group_by(State) %>% summarise_all(mean)
+    comparison_df = comparison_df %>% group_by(State) %>% summarise_all(mean)
+    # align states into same order
+    reference_df = reference_df[order(reference_df$State),]
+    comparison_df = comparison_df[order(comparison_df$State),]
+  }
+  difference_burden_df = data.frame('State'=reference_df$State, 'Run_Number'=reference_df$Run_Number)
+  # iterate through burden indicators, calculating relative burden and adding to dataframe
+  burden_indicators = colnames(reference_df)[-which(colnames(reference_df) %in% c('State', 'Run_Number'))]
+  for(bb in 1:length(burden_indicators)){
+    difference_burden_cur = (reference_df[[burden_indicators[bb]]] - comparison_df[[burden_indicators[bb]]])
+    difference_burden_df[[burden_indicators[bb]]] = difference_burden_cur
+  }
+  difference_burden_df$scenario = comparison_scenario_name
+  return(difference_burden_df)
+}
+
+
+
+
+
 
 ####################################################################################
 # timeseries of simulation burden and/or interventions
@@ -721,8 +789,8 @@ get_burden_timeseries_exp = function(exp_filepath, exp_name, district_subset, cu
     }
     
     cur_sim_output_agg$scenario = exp_name
+    write.csv(cur_sim_output_agg, output_filename, row.names=FALSE)
   }
-  write.csv(cur_sim_output_agg, output_filename, row.names=FALSE)
   return(cur_sim_output_agg)
 }