Skip to content

Commit

Permalink
add functions to process data and create new plots of simulation inpu…
Browse files Browse the repository at this point in the history
…ts and results

(cherry picked from commit dde1c4a)
  • Loading branch information
MAmbrose-IDM committed Nov 3, 2024
1 parent 8c9d0c2 commit 553746b
Show file tree
Hide file tree
Showing 5 changed files with 446 additions and 66 deletions.
116 changes: 58 additions & 58 deletions r_utilities/data_processing/1_DHS_data_extraction.R
Original file line number Diff line number Diff line change
Expand Up @@ -924,35 +924,35 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
# plots of cluster-level DHS results
#=========================================================================================================##
if(!dir.exists(paste0(hbhi_dir,'/estimates_from_DHS/plots'))) dir.create(paste0(hbhi_dir,'/estimates_from_DHS/plots'))
# if(plot_separate_pdfs){
# for(yy in 1:length(years)){
# pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
# par(mfrow=c(1,1))
# for(i_var in 1:length(variables)){
# var = variables[i_var]
# cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1]
# cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA
# cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA
# if(paste0(var,'_num_total') %in% colnames(cluster_obs)){
# max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE)
# layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
# plot(admin_shape, main=var)
# points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude)))
# # legend - colorbar
# legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
# plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
# text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
# rasterImage(legend_image, 0, 0, 1,1)
# # legend - survey size
# plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2)
# }
# }
# dev.off()
# }
# }
#
#
#
if(plot_separate_pdfs){
for(yy in 1:length(years)){
pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'cluster_observations_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
par(mfrow=c(1,1))
for(i_var in 1:length(variables)){
var = variables[i_var]
cluster_obs = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'cluster_outputs_', years[yy], '.csv'))[,-1]
cluster_obs$latitude[which(cluster_obs$latitude == 0)] = NA
cluster_obs$longitude[which(cluster_obs$longitude == 0)] = NA
if(paste0(var,'_num_total') %in% colnames(cluster_obs)){
max_survey_size = max(cluster_obs[[paste0(var,'_num_total')]], na.rm=TRUE)
layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
plot(admin_shape, main=var)
points(cluster_obs$longitude, cluster_obs$latitude, col=colors_range_0_to_1[1+round(cluster_obs[[paste0(var,'_rate')]]*100)], pch=20, cex=cluster_obs[[paste0(var,'_num_total')]]/round(max_survey_size/5))#, xlim=c(min(cluster_obs$longitude), max(cluster_obs$longitude)), ylim=c(min(cluster_obs$latitude), max(cluster_obs$latitude)))
# legend - colorbar
legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
rasterImage(legend_image, 0, 0, 1,1)
# legend - survey size
plot(rep(0,5), seq(1, max_survey_size, length.out=5), cex=seq(1,max_survey_size, length.out=5)/round(max_survey_size/5), pch=20, axes=FALSE, xlab='', ylab='sample size'); axis(2)
}
}
dev.off()
}
}



##=========================================================================================================##
# plots of cluster-level DHS results - separated by interventions, each plot panel showing across years
##=========================================================================================================##
Expand All @@ -963,7 +963,7 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
variables2 = variables[variables %in% c('mic', 'rdt', 'itn_all', 'itn_u5', 'iptp','cm','received_treatment','sought_treatment','blood_test', 'art')]
} else variables2 = variables
nyears = length(years)

if(separate_plots_for_each_var){
for(i_var in 1:length(variables2)){
var = variables2[i_var]
Expand Down Expand Up @@ -1029,37 +1029,37 @@ plot_extracted_DHS_data = function(hbhi_dir, years, admin_shape, min_num_total=3
dev.off()
}
}
#
#
#
#
#
#
# ####=========================================================================================================####
# # map of LGA-level DHS results, allowing for aggregation to admin1 level when sample sizes too small
# ####=========================================================================================================####
# if(plot_separate_pdfs){
# for(yy in 1:length(years)){
# pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
# for(i_var in 1:length(variables)){
# var = variables[i_var]
# admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1]
# reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names))
# admin_sums = admin_sums0[reorder_admins,]
# if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){
# if(paste0(var,'_num_total') %in% colnames(admin_sums)){
# layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
# admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)]
# plot(admin_shape, main=var, col=admin_colors)
#
# # legend - colorbar
# legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
# plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
# text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
# rasterImage(legend_image, 0, 0, 1,1)
# }
# } else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.')
# }
# dev.off()
# }
# }
if(plot_separate_pdfs){
for(yy in 1:length(years)){
pdf(paste0(hbhi_dir, '/estimates_from_DHS/plots/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.pdf'), width=7, height=5, useDingbats = FALSE)
for(i_var in 1:length(variables)){
var = variables[i_var]
admin_sums0 = read.csv(paste0(hbhi_dir, '/estimates_from_DHS/DHS_',vacc_string, 'admin_minN', min_num_total,'_', years[yy], '.csv'))[,-1]
reorder_admins = match(sapply(admin_shape$NOMDEP, match_lga_names), sapply(admin_sums0$NOMDEP, match_lga_names))
admin_sums = admin_sums0[reorder_admins,]
if(all(sapply(admin_shape$NOMDEP, match_lga_names) == sapply(admin_sums$NOMDEP, match_lga_names))){
if(paste0(var,'_num_total') %in% colnames(admin_sums)){
layout(matrix(c(1,1,1,2, 1,1,1,3),nrow=2, byrow=TRUE))
admin_colors = colors_range_0_to_1[1+round(admin_sums[[paste0(var,'_rate')]]*100)]
plot(admin_shape, main=var, col=admin_colors)

# legend - colorbar
legend_image = as.raster(matrix(rev(colors_range_0_to_1[1+round(seq(0,1,length.out=20)*100)]), ncol=1))
plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = var)
text(x=1.5, y = seq(0,1,length.out=5), labels = seq(0,1,length.out=5))
rasterImage(legend_image, 0, 0, 1,1)
}
} else warning('during plot generation, order of districts in shapefile and data frame did not match, skipping plotting.')
}
dev.off()
}
}



Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# plot_map_intervention_coverage_input_functions.R


library(raster)
library(ggplot2)
library(gridExtra)
library(grid)
library(RColorBrewer)
library(tidyverse)
library(sf)
library(reshape2)
library(pals)
library(prettyGraphs)



##########################################################
# functions for ggplot version
##########################################################

# function to combine multiple plots for same intervention that share a legend
grid_arrange_shared_legend_plotlist =function(...,
plotlist=NULL,
ncol = length(list(...)),
nrow = NULL,
position = c("bottom", "right")) {

plots <- c(list(...), plotlist)

if (is.null(nrow)) nrow = ceiling(length(plots)/ncol)

position <- match.arg(position)
g <- ggplotGrob(plots[[1]] + theme(legend.position = position) + guides(fill = guide_legend(reverse=T)))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$height)
lwidth <- sum(legend$width)
gl <- lapply(plots, function(x) x + theme(legend.position="none"))
gl <- c(gl, ncol = ncol, nrow = nrow)

combined <- switch(position,
"bottom" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 1,
heights = unit.c(unit(1, "npc") - lheight, lheight)),
"right" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lwidth, lwidth)))

grid.newpage()
grid.draw(combined)

# return gtable invisibly
invisible(combined)
}

change_legend_size <- function(myPlot, pointSize = 0.5, textSize = 3, spaceLegend = 0.1) {
myPlot +
guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}

# function to create and save maps of intervention coverages
create_coverage_input_maps = function(inter_input, inter_years, output_filename, colorscale, min_value, max_value, num_colors){
plot_list = list()
for(yy in 1:length(inter_years)){
inter_input_cur = inter_input[inter_input$year == inter_years[yy],]
inter_input_cur$output_value = inter_input_cur[[coverage_colname]]
admin_cur = admin_shapefile %>%
left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name')) %>%
mutate(binned_values=cut(output_value,
breaks=round(seq((min_value), (max_value), length.out = (num_colors+1)),2)))
plot_list[[yy]] = ggplot(admin_cur) +
geom_sf(aes(fill=binned_values), size=0.1) +
scale_fill_manual(values=setNames(colorscale, levels(admin_cur$binned_values)), drop=FALSE, name='coverage', na.value='grey96') +
ggtitle(inter_years[yy]) +
guides(fill = guide_legend(reverse=T)) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5))
plot_list[[yy]] = change_legend_size(plot_list[[yy]], pointSize=10, textSize=10, spaceLegend=1)
}

gg = grid_arrange_shared_legend_plotlist(plotlist=plot_list, ncol=length(plot_list), position='right')
ggsave(output_filename, gg, width = (length(inter_years)+1)*1.8, height=2.3, units='in', dpi=800)
}



# function to create and save maps of intervention coverages
create_maps_state_groups = function(admin_shapefile_filepath, shapefile_admin_colname, admin_group_df, column_name, output_filename, colorscale){
admin_shapefile = shapefile(admin_shapefile_filepath)
# standardize shapefile names
admin_shapefile$NOMDEP = standardize_admin_names_in_vector(target_names=archetype_info$LGA, origin_names=admin_shapefile$NOMDEP)

admin_group_df$output_value = admin_group_df[[column_name]]
admin_cur = admin_shapefile %>%
left_join(inter_input_cur, by=c('NOMDEP' = 'admin_name'))
gg = ggplot(admin_cur) +
geom_sf(aes(fill=binned_values), size=0.1) +
scale_fill_manual(values=setNames(colorscale, levels(admin_cur$output_value)), drop=FALSE, name='group', na.value='grey96') +
ggtitle(inter_years[yy]) +
guides(fill = guide_legend(reverse=T)) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5))

ggsave(output_filename, gg, width = (2)*1.8, height=2.3, units='in', dpi=800)
}
Loading

0 comments on commit 553746b

Please sign in to comment.