From 0c34f17d819ca345fa5e55c0c1112e005682be4c Mon Sep 17 00:00:00 2001 From: Ryan Hafen Date: Thu, 21 Apr 2022 14:20:20 -0700 Subject: [PATCH] Initial working visualizations --- .Rbuildignore | 5 + DESCRIPTION | 23 ++- LICENSE | 2 + LICENSE.md | 21 +++ NAMESPACE | 34 +++++ R/calc.R | 64 ++++++-- R/geo_vis.R | 73 --------- R/misc.R | 26 +++- R/preprocess.R | 4 + R/vis_geo.R | 318 ++++++++++++++++++++++++++++++++++++++++ R/vis_geo_scatter.R | 34 +++++ R/vis_scatter.R | 122 +++++++++++++++ man/calc_rates.Rd | 20 ++- man/geo_scatter_vis.Rd | 20 +++ man/geo_vis_all.Rd | 31 ++++ man/preprocess_geo.Rd | 5 + man/preprocess_ipums.Rd | 5 + man/scatter_vis_all.Rd | 14 ++ man/view_var_descs.Rd | 7 +- scripts/data.R | 14 ++ scripts/explore.R | 47 ++++++ 21 files changed, 788 insertions(+), 101 deletions(-) create mode 100644 .Rbuildignore create mode 100644 LICENSE create mode 100644 LICENSE.md delete mode 100644 R/geo_vis.R create mode 100644 R/vis_geo.R create mode 100644 R/vis_geo_scatter.R create mode 100644 R/vis_scatter.R create mode 100644 man/geo_scatter_vis.Rd create mode 100644 man/geo_vis_all.Rd create mode 100644 man/scatter_vis_all.Rd create mode 100644 scripts/data.R create mode 100644 scripts/explore.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..18b7e63 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,5 @@ +^ext-data$ +^_ignore$ +^scripts$ +^docs$ +^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index d17318e..c8a9c9d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,26 @@ -Package: ipumsvis -Title: What the Package Does (One Line, Title Case) +Package: idhs +Title: Tools to process and visualize IPUMS DHS data Version: 0.0.0.9000 Authors@R: - person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), + person("Ryan", "Hafen", , "rhafen@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "YOUR-ORCID-ID")) -Description: What the package does (one paragraph). -License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a - license +Description: Tools to process and visualize IPUMS DHS data. +License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.2 Imports: + DescTools, dplyr, DT, + forcats, + htmltools, ipumsr, - mapboxer + jsonlite, + mapboxer, + plotly, + rmapshaper, + safer, + sf, + tidyr, + viridisLite diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ca22524 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2022 +COPYRIGHT HOLDER: ipumsvis authors diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..162ccef --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2022 ipumsvis authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 8bb5858..610d240 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,52 @@ # Generated by roxygen2: do not edit by hand +S3method(print,idhs_map_vis) export(calc_rates) +export(geo_scatter_vis) export(preprocess_geo) export(preprocess_ipums) +export(scatter_vis_all) export(view_var_descs) importFrom(DT,datatable) +importFrom(DescTools,BinomCI) importFrom(dplyr,"%>%") importFrom(dplyr,.data) +importFrom(dplyr,all_of) +importFrom(dplyr,bind_rows) importFrom(dplyr,filter) importFrom(dplyr,group_by) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,recode) importFrom(dplyr,rename_all) +importFrom(dplyr,select) +importFrom(dplyr,summarise) importFrom(dplyr,tibble) +importFrom(dplyr,ungroup) +importFrom(forcats,fct_reorder) +importFrom(htmltools,HTML) +importFrom(htmltools,browsable) +importFrom(htmltools,tagList) +importFrom(htmltools,tags) importFrom(ipumsr,as_factor) importFrom(ipumsr,lbl_clean) importFrom(ipumsr,read_ipums_ddi) importFrom(ipumsr,read_ipums_micro) +importFrom(ipumsr,read_ipums_sf) +importFrom(ipumsr,zap_labels) +importFrom(jsonlite,toJSON) +importFrom(mapboxer,add_fill_layer) +importFrom(mapboxer,add_line_layer) +importFrom(mapboxer,add_tooltips) +importFrom(mapboxer,as_mapbox_source) +importFrom(mapboxer,mapboxer) +importFrom(plotly,add_trace) +importFrom(plotly,layout) +importFrom(plotly,plot_ly) +importFrom(plotly,subplot) +importFrom(rmapshaper,ms_simplify) +importFrom(sf,st_bbox) +importFrom(sf,st_sfc) +importFrom(tidyr,nest) importFrom(utils,download.file) +importFrom(viridisLite,viridis) diff --git a/R/calc.R b/R/calc.R index 66b5f7e..548b277 100644 --- a/R/calc.R +++ b/R/calc.R @@ -1,16 +1,33 @@ +#' Calculate rates from IPUMS DHS data +#' @param x TODO +#' @param geo_dir TODO +#' @param num_var TODO +#' @param num_cond TODO +#' @param denom_var TODO +#' @param denom_cond TODO #' Calculate rates from ipums data and associated geography #' @examples #' \dontrun{ #' calc_rates(dd, geo_dir = "ext-data/idhs/geo") #' } -#' @importFrom %>% dplyr group_by ungroup rename_all filter summarise n +#' @importFrom dplyr %>% group_by ungroup rename_all filter summarise n #' select all_of mutate #' @importFrom ipumsr read_ipums_sf as_factor lbl_clean zap_labels #' @export -calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { +calc_rates <- function( + x, geo_dir, num_var, num_cond, denom_var = NULL, denom_cond = NULL +) { check_ipums_data(x, "x") x$country2 <- ipumsr::as_factor(ipumsr::lbl_clean(x$country)) + if (is.null(denom_var)) { + x[["___denom___"]] <- 1 + denom_var <- "___denom___" + } + if (is.null(denom_cond)) { + denom_cond <- unique(x[[denom_var]]) + } + if (!dir.exists(geo_dir)) stop("directory '", geo_dir, "' doesn't exist") @@ -33,8 +50,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { # (could add an option later to do latest start and end date) ftbl <- ftbl %>% dplyr::group_by(.data$country) %>% - dplyr::filter(end_yr == max(end_yr)) %>% - dplyr::filter(start_yr == max(start_yr)) %>% + dplyr::filter(.data$end_yr == max(.data$end_yr)) %>% + dplyr::filter(.data$start_yr == max(.data$start_yr)) %>% dplyr::ungroup() ff <- ftbl$f @@ -49,6 +66,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { cur_var <- gsub(".*(geo_.*).zip", "\\1", f) cur_cntry <- geodf$cntry_name[1] message(cur_cntry, ": ", cur_var) + if (cur_cntry == "South Africa") { + geodf <- fix_south_africa_geo(geodf) + } if (!cur_var %in% xnms) { message(" not found...") @@ -66,7 +86,7 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { n = dplyr::n(), nna = length(which(is.na(.data[[num_var]]))) ) %>% - dplyr::filter(.data$n == nna) + dplyr::filter(.data$n == .data$nna) tmp <- tmp %>% dplyr::filter(!.data$year %in% tmp2$year, ) %>% dplyr::select(all_of(c(cur_var, vars))) %>% @@ -100,9 +120,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { dplyr::group_by(.data$year, .data$dhscode, .data$region) %>% dplyr::summarise( n = sum(.data$perweight), - n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), - nsti = sum((.data[[num_var]] == 1) * .data$perweight), - nsti2 = length(which(.data[[num_var]] == 1)), + n2 = dplyr::n(), + nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight), + nsti2 = length(which(.data[[num_var]] %in% num_cond)), pct = 100 * .data$nsti / .data$n, .groups = "drop") %>% dplyr::mutate(country = cur_cntry, geo_var = cur_var) @@ -112,9 +132,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { dplyr::group_by(.data$dhscode, .data$region) %>% dplyr::summarise( n = sum(.data$perweight), - n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), - nsti = sum((.data[[num_var]] == 1) * .data$perweight), - nsti2 = length(which(.data[[num_var]] == 1)), + n2 = dplyr::n(), + nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight), + nsti2 = length(which(.data[[num_var]] %in% num_cond)), pct = 100 * .data$nsti / .data$n, .groups = "drop") %>% dplyr::mutate(country = cur_cntry, geo_var = cur_var) @@ -125,8 +145,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { dplyr::summarise( n = sum(.data$perweight), n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), - nsti = sum((.data[[num_var]] == 1) * .data$perweight), - nsti2 = length(which(.data[[num_var]] == 1)), + nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight), + nsti2 = length(which(.data[[num_var]] %in% num_cond)), pct = 100 * .data$nsti / .data$n) %>% dplyr::mutate(country = cur_cntry, geo_var = cur_var) @@ -135,8 +155,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { dplyr::summarise( n = sum(.data$perweight), n2 = length(which(.data[[num_var]] %in% num_cond)), - nsti = sum((.data[[num_var]] == 1) * .data$perweight), - nsti2 = length(which(.data[[num_var]] == 1)), + nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight), + nsti2 = length(which(.data[[num_var]] %in% num_cond)), pct = 100 * .data$nsti / .data$n) %>% dplyr::mutate(country = cur_cntry, geo_var = cur_var) @@ -153,3 +173,17 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { class(res) <- c("list", "ipums-rates") res } + +#' @importFrom sf st_sfc +fix_south_africa_geo <- function(a) { + idx <- which(a$admin_name == "Western Cape") + if (length(idx) == 1) { + bb <- a$geometry[[idx]] + bb <- bb[-c(1:8)] + a$geometry[[idx]] <- sf::st_multipolygon(bb) + attr(a$geometry, "bbox") <- NULL + a$geometry <- sf::st_sfc(a$geometry) + } + a +} +# ipumsr::read_ipums_sf("ext-data/idhs/geo/geo_za1998_2016.zip") diff --git a/R/geo_vis.R b/R/geo_vis.R deleted file mode 100644 index 7632dc8..0000000 --- a/R/geo_vis.R +++ /dev/null @@ -1,73 +0,0 @@ - -#' @importFrom dplyr bind_rows recode -#' @importFrom mapboxer as_mapbox_source mapboxer add_fill_layer -#' add_line_layer as_mapbox_source add_tooltips basemaps -geo_vis_all <- function( - rates, geo_dir, bins = NULL, width = NULL, height = NULL -) { - world <- ipumsr::read_ipums_sf( - file.path(geo_dir, "IPUMSI_world_release2020.zip"), - verbose = FALSE - ) %>% - dplyr::rename_all(tolower) %>% - # DRC is the only one coded differently here vs. in IPUMS data - # setdiff(sort(unique(x$country2)), world$cntry_name) - dplyr::mutate(country = dplyr::recode(.data$cntry_name, - "Congo, DRC" = "Congo Democratic Republic" - )) - - world2 <- dplyr::filter(world, .data$country %in% names(rates)) - - alldat <- dplyr::bind_rows(lapply(rates, function(x) x$yrregstats)) %>% - dplyr::group_by(.data$country, .data$region, .data$dhscode) %>% - dplyr::filter(year == max(.data$year) - ) - - allgeo <- lapply(rates, function(x) x$geodf) %>% - dplyr::bind_rows() %>% - dplyr::rename(country = "cntry_name") - - if (is.null(bins)) { - bins <- pretty(alldat$pct, n = 8) - # classInt::classIntervals(alldat$pct, n = 10) - } - cols <- viridisLite::viridis(length(bins) - 1) %>% - substr(1, 7) - - leglbl <- paste0(bins[-length(bins)], "-", bins[-1], "%") - - geodat <- ipumsr::ipums_shape_inner_join(alldat, allgeo, - by = c("country", "dhscode")) %>% - dplyr::mutate( - fill_color = cols[cut(.data$pct, bins, labels = FALSE)], - pct = round(.data$pct, 1) - ) - - # setdiff(names(sti_rates), geodat$country) - - # legend: https://docs.mapbox.com/help/tutorials/choropleth-studio-gl-pt-2/ - mapboxer::as_mapbox_source(geodat) %>% - mapboxer::mapboxer( - style = mapboxer::basemaps$Mapbox$light_v10, - bounds = sf::st_bbox(geodat), - fitBoundsOptions = list(padding = 20), - width = width, - height = height - ) %>% - mapboxer::add_fill_layer( - id = "map", - fill_color = c("get", "fill_color"), - fill_opacity = 0.6 - ) %>% - mapboxer::add_line_layer( - source = mapboxer::as_mapbox_source(world2), - line_color = "black", - line_width = 1 - ) %>% - mapboxer::add_tooltips( - "map", paste0( - "Country: {{country}}
", - "Region: {{region}}
", - "Year: {{year}}
", - "Pct STI: {{pct}}%")) -} \ No newline at end of file diff --git a/R/misc.R b/R/misc.R index fe18433..e5fd0fb 100644 --- a/R/misc.R +++ b/R/misc.R @@ -1,11 +1,13 @@ #' View html table of IPUMS extract variable descriptions +#' @param x TODO +#' @param include_geo TODO #' @importFrom DT datatable #' @importFrom dplyr filter tibble .data #' @export -view_var_descs <- function(dd, include_geo = FALSE) { +view_var_descs <- function(x, include_geo = FALSE) { check_ipums_data(x, "x") - descs <- sapply(dd, function(x) attr(x, "var_desc")) + descs <- sapply(x, function(x) attr(x, "var_desc")) descs <- dplyr::tibble( name = names(descs), desc = unname(descs) @@ -16,3 +18,23 @@ view_var_descs <- function(dd, include_geo = FALSE) { DT::datatable(descs, options = list(paging = FALSE)) } + +get_tkn <- function() { + if (Sys.getenv("IDHS_DEV_MODE") == "true") { + Sys.getenv("MAPBOX_API_KEY") + } else { + safer::decrypt_string("XLNDMUY6HdUjQ34+47BLmKf42KyqrWRmvL0QwGolHdOUAXEQqtp+SGd3/z1/4GweGP5vk0P7TLkJdI/e3iqekM5cgDq2riRySW1Vo1Tca3AtngJ0rVildwtkjwi9GMl75iOmrLDkGSI=", "idhs") + } +} + +dev_mode <- function(bool = FALSE) { + if (bool) { + Sys.setenv(IDHS_DEV_MODE = "true") + } else { + Sys.setenv(IDHS_DEV_MODE = "") + } +} + + +# ‘DescTools’ ‘forcats’ ‘htmltools’ ‘jsonlite’ ‘plotly’ ‘rmapshaper’ +# ‘sf’ ‘tidyr’ ‘viridisLite’ diff --git a/R/preprocess.R b/R/preprocess.R index b63650e..78dafed 100644 --- a/R/preprocess.R +++ b/R/preprocess.R @@ -1,4 +1,6 @@ #' Preprocess IPUMS extract +#' @param input_xml TODO +#' @param output_file TODO #' @importFrom ipumsr read_ipums_ddi read_ipums_micro as_factor lbl_clean #' @importFrom dplyr rename_all #' @examples @@ -26,6 +28,8 @@ preprocess_ipums <- function(input_xml, output_file) { # https://www.idhsdata.org/idhs/gis.shtml #' Preprocess (download) shapfiles associated with an IPUMS extract +#' @param x TODO +#' @param output_dir TODO #' @examples #' \dontrun{ #' dd <- preprocess_ipums( diff --git a/R/vis_geo.R b/R/vis_geo.R new file mode 100644 index 0000000..b441328 --- /dev/null +++ b/R/vis_geo.R @@ -0,0 +1,318 @@ +#' Create Mapbox visualization of rate data +#' @param rates TODO +#' @param geo_dir TODO +#' @param title TODO +#' @param bins TODO +#' @param width TODO +#' @param height TODO +#' @importFrom dplyr bind_rows recode +#' @importFrom mapboxer as_mapbox_source mapboxer add_fill_layer +#' add_line_layer add_tooltips +#' @importFrom jsonlite toJSON +#' @importFrom rmapshaper ms_simplify +#' @importFrom sf st_bbox +#' @importFrom viridisLite viridis +geo_vis_all <- function( + rates, geo_dir, title = "", bins = NULL, + width = "100vw", height = "100vh" +) { + world <- ipumsr::read_ipums_sf( + file.path(geo_dir, "IPUMSI_world_release2020.zip"), + verbose = FALSE + ) %>% + dplyr::rename_all(tolower) %>% + # DRC is the only one coded differently here vs. in IPUMS data + # setdiff(sort(unique(x$country2)), world$cntry_name) + dplyr::mutate(country = dplyr::recode(.data$cntry_name, + "Congo, DRC" = "Congo Democratic Republic" + )) + + world2 <- dplyr::filter(world, .data$country %in% names(rates)) + + alldat <- dplyr::bind_rows(lapply(rates, function(x) x$yrregstats)) %>% + dplyr::group_by(.data$country, .data$region, .data$dhscode) %>% + dplyr::filter(.data$year == max(.data$year) + ) + + allgeo <- lapply(rates, function(x) x$geodf) %>% + dplyr::bind_rows() %>% + dplyr::rename(country = "cntry_name") + + if (is.null(bins)) { + bins <- pretty(alldat$pct, n = 8) + # classInt::classIntervals(alldat$pct, n = 10) + } + cols <- viridisLite::viridis(length(bins) - 1) %>% + substr(1, 7) + + leglbl <- paste0(bins[-length(bins)], "-", bins[-1], "%") + + geodat <- ipumsr::ipums_shape_inner_join(alldat, allgeo, + by = c("country", "dhscode")) %>% + dplyr::mutate( + fill_color = cols[cut(.data$pct, bins, labels = FALSE)], + pct = round(.data$pct, 1), + country_region = paste(.data$country, .data$region, sep = "___") + ) + + # setdiff(names(sti_rates), geodat$country) + geodat2 <- rmapshaper::ms_simplify(geodat, keep = 0.01, keep_shapes = FALSE) + world3 <- rmapshaper::ms_simplify(world2, keep = 0.01, keep_shapes = FALSE) + + bboxes <- lapply( + c(lapply(world3$geometry, identity), list(world3$geometry) + ), function(x) { + tmp <- as.list(sf::st_bbox(x)) + list( + bottomleft = c(tmp$xmin, tmp$ymin), + topright = c(tmp$xmax, tmp$ymax) + ) + }) + names(bboxes) <- c(world3$country, "all") + bboxstr <- paste0("var bboxes = ", + jsonlite::toJSON(bboxes, auto_unbox = TRUE, pretty = TRUE)) + + # legend: https://docs.mapbox.com/help/tutorials/choropleth-studio-gl-pt-2/ + p <- mapboxer::as_mapbox_source(geodat2) %>% + mapboxer::mapboxer( + style = mapboxer::basemaps$Mapbox$light_v10, + bounds = sf::st_bbox(geodat), + fitBoundsOptions = list(padding = 20), + width = "100%", + height = "100%", + token = get_tkn() + ) %>% + mapboxer::add_fill_layer( + id = "map", + fill_color = c("get", "fill_color"), + fill_opacity = 0.6 + ) %>% + mapboxer::add_line_layer( + id = "map-lines", + line_color = "black", + line_width = 1, + line_opacity = 0.1 + ) %>% + mapboxer::add_line_layer( + id = "countries", + source = mapboxer::as_mapbox_source(world3), + line_color = "black", + line_width = 1 + ) %>% + mapboxer::add_tooltips( + "map", paste0( + "Country: {{country}}
", + "Region: {{region}}
", + "Year: {{year}}
", + "Pct STI: {{pct}}%")) + + build_map(p, bins, leglbl, cols, title, bboxstr, height, width) +} + +#' @importFrom htmltools tags tagList HTML browsable +build_map <- function(p, bins, leglbl, cols, title, bboxstr, height, width) { + tags <- htmltools::tags + + res <- htmltools::tagList( + tags$script(htmltools::HTML(bboxstr)), + tags$script(htmltools::HTML(" + function throttle(callback, interval) { + let enableCall = true; + + return function(...args) { + if (!enableCall) return; + + enableCall = false; + callback.apply(this, args); + setTimeout(() => enableCall = true, interval); + } + } + + var hoveredId = null; + + function resize() { + console.log('resize...') + var keys = Object.keys(window.mapboxer._widget); + keys.forEach(function(key) { + var curmap = window.mapboxer._widget[key].map; + curmap.resize(); + curmap.fitBounds(window.mapboxer_bounds[key]); + }); + }; + + var doresize; + window.onresize = function() { + clearTimeout(doresize); + doresize = setTimeout(resize, 100); + }; + + window.onload = function() { + var scatter = document.getElementById('scatterplot'); + var bnds = {}; + var keys = Object.keys(window.mapboxer._widget); + keys.forEach(function(key) { + var curmap = window.mapboxer._widget[key].map; + bnds[key] = curmap.getBounds(); + }); + window.mapboxer_bounds = bnds; + + var curmap = window.mapboxer._widget[keys[0]].map + + let onHover = (e) => { + if ( + e.features.length > 0 && + e.features[0].properties.country_region !== hoveredId + ) { + Plotly.restyle(scatter, {'line': {'color': 'darkgray'}}, []) + hoveredId = e.features[0].properties.country_region; + curmap.setPaintProperty( + 'map-lines', + 'line-opacity', + ['match', ['get', 'country_region'], hoveredId, 1, 0.1] + ); + curmap.setPaintProperty( + 'map-lines', + 'line-width', + ['match', ['get', 'country_region'], hoveredId, 3, 1] + ); + var traceidx = scatter.data.findIndex(obj => { + return obj.name === hoveredId; + }); + if (traceidx > -1) { + // console.log('restyling hover...'); + // Plotly.addTraces(scatterplot, [{x: [2010], y: [20], mode: 'markers', marker: {symbol: 6, size: 12}, xaxis: 'x', yaxis: 'y'}]) + Plotly.restyle(scatter, {'line': {'color': 'blue'}}, [traceidx]) + } + } + }; + + let onUnhover = (e) => { + curmap.setPaintProperty( + 'map-lines', + 'line-opacity', + 0.1 + ); + curmap.setPaintProperty( + 'map-lines', + 'line-width', + 1 + ); + + // console.log('restyling unhover...'); + Plotly.restyle(scatter, {'line': {'color': 'darkgray'}}, []) + hoveredId = null; + }; + + // curmap.on('mousemove', 'map', throttle(onHover, 50)); + // curmap.on('mouseleave', 'map', throttle(onUnhover, 50)); + curmap.on('mousemove', 'map', onHover); + curmap.on('mouseleave', 'map', onUnhover); + + scatterplot.on('plotly_hover', function(d) { + // console.log('restyling plotly hover...') + Plotly.restyle(scatterplot, {'line': {'color': 'blue'}}, [d.points[0].curveNumber]) + var hoveredId = d.points[0].data.name; + var country = hoveredId.split('___')[0]; + if (bboxes && bboxes[country]) { + curmap.fitBounds([bboxes[country].bottomleft, bboxes[country].topright]); + } + curmap.setPaintProperty( + 'map-lines', + 'line-opacity', + ['match', ['get', 'country_region'], hoveredId, 1, 0.1] + ); + curmap.setPaintProperty( + 'map-lines', + 'line-width', + ['match', ['get', 'country_region'], hoveredId, 4, 1] + ); + }); + + scatterplot.on('plotly_unhover', function(d) { + if (bboxes && bboxes.all) { + curmap.fitBounds([bboxes.all.bottomleft, bboxes.all.topright]); + } + curmap.setPaintProperty( + 'map-lines', + 'line-opacity', + 0.1 + ); + curmap.setPaintProperty( + 'map-lines', + 'line-width', + 1 + ); + // console.log('restyling plotly unhover...') + Plotly.restyle(scatterplot, {'line': {'color': 'darkgray'}}, [d.points[0].curveNumber]); + }); + } + ")), + tags$head(tags$style(" + body { + margin: 0px; + } + .geo-container { + position: relative; + width: fit-content; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif + } + .title { + position: absolute; + font-size: 16px; + top: 0; + left: 0; + } + .legend-container { + position: absolute; + display: flex; + flex-direction: column; + top: 10px; + right: 10px; + background: rgba(255, 255, 255, 0.7); + } + .legend-entry { + display: flex; + flex-direction: row; + margin-bottom: 2px; + } + .legend-color-box { + width: 17px; + height: 17px; + margin-right: 10px; + } + .legend-item-text { + font-size: 14px; + } + ")), + tags$div( + class = "geo-container", + style = paste0("width: ", width, "; height: ", height, ";"), + p, + tags$div(class = "title", title), + tags$div(class = "legend-container", + lapply(seq_along(leglbl), function(ii) { + tags$div(class = "legend-entry", + tags$div( + class = "legend-color-box", + style = paste0("background: ", cols[ii], ";") + ), + tags$div( + class = "legend-item-text", + leglbl[ii] + ) + ) + }) + ) + ) + ) + + class(res) <- c("idhs_map_vis", class(res)) + + res +} + +#' @export +print.idhs_map_vis <- function(x, ...) { + class(x) <- setdiff(class(x), "idhs_map_vis") + print(htmltools::browsable(x)) +} diff --git a/R/vis_geo_scatter.R b/R/vis_geo_scatter.R new file mode 100644 index 0000000..e28a604 --- /dev/null +++ b/R/vis_geo_scatter.R @@ -0,0 +1,34 @@ +#' Create linked scatter and geo visualization +#' @param rates TODO +#' @param title TODO +#' @param geo_dir TODO +#' @param bins TODO +#' @importFrom htmltools browsable +#' @export +geo_scatter_vis <- function(rates, title, geo_dir, bins = NULL) { + p1 <- scatter_vis_all(rates) + + p2 <- geo_vis_all( + rates = rates, + title = title, + geo_dir = geo_dir, + bins = bins, + width = "40vw" + ) + + tags <- htmltools::tags + res <- htmltools::tagList( + tags$head(tags$style(" + .window-container { + display: flex; + flex-direction: row; + } + ")), + tags$div(class = "window-container", + tags$div(style = "width: 60vw; height: 100vh;", p1), + p2 + ) + ) + + htmltools::browsable(res) +} diff --git a/R/vis_scatter.R b/R/vis_scatter.R new file mode 100644 index 0000000..21f2588 --- /dev/null +++ b/R/vis_scatter.R @@ -0,0 +1,122 @@ +#' @importFrom DescTools BinomCI +get_ci <- function(x, n) { + # default 0.95 Wilson + dplyr::as_tibble(DescTools::BinomCI(x, n)) %>% + dplyr::mutate_all(function(x) x * 100) %>% + dplyr::rename( + percent = .data$est, lower = .data$lwr.ci, upper = .data$upr.ci) +} + +#' Create scatterplot visualization for all geographies +#' @param rates TODO +#' @importFrom plotly plot_ly add_trace layout subplot +#' @importFrom forcats fct_reorder +#' @importFrom tidyr nest +#' @export +scatter_vis_all <- function(rates) { + alldat <- dplyr::bind_rows( + lapply(rates, function(x) x$yrregstats)) %>% + dplyr::mutate( + country = forcats::fct_reorder(.data$country, .data$pct, mean) + ) + alldat <- dplyr::bind_cols(alldat, get_ci(alldat$nsti, alldat$n)) + + ctrydat <- dplyr::bind_rows( + lapply(rates, function(x) x$yrstats)) %>% + dplyr::mutate( + country = factor(.data$country, levels = levels(alldat$country)), + region = "") + ctrydat <- dplyr::bind_cols(ctrydat, get_ci(ctrydat$nsti, ctrydat$n)) + + pdat <- dplyr::left_join( + tidyr::nest(alldat, rdat = -c("country")), + tidyr::nest(ctrydat, cdat = -c("country")), + by = "country" + ) %>% + dplyr::arrange(.data$country) + + maxrng <- max(alldat$pct) + yrrng <- range(alldat$year) + + figs <- lapply(seq_len(nrow(pdat)), function(ii) { + curr <- tidyr::nest(pdat$rdat[[ii]], data = -c("region")) + curc <- pdat$cdat[[ii]] + cur_cntry <- as.character(pdat$country[ii]) + + fig <- plotly::plot_ly() + for (jj in seq_len(nrow(curr))) { + tmp <- curr$data[[jj]] + cur_reg <- as.character(curr$region[jj]) + fig <- fig %>% + # plotly::add_ribbons( + # x = tmp$year, + # ymin = tmp$lower, + # ymax = tmp$upper, + # color = I("darkgray"), + # line = list(color = "transparent"), + # alpha = 0.4, + # hoverinfo = "text", + # showlegend = FALSE, + # visible = "legendonly", + # name = paste(cur_cntry, cur_reg, "CI", sep = "___") + # ) %>% + plotly::add_trace( + x = tmp$year, + y = tmp$pct, + color = I("darkgray"), + line = list(width = 3), + size = 4, + alpha = 0.6, + name = paste(cur_cntry, cur_reg, sep = "___"), + type = "scatter", + mode = "lines+markers", + hoverinfo = "text", + text = paste0(round(tmp$pct, 1), "%
Region: ", cur_reg), + showlegend = FALSE, + name = paste(cur_cntry, cur_reg, sep = "___") + ) %>% + plotly::layout( + yaxis = list( + range = list(0, maxrng), + zeroline = FALSE + ), + # TODO: determine fixed tickvals dynamically + xaxis = list( + range = list(yrrng[1] - 1, yrrng[2] + 1), + tickmode = "array", + ticktext = c( + "'00", + "'05", + "'10", + "'15" + ), + tickvals = c( + 2000, + 2005, + 2010, + 2015 + ), + categoryorder = "array", + categoryarray = c( + "'00", + "'05", + "'10", + "'15" + ), + tickangle = 0, + zeroline = FALSE + ) + ) + } + fig + }) + + res <- plotly::subplot(figs, nrows = 2, shareY = TRUE, shareX = TRUE, + margin = 0.004) + + res$elementId <- "scatterplot" + + res$sizingPolicy$defaultHeight <- "100%" + + res +} diff --git a/man/calc_rates.Rd b/man/calc_rates.Rd index 2684f43..6fb605f 100644 --- a/man/calc_rates.Rd +++ b/man/calc_rates.Rd @@ -2,12 +2,26 @@ % Please edit documentation in R/calc.R \name{calc_rates} \alias{calc_rates} -\title{Calculate rates from ipums data and associated geography} +\title{Calculate rates from IPUMS DHS data} \usage{ -calc_rates(x, geo_dir) +calc_rates(x, geo_dir, num_var, num_cond, denom_var = NULL, denom_cond = NULL) +} +\arguments{ +\item{x}{TODO} + +\item{geo_dir}{TODO} + +\item{num_var}{TODO} + +\item{num_cond}{TODO} + +\item{denom_var}{TODO} + +\item{denom_cond}{TODO +Calculate rates from ipums data and associated geography} } \description{ -Calculate rates from ipums data and associated geography +Calculate rates from IPUMS DHS data } \examples{ \dontrun{ diff --git a/man/geo_scatter_vis.Rd b/man/geo_scatter_vis.Rd new file mode 100644 index 0000000..bc7ed82 --- /dev/null +++ b/man/geo_scatter_vis.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vis_geo_scatter.R +\name{geo_scatter_vis} +\alias{geo_scatter_vis} +\title{Create linked scatter and geo visualization} +\usage{ +geo_scatter_vis(rates, title, geo_dir, bins = NULL) +} +\arguments{ +\item{rates}{TODO} + +\item{title}{TODO} + +\item{geo_dir}{TODO} + +\item{bins}{TODO} +} +\description{ +Create linked scatter and geo visualization +} diff --git a/man/geo_vis_all.Rd b/man/geo_vis_all.Rd new file mode 100644 index 0000000..2b0f614 --- /dev/null +++ b/man/geo_vis_all.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vis_geo.R +\name{geo_vis_all} +\alias{geo_vis_all} +\title{Create Mapbox visualization of rate data} +\usage{ +geo_vis_all( + rates, + geo_dir, + title = "", + bins = NULL, + width = "100vw", + height = "100vh" +) +} +\arguments{ +\item{rates}{TODO} + +\item{geo_dir}{TODO} + +\item{title}{TODO} + +\item{bins}{TODO} + +\item{width}{TODO} + +\item{height}{TODO} +} +\description{ +Create Mapbox visualization of rate data +} diff --git a/man/preprocess_geo.Rd b/man/preprocess_geo.Rd index 7921b9d..92819ca 100644 --- a/man/preprocess_geo.Rd +++ b/man/preprocess_geo.Rd @@ -6,6 +6,11 @@ \usage{ preprocess_geo(x, output_dir) } +\arguments{ +\item{x}{TODO} + +\item{output_dir}{TODO} +} \description{ Preprocess (download) shapfiles associated with an IPUMS extract } diff --git a/man/preprocess_ipums.Rd b/man/preprocess_ipums.Rd index e2726fe..f2bc1fb 100644 --- a/man/preprocess_ipums.Rd +++ b/man/preprocess_ipums.Rd @@ -6,6 +6,11 @@ \usage{ preprocess_ipums(input_xml, output_file) } +\arguments{ +\item{input_xml}{TODO} + +\item{output_file}{TODO} +} \description{ Preprocess IPUMS extract } diff --git a/man/scatter_vis_all.Rd b/man/scatter_vis_all.Rd new file mode 100644 index 0000000..19d5992 --- /dev/null +++ b/man/scatter_vis_all.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vis_scatter.R +\name{scatter_vis_all} +\alias{scatter_vis_all} +\title{Create scatterplot visualization for all geographies} +\usage{ +scatter_vis_all(rates) +} +\arguments{ +\item{rates}{TODO} +} +\description{ +Create scatterplot visualization for all geographies +} diff --git a/man/view_var_descs.Rd b/man/view_var_descs.Rd index f7960a5..410ccc3 100644 --- a/man/view_var_descs.Rd +++ b/man/view_var_descs.Rd @@ -4,7 +4,12 @@ \alias{view_var_descs} \title{View html table of IPUMS extract variable descriptions} \usage{ -view_var_descs(dd, include_geo = FALSE) +view_var_descs(x, include_geo = FALSE) +} +\arguments{ +\item{x}{TODO} + +\item{include_geo}{TODO} } \description{ View html table of IPUMS extract variable descriptions diff --git a/scripts/data.R b/scripts/data.R new file mode 100644 index 0000000..915a3dd --- /dev/null +++ b/scripts/data.R @@ -0,0 +1,14 @@ +# ---------- preprocess the data (only run once) --------- # + +dd <- preprocess_ipums( + input_xml = "ext-data/idhs/extract2/idhs_00002.xml", + output_file = "ext-data/idhs/extract2/dd.rds" +) + +preprocess_geo(dd, output_dir = "ext-data/idhs/geo/") + +ipumsr::ipums_conditions() +# view extract information +ipumsr::ipums_view(attr(dd, "ipums_ddi")) + + diff --git a/scripts/explore.R b/scripts/explore.R new file mode 100644 index 0000000..2e5a97b --- /dev/null +++ b/scripts/explore.R @@ -0,0 +1,47 @@ +library() +# load_all() + +# NOTE: before running this script run the code in data.R + +# --------------- read in preprocessed data -------------- # + +dd <- readRDS("ext-data/idhs/extract2/dd.rds") + +# html page showing variable names and their descriptions +view_var_descs(dd) +view_var_descs(dd, include_geo = TRUE) + +# ---------- get regional yearly STI prevalence ---------- # + +# numerator is stianyr (had STI in last 12 months) = "Yes" +attributes(dd$stianyr)[c("labels", "label")] +# denominator is all responses except sexactiv4wk = "never had intercourse" +attributes(dd$sexactiv4wk)[c("labels", "label")] + +sti_rates <- calc_rates(dd, + geo_dir = "ext-data/idhs/geo/", + num_var = "stianyr", + num_cond = 1, + denom_var = "sexactiv4wk", + denom_cond = c(1:9) +) + +# -------------------- visualizations -------------------- # + +# individual scatter +scatter_vis_all(sti_rates) + +# individual geo +geo_vis_all(sti_rates, + title = "STI rates", + geo_dir = "ext-data/idhs/geo/", + bins = c(0, 2, 4, 6, 10, 15, 35) +) + +# joint +geo_scatter_vis( + sti_rates, + title = "STI rates", + geo_dir = "ext-data/idhs/geo/", + bins = c(0, 2, 4, 6, 10, 15, 35) +)