diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..18b7e63
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,5 @@
+^ext-data$
+^_ignore$
+^scripts$
+^docs$
+^LICENSE\.md$
diff --git a/DESCRIPTION b/DESCRIPTION
index d17318e..c8a9c9d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,17 +1,26 @@
-Package: ipumsvis
-Title: What the Package Does (One Line, Title Case)
+Package: idhs
+Title: Tools to process and visualize IPUMS DHS data
Version: 0.0.0.9000
Authors@R:
- person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
+ person("Ryan", "Hafen", , "rhafen@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: What the package does (one paragraph).
-License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
- license
+Description: Tools to process and visualize IPUMS DHS data.
+License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
Imports:
+ DescTools,
dplyr,
DT,
+ forcats,
+ htmltools,
ipumsr,
- mapboxer
+ jsonlite,
+ mapboxer,
+ plotly,
+ rmapshaper,
+ safer,
+ sf,
+ tidyr,
+ viridisLite
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ca22524
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2022
+COPYRIGHT HOLDER: ipumsvis authors
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..162ccef
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2022 ipumsvis authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/NAMESPACE b/NAMESPACE
index 8bb5858..610d240 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,18 +1,52 @@
# Generated by roxygen2: do not edit by hand
+S3method(print,idhs_map_vis)
export(calc_rates)
+export(geo_scatter_vis)
export(preprocess_geo)
export(preprocess_ipums)
+export(scatter_vis_all)
export(view_var_descs)
importFrom(DT,datatable)
+importFrom(DescTools,BinomCI)
importFrom(dplyr,"%>%")
importFrom(dplyr,.data)
+importFrom(dplyr,all_of)
+importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
+importFrom(dplyr,n)
+importFrom(dplyr,recode)
importFrom(dplyr,rename_all)
+importFrom(dplyr,select)
+importFrom(dplyr,summarise)
importFrom(dplyr,tibble)
+importFrom(dplyr,ungroup)
+importFrom(forcats,fct_reorder)
+importFrom(htmltools,HTML)
+importFrom(htmltools,browsable)
+importFrom(htmltools,tagList)
+importFrom(htmltools,tags)
importFrom(ipumsr,as_factor)
importFrom(ipumsr,lbl_clean)
importFrom(ipumsr,read_ipums_ddi)
importFrom(ipumsr,read_ipums_micro)
+importFrom(ipumsr,read_ipums_sf)
+importFrom(ipumsr,zap_labels)
+importFrom(jsonlite,toJSON)
+importFrom(mapboxer,add_fill_layer)
+importFrom(mapboxer,add_line_layer)
+importFrom(mapboxer,add_tooltips)
+importFrom(mapboxer,as_mapbox_source)
+importFrom(mapboxer,mapboxer)
+importFrom(plotly,add_trace)
+importFrom(plotly,layout)
+importFrom(plotly,plot_ly)
+importFrom(plotly,subplot)
+importFrom(rmapshaper,ms_simplify)
+importFrom(sf,st_bbox)
+importFrom(sf,st_sfc)
+importFrom(tidyr,nest)
importFrom(utils,download.file)
+importFrom(viridisLite,viridis)
diff --git a/R/calc.R b/R/calc.R
index 66b5f7e..548b277 100644
--- a/R/calc.R
+++ b/R/calc.R
@@ -1,16 +1,33 @@
+#' Calculate rates from IPUMS DHS data
+#' @param x TODO
+#' @param geo_dir TODO
+#' @param num_var TODO
+#' @param num_cond TODO
+#' @param denom_var TODO
+#' @param denom_cond TODO
#' Calculate rates from ipums data and associated geography
#' @examples
#' \dontrun{
#' calc_rates(dd, geo_dir = "ext-data/idhs/geo")
#' }
-#' @importFrom %>% dplyr group_by ungroup rename_all filter summarise n
+#' @importFrom dplyr %>% group_by ungroup rename_all filter summarise n
#' select all_of mutate
#' @importFrom ipumsr read_ipums_sf as_factor lbl_clean zap_labels
#' @export
-calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
+calc_rates <- function(
+ x, geo_dir, num_var, num_cond, denom_var = NULL, denom_cond = NULL
+) {
check_ipums_data(x, "x")
x$country2 <- ipumsr::as_factor(ipumsr::lbl_clean(x$country))
+ if (is.null(denom_var)) {
+ x[["___denom___"]] <- 1
+ denom_var <- "___denom___"
+ }
+ if (is.null(denom_cond)) {
+ denom_cond <- unique(x[[denom_var]])
+ }
+
if (!dir.exists(geo_dir))
stop("directory '", geo_dir, "' doesn't exist")
@@ -33,8 +50,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
# (could add an option later to do latest start and end date)
ftbl <- ftbl %>%
dplyr::group_by(.data$country) %>%
- dplyr::filter(end_yr == max(end_yr)) %>%
- dplyr::filter(start_yr == max(start_yr)) %>%
+ dplyr::filter(.data$end_yr == max(.data$end_yr)) %>%
+ dplyr::filter(.data$start_yr == max(.data$start_yr)) %>%
dplyr::ungroup()
ff <- ftbl$f
@@ -49,6 +66,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
cur_var <- gsub(".*(geo_.*).zip", "\\1", f)
cur_cntry <- geodf$cntry_name[1]
message(cur_cntry, ": ", cur_var)
+ if (cur_cntry == "South Africa") {
+ geodf <- fix_south_africa_geo(geodf)
+ }
if (!cur_var %in% xnms) {
message(" not found...")
@@ -66,7 +86,7 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
n = dplyr::n(),
nna = length(which(is.na(.data[[num_var]])))
) %>%
- dplyr::filter(.data$n == nna)
+ dplyr::filter(.data$n == .data$nna)
tmp <- tmp %>%
dplyr::filter(!.data$year %in% tmp2$year, ) %>%
dplyr::select(all_of(c(cur_var, vars))) %>%
@@ -100,9 +120,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
dplyr::group_by(.data$year, .data$dhscode, .data$region) %>%
dplyr::summarise(
n = sum(.data$perweight),
- n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)),
- nsti = sum((.data[[num_var]] == 1) * .data$perweight),
- nsti2 = length(which(.data[[num_var]] == 1)),
+ n2 = dplyr::n(),
+ nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight),
+ nsti2 = length(which(.data[[num_var]] %in% num_cond)),
pct = 100 * .data$nsti / .data$n,
.groups = "drop") %>%
dplyr::mutate(country = cur_cntry, geo_var = cur_var)
@@ -112,9 +132,9 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
dplyr::group_by(.data$dhscode, .data$region) %>%
dplyr::summarise(
n = sum(.data$perweight),
- n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)),
- nsti = sum((.data[[num_var]] == 1) * .data$perweight),
- nsti2 = length(which(.data[[num_var]] == 1)),
+ n2 = dplyr::n(),
+ nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight),
+ nsti2 = length(which(.data[[num_var]] %in% num_cond)),
pct = 100 * .data$nsti / .data$n,
.groups = "drop") %>%
dplyr::mutate(country = cur_cntry, geo_var = cur_var)
@@ -125,8 +145,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
dplyr::summarise(
n = sum(.data$perweight),
n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)),
- nsti = sum((.data[[num_var]] == 1) * .data$perweight),
- nsti2 = length(which(.data[[num_var]] == 1)),
+ nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight),
+ nsti2 = length(which(.data[[num_var]] %in% num_cond)),
pct = 100 * .data$nsti / .data$n) %>%
dplyr::mutate(country = cur_cntry, geo_var = cur_var)
@@ -135,8 +155,8 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
dplyr::summarise(
n = sum(.data$perweight),
n2 = length(which(.data[[num_var]] %in% num_cond)),
- nsti = sum((.data[[num_var]] == 1) * .data$perweight),
- nsti2 = length(which(.data[[num_var]] == 1)),
+ nsti = sum((.data[[num_var]] %in% num_cond) * .data$perweight),
+ nsti2 = length(which(.data[[num_var]] %in% num_cond)),
pct = 100 * .data$nsti / .data$n) %>%
dplyr::mutate(country = cur_cntry, geo_var = cur_var)
@@ -153,3 +173,17 @@ calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) {
class(res) <- c("list", "ipums-rates")
res
}
+
+#' @importFrom sf st_sfc
+fix_south_africa_geo <- function(a) {
+ idx <- which(a$admin_name == "Western Cape")
+ if (length(idx) == 1) {
+ bb <- a$geometry[[idx]]
+ bb <- bb[-c(1:8)]
+ a$geometry[[idx]] <- sf::st_multipolygon(bb)
+ attr(a$geometry, "bbox") <- NULL
+ a$geometry <- sf::st_sfc(a$geometry)
+ }
+ a
+}
+# ipumsr::read_ipums_sf("ext-data/idhs/geo/geo_za1998_2016.zip")
diff --git a/R/geo_vis.R b/R/geo_vis.R
deleted file mode 100644
index 7632dc8..0000000
--- a/R/geo_vis.R
+++ /dev/null
@@ -1,73 +0,0 @@
-
-#' @importFrom dplyr bind_rows recode
-#' @importFrom mapboxer as_mapbox_source mapboxer add_fill_layer
-#' add_line_layer as_mapbox_source add_tooltips basemaps
-geo_vis_all <- function(
- rates, geo_dir, bins = NULL, width = NULL, height = NULL
-) {
- world <- ipumsr::read_ipums_sf(
- file.path(geo_dir, "IPUMSI_world_release2020.zip"),
- verbose = FALSE
- ) %>%
- dplyr::rename_all(tolower) %>%
- # DRC is the only one coded differently here vs. in IPUMS data
- # setdiff(sort(unique(x$country2)), world$cntry_name)
- dplyr::mutate(country = dplyr::recode(.data$cntry_name,
- "Congo, DRC" = "Congo Democratic Republic"
- ))
-
- world2 <- dplyr::filter(world, .data$country %in% names(rates))
-
- alldat <- dplyr::bind_rows(lapply(rates, function(x) x$yrregstats)) %>%
- dplyr::group_by(.data$country, .data$region, .data$dhscode) %>%
- dplyr::filter(year == max(.data$year)
- )
-
- allgeo <- lapply(rates, function(x) x$geodf) %>%
- dplyr::bind_rows() %>%
- dplyr::rename(country = "cntry_name")
-
- if (is.null(bins)) {
- bins <- pretty(alldat$pct, n = 8)
- # classInt::classIntervals(alldat$pct, n = 10)
- }
- cols <- viridisLite::viridis(length(bins) - 1) %>%
- substr(1, 7)
-
- leglbl <- paste0(bins[-length(bins)], "-", bins[-1], "%")
-
- geodat <- ipumsr::ipums_shape_inner_join(alldat, allgeo,
- by = c("country", "dhscode")) %>%
- dplyr::mutate(
- fill_color = cols[cut(.data$pct, bins, labels = FALSE)],
- pct = round(.data$pct, 1)
- )
-
- # setdiff(names(sti_rates), geodat$country)
-
- # legend: https://docs.mapbox.com/help/tutorials/choropleth-studio-gl-pt-2/
- mapboxer::as_mapbox_source(geodat) %>%
- mapboxer::mapboxer(
- style = mapboxer::basemaps$Mapbox$light_v10,
- bounds = sf::st_bbox(geodat),
- fitBoundsOptions = list(padding = 20),
- width = width,
- height = height
- ) %>%
- mapboxer::add_fill_layer(
- id = "map",
- fill_color = c("get", "fill_color"),
- fill_opacity = 0.6
- ) %>%
- mapboxer::add_line_layer(
- source = mapboxer::as_mapbox_source(world2),
- line_color = "black",
- line_width = 1
- ) %>%
- mapboxer::add_tooltips(
- "map", paste0(
- "Country: {{country}}
",
- "Region: {{region}}
",
- "Year: {{year}}
",
- "Pct STI: {{pct}}%"))
-}
\ No newline at end of file
diff --git a/R/misc.R b/R/misc.R
index fe18433..e5fd0fb 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -1,11 +1,13 @@
#' View html table of IPUMS extract variable descriptions
+#' @param x TODO
+#' @param include_geo TODO
#' @importFrom DT datatable
#' @importFrom dplyr filter tibble .data
#' @export
-view_var_descs <- function(dd, include_geo = FALSE) {
+view_var_descs <- function(x, include_geo = FALSE) {
check_ipums_data(x, "x")
- descs <- sapply(dd, function(x) attr(x, "var_desc"))
+ descs <- sapply(x, function(x) attr(x, "var_desc"))
descs <- dplyr::tibble(
name = names(descs),
desc = unname(descs)
@@ -16,3 +18,23 @@ view_var_descs <- function(dd, include_geo = FALSE) {
DT::datatable(descs, options = list(paging = FALSE))
}
+
+get_tkn <- function() {
+ if (Sys.getenv("IDHS_DEV_MODE") == "true") {
+ Sys.getenv("MAPBOX_API_KEY")
+ } else {
+ safer::decrypt_string("XLNDMUY6HdUjQ34+47BLmKf42KyqrWRmvL0QwGolHdOUAXEQqtp+SGd3/z1/4GweGP5vk0P7TLkJdI/e3iqekM5cgDq2riRySW1Vo1Tca3AtngJ0rVildwtkjwi9GMl75iOmrLDkGSI=", "idhs")
+ }
+}
+
+dev_mode <- function(bool = FALSE) {
+ if (bool) {
+ Sys.setenv(IDHS_DEV_MODE = "true")
+ } else {
+ Sys.setenv(IDHS_DEV_MODE = "")
+ }
+}
+
+
+# ‘DescTools’ ‘forcats’ ‘htmltools’ ‘jsonlite’ ‘plotly’ ‘rmapshaper’
+# ‘sf’ ‘tidyr’ ‘viridisLite’
diff --git a/R/preprocess.R b/R/preprocess.R
index b63650e..78dafed 100644
--- a/R/preprocess.R
+++ b/R/preprocess.R
@@ -1,4 +1,6 @@
#' Preprocess IPUMS extract
+#' @param input_xml TODO
+#' @param output_file TODO
#' @importFrom ipumsr read_ipums_ddi read_ipums_micro as_factor lbl_clean
#' @importFrom dplyr rename_all
#' @examples
@@ -26,6 +28,8 @@ preprocess_ipums <- function(input_xml, output_file) {
# https://www.idhsdata.org/idhs/gis.shtml
#' Preprocess (download) shapfiles associated with an IPUMS extract
+#' @param x TODO
+#' @param output_dir TODO
#' @examples
#' \dontrun{
#' dd <- preprocess_ipums(
diff --git a/R/vis_geo.R b/R/vis_geo.R
new file mode 100644
index 0000000..b441328
--- /dev/null
+++ b/R/vis_geo.R
@@ -0,0 +1,318 @@
+#' Create Mapbox visualization of rate data
+#' @param rates TODO
+#' @param geo_dir TODO
+#' @param title TODO
+#' @param bins TODO
+#' @param width TODO
+#' @param height TODO
+#' @importFrom dplyr bind_rows recode
+#' @importFrom mapboxer as_mapbox_source mapboxer add_fill_layer
+#' add_line_layer add_tooltips
+#' @importFrom jsonlite toJSON
+#' @importFrom rmapshaper ms_simplify
+#' @importFrom sf st_bbox
+#' @importFrom viridisLite viridis
+geo_vis_all <- function(
+ rates, geo_dir, title = "", bins = NULL,
+ width = "100vw", height = "100vh"
+) {
+ world <- ipumsr::read_ipums_sf(
+ file.path(geo_dir, "IPUMSI_world_release2020.zip"),
+ verbose = FALSE
+ ) %>%
+ dplyr::rename_all(tolower) %>%
+ # DRC is the only one coded differently here vs. in IPUMS data
+ # setdiff(sort(unique(x$country2)), world$cntry_name)
+ dplyr::mutate(country = dplyr::recode(.data$cntry_name,
+ "Congo, DRC" = "Congo Democratic Republic"
+ ))
+
+ world2 <- dplyr::filter(world, .data$country %in% names(rates))
+
+ alldat <- dplyr::bind_rows(lapply(rates, function(x) x$yrregstats)) %>%
+ dplyr::group_by(.data$country, .data$region, .data$dhscode) %>%
+ dplyr::filter(.data$year == max(.data$year)
+ )
+
+ allgeo <- lapply(rates, function(x) x$geodf) %>%
+ dplyr::bind_rows() %>%
+ dplyr::rename(country = "cntry_name")
+
+ if (is.null(bins)) {
+ bins <- pretty(alldat$pct, n = 8)
+ # classInt::classIntervals(alldat$pct, n = 10)
+ }
+ cols <- viridisLite::viridis(length(bins) - 1) %>%
+ substr(1, 7)
+
+ leglbl <- paste0(bins[-length(bins)], "-", bins[-1], "%")
+
+ geodat <- ipumsr::ipums_shape_inner_join(alldat, allgeo,
+ by = c("country", "dhscode")) %>%
+ dplyr::mutate(
+ fill_color = cols[cut(.data$pct, bins, labels = FALSE)],
+ pct = round(.data$pct, 1),
+ country_region = paste(.data$country, .data$region, sep = "___")
+ )
+
+ # setdiff(names(sti_rates), geodat$country)
+ geodat2 <- rmapshaper::ms_simplify(geodat, keep = 0.01, keep_shapes = FALSE)
+ world3 <- rmapshaper::ms_simplify(world2, keep = 0.01, keep_shapes = FALSE)
+
+ bboxes <- lapply(
+ c(lapply(world3$geometry, identity), list(world3$geometry)
+ ), function(x) {
+ tmp <- as.list(sf::st_bbox(x))
+ list(
+ bottomleft = c(tmp$xmin, tmp$ymin),
+ topright = c(tmp$xmax, tmp$ymax)
+ )
+ })
+ names(bboxes) <- c(world3$country, "all")
+ bboxstr <- paste0("var bboxes = ",
+ jsonlite::toJSON(bboxes, auto_unbox = TRUE, pretty = TRUE))
+
+ # legend: https://docs.mapbox.com/help/tutorials/choropleth-studio-gl-pt-2/
+ p <- mapboxer::as_mapbox_source(geodat2) %>%
+ mapboxer::mapboxer(
+ style = mapboxer::basemaps$Mapbox$light_v10,
+ bounds = sf::st_bbox(geodat),
+ fitBoundsOptions = list(padding = 20),
+ width = "100%",
+ height = "100%",
+ token = get_tkn()
+ ) %>%
+ mapboxer::add_fill_layer(
+ id = "map",
+ fill_color = c("get", "fill_color"),
+ fill_opacity = 0.6
+ ) %>%
+ mapboxer::add_line_layer(
+ id = "map-lines",
+ line_color = "black",
+ line_width = 1,
+ line_opacity = 0.1
+ ) %>%
+ mapboxer::add_line_layer(
+ id = "countries",
+ source = mapboxer::as_mapbox_source(world3),
+ line_color = "black",
+ line_width = 1
+ ) %>%
+ mapboxer::add_tooltips(
+ "map", paste0(
+ "Country: {{country}}
",
+ "Region: {{region}}
",
+ "Year: {{year}}
",
+ "Pct STI: {{pct}}%"))
+
+ build_map(p, bins, leglbl, cols, title, bboxstr, height, width)
+}
+
+#' @importFrom htmltools tags tagList HTML browsable
+build_map <- function(p, bins, leglbl, cols, title, bboxstr, height, width) {
+ tags <- htmltools::tags
+
+ res <- htmltools::tagList(
+ tags$script(htmltools::HTML(bboxstr)),
+ tags$script(htmltools::HTML("
+ function throttle(callback, interval) {
+ let enableCall = true;
+
+ return function(...args) {
+ if (!enableCall) return;
+
+ enableCall = false;
+ callback.apply(this, args);
+ setTimeout(() => enableCall = true, interval);
+ }
+ }
+
+ var hoveredId = null;
+
+ function resize() {
+ console.log('resize...')
+ var keys = Object.keys(window.mapboxer._widget);
+ keys.forEach(function(key) {
+ var curmap = window.mapboxer._widget[key].map;
+ curmap.resize();
+ curmap.fitBounds(window.mapboxer_bounds[key]);
+ });
+ };
+
+ var doresize;
+ window.onresize = function() {
+ clearTimeout(doresize);
+ doresize = setTimeout(resize, 100);
+ };
+
+ window.onload = function() {
+ var scatter = document.getElementById('scatterplot');
+ var bnds = {};
+ var keys = Object.keys(window.mapboxer._widget);
+ keys.forEach(function(key) {
+ var curmap = window.mapboxer._widget[key].map;
+ bnds[key] = curmap.getBounds();
+ });
+ window.mapboxer_bounds = bnds;
+
+ var curmap = window.mapboxer._widget[keys[0]].map
+
+ let onHover = (e) => {
+ if (
+ e.features.length > 0 &&
+ e.features[0].properties.country_region !== hoveredId
+ ) {
+ Plotly.restyle(scatter, {'line': {'color': 'darkgray'}}, [])
+ hoveredId = e.features[0].properties.country_region;
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-opacity',
+ ['match', ['get', 'country_region'], hoveredId, 1, 0.1]
+ );
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-width',
+ ['match', ['get', 'country_region'], hoveredId, 3, 1]
+ );
+ var traceidx = scatter.data.findIndex(obj => {
+ return obj.name === hoveredId;
+ });
+ if (traceidx > -1) {
+ // console.log('restyling hover...');
+ // Plotly.addTraces(scatterplot, [{x: [2010], y: [20], mode: 'markers', marker: {symbol: 6, size: 12}, xaxis: 'x', yaxis: 'y'}])
+ Plotly.restyle(scatter, {'line': {'color': 'blue'}}, [traceidx])
+ }
+ }
+ };
+
+ let onUnhover = (e) => {
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-opacity',
+ 0.1
+ );
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-width',
+ 1
+ );
+
+ // console.log('restyling unhover...');
+ Plotly.restyle(scatter, {'line': {'color': 'darkgray'}}, [])
+ hoveredId = null;
+ };
+
+ // curmap.on('mousemove', 'map', throttle(onHover, 50));
+ // curmap.on('mouseleave', 'map', throttle(onUnhover, 50));
+ curmap.on('mousemove', 'map', onHover);
+ curmap.on('mouseleave', 'map', onUnhover);
+
+ scatterplot.on('plotly_hover', function(d) {
+ // console.log('restyling plotly hover...')
+ Plotly.restyle(scatterplot, {'line': {'color': 'blue'}}, [d.points[0].curveNumber])
+ var hoveredId = d.points[0].data.name;
+ var country = hoveredId.split('___')[0];
+ if (bboxes && bboxes[country]) {
+ curmap.fitBounds([bboxes[country].bottomleft, bboxes[country].topright]);
+ }
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-opacity',
+ ['match', ['get', 'country_region'], hoveredId, 1, 0.1]
+ );
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-width',
+ ['match', ['get', 'country_region'], hoveredId, 4, 1]
+ );
+ });
+
+ scatterplot.on('plotly_unhover', function(d) {
+ if (bboxes && bboxes.all) {
+ curmap.fitBounds([bboxes.all.bottomleft, bboxes.all.topright]);
+ }
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-opacity',
+ 0.1
+ );
+ curmap.setPaintProperty(
+ 'map-lines',
+ 'line-width',
+ 1
+ );
+ // console.log('restyling plotly unhover...')
+ Plotly.restyle(scatterplot, {'line': {'color': 'darkgray'}}, [d.points[0].curveNumber]);
+ });
+ }
+ ")),
+ tags$head(tags$style("
+ body {
+ margin: 0px;
+ }
+ .geo-container {
+ position: relative;
+ width: fit-content;
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif
+ }
+ .title {
+ position: absolute;
+ font-size: 16px;
+ top: 0;
+ left: 0;
+ }
+ .legend-container {
+ position: absolute;
+ display: flex;
+ flex-direction: column;
+ top: 10px;
+ right: 10px;
+ background: rgba(255, 255, 255, 0.7);
+ }
+ .legend-entry {
+ display: flex;
+ flex-direction: row;
+ margin-bottom: 2px;
+ }
+ .legend-color-box {
+ width: 17px;
+ height: 17px;
+ margin-right: 10px;
+ }
+ .legend-item-text {
+ font-size: 14px;
+ }
+ ")),
+ tags$div(
+ class = "geo-container",
+ style = paste0("width: ", width, "; height: ", height, ";"),
+ p,
+ tags$div(class = "title", title),
+ tags$div(class = "legend-container",
+ lapply(seq_along(leglbl), function(ii) {
+ tags$div(class = "legend-entry",
+ tags$div(
+ class = "legend-color-box",
+ style = paste0("background: ", cols[ii], ";")
+ ),
+ tags$div(
+ class = "legend-item-text",
+ leglbl[ii]
+ )
+ )
+ })
+ )
+ )
+ )
+
+ class(res) <- c("idhs_map_vis", class(res))
+
+ res
+}
+
+#' @export
+print.idhs_map_vis <- function(x, ...) {
+ class(x) <- setdiff(class(x), "idhs_map_vis")
+ print(htmltools::browsable(x))
+}
diff --git a/R/vis_geo_scatter.R b/R/vis_geo_scatter.R
new file mode 100644
index 0000000..e28a604
--- /dev/null
+++ b/R/vis_geo_scatter.R
@@ -0,0 +1,34 @@
+#' Create linked scatter and geo visualization
+#' @param rates TODO
+#' @param title TODO
+#' @param geo_dir TODO
+#' @param bins TODO
+#' @importFrom htmltools browsable
+#' @export
+geo_scatter_vis <- function(rates, title, geo_dir, bins = NULL) {
+ p1 <- scatter_vis_all(rates)
+
+ p2 <- geo_vis_all(
+ rates = rates,
+ title = title,
+ geo_dir = geo_dir,
+ bins = bins,
+ width = "40vw"
+ )
+
+ tags <- htmltools::tags
+ res <- htmltools::tagList(
+ tags$head(tags$style("
+ .window-container {
+ display: flex;
+ flex-direction: row;
+ }
+ ")),
+ tags$div(class = "window-container",
+ tags$div(style = "width: 60vw; height: 100vh;", p1),
+ p2
+ )
+ )
+
+ htmltools::browsable(res)
+}
diff --git a/R/vis_scatter.R b/R/vis_scatter.R
new file mode 100644
index 0000000..21f2588
--- /dev/null
+++ b/R/vis_scatter.R
@@ -0,0 +1,122 @@
+#' @importFrom DescTools BinomCI
+get_ci <- function(x, n) {
+ # default 0.95 Wilson
+ dplyr::as_tibble(DescTools::BinomCI(x, n)) %>%
+ dplyr::mutate_all(function(x) x * 100) %>%
+ dplyr::rename(
+ percent = .data$est, lower = .data$lwr.ci, upper = .data$upr.ci)
+}
+
+#' Create scatterplot visualization for all geographies
+#' @param rates TODO
+#' @importFrom plotly plot_ly add_trace layout subplot
+#' @importFrom forcats fct_reorder
+#' @importFrom tidyr nest
+#' @export
+scatter_vis_all <- function(rates) {
+ alldat <- dplyr::bind_rows(
+ lapply(rates, function(x) x$yrregstats)) %>%
+ dplyr::mutate(
+ country = forcats::fct_reorder(.data$country, .data$pct, mean)
+ )
+ alldat <- dplyr::bind_cols(alldat, get_ci(alldat$nsti, alldat$n))
+
+ ctrydat <- dplyr::bind_rows(
+ lapply(rates, function(x) x$yrstats)) %>%
+ dplyr::mutate(
+ country = factor(.data$country, levels = levels(alldat$country)),
+ region = "")
+ ctrydat <- dplyr::bind_cols(ctrydat, get_ci(ctrydat$nsti, ctrydat$n))
+
+ pdat <- dplyr::left_join(
+ tidyr::nest(alldat, rdat = -c("country")),
+ tidyr::nest(ctrydat, cdat = -c("country")),
+ by = "country"
+ ) %>%
+ dplyr::arrange(.data$country)
+
+ maxrng <- max(alldat$pct)
+ yrrng <- range(alldat$year)
+
+ figs <- lapply(seq_len(nrow(pdat)), function(ii) {
+ curr <- tidyr::nest(pdat$rdat[[ii]], data = -c("region"))
+ curc <- pdat$cdat[[ii]]
+ cur_cntry <- as.character(pdat$country[ii])
+
+ fig <- plotly::plot_ly()
+ for (jj in seq_len(nrow(curr))) {
+ tmp <- curr$data[[jj]]
+ cur_reg <- as.character(curr$region[jj])
+ fig <- fig %>%
+ # plotly::add_ribbons(
+ # x = tmp$year,
+ # ymin = tmp$lower,
+ # ymax = tmp$upper,
+ # color = I("darkgray"),
+ # line = list(color = "transparent"),
+ # alpha = 0.4,
+ # hoverinfo = "text",
+ # showlegend = FALSE,
+ # visible = "legendonly",
+ # name = paste(cur_cntry, cur_reg, "CI", sep = "___")
+ # ) %>%
+ plotly::add_trace(
+ x = tmp$year,
+ y = tmp$pct,
+ color = I("darkgray"),
+ line = list(width = 3),
+ size = 4,
+ alpha = 0.6,
+ name = paste(cur_cntry, cur_reg, sep = "___"),
+ type = "scatter",
+ mode = "lines+markers",
+ hoverinfo = "text",
+ text = paste0(round(tmp$pct, 1), "%
Region: ", cur_reg),
+ showlegend = FALSE,
+ name = paste(cur_cntry, cur_reg, sep = "___")
+ ) %>%
+ plotly::layout(
+ yaxis = list(
+ range = list(0, maxrng),
+ zeroline = FALSE
+ ),
+ # TODO: determine fixed tickvals dynamically
+ xaxis = list(
+ range = list(yrrng[1] - 1, yrrng[2] + 1),
+ tickmode = "array",
+ ticktext = c(
+ "'00",
+ "'05",
+ "'10",
+ "'15"
+ ),
+ tickvals = c(
+ 2000,
+ 2005,
+ 2010,
+ 2015
+ ),
+ categoryorder = "array",
+ categoryarray = c(
+ "'00",
+ "'05",
+ "'10",
+ "'15"
+ ),
+ tickangle = 0,
+ zeroline = FALSE
+ )
+ )
+ }
+ fig
+ })
+
+ res <- plotly::subplot(figs, nrows = 2, shareY = TRUE, shareX = TRUE,
+ margin = 0.004)
+
+ res$elementId <- "scatterplot"
+
+ res$sizingPolicy$defaultHeight <- "100%"
+
+ res
+}
diff --git a/man/calc_rates.Rd b/man/calc_rates.Rd
index 2684f43..6fb605f 100644
--- a/man/calc_rates.Rd
+++ b/man/calc_rates.Rd
@@ -2,12 +2,26 @@
% Please edit documentation in R/calc.R
\name{calc_rates}
\alias{calc_rates}
-\title{Calculate rates from ipums data and associated geography}
+\title{Calculate rates from IPUMS DHS data}
\usage{
-calc_rates(x, geo_dir)
+calc_rates(x, geo_dir, num_var, num_cond, denom_var = NULL, denom_cond = NULL)
+}
+\arguments{
+\item{x}{TODO}
+
+\item{geo_dir}{TODO}
+
+\item{num_var}{TODO}
+
+\item{num_cond}{TODO}
+
+\item{denom_var}{TODO}
+
+\item{denom_cond}{TODO
+Calculate rates from ipums data and associated geography}
}
\description{
-Calculate rates from ipums data and associated geography
+Calculate rates from IPUMS DHS data
}
\examples{
\dontrun{
diff --git a/man/geo_scatter_vis.Rd b/man/geo_scatter_vis.Rd
new file mode 100644
index 0000000..bc7ed82
--- /dev/null
+++ b/man/geo_scatter_vis.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vis_geo_scatter.R
+\name{geo_scatter_vis}
+\alias{geo_scatter_vis}
+\title{Create linked scatter and geo visualization}
+\usage{
+geo_scatter_vis(rates, title, geo_dir, bins = NULL)
+}
+\arguments{
+\item{rates}{TODO}
+
+\item{title}{TODO}
+
+\item{geo_dir}{TODO}
+
+\item{bins}{TODO}
+}
+\description{
+Create linked scatter and geo visualization
+}
diff --git a/man/geo_vis_all.Rd b/man/geo_vis_all.Rd
new file mode 100644
index 0000000..2b0f614
--- /dev/null
+++ b/man/geo_vis_all.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vis_geo.R
+\name{geo_vis_all}
+\alias{geo_vis_all}
+\title{Create Mapbox visualization of rate data}
+\usage{
+geo_vis_all(
+ rates,
+ geo_dir,
+ title = "",
+ bins = NULL,
+ width = "100vw",
+ height = "100vh"
+)
+}
+\arguments{
+\item{rates}{TODO}
+
+\item{geo_dir}{TODO}
+
+\item{title}{TODO}
+
+\item{bins}{TODO}
+
+\item{width}{TODO}
+
+\item{height}{TODO}
+}
+\description{
+Create Mapbox visualization of rate data
+}
diff --git a/man/preprocess_geo.Rd b/man/preprocess_geo.Rd
index 7921b9d..92819ca 100644
--- a/man/preprocess_geo.Rd
+++ b/man/preprocess_geo.Rd
@@ -6,6 +6,11 @@
\usage{
preprocess_geo(x, output_dir)
}
+\arguments{
+\item{x}{TODO}
+
+\item{output_dir}{TODO}
+}
\description{
Preprocess (download) shapfiles associated with an IPUMS extract
}
diff --git a/man/preprocess_ipums.Rd b/man/preprocess_ipums.Rd
index e2726fe..f2bc1fb 100644
--- a/man/preprocess_ipums.Rd
+++ b/man/preprocess_ipums.Rd
@@ -6,6 +6,11 @@
\usage{
preprocess_ipums(input_xml, output_file)
}
+\arguments{
+\item{input_xml}{TODO}
+
+\item{output_file}{TODO}
+}
\description{
Preprocess IPUMS extract
}
diff --git a/man/scatter_vis_all.Rd b/man/scatter_vis_all.Rd
new file mode 100644
index 0000000..19d5992
--- /dev/null
+++ b/man/scatter_vis_all.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vis_scatter.R
+\name{scatter_vis_all}
+\alias{scatter_vis_all}
+\title{Create scatterplot visualization for all geographies}
+\usage{
+scatter_vis_all(rates)
+}
+\arguments{
+\item{rates}{TODO}
+}
+\description{
+Create scatterplot visualization for all geographies
+}
diff --git a/man/view_var_descs.Rd b/man/view_var_descs.Rd
index f7960a5..410ccc3 100644
--- a/man/view_var_descs.Rd
+++ b/man/view_var_descs.Rd
@@ -4,7 +4,12 @@
\alias{view_var_descs}
\title{View html table of IPUMS extract variable descriptions}
\usage{
-view_var_descs(dd, include_geo = FALSE)
+view_var_descs(x, include_geo = FALSE)
+}
+\arguments{
+\item{x}{TODO}
+
+\item{include_geo}{TODO}
}
\description{
View html table of IPUMS extract variable descriptions
diff --git a/scripts/data.R b/scripts/data.R
new file mode 100644
index 0000000..915a3dd
--- /dev/null
+++ b/scripts/data.R
@@ -0,0 +1,14 @@
+# ---------- preprocess the data (only run once) --------- #
+
+dd <- preprocess_ipums(
+ input_xml = "ext-data/idhs/extract2/idhs_00002.xml",
+ output_file = "ext-data/idhs/extract2/dd.rds"
+)
+
+preprocess_geo(dd, output_dir = "ext-data/idhs/geo/")
+
+ipumsr::ipums_conditions()
+# view extract information
+ipumsr::ipums_view(attr(dd, "ipums_ddi"))
+
+
diff --git a/scripts/explore.R b/scripts/explore.R
new file mode 100644
index 0000000..2e5a97b
--- /dev/null
+++ b/scripts/explore.R
@@ -0,0 +1,47 @@
+library()
+# load_all()
+
+# NOTE: before running this script run the code in data.R
+
+# --------------- read in preprocessed data -------------- #
+
+dd <- readRDS("ext-data/idhs/extract2/dd.rds")
+
+# html page showing variable names and their descriptions
+view_var_descs(dd)
+view_var_descs(dd, include_geo = TRUE)
+
+# ---------- get regional yearly STI prevalence ---------- #
+
+# numerator is stianyr (had STI in last 12 months) = "Yes"
+attributes(dd$stianyr)[c("labels", "label")]
+# denominator is all responses except sexactiv4wk = "never had intercourse"
+attributes(dd$sexactiv4wk)[c("labels", "label")]
+
+sti_rates <- calc_rates(dd,
+ geo_dir = "ext-data/idhs/geo/",
+ num_var = "stianyr",
+ num_cond = 1,
+ denom_var = "sexactiv4wk",
+ denom_cond = c(1:9)
+)
+
+# -------------------- visualizations -------------------- #
+
+# individual scatter
+scatter_vis_all(sti_rates)
+
+# individual geo
+geo_vis_all(sti_rates,
+ title = "STI rates",
+ geo_dir = "ext-data/idhs/geo/",
+ bins = c(0, 2, 4, 6, 10, 15, 35)
+)
+
+# joint
+geo_scatter_vis(
+ sti_rates,
+ title = "STI rates",
+ geo_dir = "ext-data/idhs/geo/",
+ bins = c(0, 2, 4, 6, 10, 15, 35)
+)