-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
436 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.DS_Store | ||
_ignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Package: ipumsvis | ||
Title: What the Package Does (One Line, Title Case) | ||
Version: 0.0.0.9000 | ||
Authors@R: | ||
person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), | ||
comment = c(ORCID = "YOUR-ORCID-ID")) | ||
Description: What the package does (one paragraph). | ||
License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a | ||
license | ||
Encoding: UTF-8 | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.1.2 | ||
Imports: | ||
dplyr, | ||
DT, | ||
ipumsr, | ||
mapboxer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(calc_rates) | ||
export(preprocess_geo) | ||
export(preprocess_ipums) | ||
export(view_var_descs) | ||
importFrom(DT,datatable) | ||
importFrom(dplyr,"%>%") | ||
importFrom(dplyr,.data) | ||
importFrom(dplyr,filter) | ||
importFrom(dplyr,group_by) | ||
importFrom(dplyr,rename_all) | ||
importFrom(dplyr,tibble) | ||
importFrom(ipumsr,as_factor) | ||
importFrom(ipumsr,lbl_clean) | ||
importFrom(ipumsr,read_ipums_ddi) | ||
importFrom(ipumsr,read_ipums_micro) | ||
importFrom(utils,download.file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
#' Calculate rates from ipums data and associated geography | ||
#' @examples | ||
#' \dontrun{ | ||
#' calc_rates(dd, geo_dir = "ext-data/idhs/geo") | ||
#' } | ||
#' @importFrom %>% dplyr group_by ungroup rename_all filter summarise n | ||
#' select all_of mutate | ||
#' @importFrom ipumsr read_ipums_sf as_factor lbl_clean zap_labels | ||
#' @export | ||
calc_rates <- function(x, geo_dir, num_var, denom_var, num_cond, denom_cond) { | ||
check_ipums_data(x, "x") | ||
x$country2 <- ipumsr::as_factor(ipumsr::lbl_clean(x$country)) | ||
|
||
if (!dir.exists(geo_dir)) | ||
stop("directory '", geo_dir, "' doesn't exist") | ||
|
||
ff <- list.files(geo_dir, pattern = "geo_", full.names = TRUE) | ||
if (length(ff) == 0) { | ||
stop("no files starting with 'geo_' were found in the directory '", | ||
geo_dir, "'") | ||
} | ||
|
||
ftbl <- dplyr::tibble( | ||
f = ff, | ||
country = substr(basename(ff), 5, 6), | ||
start_yr = as.integer(substr(basename(ff), 7, 10)), | ||
end_yr = as.integer(substr(basename(ff), 12, 15)) | ||
) | ||
|
||
# there are duplicate shapefiles for many countries | ||
# for now, choose those that have the latest data | ||
# and also go back the farthest | ||
# (could add an option later to do latest start and end date) | ||
ftbl <- ftbl %>% | ||
dplyr::group_by(.data$country) %>% | ||
dplyr::filter(end_yr == max(end_yr)) %>% | ||
dplyr::filter(start_yr == max(start_yr)) %>% | ||
dplyr::ungroup() | ||
ff <- ftbl$f | ||
|
||
xnms <- names(x) | ||
|
||
res <- list() | ||
for (f in ff) { | ||
geodf <- ipumsr::read_ipums_sf(f, verbose = FALSE) %>% | ||
dplyr::rename_all(tolower) | ||
names(geodf)[3] <- "dhscode" | ||
geodf$dhscode <- as.integer(geodf$dhscode) | ||
cur_var <- gsub(".*(geo_.*).zip", "\\1", f) | ||
cur_cntry <- geodf$cntry_name[1] | ||
message(cur_cntry, ": ", cur_var) | ||
|
||
if (!cur_var %in% xnms) { | ||
message(" not found...") | ||
next | ||
} | ||
|
||
vars <- c("year", "perweight", num_var, denom_var) | ||
# first get just the country | ||
tmp <- x %>% | ||
dplyr::filter(.data$country2 == cur_cntry) | ||
# then get rid of years where n == nna for numerator variable | ||
tmp2 <- tmp %>% | ||
dplyr::group_by(.data$year) %>% | ||
dplyr::summarise( | ||
n = dplyr::n(), | ||
nna = length(which(is.na(.data[[num_var]]))) | ||
) %>% | ||
dplyr::filter(.data$n == nna) | ||
tmp <- tmp %>% | ||
dplyr::filter(!.data$year %in% tmp2$year, ) %>% | ||
dplyr::select(all_of(c(cur_var, vars))) %>% | ||
dplyr::rename_all(function(x) | ||
ifelse(x %in% vars, x, "dhscode")) %>% | ||
dplyr::mutate( | ||
region = ipumsr::as_factor(ipumsr::lbl_clean(.data$dhscode)), | ||
dhscode = ipumsr::zap_labels(.data$dhscode) | ||
) | ||
|
||
if (nrow(tmp) == 0) { | ||
message(" no data for '", num_var, "'...") | ||
next | ||
} | ||
if (all(is.na(tmp[[denom_var]]))) { | ||
message(" denominator variable '", denom_var, "' is all NA...") | ||
next | ||
} | ||
|
||
if ( | ||
length(which(is.na(tmp$year))) != 0 || | ||
# length(which(is.na(tmp$sexactiv4wk))) != 0 || | ||
length(which(is.na(tmp[[num_var]]))) != 0 | ||
) { | ||
# browser() | ||
stop("something isn't right...") | ||
} | ||
|
||
yrregstats <- tmp %>% | ||
dplyr::filter(.data[[denom_var]] %in% denom_cond) %>% | ||
dplyr::group_by(.data$year, .data$dhscode, .data$region) %>% | ||
dplyr::summarise( | ||
n = sum(.data$perweight), | ||
n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), | ||
nsti = sum((.data[[num_var]] == 1) * .data$perweight), | ||
nsti2 = length(which(.data[[num_var]] == 1)), | ||
pct = 100 * .data$nsti / .data$n, | ||
.groups = "drop") %>% | ||
dplyr::mutate(country = cur_cntry, geo_var = cur_var) | ||
|
||
regstats <- tmp %>% | ||
dplyr::filter(.data[[denom_var]] %in% denom_cond) %>% | ||
dplyr::group_by(.data$dhscode, .data$region) %>% | ||
dplyr::summarise( | ||
n = sum(.data$perweight), | ||
n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), | ||
nsti = sum((.data[[num_var]] == 1) * .data$perweight), | ||
nsti2 = length(which(.data[[num_var]] == 1)), | ||
pct = 100 * .data$nsti / .data$n, | ||
.groups = "drop") %>% | ||
dplyr::mutate(country = cur_cntry, geo_var = cur_var) | ||
|
||
yrstats <- tmp %>% | ||
dplyr::filter(.data[[denom_var]] %in% denom_cond) %>% | ||
dplyr::group_by(.data$year) %>% | ||
dplyr::summarise( | ||
n = sum(.data$perweight), | ||
n2 = dplyr::n(), # length(which(.data[[num_var]] %in% num_cond)), | ||
nsti = sum((.data[[num_var]] == 1) * .data$perweight), | ||
nsti2 = length(which(.data[[num_var]] == 1)), | ||
pct = 100 * .data$nsti / .data$n) %>% | ||
dplyr::mutate(country = cur_cntry, geo_var = cur_var) | ||
|
||
allstats <- tmp %>% | ||
dplyr::filter(.data[[denom_var]] %in% denom_cond) %>% | ||
dplyr::summarise( | ||
n = sum(.data$perweight), | ||
n2 = length(which(.data[[num_var]] %in% num_cond)), | ||
nsti = sum((.data[[num_var]] == 1) * .data$perweight), | ||
nsti2 = length(which(.data[[num_var]] == 1)), | ||
pct = 100 * .data$nsti / .data$n) %>% | ||
dplyr::mutate(country = cur_cntry, geo_var = cur_var) | ||
|
||
res[[cur_cntry]] <- list( | ||
country = cur_cntry, | ||
geo_var = cur_var, | ||
allstats = allstats, | ||
yrstats = yrstats, | ||
regstats = regstats, | ||
yrregstats = yrregstats, | ||
geodf = geodf | ||
) | ||
} | ||
class(res) <- c("list", "ipums-rates") | ||
res | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
|
||
#' @importFrom dplyr bind_rows recode | ||
#' @importFrom mapboxer as_mapbox_source mapboxer add_fill_layer | ||
#' add_line_layer as_mapbox_source add_tooltips basemaps | ||
geo_vis_all <- function( | ||
rates, geo_dir, bins = NULL, width = NULL, height = NULL | ||
) { | ||
world <- ipumsr::read_ipums_sf( | ||
file.path(geo_dir, "IPUMSI_world_release2020.zip"), | ||
verbose = FALSE | ||
) %>% | ||
dplyr::rename_all(tolower) %>% | ||
# DRC is the only one coded differently here vs. in IPUMS data | ||
# setdiff(sort(unique(x$country2)), world$cntry_name) | ||
dplyr::mutate(country = dplyr::recode(.data$cntry_name, | ||
"Congo, DRC" = "Congo Democratic Republic" | ||
)) | ||
|
||
world2 <- dplyr::filter(world, .data$country %in% names(rates)) | ||
|
||
alldat <- dplyr::bind_rows(lapply(rates, function(x) x$yrregstats)) %>% | ||
dplyr::group_by(.data$country, .data$region, .data$dhscode) %>% | ||
dplyr::filter(year == max(.data$year) | ||
) | ||
|
||
allgeo <- lapply(rates, function(x) x$geodf) %>% | ||
dplyr::bind_rows() %>% | ||
dplyr::rename(country = "cntry_name") | ||
|
||
if (is.null(bins)) { | ||
bins <- pretty(alldat$pct, n = 8) | ||
# classInt::classIntervals(alldat$pct, n = 10) | ||
} | ||
cols <- viridisLite::viridis(length(bins) - 1) %>% | ||
substr(1, 7) | ||
|
||
leglbl <- paste0(bins[-length(bins)], "-", bins[-1], "%") | ||
|
||
geodat <- ipumsr::ipums_shape_inner_join(alldat, allgeo, | ||
by = c("country", "dhscode")) %>% | ||
dplyr::mutate( | ||
fill_color = cols[cut(.data$pct, bins, labels = FALSE)], | ||
pct = round(.data$pct, 1) | ||
) | ||
|
||
# setdiff(names(sti_rates), geodat$country) | ||
|
||
# legend: https://docs.mapbox.com/help/tutorials/choropleth-studio-gl-pt-2/ | ||
mapboxer::as_mapbox_source(geodat) %>% | ||
mapboxer::mapboxer( | ||
style = mapboxer::basemaps$Mapbox$light_v10, | ||
bounds = sf::st_bbox(geodat), | ||
fitBoundsOptions = list(padding = 20), | ||
width = width, | ||
height = height | ||
) %>% | ||
mapboxer::add_fill_layer( | ||
id = "map", | ||
fill_color = c("get", "fill_color"), | ||
fill_opacity = 0.6 | ||
) %>% | ||
mapboxer::add_line_layer( | ||
source = mapboxer::as_mapbox_source(world2), | ||
line_color = "black", | ||
line_width = 1 | ||
) %>% | ||
mapboxer::add_tooltips( | ||
"map", paste0( | ||
"Country: {{country}}<br>", | ||
"Region: {{region}}<br>", | ||
"Year: {{year}}<br>", | ||
"Pct STI: {{pct}}%")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#' View html table of IPUMS extract variable descriptions | ||
#' @importFrom DT datatable | ||
#' @importFrom dplyr filter tibble .data | ||
#' @export | ||
view_var_descs <- function(dd, include_geo = FALSE) { | ||
check_ipums_data(x, "x") | ||
|
||
descs <- sapply(dd, function(x) attr(x, "var_desc")) | ||
descs <- dplyr::tibble( | ||
name = names(descs), | ||
desc = unname(descs) | ||
) | ||
if (!include_geo) { | ||
descs <- dplyr::filter(descs, !grepl("^geo_", .data$name)) | ||
} | ||
|
||
DT::datatable(descs, options = list(paging = FALSE)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#' Preprocess IPUMS extract | ||
#' @importFrom ipumsr read_ipums_ddi read_ipums_micro as_factor lbl_clean | ||
#' @importFrom dplyr rename_all | ||
#' @examples | ||
#' \dontrun{ | ||
#' preprocess_ipums( | ||
#' input_xml = "ext-data/idhs/extract2/idhs_00002.xml", | ||
#' output_file = "ext-data/idhs/extract2/dd.rds" | ||
#' ) | ||
#' } | ||
#' @export | ||
preprocess_ipums <- function(input_xml, output_file) { | ||
message("reading xml") | ||
ddi <- ipumsr::read_ipums_ddi(input_xml) | ||
message("reading microdata") | ||
dd <- ipumsr::read_ipums_micro(ddi) | ||
dd <- dplyr::rename_all(dd, tolower) | ||
attr(dd, "ipums_ddi") <- ddi | ||
|
||
message("saving output") | ||
saveRDS(dd, file = output_file) | ||
|
||
invisible(dd) | ||
} | ||
|
||
# https://www.idhsdata.org/idhs/gis.shtml | ||
|
||
#' Preprocess (download) shapfiles associated with an IPUMS extract | ||
#' @examples | ||
#' \dontrun{ | ||
#' dd <- preprocess_ipums( | ||
#' input_xml = "ext-data/idhs/extract2/idhs_00002.xml", | ||
#' output_file = "ext-data/idhs/extract2/dd.rds" | ||
#' ) | ||
#' preprocess_geo(dd, output_dir = "ext-data/idhs/geo") | ||
#' } | ||
#' @export | ||
#' @importFrom utils download.file | ||
preprocess_geo <- function(x, output_dir) { | ||
check_ipums_data(x, "x") | ||
|
||
if (!dir.exists(output_dir)) | ||
dir.create(output_dir, recursive = TRUE) | ||
|
||
prefix <- "https://www.idhsdata.org/idhs/resources/gis/" | ||
|
||
nms <- names(x) | ||
geonms <- sort(nms[grepl("geo_.*_.*", nms)]) | ||
|
||
if (length(geonms) == 0) { | ||
message("no shapefiles found associated with this data") | ||
return(invisible(FALSE)) | ||
} | ||
|
||
for (nm in geonms) { | ||
message("downloading shapefiles for ", nm) | ||
tryres <- try(utils::download.file( | ||
paste0(prefix, nm, ".zip"), | ||
file.path(output_dir, paste0(nm, ".zip")), | ||
quiet = TRUE | ||
), silent = TRUE) | ||
if (inherits(tryres, "try-error")) { | ||
message(" could not find shapefiles associated with this country") | ||
} | ||
} | ||
|
||
message("downloading shapefiles for world countries") | ||
tryres <- try(utils::download.file( | ||
"https://international.ipums.org/international/resources/gis/IPUMSI_world_release2020.zip", | ||
file.path(output_dir, "IPUMSI_world_release2020.zip"), | ||
quiet = TRUE | ||
)) | ||
if (inherits(tryres, "try-error")) { | ||
message(" could not find world shapefiles") | ||
} | ||
|
||
invisible(TRUE) | ||
} | ||
|
||
# https://spatialdata.dhsprogram.com/population-estimates/ | ||
# https://dhs-sites.s3.amazonaws.com/SDR/boundaries/production/data/Population_Estimates_ALL.zip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
check_ipums_data <- function(x, nm = "x") { | ||
if (!"ipums_ddi" %in% names(attributes(x))) { | ||
stop("Input '", nm, "' must come from preprocess_ipums()") | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.