-
Notifications
You must be signed in to change notification settings - Fork 2
/
ACS_AllYears_Journey2Work_purrr.r
228 lines (181 loc) · 9.41 KB
/
ACS_AllYears_Journey2Work_purrr.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# Initial upload to GITHUB on 2/16/2022
########################################################################
# ACS_AllYears_Journey2Work_purrr.r
# Use the purrr function map_dfr to iterate tidycensus call for 1-year, 5-year ACS data.
# Data on Means of Transportation to Work Variables, All ACS years
# Nation and US States and counties and places (added 1/28/22)
#
# American Community Survey, 1-year, 2005-2019 data
# ACS 5-year data: 2005-2009, 2010-2014, 2015-2019
# ACS 5-year data: 2006-2010, 2010-2014, 2015-2019 (this works for B08006....)
## ACS 5-year data: 2006-2010, 2011-2015, 2016-2020 @@ should work after 3/17/22.
#
# Data is meant to be "stacked" with one row/record for each geography/year combination
# -- January 18, 2022 --
## -- January 28, 2022 -- (Add County and Place!!!)
## -- February 7, 2022 -- "skip-a-year" is working. And new sets of
## nonoverlapping 5-yrs when 2016-2020 is released 3/17/22
########################################################################
# These R packages were installed in previous sessions, so no need to re-install
# install.packages("tidyverse")
# install.packages("tidycensus")
# install.packages("janitor")
# install.packages("purrr")
# Load relevant libraries into R-session.
library(purrr)
library(tidyverse)
library(tidycensus)
library(janitor)
library(plyr)
library(dplyr)
library(xlsx)
# Census API Key was installed in previous sessions, so no need to re-install
# census_api_key("fortycharacterkeysentbyCensusBureau",install=TRUE)
Sys.getenv("CENSUS_API_KEY")
# Set a list of variables to extract in each iteration of get_acs
# Total Population is problematic for 2005-09 since GQ pop wasn't collected in 2005....
# selvars2 was a previous "best of" variables for introductory analyses....
selvars2 <- c(# TotalPop_ = "B06001_001", # Total Population
Med_HHInc_ = "B19013_001", # Median Household Income
Agg_HHInc_ = "B19025_001", # Aggregate Household Income
HHldPop_ = "B11002_001", # Population in Households
Househlds_ = "B25003_001", # Total Households
Owner_OccDU_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_ = "B25077_001")
selvars <- c(total_pop_ = "B06001_001", # Total Population
hh_pop_ = "B11002_001", # Population in Households
households_ = "B25003_001", # Total Households
agg_commtime_ = "B08013_001", # Aggregate Travel Time to Work
agg_vehicle_ = "B08015_001", # Aggregate Vehicles Used in Journey-to-Work
total_worker_ = "B08006_001",
drove_alone_ = "B08006_003",
carpool_2_ = "B08006_005",
carpool_3_ = "B08006_006",
carpool_4p_ = "B08006_007",
transit_ = "B08006_008",
bicycle_ = "B08006_014",
walked_ = "B08006_015",
other3_ = "B08006_016", # taxicab, motorcycle, other means
at_home_ = "B08006_017",
bus_ = "B08006_009",
subway_ = "B08006_010",
railroad_ = "B08006_011",
lightrail_ = "B08006_012",
ferry_ = "B08006_013")
###################################################################
# Iterate on ACS 1-year data, using the purrr function "map_dfr"
###################################################################
test1 <- get_acs(survey="acs1",geography="place",state="CA",variables=selvars,
year=2019,output='wide')
# This doesn't work for 2005 since the ACS 2005 B08006 table
# had extra records for Carpools: carpool 4, carpool 5-6, and carpool 7+
# years <- 2005:2019
years <- 2006:2019
# Testing skip-a-year to see what how we'll handle 2020-X data.....
years <- c(2009:2017,2019)
names(years) <- years
# United States, 2006-2019, single year ACS
zeta1 <- map_dfr(years, ~{ get_acs(survey = "acs1",
geography = "us", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta2 <- zeta1[order(zeta1$GEOID,zeta1$year),]
# All US States, 2006-2019, single year ACS
zeta3 <- map_dfr(years, ~{ get_acs(survey = "acs1",
geography = "state", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta4 <- zeta3[order(zeta3$GEOID,zeta3$year),]
# Large US Counties, 2006-2019, single year ACS (Population 65K+)
zeta5 <- map_dfr(years, ~{ get_acs(survey = "acs1",
geography = "county", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta6 <- zeta5[order(zeta5$GEOID,zeta5$year),]
# Large US Places, 2006-2019, single year ACS (Population 65K+)
zeta7 <- map_dfr(years, ~{ get_acs(survey = "acs1",
geography = "place", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta8 <- zeta7[order(zeta7$GEOID,zeta7$year),]
########################################################################
## Output the ACS single year databases....
########################################################################
# note that my working directory (wd) is for a Mac computer.
setwd("~/Desktop/tidycensus_work/output")
# Export the data frames to CSV files, for importing to Excel
write.csv(zeta2,"ACS_AllYears_US_meansoftransportation_2006_2019.csv")
write.csv(zeta4,"ACS_AllYears_states_meansoftransportation_2006_2019.csv")
write.csv(zeta6,"ACS_AllYears_counties_meansoftransportation_2006_2019.csv")
write.csv(zeta8,"ACS_AllYears_places_meansoftransportation_2006_2019.csv")
# just write them in a master XLSX workbook.....
# Grrrr... this doesn't work... java heap space error, whatever that is.....
write.xlsx2(zeta2, "ACS_AllYears_journey_to_work_master_2006_2019.xlsx",
sheetName="nation", append=FALSE)
write.xlsx2(zeta4, "ACS_AllYears_journey_to_work_master_2006_2019.xlsx",
sheetName="states", append=TRUE)
write.xlsx2(zeta6, "ACS_AllYears_journey_to_work_master_2006_2019.xlsx",
sheetName="counties", append=TRUE)
write.xlsx2(zeta8, "ACS_AllYears_journey_to_work_master_2006_2019.xlsx",
sheetName="places", append=TRUE)
####################################################################################
###################################################################
# Iterate on ACS 5-year data, using the purrr function "map_dfr"
###################################################################
# this doesn't work....
test5 <- get_acs(survey="acs5",year=2009,
geography="us", variables="B08006001", output="wide")
# with years 2010, 2014 and 2019, this is a one-year overlap between
# the first two periods.... 2009 --- bombed for some reason.....
# The new 2016-2020 ACS 5-year data is scheduled for release 3/17/22.....
# Data for 2006-2010, 2011-2015, 2016-2020:
# years5 <- c(2010,2015,2020)
years5 <- c(2010,2014,2019)
names(years5) <- years5
# This code is choking on selvars (means of transportation to work)
# getting an "unknown variable 'B08006_001E'" error??? :(
# re-ran... it works on 2006-2010; 2010-2014; 2015-2019..... weird!!
# National, ACS 5-years (2006-2010, 2010-2014, 2015-2019)
zeta9 <- map_dfr(years5, ~{ get_acs(survey = "acs5",
geography = "us", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta10 <- zeta9[order(zeta9$GEOID,zeta9$year),]
# State, ACS 5-years (2006-2010, 2010-2014, 2015-2019)
zeta11 <- map_dfr(years5, ~{ get_acs(survey = "acs5",
geography = "state", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta12 <- zeta11[order(zeta11$GEOID,zeta11$year),]
# County, ACS 5-years (2006-2010, 2010-2014, 2015-2019)
zeta13 <- map_dfr(years5, ~{ get_acs(survey = "acs5",
geography = "county", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta14 <- zeta13[order(zeta13$GEOID,zeta13$year),]
# Place, ACS 5-years (2006-2010, 2010-2014, 2015-2019)
zeta15 <- map_dfr(years5, ~{ get_acs(survey = "acs5",
geography = "place", # state = "CA",
variables = selvars, output='wide',
year = .x)}, .id = "year")
# Sort the Results by GEOID and then by year
zeta16 <- zeta15[order(zeta15$GEOID,zeta15$year),]
# This last dataframe, zeta16, has 88,636 records... pretty big!
########################################################################
## Output the ACS five year databases....
########################################################################
setwd("~/Desktop/tidycensus_work/output")
# Export the data frames to CSV files, for importing to Excel
write.csv(zeta10,"ACS_5Years_US_meansoftransportation_2006_2014_2019.csv")
write.csv(zeta12,"ACS_5Years_states_meansoftransportation_2006_2014_2019.csv")
write.csv(zeta14,"ACS_5Years_counties_meansoftransportation_2006_2014_2019.csv")
write.csv(zeta16,"ACS_5Years_places_meansoftransportation_2006_2014_2019.csv")
########################################################################