-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacquire_data.R
69 lines (55 loc) · 1.8 KB
/
acquire_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# This is the script used to acquire the various datasets that will be used
# for this project
# Get the Data data manually
library(rKenyaCensus)
library(tidyverse)
# Add other useful datasets
# Household size and population
households <- V1_T2.3
# Population and gender by county
pop_county <- V1_T2.2
# Births last 5 years by county
births_county <- V4_T2.40
# Assets
assets_owned <- V4_T2.36
# Searched and bought goods online
goods_online <- V4_T2.34
# Clean the datasets
library(janitor)
library(tools)
# Remove row containing Kenya (sum total)
households <- households %>% slice(-1) %>%
clean_names()
# Remove row containing total counts for the sexes
pop_county <- pop_county %>% slice(-1) %>%
clean_names()
# Remove first 3 rows of rural and urban and whole of Kenya
# Change county names to sentence case
births_county <- births_county %>%
slice(-c(1:3)) %>%
mutate(County = toTitleCase(tolower(County)))%>%
clean_names()
# Remove first three rows of rural and urban and whole of Kenya
# Remove subcounties
# standardize the naming system
assets_owned <- assets_owned %>%
ungroup(County) %>%
filter(AdminArea == "County") %>%
mutate(County = toTitleCase(tolower(County))) %>%
select(-SubCounty) %>%
clean_names()
# Remove first three rows of rural and urban and whole of Kenya
# Remove subcounties
# standardize the naming system
goods_online <- goods_online %>%
ungroup(County) %>%
filter(AdminArea == "County") %>%
mutate(County = toTitleCase(tolower(County))) %>%
select(-SubCounty) %>%
clean_names()
# Write the datasets into csv files
write_csv(households, "data/households.csv")
write_csv(pop_county, "data/pop_county.csv")
write_csv(births_county, "data/births_county.csv")
write_csv(assets_owned, "data/assets_owned.csv")
write_csv(goods_online, "data/goods_online.csv")