-
Notifications
You must be signed in to change notification settings - Fork 7
/
bonus_tidy_airbnb.R
66 lines (52 loc) · 1.92 KB
/
bonus_tidy_airbnb.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
##########################################################################
# Jose Cajide - @jrcajide
# Master Data Science: Tidy time series
##########################################################################
list.of.packages <- c("tibbletime", "tidyverse", "ggmap")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(tibbletime)
library(tidyverse)
library(ggmap)
( files <- list.files("data/airbnb/", pattern = '*.csv', full.names = T) )
airbnb <- lapply(files, read_csv)
airbnb <- bind_rows(airbnb)
airbnb <- airbnb %>%
as_tbl_time(last_modified) %>%
arrange(last_modified) %>%
select(last_modified, price, overall_satisfaction, latitude, longitude)
summary(airbnb)
airbnb %>%
collapse_by(period = "1 year") %>%
group_by(last_modified) %>%
summarise(median_price = median(price, na.rm = T))
# Clean up
airbnb %>%
collapse_by(period = "2 hour", clean = TRUE) %>%
group_by(last_modified) %>%
summarise(median_price = median(price)) %>%
head
# Start
airbnb %>%
collapse_by(period = "2 hour", clean = TRUE, side = "start") %>%
group_by(last_modified) %>%
summarise(median_price = median(price)) %>%
head
airbnb %>%
collapse_by(period = "2 hour", clean = TRUE, side = "start", start_date = "2014-08-01 15:00:00") %>%
group_by(last_modified) %>%
summarise(median_price = median(price)) %>%
head
# Viz: ggmap
airbnb_plot <- airbnb %>%
drop_na() %>%
as_tbl_time(index = last_modified) %>%
# Collapse and clean
collapse_by(period = "hour", clean = TRUE, side = "start") %>%
# Throw out a few outliers
filter(between(price, quantile(price, .05), quantile(price, .95))) %>%
mutate(price = log10(price)) %>%
qmplot(longitude, latitude, data = ., geom = "blank") +
geom_point(aes(color = price), alpha = .2, size = .3) +
scale_color_continuous(low = "red", high = "blue")
airbnb_plot