> str(mvt)
'data.frame': 191641 obs. of 3 variables:
$ Date : chr "12/31/12 23:15" "12/31/12 22:00" "12/31/12 22:00" "12/31/12 22:00" ...
$ Latitude : num 41.8 41.9 42 41.8 41.8 ...
$ Longitude: num -87.6 -87.7 -87.8 -87.7 -87.6 ...
>
> mvt$Date <- strptime(mvt$Date, format="%m/%d/%y %H:%M")
> range(mvt$Date)
[1] "1/1/01 0:01" "9/9/12 9:50"
>
> mvt$Year <- format(mvt$Date, '%Y')
> mvt$Month <- months(mvt$Date)
> mvt$Day <- weekdays(mvt$Date)
> mvt$Hour <- mvt$Date$hour
>
> head(mvt)
Date Latitude Longitude Year Month Day Hour
1 2012-12-31 23:15:00 41.75628 -87.62164 2012 December Monday 23
2 2012-12-31 22:00:00 41.89879 -87.66130 2012 December Monday 22
3 2012-12-31 22:00:00 41.96919 -87.76767 2012 December Monday 22
4 2012-12-31 22:00:00 41.76933 -87.65773 2012 December Monday 22
5 2012-12-31 21:30:00 41.83757 -87.62176 2012 December Monday 21
6 2012-12-31 20:30:00 41.92856 -87.75400 2012 December Monday 20
> mvtByYear <- as.data.frame(table(mvt$Year))
> colnames(mvtByYear) <- c('Year', 'Count')
> mvtByYear$Year <- as.numeric(as.character(mvtByYear$Year))
>
> ggplot(mvtByYear, aes(x = Year, y = Count)) +
+ geom_line(aes(group = 1), size = 2, alpha = 0.5, color = 'blue') +
+ scale_x_continuous(breaks = 2001:2012) +
+ theme(axis.title.x = element_blank()) +
+ ggtitle('Motor Vehicle Thefts by Year (2001 - 2012)')
> mvtByMonth <- as.data.frame(table(mvt$Month))
> colnames(mvtByMonth) <- c('Month', 'Count')
> mvtByMonth$Month <- factor(mvtByMonth$Month,
+ levels = c('January', 'February', 'March', 'April',
+ 'May', 'June', 'July', 'August', 'September',
+ 'October', 'November', 'December'))
>
> ggplot(mvtByMonth, aes(x=Month, y=Count)) +
> geom_line(aes(group=1), size = 2, alpha = 0.5, color = 'red') +
> theme(axis.title.x = element_blank(),
> axis.text.x = element_text(angle = 45, hjust = 1)) +
> ggtitle('Motor Vehicle Thefts by Month (2001 - 2012)')
> mvtByDay <- as.data.frame(table(mvt$Day))
> colnames(mvtByDay) <- c('Day', 'Count')
> mvtByDay$Day <- factor(mvtByDay$Day,
+ levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday',
+ 'Friday', 'Saturday', 'Sunday'))
>
> ggplot(mvtByDay, aes(x = Day, y = Count)) +
+ geom_bar(aes(fill = Day), stat = 'identity') +
+ theme(axis.title.x = element_blank()) +
+ ggtitle('Motor Vehicle Thefts by Day of the Week (2001 - 2012)')
> mvtByHour <- as.data.frame(table(mvt$Hour))
> colnames(mvtByHour) <- c('Hour', 'Count')
> mvtByHour$Hour <- as.numeric(as.character(mvtByHour$Hour))
>
> ggplot(mvtByHour, aes(x=Hour, y=Count)) +
+ geom_line(aes(group=1), size=2, alpha=0.5, color = 'darkgreen') +
+ ggtitle('Motor Vehicle Thefts by Hour (2001 - 2012)')
> mvtByMonthYear <- as.data.frame(table(mvt$Month, mvt$Year))
> colnames(mvtByMonthYear) <- c('Month', 'Year', 'Count')
> mvtByMonthYear$Year <- as.numeric(as.character(mvtByMonthYear$Year))
> mvtByMonthYear$Month <- factor(mvtByMonthYear$Month,
+ levels = c('January', 'February', 'March', 'April',
+ 'May', 'June', 'July', 'August', 'September',
+ 'October', 'November', 'December'))
>
> ggplot(mvtByMonthYear, aes(x = Month, y = Year)) +
+ geom_tile(aes(fill = Count)) +
+ theme(axis.title.x = element_blank(),
+ axis.text.x = element_text(angle = 45, hjust = 1)) +
+ scale_fill_gradient(low = 'white', high = 'red') +
+ scale_y_continuous(breaks = 2001:2012) +
+ ggtitle('Motor Vehicle Thefts by Month and Year (2001 - 2012)')
> mvtByHourDay <- as.data.frame(table(mvt$Day, mvt$Hour))
> colnames(mvtByHourDay) <- c('Day', 'Hour', 'Count')
> mvtByHourDay$Day <- factor(mvtByHourDay$Day,
+ levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday',
+ 'Friday', 'Saturday', 'Sunday'))
> mvtByHourDay$Hour <- as.numeric(as.character(mvtByHourDay$Hour))
>
> ggplot(mvtByHourDay, aes(x = Hour, y = Day)) +
+ geom_tile(aes(fill = Count)) +
+ scale_fill_gradient(name = 'Motor Vehicle Thefts',
+ low = 'white', high = 'red') +
+ theme(axis.title.y = element_blank()) +
+ ggtitle('Motor Vehicle Thefts by Hour and Day of the Week (2001 - 2012)') +
+ scale_x_continuous(breaks = seq(from=0, to=23, by=2))
> install.packages('maps')
> install.packages('ggmap')
> library(maps)
> library(ggmap)
> chicago <- get_map(location = "chicago", zoom = 11)
> ggmap(chicago)
> ggmap(chicago) +
+ geom_point(data = mvt,
+ aes(x = Longitude, y = Latitude))
> ggmap(chicago) +
+ geom_point(data = mvt[1:1000,],
+ aes(x = Longitude, y = Latitude))
> mvtByLongLat <- as.data.frame(table(round(mvt$Longitude, 2), round(mvt$Latitude, 2))) # round the longitude and latitude coordinates to 2 decimal places
> colnames(mvtByLongLat) <- c('Longitude', 'Latitude', 'Count')
> head(mvtByLongLat)
Longitude Latitude Count
1 -87.93 41.64 0
2 -87.92 41.64 0
3 -87.91 41.64 0
4 -87.9 41.64 0
5 -87.89 41.64 0
6 -87.88 41.64 0
> ggmap(chicago) +
+ geom_point(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude, color = Count, size = Count))
> head(mvtByLongLat)
Longitude Latitude Count
1 -87.93 41.64 0
2 -87.92 41.64 0
3 -87.91 41.64 0
4 -87.90 41.64 0
5 -87.89 41.64 0
6 -87.88 41.64 0
> mvtByLongLat <- subset(mvtByLongLat, Count != 0)
> head(mvtByLongLat)
Longitude Latitude Count
33 -87.61 41.64 3
40 -87.54 41.64 1
74 -87.62 41.65 54
75 -87.61 41.65 60
76 -87.60 41.65 28
77 -87.59 41.65 1
>
> ggmap(chicago) +
+ geom_point(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude, color = Count, size = Count)) +
+ scale_color_gradient(low = 'white', high = 'red') +
+ xlab('Longitude') +
+ ylab('Latitude') +
+ ggtitle('Map of Motor Vehicle Thefts (2001 - 2012)')
> ggmap(chicago) +
+ geom_tile(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude))
> ggmap(chicago) +
+ geom_tile(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude, alpha = Count))
> ggmap(chicago) +
+ geom_tile(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude, alpha = Count),
+ fill = 'red')
> ggmap(chicago) +
+ geom_tile(data = mvtByLongLat,
+ aes(x = Longitude, y = Latitude, alpha = Count),
+ fill = 'red') +
+ xlab('Longitude') +
+ ylab('Latitude') +
+ ggtitle('Map of Motor Vehicle Thefts (2001 - 2012)')