-
Notifications
You must be signed in to change notification settings - Fork 1
/
DV_project.Rmd
495 lines (393 loc) · 21.3 KB
/
DV_project.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
---
title: "Group U_Final project"
author: "shiyun li, xia shan, junjie ma, peiyuan huo"
date: "4/25/2022"
output:
html_document: default
pdf_document: default
---
# Data Preparing:
- The dataset is from LA opendate, containing crimes happened during 2020-2022, updated weekly by LAPD
- Delete the cases that are still under investigation, and cleaned the text
- Integragate crime category that happened more than 500 times in the past
```{r, echo=FALSE, warning=FALSE, message = FALSE}
library(readr)
library(DT)
library(dplyr)
crime=read.csv("/Users/kimberlyshan/Desktop/Crime_Data_from_2020_to_Present.csv")
```
```{r, echo=FALSE,warning=FALSE}
#Clean version of the total dataset
col=c("DR_NO","Date.Rptd","Date.occ","TIME.OCC","AREA","AREA.NAME","Rpt.Dist.No","Crm.Cd.Desc","Mocodes",
"Vict.Age","Vict.Sex","Vict.Descent","Premis.Desc","Weapon.Desc","Status.Desc","LOCATION","LAT","LON")
clean_total=crime[,colnames(crime) %in% col]
clean_total$TIME.OCC <- as.character(clean_total$TIME.OCC)
proper_case <- function(x) {
return (gsub("\\b([A-Z])([A-Z]+)", "\\U\\1\\L\\2" , x, perl=TRUE))
}
clean_total <- clean_total %>% mutate(Crm.Cd.Desc = proper_case(Crm.Cd.Desc),
AREA=proper_case(AREA),
AREA.NAME=proper_case(AREA.NAME),
LOCATION=proper_case(LOCATION),
Weapon.Desc = proper_case(Weapon.Desc),
Premis.Desc=proper_case(Premis.Desc),
Status.Desc=proper_case(Status.Desc))
```
```{r, echo=FALSE,warning=FALSE}
#Delete the cases that are still under investigation
sum_ctg=clean_total %>%
group_by(Status.Desc)%>%
summarise(length(Status.Desc))
total1=subset(clean_total,Status.Desc=="Adult Arrest"|Status.Desc=="Adult Other" |Status.Desc=="Juv Arrest"|Status.Desc=="Juv Other")
```
```{r, echo=FALSE,warning=FALSE}
#Integragate crime category
sum_crime=total1 %>%
group_by(Crm.Cd.Desc)%>%
summarise(length(Crm.Cd.Desc))
#select the cirmes that happen more than 500 times
clean_total=subset(total1,Crm.Cd.Desc=="Intimate Partner - Simple Assault"|Crm.Cd.Desc=="Battery - Simple Assault"|Crm.Cd.Desc=="Assault With Deadly Weapon, Aggravated Assault
"|Crm.Cd.Desc==" Vandalism - Felony ($400 & Over, All Church Vandalisms) "|Crm.Cd.Desc=="Intimate Partner - Aggravated Assault"|Crm.Cd.Desc=="Criminal Threats - No Weapon Displayed"|Crm.Cd.Desc=="Robbery"|Crm.Cd.Desc=="Vehicle-Stolen"|Crm.Cd.Desc=="Burglary"|Crm.Cd.Desc=="Violation Of Restraining Order "|Crm.Cd.Desc=="Brandish Weapon "|Crm.Cd.Desc==" Vandalism - Misdeameanor ($399 Or Under)"|Crm.Cd.Desc=="Violation Of Court Order "|Crm.Cd.Desc==" Theft Plain - Petty ($950 & Under)"|Crm.Cd.Desc==" Letters, Lewd - Telephone Calls, Lewd"|Crm.Cd.Desc=="Child Abuse (Physical) - Simple Assault "|Crm.Cd.Desc=="Theft-Grand ($950.01 & Over)Excpt,Guns,Fowl,Livestk,Prod "|Crm.Cd.Desc==" Trespassing"|Crm.Cd.Desc=="Other Miscellaneous Crime "|Crm.Cd.Desc=="Contempt Of Court "|Crm.Cd.Desc==" Shoplifting - Petty Theft ($950 & Under)"|Crm.Cd.Desc==" Attempted Robbery"|Crm.Cd.Desc==" Battery Police (Simple)
"|Crm.Cd.Desc=="Battery With Sexual Contact "|Crm.Cd.Desc=="Rape, Forcible "|Crm.Cd.Desc=="
Other Assault "|Crm.Cd.Desc==" Burglary From Vehicle"|Crm.Cd.Desc=="Embezzlement, Grand Theft ($950.01 & Over) "|Crm.Cd.Desc==" Theft Of Identity")
```
#Use datatable to show the first 100 cases
```{r, echo=FALSE,warning=FALSE}
sample=clean_total[1:100, ]
datatable(sample, options = list(pageLength = 5,scrollX='400px'))
```
# Visualize the crime
## Preprocessing
```{r, warning=FALSE, message = FALSE}
library(tidyverse)
library("lubridate")
clean_total <- clean_total %>%
mutate(Date = as.Date(Date.Rptd, "%m/%d/%Y %H:%M:%S")) %>%
mutate(Location = str_squish(LOCATION))
names(clean_total)[names(clean_total) == 'Crm.Cd.Desc'] <- "Category"
clean_total$Year <- format(clean_total$Date, format="%Y")
```
# Map
## Crime over space
To visualize the data, we created an interactive map using the booklet to help us visualize the distribution of crime incidents in Los Angeles. The map contains 43,831 total crime data. The pop-up provides information on the category, date, address, victim's age, victim's gender, longitude and latitude. The purpose of this interactive map is to help us display more detailed information based on location.
```{r, echo=FALSE,warning=FALSE}
library(leaflet)
clean_total <- clean_total[clean_total$LON != 0, ] # display the first 10,000 rows
clean_total$popup <- paste("<br>", "<b>Category: </b>", clean_total$Category,
"<br>", "<b>Date: </b>", clean_total$Date,
"<br>", "<b>Address: </b>", clean_total$Location,
"<br>", "<b>Vict Age: </b>", clean_total$Vict.Age,
"<br>", "<b>Vict Sex: </b>", clean_total$Vict.Sex,
"<br>", "<b>Longitude: </b>", clean_total$LON,
"<br>", "<b>Latitude: </b>", clean_total$LAT)
leaflet(clean_total, width = "100%") %>% addTiles() %>%
addTiles(group = "OSM (default)") %>%
addProviderTiles(provider = "Esri.WorldStreetMap",group = "World StreetMap") %>%
addProviderTiles(provider = "Esri.WorldImagery",group = "World Imagery") %>%
# addProviderTiles(provider = "NASAGIBS.ViirsEarthAtNight2012",group = "Nighttime Imagery") %>%
addMarkers(lng = ~LON, lat = ~LAT, popup = clean_total$popup, clusterOptions = markerClusterOptions()) %>%
addLayersControl(
baseGroups = c("OSM (default)","World StreetMap", "World Imagery"),
options = layersControlOptions(collapsed = FALSE)
)
```
# Crime over time
### Overall trend
The crime graph over time shows us the general trend in the number of daily crimes in Los Angeles. Based on the map, we can see that as of July 2021, Los Angeles has an annual daily crime count of around 60. In the first few months of the pandemic, crime numbers fell sharply compared to the end of 2021. Specifically, the daily crime rate dropped by nearly 78%.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
library(dplyr)
df_crime_daily <- clean_total %>%
mutate(Date = as.Date(Date, "%m/%d/%Y")) %>%
group_by(Date) %>%
summarize(count = n()) %>%
arrange(Date)
library(ggplot2)
library(scales)
plot <- ggplot(df_crime_daily, aes(x = Date, y = count)) +
geom_line(color = "#F2CA27", size = 0.1) +
geom_smooth(color = "#1A1A1A") +
# fte_theme() +
scale_x_date(breaks = date_breaks("1 year"), labels = date_format("%Y")) +
labs(x = "Date of Crime", y = "Number of Crimes", title = "Daily Crimes in Los Angeles from 2020 – Present")
plot
```
### In category
Here, I break down the crime categories into battery, burglary, intimate partner, robbery, and criminal threat. As the plot shows, we conclude that the number of burglaries and robberies has decreased significantly due to the pandemic, but the number of intimate partner crimes has increased dramatically.
```{r, echo=FALSE,warning=FALSE,message = FALSE}
library(tidytext)
library(janeaustenr)
library(dplyr)
df_crime_cat <- clean_total %>%
dplyr::group_by(Date,Category)%>%
dplyr::summarise(Count = n())
library(plotly)
df_crime_cat %>%
ungroup() %>%
plot_ly(x=~Date,y = ~Count,
color = ~ Category,
type = "scatter",
mode = "lines+ markers",
opacity = 0.8
)
```
# Aggregte data
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df=read.csv("/Users/kimberlyshan/Desktop/clean_total.csv")
```
### Summarize the data by crime category, and calculate the percentage of each crime.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
library(DT)
library(stringr)
Sys.setlocale("LC_TIME", "C")
df_category <- sort(table(df$Crm.Cd.Desc),decreasing = TRUE)
df_category <- data.frame(df_category)
colnames(df_category) <- c("Category", "Frequency")
df_category$Percentage <- df_category$Frequency / sum(df_category$Frequency)
datatable(df_category, options = list(scrollX='400px'))
```
These are the top six categories of crime. And assaults are classified into two types: simple assault and aggravated assault. A simple assault is defined as any purposeful conduct that puts another person in reasonable fear of being battered. Aggravated assault includes more serious crimes, such as assault with the intent to cause significant bodily harm or assault with a lethal weapon, such as a firearm.
### Create a bar plot based on the crime category.
```{r, echo=FALSE,warning=FALSE}
library(ggplot2)
library(ggrepel)
bp <- ggplot(df_category, aes(x=Category, y=Frequency, fill=Category)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_blank()) +
geom_text_repel(data=df_category, aes(label=Category), size=2)
bp
```
We can clearly observe that simple assault from intimate partner has the highest frequency number of nearly 15,000 and counts for 33% of the whole crime types. And its aggravated assault has the third highest frequency. Intimate partner violence is defined as abuse or aggressiveness in a romantic relationship. And battery has the second highest number of over 12,000 and counts for 27%. In criminal law, this is a physical act that results in damaging or offensive contact with another person without that person's agreement.
And we also create a pie chart to show percentage of each categories more straightforward.
```{r, echo=FALSE,warning=FALSE}
bp <- ggplot(df_category, aes(x="", y=Percentage, fill=Category)) +
geom_bar(stat="identity") +
coord_polar("y")
bp
```
Through these statistics, we can conclude that assault from intimate partners has the highest frequency in Los Angeles during 2020 to 2022. Many risk factors can lead to intimate partner violence, such as people who have a low self-esteem, low education or income, and unstable emotions. For relationship factors, unhealthy family relationships, strong dominance and control by the partner, etc. And people who are in the communities with easy access to drugs and alcohol are more likely to be involved in this crime.
## Temporal Trends
### Robbery Over Time
We wanted to understand a trend in robbery in LA over the two years, as it also represents a high percentage of the overall crime types.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_robbery <- df[df$Crm.Cd.Desc == "Robbery",]
df_robbery_daily <- df_robbery %>%
mutate(Date = as.Date(Date.Rptd, "%m/%d/%Y %H:%M:%S")) %>%
group_by(Date) %>%
summarize(count = n()) %>%
arrange(Date)
library(ggplot2)
library(scales)
plot <- ggplot(df_robbery_daily, aes(x = Date, y = count)) +
geom_line(color = "#F2CA27", size = 0.1) +
geom_smooth(color = "#1A1A1A") +
# fte_theme() +
scale_x_date(breaks = date_breaks("1 year"), labels = date_format("%Y")) +
labs(x = "Date of Robbery", y = "Number of Robberys", title = "Daily Robberys From 2020 to 2022")
plot
```
This graph shows a downward trend of daily robbery from 2020 to present, from roughly 7 cases down to 4 cases per day. And the numbers start to decrease obviously starting from 2022.
### Robbery Time Heatmap
We want to know the most frequent days in a week of robbery.So we aggregate counts of robbery by Day-of-Week and Time to create a heat map.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
library(lubridate)
get_hour <- function(x) {
return (as.numeric((str_split(str_split(x, " ", simplify = T)[,2], ":", simplify = T)[,1])))
}
get_weekdays <- function(x){
return (weekdays(as.Date(x, format = "%m/%d/%Y %H:%M:%S")))
}
df_robbery_time <- df_robbery %>%
mutate(Hour = sapply(Date.Rptd, get_hour)) %>%
mutate(DayOfWeek = sapply(Date.Rptd, get_weekdays)) %>%
group_by(DayOfWeek, Hour) %>%
summarize(count = n())
# df_theft_time %>% head(10)
datatable(df_robbery_time, options = list(scrollX='400px'))
```
From the table, we can see that Friday, Monday, and Saturdays are the top three frequent days in a week of robbery. And Monday has the highest counts of 675 over the time.
Create a time heatmap.
```{r, echo=FALSE,warning=FALSE}
plot <- ggplot(df_robbery_time, aes(x = Hour, y = DayOfWeek, fill = count)) +
geom_tile() +
# fte_theme() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6), legend.title = element_blank(), legend.position="top", legend.direction="horizontal", legend.key.width=unit(2, "cm"), legend.key.height=unit(0.25, "cm"), legend.margin=unit(-0.5,"cm"), panel.margin=element_blank()) +
labs(x = "Hour of Roberry (Local Time)", y = "Day of Week of Roberry", title = "Number of Roberry from 2020 to 2022, by Time of Roberry") +
scale_fill_gradient(low = "white", high = "#27AE60", labels = comma)
plot
```
From the graph, between 11:50 to 12:50AM, robbery happens the most on Mondays, more than 660 cases. Comparing to Mondays, Fridays have the least numbers of rubbery during that time.
### Burglary Over Time
Then we want to know if there is any difference or common between robbery and burglary, so we create a chart of crimes (Burglary) over time.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_burglary <- df[df$Crm.Cd.Desc == "Burglary",]
df_burglary_daily <- df_burglary %>%
mutate(Date = as.Date(Date.Rptd, "%m/%d/%Y %H:%M:%S")) %>%
group_by(Date) %>%
summarize(count = n()) %>%
arrange(Date)
library(ggplot2)
library(scales)
plot <- ggplot(df_burglary_daily, aes(x = Date, y = count)) +
geom_line(color = "#F2CA27", size = 0.1) +
geom_smooth(color = "#1A1A1A") +
# fte_theme() +
scale_x_date(breaks = date_breaks("1 year"), labels = date_format("%Y")) +
labs(x = "Date of Burglary", y = "Number of Burglarys", title = "Daily Burglarys From 2020 to 2022")
plot
```
Comparing to robbery, the trend line of burglary is relatively smooth with an average number of 4 per day, and it begins to slightly decrease in 2022.
We also create a time heatmap of burglary.
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_burglary_time <- df_burglary %>%
mutate(Hour = sapply(Date.Rptd, get_hour)) %>%
mutate(DayOfWeek = sapply(Date.Rptd, get_weekdays)) %>%
group_by(DayOfWeek, Hour) %>%
summarize(count = n())
plot <- ggplot(df_burglary_time, aes(x = Hour, y = DayOfWeek, fill = count)) +
geom_tile() +
# fte_theme() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6), legend.title = element_blank(), legend.position="top", legend.direction="horizontal", legend.key.width=unit(2, "cm"), legend.key.height=unit(0.25, "cm"), legend.margin=unit(-0.5,"cm"), panel.margin=element_blank()) +
labs(x = "Hour of Burglary (Local Time)", y = "Day of Week of Burglary", title = "Number of Burglary from 2020 to 2022, by Time of Burglary") +
scale_fill_gradient(low = "white", high = "#27AE60", labels = comma)
plot
```
From the map, we can see Mondays still are the most frequent happening days between 11:50 to 12:50AM, but now Saturdays have the least numbers of burglary. Burglary is when someone illegally enters a building in order to conduct a crime while inside; robbery is when someone steals anything of value directly from another person using force or terror. So we assume the reason that lead to Monday being the highest frequency of robberies is people are out for lunch, many people are on the streets, which give thieves more opportunities. As for burglary, most people are not home during the weekdays, so thieves will likely take advantage and break into the houses, and vice versa.
# Correlation analysis
```{r, echo=FALSE,warning=FALSE, message = FALSE}
library(magrittr)
library(dplyr)
library(DT)
library(ggplot2)
library(gridExtra)
library(grid)
```
```{r, echo=FALSE,warning=FALSE}
df=read.csv("/Users/kimberlyshan/Desktop/clean_total.csv")
# Convert date formate
df$date <- substr(df$Date.Rptd,1,10)
betterDates <- as.Date(df$date, "%m/%d/%y")
df$DayOfWeek <- weekdays(betterDates)
```
### Crime By Category
```{r, echo=FALSE,warning=FALSE}
df_arrest <- df %>% filter(grepl("Arrest", Status.Desc))
df_top_crimes <- df_arrest %>%
group_by(Crm.Cd.Desc) %>%
summarize(count = n()) %>%
arrange(desc(count))
datatable(df_top_crimes, options = list(pageLength = 10,scrollX='400px'))
```
```{r, echo=FALSE,warning=FALSE}
# Define a function that will conver Time.Occ to two-digit time indicator
# For instance, 1245 will return 12, 25 will return 0, and 345 will return 3
ToTime <- function(x){
if ((floor(log10(x)) + 1)<=2){
result <- "0"
} else if (floor(log10(x)) + 1==3){
result <- substr(x, 1, 1)
} else {
result <- substr(x, 1, 2)
}
return (result)
}
```
### By Category
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_arrest_time_crime <- df_arrest %>%
filter(Crm.Cd.Desc %in% df_top_crimes$Crm.Cd.Desc[2:19]) %>%
mutate(Hour = sapply(TIME.OCC, ToTime)) %>%
group_by(Crm.Cd.Desc, DayOfWeek, Hour) %>%
summarize(count = n())
dow_format <- c("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday")
hour_format <- c(paste(c(12,1:11),"AM"), paste(c(12,1:11),"PM"))
df_arrest_time_crime$DayOfWeek <- factor(df_arrest_time_crime$DayOfWeek, level = rev(dow_format))
df_arrest_time_crime$Hour <- factor(df_arrest_time_crime$Hour, level = 0:23, label = hour_format)
datatable(df_arrest_time_crime, options = list(pageLength = 10, scrollX='400px'))
```
```{r, echo=FALSE,warning=FALSE}
plot <- ggplot(df_arrest_time_crime, aes(x = Hour, y = DayOfWeek, fill = count)) +
geom_tile() +
# fte_theme() +
theme(axis.text=element_text(size=8), axis.text.x = element_text(angle = 90, vjust = 0.6, size = 8)) +
labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Number of Police Arrests in Los Angeles from 2020 – Present, by Category and Time of Arrest") +
scale_fill_gradient(low = "white", high = "#2980B9") +
facet_wrap(~ Crm.Cd.Desc, nrow = 3)
plot
```
## By Category (Normalized)
```{r, echo=FALSE,warning=FALSE}
df_arrest_time_crime <- df_arrest_time_crime %>%
group_by(Crm.Cd.Desc) %>%
mutate(norm = count/sum(count))
datatable(df_arrest_time_crime, options = list(pageLength = 10,scrollX='400px'))
plot <- ggplot(df_arrest_time_crime, aes(x = Hour, y = DayOfWeek, fill = norm)) +
geom_tile() +
# fte_theme() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6, size = 8)) +
labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests in Los Angeles from 2020 by Time of Arrest, Normalized by Type of Crime") +
scale_fill_gradient(low = "white", high = "#2980B9") +
facet_wrap(~ Crm.Cd.Desc, nrow = 3)
plot
```
## By Police District
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_arrest_time_district <- df_arrest %>%
mutate(Hour = sapply(TIME.OCC, ToTime)) %>%
group_by(AREA.NAME, DayOfWeek, Hour) %>%
summarize(count = n()) %>%
group_by(AREA.NAME) %>%
mutate(norm = count/sum(count))
df_arrest_time_district$DayOfWeek <- factor(df_arrest_time_district$DayOfWeek, level = rev(dow_format))
df_arrest_time_district$Hour <- factor(df_arrest_time_district$Hour, level = 0:23, label = hour_format)
datatable(df_arrest_time_district, options = list(pageLength = 10,scrollX='400px'))
```
```{r,echo=FALSE, warning=FALSE}
plot <- ggplot(df_arrest_time_district, aes(x = Hour, y = DayOfWeek, fill = norm)) +
geom_tile() +
theme(axis.text=element_text(size=5),axis.text.x = element_text(angle = 90, vjust = 0.6, size =4)) +
labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests in Los Angeles from 2020 by Time of Arrest, Normalized by Station") +
scale_fill_gradient(low = "white", high = "#8E44AD") +
facet_wrap(~ AREA.NAME, nrow=5,shrink=FALSE)
plot
```
### By Month
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_arrest_time_month <- df_arrest %>%
mutate(Month = format(as.Date(date, "%m/%d/%Y"), "%B"), Hour = sapply(TIME.OCC, ToTime)) %>%
group_by(Month, DayOfWeek, Hour) %>%
summarize(count = n()) %>%
group_by(Month) %>%
mutate(norm = count/sum(count))
df_arrest_time_month$DayOfWeek <- factor(df_arrest_time_month$DayOfWeek, level = rev(dow_format))
df_arrest_time_month$Hour <- factor(df_arrest_time_month$Hour, level = 0:23, label = hour_format)
# Set order of month facets by chronological order instead of alphabetical
df_arrest_time_month$Month <- factor(df_arrest_time_month$Month,
level = c("January","February","March","April","May","June","July","August","September","October","November","December"))
plot <- ggplot(df_arrest_time_month, aes(x = Hour, y = DayOfWeek, fill = norm)) +
geom_tile() +
# fte_theme() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6, size = 4)) +
labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests in Los Angeles from 2020 by Time of Arrest, Normalized by Month") +
scale_fill_gradient(low = "white", high = "#E74C3C") +
facet_wrap(~ Month, nrow = 4)
plot
```
### By Year
```{r, echo=FALSE,warning=FALSE, message = FALSE}
df_arrest_time_year <- df_arrest %>%
mutate(Year = format(as.Date(date(), "%m/%d/%Y"), "%Y"), Hour = sapply(TIME.OCC, ToTime)) %>%
group_by(Year, DayOfWeek, Hour) %>%
summarize(count = n()) %>%
group_by(Year) %>%
mutate(norm = count/sum(count))
df_arrest_time_year$DayOfWeek <- factor(df_arrest_time_year$DayOfWeek, level = rev(dow_format))
df_arrest_time_year$Hour <- factor(df_arrest_time_year$Hour, level = 0:23, label = hour_format)
plot <- ggplot(df_arrest_time_year, aes(x = Hour, y = DayOfWeek, fill = norm)) +
geom_tile() +
# fte_theme() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.6, size = 4)) +
labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests Los Angeles from 2020 by Time of Arrest, Normalized by Year") +
scale_fill_gradient(low = "white", high = "#E67E22") +
facet_wrap(~ Year, nrow = 6)
plot
```