-
Notifications
You must be signed in to change notification settings - Fork 0
/
ump_join.R
70 lines (56 loc) · 2.15 KB
/
ump_join.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
remove(list = ls())
library(tidyverse)
setwd("./")
# Import datasets
gamelog_df <- read.csv("gl2023.txt",
header = FALSE)
statcast_df <- read.csv("statcast_data.csv")
team_xwalk <- read.csv("team_xwalk.csv")
# Clean gamelog data
gamelog_df <- gamelog_df %>%
rename(game_date = V1,
game_num = V2,
away_team = V4,
home_team = V7,
home_ump_name = V79) %>%
select(game_date, game_num, away_team, home_team, home_ump_name) %>%
mutate(game_date = paste0(substr(game_date, 1, 4), "-",
substr(game_date, 5, 6), "-",
substr(game_date, 7, 8))) %>%
mutate(game_num = case_when(game_num == 0 ~ 1,
TRUE ~ game_num))
# Join on statcast team names
ump_xwalk <- gamelog_df %>%
left_join(team_xwalk, join_by(away_team == gl_name)) %>%
select(-away_team) %>%
rename(away_team = statcast_name) %>%
left_join(team_xwalk, join_by(home_team == gl_name)) %>%
select(-home_team) %>%
rename(home_team = statcast_name)
# Create game numbers for statcast data
statcast_game_num_xwalk <- statcast_df %>%
select(game_date, away_team, home_team, game_pk) %>%
distinct() %>%
arrange(game_date, away_team, home_team, game_pk) %>%
group_by(game_date, away_team, home_team) %>%
mutate(game_num = row_number()) %>%
ungroup()
# Crosswalk for ump names
ump_statcast_xwalk <- statcast_game_num_xwalk %>%
left_join(ump_xwalk) %>%
select(game_pk, home_ump_name)
# Join umps onto statcast
statcast_with_ump <- statcast_df %>%
left_join(ump_statcast_xwalk)
# Calculate strike dummy
left_plate_edge <- -(17/2)/12
right_plate_edge <- (17/2)/12
statcast_with_ump <- statcast_with_ump %>%
mutate(strike_dummy = case_when(
((plate_x >= left_plate_edge & plate_x <= right_plate_edge) &
(plate_z >= sz_bot & plate_z <= sz_top)) ~ 1,
TRUE ~ 0
))
test <- statcast_with_ump %>%
select(pitch_type, game_date, release_speed, release_pos_x, release_pos_z,
pfx_x, pfx_z, plate_x, plate_z, sz_top, sz_bot, description, type, strike_dummy)