-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtymek.R
173 lines (129 loc) · 5.27 KB
/
tymek.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# siemanko witam w mojej kuchni
# tutaj robimy triki jakies no
getwd()
source("mega_wazny_plik.R")
source("theme.R")
library(dplyr)
library(ggplot2)
library(scales)
library(tidyquant)
library(ggthemes)
library(tidyr)
# pobieranie zbioróW
options(stringsAsFactors = FALSE)
data_set_names <- c("drivers", "driver_standings", "races", "results",
"seasons", "sprint_results", "status", "lap_times",
"earnings_2021", "total_earnings")
el <- data_set_names[1]
for (el in data_set_names ){
path <- paste("data/", el, ".csv", sep = "")
assign(el, read.csv(path))
}
drivers <- drivers %>%
filter(driverId %in% driver_standings$driverId) %>%
mutate(driver.name = paste(forename, surname, sep = " ")) %>%
select(driverId, code, number, driver.name, dob, nationality, url) %>%
rename(driver.number = number)
results <- results %>%
select(resultId, raceId, driverId, milliseconds, position)
# czyscimy te dziwne \N
results <- results[!is.na(as.numeric(as.character(results$milliseconds))),]
results <- results[!is.na(as.numeric(as.character(results$position))),]
#df %>% transform(position = ifelse(position == "\N", ))
results$position <- as.numeric(results$position)
results$milliseconds <- as.numeric(results$milliseconds)
# pierwsza analiza testowa
# zrobimy sobie dla 4 wybranych zawodników porównanie œrednich prêdkosci w ka¿dym sezonie
# SREDNIE PREDKOSCI SEZONOWE
df <- results %>% merge(drivers) %>%
filter(driver.name %in% our_drivers) %>%
merge(races, by.x = "raceId", by.y="raceId") %>%
mutate(date = as.Date(date))
#mutate(date = lubridate::floor_date(as.Date(date), "month"))
str(df$date)
df <- df %>%
group_by(driver.name, date) %>%
summarize(avg_time = mean(milliseconds/(1000)), avg_position=mean(position))
df %>%
ggplot() +
aes(y=avg_time, x=date, color=driver.name) %>%
geom_ma(size = 1.5, se=FALSE, linetype=1, n=50) +
labs(title="Circuits domination",
y = "œredni czas w sekundach",
x = "rok", caption = "Races won / total driven races") +
theme_form() +
scale_color_manual(values = driver_colors) +
# xlim(2005, 2022) +
scale_y_reverse()+
theme(plot.caption = element_text(size=20, hjust = 1)) -> test
ggsave('ostateczne/test.png', test, bg='transparent')
head(df)
# porownanie srednich pozycji zawodników
source("theme.R")
df %>% filter(date >= c(as.Date("01/01/98", "%d/%m/%y"))) %>%
#filter(driver.name != "Michael Schumacher") %>%
ggplot() +
aes(y = avg_position, x = date, color = driver.name) +
geom_ma(n=12, linetype = 1, size=1.5) +
labs(title = "Average positions",
subtitle = "at Grand Prix competitions over the years",
y = "Average position",
x = "Year",
color = "Driver") +
# wartoœci maksymalne i minimalne
scale_color_manual(values = driver_colors) +
scale_y_reverse(breaks = c(9, 7, 5, 3, 1)) +
coord_x_date(xlim = c(as.Date("01/01/98", "%d/%m/%y"), as.Date("31/07/22", "%d/%m/%y")),
ylim = c(9, 1)) +
theme_form() -> srednia_poz
srednia_poz
ggsave('ostateczne/srednia.png', srednia_poz, bg='transparent')
# ZAROBKI
head(earnings_2021) # te dane sa z https://www.statista.com/statistics/1255926/formula-one-salaries/
head(total_earnings) # te dane s¹ z https://www.scmp.com/magazines/style/celebrity/article/3182964/10-richest-f1-drivers-all-time-net-worths-ranked-lewis
# oraz dla verstappena https://www.spotrac.com/formula1/oracle-red-bull-racing/max-verstappen-47373/cash-earnings/
total_earnings %>%
merge(drivers, by.x="name", by.y="driver.name") %>%
mutate(name = reorder(name, earnings)) -> df_prep
df_prep %>%
ggplot() +
aes(x=earnings, y=name, fill=name) +
geom_bar(stat="identity", width = ifelse(df_prep$name %in% our_drivers, 0.9,0.9)) +
# zarzucona zmiana szerokoœci s³upków - wygl¹da to dziwnie
labs(title="Net worths of 10 richest F1 drivers",
subtitle = "for year 2022",
y="",
x = "Net worths (in mln $)") +
scale_fill_manual(values = driver_colors, na.value = "#333333") +
theme_form() +
theme(panel.grid.major.y = element_blank()) -> piniondz
piniondz
ggsave('ostateczne/piniondz.png', piniondz, bg='transparent')
# wyprzedzanie
races %>%
filter(year == 2021) %>%
rename(circuit = name) -> races
drivers <- drivers %>%
filter(driverId %in% driver_standings$driverId) %>%
mutate(driver.name = paste(forename, surname, sep = " ")) %>%
select(driverId, code, number, driver.name, dob, nationality, url) %>%
rename(driver.number = number)
gp <- races %>% select(raceId, round, circuit)
dr <- drivers %>% select(driverId, driver.name, code, driver.number)
lap_times %>% left_join(gp, by = "raceId") %>%
left_join(dr, by = "driverId")%>%
select(-raceId, -driverId) -> lp
lp %>% filter(driver.name == "Robert Kubica", circuit != "<NA>")
lp %>%
filter(circuit == "Abu Dhabi Grand Prix") %>% # wybieramy sobie grand prix
mutate(colours = case_when( # dodajemy kolorki
driver.name == "Lewis Hamilton" ~ Hamilton,
driver.name == "Max Verstappen" ~ Verstappen,
driver.name == "Sebastian Vettel" ~ Vettel,
TRUE ~ "#f0f0f0"
)) %>%
ggplot(aes(x=lap, y=position, group=driver.name, color=colours, alpha)) +
geom_line(size=2) +
labs(title = "Pozycje zawodników na Abu Dhabi Grand Prix 2021",
y = "Pozycja",
x = "Okr¹¿enie")