-
Notifications
You must be signed in to change notification settings - Fork 1
/
unit one review.R
69 lines (50 loc) · 1.66 KB
/
unit one review.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#Core Skills for Unit One
#import a dataset
#install a package
install.packages("nycflights13")
#attach a package
library(nycflights13)
#import relevant packages for your work
library(dplyr)
library(ggplot2)
#Write a filter
#just flag day
flights%>%
filter(day==14)%>%
filter(month==6)
#there are multiple right answers here...
#Use select to make a table managible
flights%>%
select(origin, dest)
#Write a summary function
flights%>%
group_by(carrier)%>%
summarize(mean(dep_delay, na.rm= TRUE))
#calculate something across a window
explain<-flights%>%
mutate("lagged_delay"=arr_delay-dep_delay)%>%
select(time_hour, lagged_delay,carrier, arr_delay, dep_delay)
View(explain)
#plot that
ggplot(explain, aes(time_hour, lagged_delay, colour=dep_delay))+geom_point()+
#check to see if two things are related...
cor.test(flights$arr_delay, flights$dep_delay)
#Turns out they are, also that is significant
#Produce an appropriate discrete plot
z<-ggplot(flights, aes(carrier, dep_delay, colour=origin))+geom_boxplot()
#easy flips and such
z+coord_polar()+facet_grid(~origin)
z+coord_flip()
#produce an appropriate continuous plot
ggplot(flights, aes(dep_time))+geom_freqpoly()+facet_grid(~origin)
#demonstrate control of advanced plot aesthetics including color, labels
flights%>%
filter(hour>20)%>%
ggplot(aes(time_hour, dep_delay, group=carrier)) +
geom_line(aes(linetype="dotted", color=carrier, size=2))+
geom_point(aes(color=carrier, size=3))+
scale_colour_brewer(palette = "Greens")+
facet_grid(~origin)+
ggtitle("We Only Come Out At Night")+xlab("when")
#and you need to export stuff
write.csv(explain, "explaining_delays.csv", row.names = FALSE)