-
Notifications
You must be signed in to change notification settings - Fork 5
/
ingest_formula1_from_s3_to_snowflake.sql
163 lines (152 loc) Β· 4.43 KB
/
ingest_formula1_from_s3_to_snowflake.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/*
This is our setup script to create a new database for the Formula1 data in Snowflake.
We are copying data from a public s3 bucket into Snowflake by defining our csv format and snowflake stage.
The original data is from https://www.kaggle.com/datasets/rohanrao/formula-1-world-championship-1950-2020
*/
-- create and define our formula1 database
create or replace database formula1;
use database formula1;
create or replace schema raw;
use schema raw;
-- define our file format for reading in the csvs
create or replace file format csvformat
type = csv
field_delimiter =','
field_optionally_enclosed_by = '"',
skip_header=1;
--
create or replace stage formula1_stage
file_format = csvformat
url = 's3://formula1-dbt-cloud-python-demo/formula1-kaggle-data/';
-- load in the 8 tables we need for our demo
-- we are first creating the table then copying our data in from s3
create or replace table formula1.raw.circuits (
CIRCUITID NUMBER(38,0),
CIRCUITREF VARCHAR(16777216),
NAME VARCHAR(16777216),
LOCATION VARCHAR(16777216),
COUNTRY VARCHAR(16777216),
LAT FLOAT,
LNG FLOAT,
ALT NUMBER(38,0),
URL VARCHAR(16777216),
PRIMARY KEY (CIRCUITID)
);
-- copy our data from public s3 bucket into our tables
copy into circuits
from @formula1_stage/circuits.csv
on_error='continue';
create or replace table formula1.raw.constructors (
CONSTRUCTORID NUMBER(38,0),
CONSTRUCTORREF VARCHAR(16777216),
NAME VARCHAR(16777216),
NATIONALITY VARCHAR(16777216),
URL VARCHAR(16777216),
PRIMARY KEY (CONSTRUCTORID)
);
copy into constructors
from @formula1_stage/constructors.csv
on_error='continue';
create or replace table formula1.raw.drivers (
DRIVERID NUMBER(38,0),
DRIVERREF VARCHAR(16777216),
NUMBER VARCHAR(16777216),
CODE VARCHAR(16777216),
FORENAME VARCHAR(16777216),
SURNAME VARCHAR(16777216),
DOB DATE,
NATIONALITY VARCHAR(16777216),
URL VARCHAR(16777216),
PRIMARY KEY (DRIVERID)
);
copy into drivers
from @formula1_stage/drivers.csv
on_error='continue';
create or replace table formula1.raw.races (
RACEID NUMBER(38,0),
YEAR NUMBER(38,0),
ROUND NUMBER(38,0),
CIRCUITID NUMBER(38,0),
NAME VARCHAR(16777216),
DATE DATE,
TIME VARCHAR(16777216),
URL VARCHAR(16777216),
FP1_DATE VARCHAR(16777216),
FP1_TIME VARCHAR(16777216),
FP2_DATE VARCHAR(16777216),
FP2_TIME VARCHAR(16777216),
FP3_DATE VARCHAR(16777216),
FP3_TIME VARCHAR(16777216),
QUALI_DATE VARCHAR(16777216),
QUALI_TIME VARCHAR(16777216),
SPRINT_DATE VARCHAR(16777216),
SPRINT_TIME VARCHAR(16777216),
PRIMARY KEY (RACEID),
FOREIGN KEY (CIRCUITID) REFERENCES formula1.raw.circuits(CIRCUITID)
);
copy into races
from @formula1_stage/races.csv
on_error='continue';
create or replace table formula1.raw.lap_times (
RACEID NUMBER(38,0),
DRIVERID NUMBER(38,0),
LAP NUMBER(38,0),
POSITION FLOAT,
TIME VARCHAR(16777216),
MILLISECONDS NUMBER(38,0),
FOREIGN KEY (RACEID) REFERENCES formula1.raw.races(RACEID),
FOREIGN KEY (DRIVERID) REFERENCES formula1.raw.drivers(DRIVERID)
);
copy into lap_times
from @formula1_stage/lap_times.csv
on_error='continue';
create or replace table formula1.raw.pit_stops (
RACEID NUMBER(38,0),
DRIVERID NUMBER(38,0),
STOP NUMBER(38,0),
LAP NUMBER(38,0),
TIME VARCHAR(16777216),
DURATION VARCHAR(16777216),
MILLISECONDS NUMBER(38,0),
FOREIGN KEY (RACEID) REFERENCES formula1.raw.races(RACEID),
FOREIGN KEY (DRIVERID) REFERENCES formula1.raw.drivers(DRIVERID)
);
copy into pit_stops
from @formula1_stage/pit_stops.csv
on_error='continue';
create or replace table formula1.raw.status (
STATUSID NUMBER(38,0),
STATUS VARCHAR(16777216),
PRIMARY KEY (STATUSID)
);
copy into status
from @formula1_stage/status.csv
on_error='continue';
create or replace table formula1.raw.results (
RESULTID NUMBER(38,0),
RACEID NUMBER(38,0),
DRIVERID NUMBER(38,0),
CONSTRUCTORID NUMBER(38,0),
NUMBER NUMBER(38,0),
GRID NUMBER(38,0),
POSITION FLOAT,
POSITIONTEXT VARCHAR(16777216),
POSITIONORDER NUMBER(38,0),
POINTS NUMBER(38,0),
LAPS NUMBER(38,0),
TIME VARCHAR(16777216),
MILLISECONDS NUMBER(38,0),
FASTESTLAP NUMBER(38,0),
RANK NUMBER(38,0),
FASTESTLAPTIME VARCHAR(16777216),
FASTESTLAPSPEED FLOAT,
STATUSID NUMBER(38,0),
PRIMARY KEY (RESULTID),
FOREIGN KEY (RACEID) REFERENCES formula1.raw.races(RACEID),
FOREIGN KEY (DRIVERID) REFERENCES formula1.raw.drivers(DRIVERID),
FOREIGN KEY (CONSTRUCTORID) REFERENCES formula1.raw.constructors(CONSTRUCTORID),
FOREIGN KEY (STATUSID) REFERENCES formula1.raw.status(STATUSID)
);
copy into results
from @formula1_stage/results.csv
on_error='continue';