-
Notifications
You must be signed in to change notification settings - Fork 0
/
Create Date Table Fabric.py
101 lines (79 loc) · 3.22 KB
/
Create Date Table Fabric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import time
import pandas as pd
import holidays
import pyodbc
from datetime import datetime
def create_date_table(start_date, end_date):
date_range = pd.date_range(start=start_date, end=end_date)
date_table = pd.DataFrame(date_range, columns=['Date'])
date_table['Year'] = date_table['Date'].dt.year
date_table['Month'] = date_table['Date'].dt.month
date_table['Day'] = date_table['Date'].dt.day
date_table['Weekday'] = date_table['Date'].dt.day_name()
us_holidays = holidays.US(years=date_table['Year'].unique())
uk_holidays = holidays.UK(years=date_table['Year'].unique())
date_table['US_Holiday'] = date_table['Date'].apply(lambda x: x in us_holidays)
date_table['UK_Holiday'] = date_table['Date'].apply(lambda x: x in uk_holidays)
date_table['US_Holiday_Name'] = date_table['Date'].apply(lambda x: us_holidays.get(x))
date_table['UK_Holiday_Name'] = date_table['Date'].apply(lambda x: uk_holidays.get(x))
return date_table
# Change this to ensure correct date range
start_date = '1920-01-01'
end_date = '2050-12-31'
date_table = create_date_table(start_date, end_date)
connection_string = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=xaufnozi4obebkcwim6j5xepvy-5ex6ixwc55su3oz3rqtrfvwoki.datawarehouse.fabric.microsoft.com;'
'PORT=1433;'
'DATABASE=dim_Date;'
'Authentication=ActiveDirectoryInteractive;'
'TrustServerCertificate=yes;'
)
try:
# This connection allows link to Fabric
conn = pyodbc.connect(connection_string)
cursor = conn.cursor()
# optins here are to drop and recreate the dable if there is laready one with this name
cursor.execute('''
IF OBJECT_ID('dim_date_table', 'U') IS NOT NULL
DROP TABLE dim_date_table;
CREATE TABLE dim_date_table (
Date DATE,
Year INT,
Month INT,
Day INT,
Weekday VARCHAR(50),
US_Holiday BIT,
UK_Holiday BIT,
US_Holiday_Name VARCHAR(100),
UK_Holiday_Name VARCHAR(100)
);
''')
conn.commit()
print("Table created successfully.")
insert_query = '''
INSERT INTO dim_date_table (Date, Year, Month, Day, Weekday, US_Holiday, UK_Holiday, US_Holiday_Name, UK_Holiday_Name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
'''
# this makes it faster
cursor.fast_executemany = True
#
data_to_insert = [
(row['Date'], row['Year'], row['Month'], row['Day'], row['Weekday'],
row['US_Holiday'], row['UK_Holiday'], row['US_Holiday_Name'], row['UK_Holiday_Name'])
for index, row in date_table.iterrows()
]
# Insert in chunks and log progress to make sure long waits are logged.
chunk_size = 500
start_time = time.time()
for i in range(0, len(data_to_insert), chunk_size):
cursor.executemany(insert_query, data_to_insert[i:i+chunk_size])
conn.commit()
print(f"Inserted rows {i} to {i + chunk_size} in {(time.time() - start_time):.2f} seconds")
start_time = time.time()
print(f"Successfully inserted {len(date_table)} rows into the dim_date_table.")
except Exception as e:
print(f"An error occurred: {e}")
finally:
if conn:
conn.close()