-
Notifications
You must be signed in to change notification settings - Fork 2
/
transform.py
30 lines (26 loc) · 1.46 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import mysql.connector
import json
import pandas as pd
def transform(sports_data, hostel_data, mess_data, admin_data, access_data):
integrated_data_list = []
student_info_df = admin_data['student_data'].copy()
student_info_df['Name'] = student_info_df['Name'].astype(str)
student_info_df['Contact_No'] = student_info_df['Contact_No'].astype(str)
def process_table(df, student_info):
if 'Sr_No' not in df.columns:
df.insert(0, 'Sr_No', range(1, len(df) + 1))
name_column = 'Pick_Up_Student' if 'Pick_Up_Student' in df.columns else 'Name'
contact_column = 'Pick_Up_Student_Number' if 'Pick_Up_Student_Number' in df.columns else 'Contact_No'
if name_column in df.columns:
df[name_column] = df[name_column].astype(str)
if contact_column in df.columns:
df[contact_column] = df[contact_column].astype(str)
if 'Roll_No' not in df.columns and {name_column, contact_column}.issubset(df.columns):
df = df.merge(student_info[['Name', 'Contact_No', 'Roll_No']], left_on=[name_column, contact_column], right_on=['Name', 'Contact_No'], how='left')
return df
for data_dict in [sports_data, hostel_data, mess_data, admin_data, access_data]:
for table_name, df in data_dict.items():
processed_df = process_table(df, student_info_df)
integrated_data_list.append(processed_df)
print("Data Integration Complete")
return integrated_data_list