Skip to content

Commit

Permalink
Create DataProcessor.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KOSASIH authored Jul 5, 2024
1 parent 06f3424 commit e4093bd
Showing 1 changed file with 33 additions and 0 deletions.
33 changes: 33 additions & 0 deletions .ai/models/DataProcessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

class DataProcessor:
def __init__(self, data_path):
self.data_path = data_path
self.data = pd.read_csv(data_path)

def preprocess_data(self):
self.data.dropna(inplace=True)
self.data = self.data.apply(lambda x: x.astype(str).str.lower())
self.data = self.data.apply(lambda x: x.str.replace(r'[^\w\s]', ''))
scaler = StandardScaler()
self.data[['feature1', 'feature2', 'feature3']] = scaler.fit_transform(self.data[['feature1', 'feature2', 'feature3']])
pca = PCA(n_components=0.95)
self.data[['feature1', 'feature2', 'feature3']] = pca.fit_transform(self.data[['feature1', 'feature2', 'feature3']])
return self.data

def feature_engineering(self):
self.data['new_feature1'] = self.data['feature1'] * self.data['feature2']
self.data['new_feature2'] = self.data['feature2'] / self.data['feature3']
return self.data

def data_visualization(self):
import matplotlib.pyplot as plt
plt.scatter(self.data['feature1'], self.data['feature2'])
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Feature 1 vs Feature 2')
plt.show()
return self.data

0 comments on commit e4093bd

Please sign in to comment.