Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE ADD] StandardScaler MinMaxScaler OrdinalEncoder Algorithms added #51

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
42 changes: 42 additions & 0 deletions Pre-Processing/Algorithms/Min_Max_Scaler/min_max_scaler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd

# Custom MinMaxScaler class
class MinMaxScaling:
# init function
def __init__(self, feature_range=(0, 1)): # feature range can be specified by the user else it takes (0,1)
self.min = feature_range[0]
self.max = feature_range[1]
self.data_min_ = None
self.data_max_ = None

# fit function to calculate min and max value of the data
def fit(self, data):
# type check
if not type(data)==pd.DataFrame:
raise f"TypeError : parameter should be a Pandas.DataFrame; {type(data)} found"
else:
self.data_min_ = data.min()
self.data_max_ = data.max()

# transform function
def transform(self, data):
if self.data_max_ is None or self.data_min_ is None:
raise "Call MinMaxScaling.fit() first or call MinMaxScaling.fit_transform() as the required params not found"
else:
data_scaled = (data - self.data_min_) / (self.data_max_ - self.data_min_)
data_scaled = data_scaled * (self.max - self.min) + self.min
return data_scaled

# fit_tranform function
def fit_transform(self, data):
self.fit(data)
return self.transform(data)

# get_params function
def get_params(self):
if self.data_max_ is None or self.data_min_ is None:
raise "Params not found! Call MinMaxScaling.fit() first"
else:
return {"Min" : self.data_min_,
"Max" : self.data_max_}

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import sys
# for resolving any path conflict
current = os.path.dirname(os.path.realpath("min_max_scaler.py"))
parent = os.path.dirname(current)
sys.path.append(current)

import pandas as pd

from Min_Max_Scaler.min_max_scaler import MinMaxScaling

# Example DataFrame
data = {
'A': [1, 2, 3, 4, 5],
'B': [10, 20, 30, 40, 50],
'C': [100, 200, 300, 400, 500]
}

df = pd.DataFrame(data)

# Initialize the CustomMinMaxScaler
scaler = MinMaxScaling()

# Fit the scaler to the data and transform the data
scaled_df = scaler.fit_transform(df)

print("Original DataFrame:")
print(df)
print("\nScaled DataFrame:")
print(scaled_df)
Empty file.
Binary file not shown.
Binary file not shown.
30 changes: 30 additions & 0 deletions Pre-Processing/Algorithms/Ordinal_Encoder/ordinal_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd

class OrdinalEncoding:
def __init__(self):
self.category_mapping = {}

def fit(self, data):
# Fit the encoder to the data (pandas DataFrame).
# type check
if not type(data)==pd.DataFrame:
raise f"Type of data should be Pandas.DataFrame; {type(data)} found"
for column in data.columns:
unique_categories = sorted(set(data[column]))
self.category_mapping[column] = {category: idx for idx, category in enumerate(unique_categories)}

def transform(self, data):
# Transform the data (pandas DataFrame) to ordinal integers.
# checking for empty mapping
if not self.category_mapping:
raise "Catrgorical Mapping not found. Call OrdinalExcoding.fit() method or call OrdinalEncoding.fit_transform() method"

data_transformed = data.copy()
for column in data.columns:
data_transformed[column] = data[column].map(self.category_mapping[column])
return data_transformed

def fit_transform(self, data):
# Fit the encoder and transform the data in one step.
self.fit(data)
return self.transform(data)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import sys
# for resolving any path conflict
current = os.path.dirname(os.path.realpath("ordinal_encoder.py"))
parent = os.path.dirname(current)
sys.path.append(current)

import pandas as pd

from Ordinal_Encoder.ordinal_encoder import OrdinalEncoding

# Example usage
data = {
'Category1': ['low', 'medium', 'high', 'medium', 'low', 'high', 'medium'],
'Category2': ['A', 'B', 'A', 'B', 'A', 'B', 'A'],
'Category3': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X']
}
df = pd.DataFrame(data)

encoder = OrdinalEncoding()
encoded_df = encoder.fit_transform(df)

print("Original DataFrame:")
print(df)
print("\nEncoded DataFrame:")
print(encoded_df)
Empty file.
42 changes: 42 additions & 0 deletions Pre-Processing/Algorithms/Standard_Scaler/standard_scaler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd
import numpy as np

# Custom MinMaxScaler class
class StandardScaling:
# init function
def __init__(self):
self.data_mean_ = None
self.data_std_ = None

# fit function to calculate min and max value of the data
def fit(self, data):
# type check
if not (type(data)==pd.DataFrame or type(data)==np.ndarray):
raise f"TypeError : parameter should be a Pandas.DataFrame or Numpy.ndarray; {type(data)} found"
elif type(data)==pd.DataFrame:
data = data.to_numpy()

self.data_mean_ = np.mean(data, axis=0)
self.data_std_ = np.sqrt(np.var(data, axis=0))

# transform function
def transform(self, data):
if self.data_mean_ is None or self.data_std_ is None:
raise "Call StandardScaling.fit() first or call StandardScaling.fit_transform() as the required params not found"
else:
data_scaled = (data - self.data_mean_) / (self.data_std_)
return data_scaled

# fit_tranform function
def fit_transform(self, data):
self.fit(data)
return self.transform(data)

# get_params function
def get_params(self):
if self.data_mean_ is None or self.data_std_ is None:
raise "Params not found! Call StandardScaling.fit() first"
else:
return {"Mean" : self.data_mean_,
"Standard Deviation" : self.data_std_}

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import sys
# for resolving any path conflict
current = os.path.dirname(os.path.realpath("standard_scaler.py"))
parent = os.path.dirname(current)
sys.path.append(current)

import pandas as pd

from Standard_Scaler.standard_scaler import StandardScaling

# Example DataFrame
data = {
'A': [1, 2, 3, 4, 5],
'B': [10, 20, 30, 40, 50],
'C': [100, 200, 300, 400, 500]
}

df = pd.DataFrame(data)

# Initialize the CustomMinMaxScaler
scaler = StandardScaling()

# Fit the scaler to the data and transform the data
scaled_df = scaler.fit_transform(df)

print("Original DataFrame:")
print(df)
print("\nScaled DataFrame:")
print(scaled_df)
print("\nAssociated Parameters:")
print(scaler.get_params())
55 changes: 55 additions & 0 deletions Pre-Processing/Documentation/Min_Max_Scaler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# MinMaxScaler

A custom implementation of a MinMaxScaler class for scaling numerical data in a pandas DataFrame. The class scales the features to a specified range, typically between 0 and 1.

## Features

- **fit**: Calculate the minimum and maximum values of the data.
- **transform**: Scale the data to the specified feature range.
- **fit_transform**: Fit the scaler and transform the data in one step.
- **get_params**: Retrieve the minimum and maximum values calculated during fitting.

## Methods

1. `__init__(self, feature_range=(0, 1))`
- Initializes the MinMaxScaling class.
- Parameters:
- feature_range (tuple): Desired range of transformed data. Default is (0, 1).
2. `fit(self, data)`
- Calculates the minimum and maximum values of the data.
- Parameters:
- data (pandas.DataFrame): The data to fit.
3. `transform(self, data)`
- Transforms the data to the specified feature range.
- Parameters:
- data (pandas.DataFrame): The data to transform.
- Returns:
- pandas.DataFrame: The scaled data.
4. `fit_transform(self, data)`
- Fits the scaler to the data and transforms the data in one step.
- Parameters:
- data (pandas.DataFrame): The data to fit and transform.
- Returns:
- pandas.DataFrame: The scaled data.
5. `get_params(self)`
- Retrieves the minimum and maximum values calculated during fitting.
- Returns:
- dict: Dictionary containing the minimum and maximum values.

## Error Handling

- Raises a TypeError if the input data is not a pandas DataFrame in the fit method.
- Raises an error if transform is called before fit or fit_transform.
- Raises an error in get_params if called before fit.

## Use Case

![Use Case](images/use_case.png)

## Output

![Output](images/output.png)

## Installation

No special installation is required. Just ensure you have `pandas` installed in your Python environment.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
52 changes: 52 additions & 0 deletions Pre-Processing/Documentation/Ordinal_Encoder/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# OrdinalEncoder

A custom implementation of an OrdinalEncoder class for encoding categorical data into ordinal integers using a pandas DataFrame. The class maps each unique category to an integer based on the order of appearance.

## Features

- **fit**: Learn the mapping of categories to ordinal integers for each column.
- **transform**: Transform the categorical data to ordinal integers based on the learned mapping.
- **fit_transform**: Fit the encoder and transform the data in one step.

## Methods

1. `__init__(self)`
- Initializes the OrdinalEncoding class.
- No parameters are required.
2. `fit(self, data)`
- Learns the mapping of categories to ordinal integers for each column.
- Parameters:
- data (pandas.DataFrame): The data to fit.
- Raises:
- TypeError: If the input data is not a pandas DataFrame.
3. `transform(self, data)`
- Transforms the categorical data to ordinal integers based on the learned mapping.
- Parameters:
- data (pandas.DataFrame): The data to transform.
- Returns:
- pandas.DataFrame: The transformed data.
- Raises:
- Error: If transform is called before fit or fit_transform.
4. `fit_transform(self, data)`
- Fits the encoder to the data and transforms the data in one step.
- Parameters:
- data (pandas.DataFrame): The data to fit and transform.
- Returns:
- pandas.DataFrame: The transformed data.

## Error Handling

- Raises a TypeError if the input data is not a pandas DataFrame in the fit method.
- Raises an error if transform is called before fit or fit_transform.

## Use Case

![Use Case](images/use_case.png)

## Output

![Output](images/output.png)

## Installation

No special installation is required. Just ensure you have `pandas` installed in your Python environment.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
60 changes: 60 additions & 0 deletions Pre-Processing/Documentation/Standard_Scaler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# StandardScaler

A custom implementation of a StandardScaler class for scaling numerical data in a pandas DataFrame or NumPy array. The class scales the features to have zero mean and unit variance.

## Features

- **fit**: Calculate the mean and standard deviation of the data.
- **transform**: Scale the data to have zero mean and unit variance.
- **fit_transform**: Fit the scaler and transform the data in one step.
- **get_params**: Retrieve the mean and standard deviation calculated during fitting.

## Methods

1. `__init__(self)`
- Initializes the StandardScaling class.
- No parameters are required.
2. `fit(self, data)`
- Calculates the mean and standard deviation of the data.
- Parameters:
- data (pandas.DataFrame or numpy.ndarray): The data to fit.
- Raises:
- TypeError: If the input data is not a pandas DataFrame or NumPy array.
3. `transform(self, data)`
- Transforms the data to have zero mean and unit variance.
- Parameters:
- data (pandas.DataFrame or numpy.ndarray): The data to transform.
- Returns:
- numpy.ndarray: The scaled data.
- Raises:
- Error: If transform is called before fit or fit_transform.
4. `fit_transform(self, data)`
- Fits the scaler to the data and transforms the data in one step.
- Parameters:
- data (pandas.DataFrame or numpy.ndarray): The data to fit and transform.
- Returns:
- numpy.ndarray: The scaled data.
5. `get_params(self)`
- Retrieves the mean and standard deviation calculated during fitting.
- Returns:
- dict: Dictionary containing the mean and standard deviation.
- Raises:
- Error: If get_params is called before fit.

## Error Handling

- Raises a TypeError if the input data is not a pandas DataFrame or NumPy array in the fit method.
- Raises an error if transform is called before fit or fit_transform.
- Raises an error in get_params if called before fit.

## Use Case

![Use Case](images/use_case.png)

## Output

![Output](images/output.png)

## Installation

No special installation is required. Just ensure you have `pandas` and `numpy` installed in your Python environment.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading