Avdhesh-Varshney · Avdhesh-Varshney · Jun 22, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/Pre-Processing/Algorithms/Min_Max_Scaler/__init__.py b/Pre-Processing/Algorithms/Min_Max_Scaler/__init__.py
diff --git a/Pre-Processing/Algorithms/Min_Max_Scaler/min_max_scaler.py b/Pre-Processing/Algorithms/Min_Max_Scaler/min_max_scaler.py
@@ -0,0 +1,42 @@
+import pandas as pd
+
+# Custom MinMaxScaler class
+class MinMaxScaling:
+    # init function
+    def __init__(self, feature_range=(0, 1)):  # feature range can be specified by the user else it takes (0,1)
+        self.min = feature_range[0]
+        self.max = feature_range[1]
+        self.data_min_ = None
+        self.data_max_ = None
+
+    # fit function to calculate min and max value of the data
+    def fit(self, data):
+        # type check
+        if not type(data)==pd.DataFrame:
+            raise f"TypeError : parameter should be a Pandas.DataFrame; {type(data)} found"
+        else:
+            self.data_min_ = data.min()
+            self.data_max_ = data.max()
+
+    # transform function
+    def transform(self, data):
+        if self.data_max_ is None or self.data_min_ is None:
+            raise "Call MinMaxScaling.fit() first or call MinMaxScaling.fit_transform() as the required params not found"
+        else:
+            data_scaled = (data - self.data_min_) / (self.data_max_ - self.data_min_)
+            data_scaled = data_scaled * (self.max - self.min) + self.min
+            return data_scaled
+
+    # fit_tranform function
+    def fit_transform(self, data):
+        self.fit(data)
+        return self.transform(data)
+
+    # get_params function
+    def get_params(self):
+        if self.data_max_ is None or self.data_min_ is None:
+            raise "Params not found! Call MinMaxScaling.fit() first"
+        else:
+            return {"Min" : self.data_min_,
+                    "Max" : self.data_max_}
+
diff --git a/Pre-Processing/Algorithms/Min_Max_Scaler/testing/min_max_scaler_test.py b/Pre-Processing/Algorithms/Min_Max_Scaler/testing/min_max_scaler_test.py
@@ -0,0 +1,30 @@
+import os
+import sys
+# for resolving any path conflict
+current = os.path.dirname(os.path.realpath("min_max_scaler.py"))
+parent = os.path.dirname(current)
+sys.path.append(current)
+
+import pandas as pd
+
+from Min_Max_Scaler.min_max_scaler import MinMaxScaling
+
+# Example DataFrame
+data = {
+    'A': [1, 2, 3, 4, 5],
+    'B': [10, 20, 30, 40, 50],
+    'C': [100, 200, 300, 400, 500]
+}
+
+df = pd.DataFrame(data)
+
+# Initialize the CustomMinMaxScaler
+scaler = MinMaxScaling()
+
+# Fit the scaler to the data and transform the data
+scaled_df = scaler.fit_transform(df)
+
+print("Original DataFrame:")
+print(df)
+print("\nScaled DataFrame:")
+print(scaled_df)
diff --git a/Pre-Processing/Algorithms/Ordinal_Encoder/__init__.py b/Pre-Processing/Algorithms/Ordinal_Encoder/__init__.py
diff --git a/Pre-Processing/Algorithms/Ordinal_Encoder/__pycache__/__init__.cpython-312.pyc b/Pre-Processing/Algorithms/Ordinal_Encoder/__pycache__/__init__.cpython-312.pyc
diff --git a/Pre-Processing/Algorithms/Ordinal_Encoder/__pycache__/ordinal_encoder.cpython-312.pyc b/Pre-Processing/Algorithms/Ordinal_Encoder/__pycache__/ordinal_encoder.cpython-312.pyc
diff --git a/Pre-Processing/Algorithms/Ordinal_Encoder/ordinal_encoder.py b/Pre-Processing/Algorithms/Ordinal_Encoder/ordinal_encoder.py
@@ -0,0 +1,30 @@
+import pandas as pd
+
+class OrdinalEncoding:
+    def __init__(self):
+        self.category_mapping = {}
+
+    def fit(self, data):
+        # Fit the encoder to the data (pandas DataFrame).
+        # type check
+        if not type(data)==pd.DataFrame:
+            raise f"Type of data should be Pandas.DataFrame; {type(data)} found"
+        for column in data.columns:
+            unique_categories = sorted(set(data[column]))
+            self.category_mapping[column] = {category: idx for idx, category in enumerate(unique_categories)}
+
+    def transform(self, data):
+        # Transform the data (pandas DataFrame) to ordinal integers.
+        # checking for empty mapping
+        if not self.category_mapping:
+            raise "Catrgorical Mapping not found. Call OrdinalExcoding.fit() method or call OrdinalEncoding.fit_transform() method"
+
+        data_transformed = data.copy()
+        for column in data.columns:
+            data_transformed[column] = data[column].map(self.category_mapping[column])
+        return data_transformed
+
+    def fit_transform(self, data):
+        # Fit the encoder and transform the data in one step.
+        self.fit(data)
+        return self.transform(data)
diff --git a/Pre-Processing/Algorithms/Ordinal_Encoder/testing/ordinal_encoder_test.py b/Pre-Processing/Algorithms/Ordinal_Encoder/testing/ordinal_encoder_test.py
@@ -0,0 +1,26 @@
+import os
+import sys
+# for resolving any path conflict
+current = os.path.dirname(os.path.realpath("ordinal_encoder.py"))
+parent = os.path.dirname(current)
+sys.path.append(current)
+
+import pandas as pd
+
+from Ordinal_Encoder.ordinal_encoder import OrdinalEncoding
+
+# Example usage
+data = {
+    'Category1': ['low', 'medium', 'high', 'medium', 'low', 'high', 'medium'],
+    'Category2': ['A', 'B', 'A', 'B', 'A', 'B', 'A'],
+    'Category3': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X']
+}
+df = pd.DataFrame(data)
+
+encoder = OrdinalEncoding()
+encoded_df = encoder.fit_transform(df)
+
+print("Original DataFrame:")
+print(df)
+print("\nEncoded DataFrame:")
+print(encoded_df)
diff --git a/Pre-Processing/Algorithms/Standard_Scaler/__init__.py b/Pre-Processing/Algorithms/Standard_Scaler/__init__.py
diff --git a/Pre-Processing/Algorithms/Standard_Scaler/standard_scaler.py b/Pre-Processing/Algorithms/Standard_Scaler/standard_scaler.py
@@ -0,0 +1,42 @@
+import pandas as pd
+import numpy as np
+
+# Custom MinMaxScaler class
+class StandardScaling:
+    # init function
+    def __init__(self):     
+        self.data_mean_ = None
+        self.data_std_ = None
+
+    # fit function to calculate min and max value of the data
+    def fit(self, data):
+        # type check
+        if not (type(data)==pd.DataFrame or type(data)==np.ndarray):
+            raise f"TypeError : parameter should be a Pandas.DataFrame or Numpy.ndarray; {type(data)} found"
+        elif type(data)==pd.DataFrame:
+            data = data.to_numpy()
+
+        self.data_mean_ = np.mean(data, axis=0)
+        self.data_std_ = np.sqrt(np.var(data, axis=0))
+
+    # transform function
+    def transform(self, data):
+        if self.data_mean_ is None or self.data_std_ is None:
+            raise "Call StandardScaling.fit() first or call StandardScaling.fit_transform() as the required params not found"
+        else:
+            data_scaled = (data - self.data_mean_) / (self.data_std_)
+            return data_scaled
+
+    # fit_tranform function
+    def fit_transform(self, data):
+        self.fit(data)
+        return self.transform(data)
+
+    # get_params function
+    def get_params(self):
+        if self.data_mean_ is None or self.data_std_ is None:
+            raise "Params not found! Call StandardScaling.fit() first"
+        else:
+            return {"Mean" : self.data_mean_,
+                    "Standard Deviation" : self.data_std_}
+
diff --git a/Pre-Processing/Algorithms/Standard_Scaler/testing/standard_scaler_test.py b/Pre-Processing/Algorithms/Standard_Scaler/testing/standard_scaler_test.py
@@ -0,0 +1,32 @@
+import os
+import sys
+# for resolving any path conflict
+current = os.path.dirname(os.path.realpath("standard_scaler.py"))
+parent = os.path.dirname(current)
+sys.path.append(current)
+
+import pandas as pd
+
+from Standard_Scaler.standard_scaler import StandardScaling
+
+# Example DataFrame
+data = {
+    'A': [1, 2, 3, 4, 5],
+    'B': [10, 20, 30, 40, 50],
+    'C': [100, 200, 300, 400, 500]
+}
+
+df = pd.DataFrame(data)
+
+# Initialize the CustomMinMaxScaler
+scaler = StandardScaling()
+
+# Fit the scaler to the data and transform the data
+scaled_df = scaler.fit_transform(df)
+
+print("Original DataFrame:")
+print(df)
+print("\nScaled DataFrame:")
+print(scaled_df)
+print("\nAssociated Parameters:")
+print(scaler.get_params())
diff --git a/Pre-Processing/Documentation/Min_Max_Scaler/README.md b/Pre-Processing/Documentation/Min_Max_Scaler/README.md
@@ -0,0 +1,55 @@
+# MinMaxScaler
+
+A custom implementation of a MinMaxScaler class for scaling numerical data in a pandas DataFrame. The class scales the features to a specified range, typically between 0 and 1.
+
+## Features
+
+- **fit**: Calculate the minimum and maximum values of the data.
+- **transform**: Scale the data to the specified feature range.
+- **fit_transform**: Fit the scaler and transform the data in one step.
+- **get_params**: Retrieve the minimum and maximum values calculated during fitting.
+
+## Methods
+
+1. `__init__(self, feature_range=(0, 1))`
+    - Initializes the MinMaxScaling class.
+    - Parameters:
+        - feature_range (tuple): Desired range of transformed data. Default is (0, 1).
+2. `fit(self, data)`
+    - Calculates the minimum and maximum values of the data.
+    - Parameters:
+        - data (pandas.DataFrame): The data to fit.
+3. `transform(self, data)`
+    - Transforms the data to the specified feature range.
+    - Parameters:
+        - data (pandas.DataFrame): The data to transform.
+    - Returns:
+        - pandas.DataFrame: The scaled data.
+4. `fit_transform(self, data)`
+    - Fits the scaler to the data and transforms the data in one step.
+    - Parameters:
+        - data (pandas.DataFrame): The data to fit and transform.
+    - Returns:
+        - pandas.DataFrame: The scaled data.
+5. `get_params(self)`
+    - Retrieves the minimum and maximum values calculated during fitting.
+    - Returns:
+        - dict: Dictionary containing the minimum and maximum values.
+
+## Error Handling
+
+- Raises a TypeError if the input data is not a pandas DataFrame in the fit method.
+- Raises an error if transform is called before fit or fit_transform.
+- Raises an error in get_params if called before fit.
+
+## Use Case
+
+![Use Case](images/use_case.png)
+
+## Output
+
+![Output](images/output.png)
+
+## Installation
+
+No special installation is required. Just ensure you have `pandas` installed in your Python environment.
diff --git a/Pre-Processing/Documentation/Min_Max_Scaler/images/output.png b/Pre-Processing/Documentation/Min_Max_Scaler/images/output.png
diff --git a/Pre-Processing/Documentation/Min_Max_Scaler/images/use_case.png b/Pre-Processing/Documentation/Min_Max_Scaler/images/use_case.png
diff --git a/Pre-Processing/Documentation/Ordinal_Encoder/README.md b/Pre-Processing/Documentation/Ordinal_Encoder/README.md
@@ -0,0 +1,52 @@
+# OrdinalEncoder
+
+A custom implementation of an OrdinalEncoder class for encoding categorical data into ordinal integers using a pandas DataFrame. The class maps each unique category to an integer based on the order of appearance.
+
+## Features
+
+- **fit**: Learn the mapping of categories to ordinal integers for each column.
+- **transform**: Transform the categorical data to ordinal integers based on the learned mapping.
+- **fit_transform**: Fit the encoder and transform the data in one step.
+
+## Methods
+
+1. `__init__(self)`
+    - Initializes the OrdinalEncoding class.
+    - No parameters are required.
+2. `fit(self, data)`
+    - Learns the mapping of categories to ordinal integers for each column.
+    - Parameters:
+        - data (pandas.DataFrame): The data to fit.
+    - Raises:
+        - TypeError: If the input data is not a pandas DataFrame.
+3. `transform(self, data)`
+    - Transforms the categorical data to ordinal integers based on the learned mapping.
+    - Parameters:
+        - data (pandas.DataFrame): The data to transform.
+    - Returns:
+        - pandas.DataFrame: The transformed data.
+    - Raises:
+        - Error: If transform is called before fit or fit_transform.
+4. `fit_transform(self, data)`
+    - Fits the encoder to the data and transforms the data in one step.
+    - Parameters:
+        - data (pandas.DataFrame): The data to fit and transform.
+    - Returns:
+        - pandas.DataFrame: The transformed data.
+
+## Error Handling
+
+- Raises a TypeError if the input data is not a pandas DataFrame in the fit method.
+- Raises an error if transform is called before fit or fit_transform.
+
+## Use Case
+
+![Use Case](images/use_case.png)
+
+## Output
+
+![Output](images/output.png)
+
+## Installation
+
+No special installation is required. Just ensure you have `pandas` installed in your Python environment.
diff --git a/Pre-Processing/Documentation/Ordinal_Encoder/images/output.png b/Pre-Processing/Documentation/Ordinal_Encoder/images/output.png
diff --git a/Pre-Processing/Documentation/Ordinal_Encoder/images/use_case.png b/Pre-Processing/Documentation/Ordinal_Encoder/images/use_case.png
diff --git a/Pre-Processing/Documentation/Standard_Scaler/README.md b/Pre-Processing/Documentation/Standard_Scaler/README.md
@@ -0,0 +1,60 @@
+# StandardScaler
+
+A custom implementation of a StandardScaler class for scaling numerical data in a pandas DataFrame or NumPy array. The class scales the features to have zero mean and unit variance.
+
+## Features
+
+- **fit**: Calculate the mean and standard deviation of the data.
+- **transform**: Scale the data to have zero mean and unit variance.
+- **fit_transform**: Fit the scaler and transform the data in one step.
+- **get_params**: Retrieve the mean and standard deviation calculated during fitting.
+
+## Methods
+
+1. `__init__(self)`
+    - Initializes the StandardScaling class.
+    - No parameters are required.
+2. `fit(self, data)`
+    - Calculates the mean and standard deviation of the data.
+    - Parameters:
+        - data (pandas.DataFrame or numpy.ndarray): The data to fit.
+    - Raises:
+        - TypeError: If the input data is not a pandas DataFrame or NumPy array.
+3. `transform(self, data)`
+    - Transforms the data to have zero mean and unit variance.
+    - Parameters:
+        - data (pandas.DataFrame or numpy.ndarray): The data to transform.
+    - Returns:
+        - numpy.ndarray: The scaled data.
+    - Raises:
+        - Error: If transform is called before fit or fit_transform.
+4. `fit_transform(self, data)`
+    - Fits the scaler to the data and transforms the data in one step.
+    - Parameters:
+        - data (pandas.DataFrame or numpy.ndarray): The data to fit and transform.
+    - Returns:
+        - numpy.ndarray: The scaled data.
+5. `get_params(self)`
+    - Retrieves the mean and standard deviation calculated during fitting.
+    - Returns:
+        - dict: Dictionary containing the mean and standard deviation.
+    - Raises:
+        - Error: If get_params is called before fit.
+
+## Error Handling
+
+- Raises a TypeError if the input data is not a pandas DataFrame or NumPy array in the fit method.
+- Raises an error if transform is called before fit or fit_transform.
+- Raises an error in get_params if called before fit.
+
+## Use Case
+
+![Use Case](images/use_case.png)
+
+## Output
+
+![Output](images/output.png)
+
+## Installation
+
+No special installation is required. Just ensure you have `pandas` and `numpy` installed in your Python environment.
diff --git a/Pre-Processing/Documentation/Standard_Scaler/images/output.png b/Pre-Processing/Documentation/Standard_Scaler/images/output.png
diff --git a/Pre-Processing/Documentation/Standard_Scaler/images/use_case.png b/Pre-Processing/Documentation/Standard_Scaler/images/use_case.png