diff --git a/README.md b/README.md
index d5dc351..93cc5e0 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,6 @@
 # Deep learning utilities library
 
-`dlordinal` is an open-source Python toolkit focused on deep learning with ordinal methodologies. It is compatible with
-[scikit-learn](https://scikit-learn.org).
-
-The library includes various modules such as loss functions, models, layers, metrics, and an estimator.
+`dlordinal` is an open-source Python toolkit focused on deep learning with ordinal methodologies.
 
 | Overview  |                                                                                                                                          |
 |-----------|------------------------------------------------------------------------------------------------------------------------------------------|
diff --git a/build_tools/run_tutorials.sh b/build_tools/run_tutorials.sh
index 693e5dd..2a3a80e 100644
--- a/build_tools/run_tutorials.sh
+++ b/build_tools/run_tutorials.sh
@@ -6,7 +6,8 @@ set -euxo pipefail
 CMD="jupyter nbconvert --to notebook --inplace --execute --ExecutePreprocessor.timeout=600"
 
 excluded=(
-    "tutorials/datasets_tutorial.ipynb"
+    "tutorials/datasets_tutorial.ipynb",
+    "tutorials/dlordinal_with_skorch_tutorial.ipynb",
 )
 
 shopt -s lastpipe
diff --git a/dlordinal/estimator/__init__.py b/dlordinal/estimator/__init__.py
deleted file mode 100644
index 6145fc9..0000000
--- a/dlordinal/estimator/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .pytorch_estimator import PytorchEstimator
-
-__all__ = [
-    "PytorchEstimator",
-]
diff --git a/dlordinal/estimator/pytorch_estimator.py b/dlordinal/estimator/pytorch_estimator.py
deleted file mode 100644
index 4f9ee1f..0000000
--- a/dlordinal/estimator/pytorch_estimator.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from typing import Optional, Union
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-from sklearn.base import BaseEstimator
-from torch.utils.data import DataLoader
-
-
-class PytorchEstimator(BaseEstimator):
-    """
-    Wrapper around a Pytorch ``nn.Module`` implementing
-    the default estimator interface defined by ``scikit-learn``.
-
-    Parameters
-    ----------
-    model : torch.nn.Module
-        A Pytorch model.
-    loss_fn : torch.nn.Module
-        A Pytorch loss function.
-    optimizer : torch.optim.Optimizer
-        A Pytorch optimizer.
-    device : torch.device
-        A Pytorch device.
-    max_iter : int
-        The maximum number of iterations.
-    **kwargs : dict
-        Additional keyword arguments.
-    """
-
-    def __init__(
-        self,
-        model: torch.nn.Module,
-        loss_fn: torch.nn.Module,
-        optimizer: torch.optim.Optimizer,
-        device: torch.device,
-        max_iter: int,
-        **kwargs,
-    ):
-        self.kwargs = kwargs
-        self.model = model
-        self.loss_fn = loss_fn
-        self.optimizer = optimizer
-        self.device = device
-        self.max_iter = max_iter
-
-    def fit(
-        self,
-        X: Union[DataLoader, torch.Tensor],
-        y: Optional[Union[torch.Tensor, None]] = None,
-    ):
-        """
-        fit() is a method that fits the model to the training data.
-
-        Parameters
-        ----------
-        X : Union[DataLoader, torch.Tensor]
-            The training data.
-        y : Optional[Union[torch.Tensor, None]], default=None
-            The training labels, only used if X is a ``torch.Tensor``.
-        """
-
-        # Check if X is a DataLoader
-        if isinstance(X, DataLoader):
-            if y is None:
-                print("Training ...")
-                self.model.train()
-
-                # Iterate over epochs
-                for epoch in range(self.max_iter):
-                    print(f"Epoch {epoch+1}/{self.max_iter}")
-
-                    # Iterate over batches
-                    for _, (X_batch, y_batch) in enumerate(X):
-                        self._fit(X_batch, y_batch)
-
-            else:
-                raise ValueError("If X is a DataLoader, y must be None")
-
-        # Check if X is a torch Tensor
-        elif isinstance(X, torch.Tensor):
-            if y is None:
-                raise ValueError("If X is a torch Tensor, y must not be None")
-
-            # Check if y is a torch Tensor
-            elif isinstance(y, torch.Tensor):
-                print("Training ...")
-                self.model.train()
-
-                # Iterate over epochs
-                for epoch in range(self.max_iter):
-                    print(f"Epoch {epoch+1}/{self.max_iter}")
-                    self._fit(X, y)
-
-            else:
-                raise ValueError("y must be a torch.Tensor")
-
-        else:
-            raise ValueError("X must be a DataLoader or a torch Tensor")
-
-        return self
-
-    def _fit(self, X, y):
-        """
-        _fit() is a private method that performs a forward pass, computes the loss
-        and performs backpropagation.
-
-        Parameters
-        ----------
-        X : torch.Tensor
-            The training data.
-        y : torch.Tensor
-            The training labels.
-        """
-        X, y = X.to(self.device), y.to(self.device)
-
-        # Forward pass
-        pred = self.model(X)
-        loss = self.loss_fn(pred, y)
-
-        # Backpropagation
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-
-    def predict_proba(self, X: Union[DataLoader, torch.Tensor]):
-        """
-        predict_proba() is a method that predicts the probability of each class.
-
-        Parameters
-        ----------
-        X : Union[DataLoader, torch.Tensor]
-            The data to predict.
-        """
-        if X is None:
-            raise ValueError("X must be a DataLoader or a torch Tensor")
-
-        # check if X is a DataLoader
-        if isinstance(X, DataLoader):
-            print("Predicting ...")
-            self.model.eval()
-            predictions = []
-
-            # Iterate over batches
-            for _, (X_batch, _) in enumerate(X):
-                predictions_batch = self._predict_proba(X_batch)
-                predictions.append(predictions_batch)
-
-            # Concatenate predictions
-            predictions = np.concatenate(predictions)
-            return predictions
-
-        # check if X is a torch Tensor
-        elif isinstance(X, torch.Tensor):
-            print("Predicting ...")
-            self.model.eval()
-            return self._predict_proba(X)
-
-        else:
-            raise ValueError("X must be a DataLoader or a torch Tensor")
-
-    def _predict_proba(self, X):
-        """
-        _predict_proba() is a private method that predicts the probability
-        of each class.
-
-        Parameters
-        ----------
-        X : torch.Tensor
-            The data to predict.
-        """
-        with torch.no_grad():
-            X = X.to(self.device)
-            pred = self.model(X)
-            probabilities = F.softmax(pred, dim=1)
-            return probabilities.cpu().numpy()
-
-    def predict(self, X: Union[DataLoader, torch.Tensor]):
-        """
-        predict() is a method that predicts the class of each sample.
-
-        Parameters
-        ----------
-        X : Union[DataLoader, torch.Tensor]
-            The data to predict.
-        """
-        pred = self.predict_proba(X)
-        return np.argmax(pred, axis=1)
diff --git a/dlordinal/estimator/tests/__init__.py b/dlordinal/estimator/tests/__init__.py
deleted file mode 100644
index a9a2c5b..0000000
--- a/dlordinal/estimator/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__all__ = []
diff --git a/dlordinal/estimator/tests/test_estimator.py b/dlordinal/estimator/tests/test_estimator.py
deleted file mode 100644
index f73d7ad..0000000
--- a/dlordinal/estimator/tests/test_estimator.py
+++ /dev/null
@@ -1,220 +0,0 @@
-import numpy as np
-import torch
-from torch import cuda
-from torch.utils.data import DataLoader, TensorDataset
-from torchvision import models
-
-from dlordinal.estimator import PytorchEstimator
-
-
-def test_pytorch_estimator_creation():
-    # Model
-    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
-    model.fc = torch.nn.Linear(model.fc.in_features, 6)
-
-    # Loss function
-    loss_fn = torch.nn.CrossEntropyLoss()
-
-    # Optimizer
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-    device = "cuda" if cuda.is_available() else "cpu"
-    model = model.to(device)
-
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    assert isinstance(estimator, PytorchEstimator)
-
-
-def create_example_dataloader(batch_size, num_samples, input_size):
-    X = torch.randn(num_samples, input_size)
-    y = torch.randint(0, 2, (num_samples,))
-    dataset = TensorDataset(X, y)
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
-    return dataloader
-
-
-def calculate_loss(model, loss_fn, dataloader):
-    model.eval()
-    total_loss = 0.0
-
-    with torch.no_grad():
-        for X, y in dataloader:
-            y_pred = model(X)
-            loss = loss_fn(y_pred, y)
-            total_loss += loss.item()
-
-    return total_loss / len(dataloader.dataset)
-
-
-def test_pytorch_estimator_fit_Dataloader():
-    input_size = 10
-    num_classes = 3
-    batch_size = 16
-
-    # Create an example DataLoader for training
-    train_dataloader = create_example_dataloader(
-        batch_size, num_samples=100, input_size=input_size
-    )
-
-    # Create an example DataLoader for prediction
-    test_dataloader = create_example_dataloader(
-        batch_size, num_samples=50, input_size=input_size
-    )
-
-    model = torch.nn.Linear(input_size, num_classes)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    # Verifies the training flow
-    # initial_loss = calculate_loss(model, loss_fn, test_dataloader)
-    estimator.fit(train_dataloader)
-    final_loss = calculate_loss(model, loss_fn, test_dataloader)
-
-    assert not np.isnan(final_loss)
-    assert not np.isinf(final_loss)
-
-
-def test_pytorch_estimator_fit_Tensor():
-    input_size = 10
-    num_classes = 3
-
-    # Create an example of training data
-    X = torch.randn(100, input_size)
-
-    # Create an example of training labels
-    y = torch.randint(0, num_classes, (100,))
-    print("shape of y")
-    print(y.shape)
-
-    model = torch.nn.Linear(input_size, num_classes)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    # Verifies the training flow
-    estimator.fit(X=X, y=y)
-
-
-def test_pytorch_estimator_predict():
-    input_size = 10
-    num_classes = 3
-    batch_size = 16
-
-    # Create an example DataLoader for training
-    train_dataloader = create_example_dataloader(
-        batch_size, num_samples=100, input_size=input_size
-    )
-
-    # Create an example DataLoader for prediction
-    test_dataloader = create_example_dataloader(
-        batch_size, num_samples=50, input_size=input_size
-    )
-
-    model = torch.nn.Linear(input_size, num_classes)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    # Verifies the training flow
-    estimator.fit(train_dataloader)
-
-    # Verifies the prediction flow
-    predictions = estimator.predict(test_dataloader)
-
-    # Check that the predictions have the correct size
-    assert predictions.shape == (50,)
-    assert len(predictions) == 50
-
-    # Check that the predictions are values in the range [0, num_classes)
-    assert np.all(predictions >= 0) and np.all(predictions < num_classes)
-
-
-def test_pytorch_estimator_predict_proba_dataloader():
-    input_size = 10
-    num_classes = 5
-    batch_size = 16
-
-    # Create an example DataLoader for training
-    train_dataloader = create_example_dataloader(
-        batch_size, num_samples=100, input_size=input_size
-    )
-
-    # Create an example DataLoader for prediction
-    test_dataloader = create_example_dataloader(
-        batch_size, num_samples=50, input_size=input_size
-    )
-
-    model = torch.nn.Linear(input_size, num_classes)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    # Verifies the training flow
-    estimator.fit(train_dataloader)
-
-    # Verifies the prediction flow
-    probabilities = estimator.predict_proba(test_dataloader)
-
-    # Check that the probabilities have the correct shape
-    assert probabilities.shape == (50, 5)
-
-    # Verify that the sum of the probabilities for each example is close to 1.
-    assert np.allclose(np.sum(probabilities, axis=1), np.ones(50), atol=1e-5)
-
-    # Verify that the probabilities are in the range [0, 1]
-    assert np.all(probabilities >= 0) and np.all(probabilities <= 1)
-
-
-def test_pytorch_estimator_predict_proba_tensor():
-    input_size = 10
-    num_classes = 3
-
-    # Create an example of training data
-    X = torch.randn(100, input_size)
-
-    # Create an example of training labels
-    y = torch.randint(0, num_classes, (100,))
-    print("shape of y")
-    print(y.shape)
-
-    model = torch.nn.Linear(input_size, num_classes)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    # Verifies the training flow
-    estimator.fit(X=X, y=y)
-
-    # minimum, maximum, (number of samples, number of features)
-    y_test = torch.randint(0, num_classes, (50, 10))
-    y_test = y_test.float()
-
-    probabilities = estimator.predict_proba(y_test)
-
-    # Check that the probabilities have the correct shape
-    assert probabilities.shape == (50, 3)
-
-    # Check that the sum of the probabilities for each example is close to 1.
-    assert np.allclose(np.sum(probabilities, axis=1), np.ones(50), atol=1e-5)
-
-    # Check that the probabilities are in the range [0, 1]
-    assert np.all(probabilities >= 0) and np.all(probabilities <= 1)
diff --git a/dlordinal/estimator/tests/test_types.py b/dlordinal/estimator/tests/test_types.py
deleted file mode 100644
index 4c691f8..0000000
--- a/dlordinal/estimator/tests/test_types.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import pytest
-import torch
-from torch.utils.data import DataLoader, TensorDataset
-
-from dlordinal.estimator import PytorchEstimator
-
-
-@pytest.fixture
-def setup_estimator():
-    # Model
-    model = torch.nn.Linear(10, 5)
-    loss_fn = torch.nn.CrossEntropyLoss()
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
-    device = torch.device("cpu")
-    max_iter = 5
-
-    estimator = PytorchEstimator(model, loss_fn, optimizer, device, max_iter)
-
-    return estimator
-
-
-class DummyDataLoader(DataLoader):
-    def __init__(self):
-        super().__init__(dataset=None)
-
-
-def create_example_dataloader(batch_size, num_samples, input_size):
-    X = torch.randn(num_samples, input_size)
-    y = torch.randint(0, 2, (num_samples,))
-    dataset = TensorDataset(X, y)
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
-    return dataloader
-
-
-def test_fit_X_dataloader_y_none(setup_estimator):
-    dummy_dataloader = DummyDataLoader()
-
-    y = torch.tensor([1, 0, 1, 0])
-
-    with pytest.raises(ValueError, match="If X is a DataLoader, y must be None"):
-        setup_estimator.fit(X=dummy_dataloader, y=y)
-
-
-def test_fit_X_tensor_y_none(setup_estimator):
-    X = torch.tensor([1, 0, 1, 0])
-
-    with pytest.raises(ValueError, match="If X is a torch Tensor, y must not be None"):
-        setup_estimator.fit(X=X, y=None)
-
-
-def test_fit_X_tensor_y_not_tensor(setup_estimator):
-    X = torch.tensor([1, 0, 1, 0])
-
-    with pytest.raises(ValueError, match="y must be a torch.Tensor"):
-        setup_estimator.fit(X=X, y=1)
-
-
-def test_fit_X_not_dataloader_or_tensor(setup_estimator):
-    X = 1
-
-    with pytest.raises(ValueError, match="X must be a DataLoader or a torch Tensor"):
-        setup_estimator.fit(X=X, y=None)
-
-
-def test_predict_proba_X_none(setup_estimator):
-    train_dataloader = create_example_dataloader(16, num_samples=100, input_size=10)
-
-    estimator = setup_estimator.fit(X=train_dataloader, y=None)
-
-    with pytest.raises(ValueError, match="X must be a DataLoader or a torch Tensor"):
-        estimator.predict_proba(X=None)
-
-
-def test_predict_proba_X_not_dataloader_or_tensor(setup_estimator):
-    train_dataloader = create_example_dataloader(16, num_samples=100, input_size=10)
-
-    estimator = setup_estimator.fit(X=train_dataloader, y=None)
-
-    with pytest.raises(ValueError, match="X must be a DataLoader or a torch Tensor"):
-        estimator.predict_proba(X=1)
diff --git a/docs/api.rst b/docs/api.rst
index 862123c..a5cc246 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -16,5 +16,4 @@ This is the API for the **dlordinal** package.
    losses
    metrics
    models
-   sklearn_integration
    soft_labelling
diff --git a/docs/sklearn_integration.rst b/docs/sklearn_integration.rst
deleted file mode 100644
index 2127761..0000000
--- a/docs/sklearn_integration.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _sklearn_integration:
-
-Integration with ``scikit-learn``
-=================================
-
-.. automodule:: dlordinal.estimator
-    :members:
diff --git a/tutorials/dlordinal_with_skorch_tutorial.ipynb b/tutorials/dlordinal_with_skorch_tutorial.ipynb
new file mode 100644
index 0000000..339672c
--- /dev/null
+++ b/tutorials/dlordinal_with_skorch_tutorial.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Importing libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from dlordinal.losses import TriangularCrossEntropyLoss\n",
+    "from dlordinal.datasets import FGNet\n",
+    "from torch import cuda, nn\n",
+    "from torch.optim import Adam\n",
+    "from torchvision import models\n",
+    "from torchvision.datasets import ImageFolder\n",
+    "from torchvision.transforms import Compose, ToTensor\n",
+    "from skorch import NeuralNetClassifier"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load and preprocess of FGNet dataset\n",
+    "\n",
+    "First, we present the configuration parameters for the experimentation and the number of workers for the `DataLoader`, which defines the number of subprocesses to use for data loading. In this specific case, it refers to the images."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimiser_params = {\n",
+    "    'lr': 1e-3,\n",
+    "    'bs': 400,\n",
+    "    'epochs': 5,\n",
+    "    's': 2,\n",
+    "    'c': 0.2,\n",
+    "    'beta': 0.5\n",
+    "}\n",
+    "\n",
+    "workers = 3"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we use the `FGNet` method to download and preprocess the images. Once that is done with the training data, we create a validation partition comprising 15% of the data using the `StratifiedShuffleSplit` method. Finally, with all the partitions, we load the images using a method called `DataLoader`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Files already downloaded and verified\n",
+      "Files already processed and verified\n",
+      "Files already split and verified\n",
+      "Using cuda device\n"
+     ]
+    }
+   ],
+   "source": [
+    "fgnet = FGNet(root=\"./datasets/fgnet\", download=True, process_data=True)\n",
+    "\n",
+    "train_data = ImageFolder(\n",
+    "    root=\"./datasets/fgnet/FGNET/train\", transform=Compose([ToTensor()])\n",
+    ")\n",
+    "test_data = ImageFolder(\n",
+    "    root=\"./datasets/fgnet/FGNET/test\", transform=Compose([ToTensor()])\n",
+    ")\n",
+    "\n",
+    "num_classes = len(train_data.classes)\n",
+    "classes = train_data.classes\n",
+    "targets = train_data.targets\n",
+    "\n",
+    "# Get CUDA device\n",
+    "device = \"cuda\" if cuda.is_available() else \"cpu\"\n",
+    "print(f\"Using {device} device\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Estimator\n",
+    "\n",
+    "We are setting up a deep learning model using `PyTorch` and `Skorch`. First, we define the model architecture using ResNet18, a pre-trained convolutional neural network, and customize its fully connected layer to match the number of classes in our classification task. Then we specify the loss function, in this case, a custom Triangular Cross Entropy Loss[1]. Finally, we configure the Skorch estimator, which serves as a bridge between PyTorch and scikit-learn, allowing us to train and evaluate our model seamlessly. We provide the model, loss function, and optimiser details such as the learning rate and number of epochs to the estimator. Additionally, we specify parameters for data loading and processing, like batch size and the number of workers, to optimise training performance.\n",
+    "\n",
+    "[1]: Víctor Manuel Vargas, Pedro Antonio Gutiérrez, Javier Barbero-Gómez, and César Hervás-Martínez (2023). *Soft Labelling Based on Triangular Distributions for Ordinal Classification.* Information Fusion, 93, 258--267. doi.org/10.1016/j.inffus.2023.01.003"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Model\n",
+    "model = models.resnet18(weights=\"IMAGENET1K_V1\")\n",
+    "model.fc = nn.Linear(model.fc.in_features, num_classes)\n",
+    "model = model.to(device)\n",
+    "\n",
+    "# Loss function\n",
+    "loss_fn = TriangularCrossEntropyLoss(num_classes=num_classes).to(device)\n",
+    "\n",
+    "# Skorch estimator\n",
+    "estimator = NeuralNetClassifier(\n",
+    "    module=model,\n",
+    "    criterion=loss_fn,\n",
+    "    optimizer=Adam,\n",
+    "    lr=optimiser_params[\"lr\"],\n",
+    "    max_epochs=optimiser_params[\"epochs\"],\n",
+    "    train_split=None,\n",
+    "    callbacks=[],\n",
+    "    device=device,\n",
+    "    verbose=0,\n",
+    "    iterator_train__batch_size=optimiser_params[\"bs\"],\n",
+    "    iterator_train__shuffle=True,\n",
+    "    iterator_train__num_workers=workers - 1,\n",
+    "    iterator_train__pin_memory=True,\n",
+    "    iterator_valid__batch_size=optimiser_params[\"bs\"],\n",
+    "    iterator_valid__shuffle=False,\n",
+    "    iterator_valid__num_workers=workers - 1,\n",
+    "    iterator_valid__pin_memory=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n",
+       "  module_=ResNet(\n",
+       "    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
+       "    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    (relu): ReLU(inplace=True)\n",
+       "    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+       "    (layer1): Sequential(\n",
+       "      (0): BasicBlock(\n",
+       "        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      )\n",
+       "      (1): BasicBlock(\n",
+       "        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer2): Sequential(\n",
+       "      (0): BasicBlock(\n",
+       "        (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (downsample): Sequential(\n",
+       "          (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): BasicBlock(\n",
+       "        (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer3): Sequential(\n",
+       "      (0): BasicBlock(\n",
+       "        (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (downsample): Sequential(\n",
+       "          (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): BasicBlock(\n",
+       "        (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer4): Sequential(\n",
+       "      (0): BasicBlock(\n",
+       "        (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (downsample): Sequential(\n",
+       "          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): BasicBlock(\n",
+       "        (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
+       "    (fc): Linear(in_features=512, out_features=6, bias=True)\n",
+       "  ),\n",
+       ")"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# estimator.fit(X=train_data, y=stargets)\n",
+    "targets = np.array(targets)\n",
+    "estimator.fit(X=train_data, y=targets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train probabilities = train_probs=array([[ 3.7356095 ,  2.280471  ,  0.27906656, -6.5274134 , -0.1423619 ,\n",
+      "        -0.89688575],\n",
+      "       [ 6.7312865 ,  4.2798862 , -2.754112  , -7.015324  , -0.58337   ,\n",
+      "        -1.614481  ],\n",
+      "       [ 1.5178611 ,  0.3035042 , -1.0939833 , -1.1187612 ,  0.3635073 ,\n",
+      "        -1.4568175 ],\n",
+      "       ...,\n",
+      "       [-8.981531  , -2.1939955 , -1.2311378 , -1.599317  ,  1.7429321 ,\n",
+      "         8.956122  ],\n",
+      "       [-7.570979  , -2.4199474 , -0.9986418 ,  2.073321  ,  1.8904057 ,\n",
+      "         3.514359  ],\n",
+      "       [-4.612633  , -2.3110492 ,  1.4501587 ,  1.0073776 ,  0.30610457,\n",
+      "         1.1957583 ]], dtype=float32)\n",
+      "\n",
+      "Test probabilities = test_probs=array([[-0.7816221 , -0.87308043, -1.196569  , -2.6637518 ,  1.0128176 ,\n",
+      "         2.083984  ],\n",
+      "       [-0.04164401,  1.2640952 ,  1.5022627 , -2.0729616 , -0.1945019 ,\n",
+      "        -1.6816527 ],\n",
+      "       [ 7.281721  ,  2.9113057 , -3.4834485 , -7.3575487 ,  1.2093832 ,\n",
+      "        -1.4325407 ],\n",
+      "       ...,\n",
+      "       [-9.944385  , -3.6944542 , -0.42169666,  0.5072165 ,  2.8238878 ,\n",
+      "         7.273284  ],\n",
+      "       [-5.4416714 , -3.3233507 , -2.3007298 ,  0.98697877,  2.2850878 ,\n",
+      "         4.3965707 ],\n",
+      "       [-4.6799173 , -1.7463862 , -0.5284957 , -1.7213606 ,  0.89393014,\n",
+      "         4.456833  ]], dtype=float32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_probs = estimator.predict_proba(train_data)\n",
+    "print(f\"Train probabilities = {train_probs=}\\n\")\n",
+    "\n",
+    "test_probs = estimator.predict_proba(test_data)\n",
+    "print(f\"Test probabilities = {test_probs=}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Torch",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "385611db6ca4af2663855b1744f455946eef985f7b33eb977c97667790417df3"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}