diff --git a/lightorch/deprecated.py b/lightorch/deprecated.py
index f64f5df..3c8228a 100644
--- a/lightorch/deprecated.py
+++ b/lightorch/deprecated.py
@@ -8,7 +8,7 @@
 from tqdm import tqdm
 
 # REASON: Optimization performance against other libraries (lightning)
-
+# requires tensorboard installed
 
 def create_config(name_run: str):
     os.makedirs(f"./{name_run}/models", exist_ok=True)
diff --git a/lightorch/nn/criterions.py b/lightorch/nn/criterions.py
index f6890c7..60b7a2c 100644
--- a/lightorch/nn/criterions.py
+++ b/lightorch/nn/criterions.py
@@ -1,6 +1,5 @@
 from torch import nn, Tensor
-from typing import Sequence, Dict, Tuple, Optional, Callable, Sequence, List
-import torch
+from typing import Sequence, Dict, Tuple, Sequence, List, Union
 from . import functional as F
 from itertools import chain
 
@@ -15,8 +14,8 @@ def _merge_dicts(dicts: Sequence[Dict[str, float]]) -> Dict[str, float]:
 class LighTorchLoss(nn.Module):
     def __init__(
         self,
-        labels: Sequence[str] | str,
-        factors: Dict[str, float] | Sequence[Dict[str, float]],
+        labels: Union[Sequence[str], str],
+        factors: Union[Dict[str, float], Sequence[Dict[str, float]]],
     ) -> None:
         super().__init__()
         self.labels = labels
@@ -66,7 +65,7 @@ def forward(self, **kwargs) -> Tensor:
 class CrossEntropyLoss(nn.CrossEntropyLoss):
     def __init__(
         self,
-        weight: Tensor | None = None,
+        weight: Union[Tensor, None] = None,
         size_average=None,
         ignore_index: int = -100,
         reduce=None,
diff --git a/lightorch/nn/dnn.py b/lightorch/nn/dnn.py
index 363a892..bc90e55 100644
--- a/lightorch/nn/dnn.py
+++ b/lightorch/nn/dnn.py
@@ -1,6 +1,6 @@
 # Base Deep Neural Network
 from torch import Tensor, nn
-from typing import Sequence
+from typing import Sequence, Union
 
 
 def _SingularLayer(
@@ -22,7 +22,7 @@ def __init__(
         self,
         in_features: int,
         layers: Sequence[int],
-        activations: Sequence[nn.Module | None],
+        activations: Sequence[Union[nn.Module, None]],
     ):
         super().__init__()
         assert len(layers) == len(
diff --git a/lightorch/nn/fourier.py b/lightorch/nn/fourier.py
index 8aef4f4..7ed8c38 100644
--- a/lightorch/nn/fourier.py
+++ b/lightorch/nn/fourier.py
@@ -1,11 +1,10 @@
-from torch import nn, Tensor
-from . import functional as F
 from torch.fft import fftn, ifftn
+from torch import nn, Tensor, init
+from . import functional as F
 import torch
-from torch.nn import init
 from math import sqrt
 import torch.nn.functional as f
-from typing import Tuple, Sequence
+from typing import Tuple, Sequence, Union
 from itertools import chain
 
 
@@ -15,8 +14,8 @@ def __init__(
         n: int,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int, ...] | int,
-        padding: Tuple[int, ...] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 1e-5,
         pre_fft: bool = True,
@@ -66,7 +65,7 @@ def __init__(
 
         self._init_parameters()
 
-    def get_padding(self, padding: Tuple[int, ...] | int) -> Sequence[int]:
+    def get_padding(self, padding: Union[Tuple[int],int]) -> Sequence[int]:
         if isinstance(padding, tuple):
             assert (
                 len(padding) == self.n
@@ -96,7 +95,7 @@ def __init__(
         n: int,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
+        kernel_size: Union[Tuple[int],int],
         padding: Tuple[int],
         bias: bool = True,
         eps: float = 0.00001,
@@ -125,8 +124,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
@@ -170,8 +169,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
@@ -216,8 +215,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
@@ -263,8 +262,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
@@ -308,8 +307,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
@@ -353,8 +352,8 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Tuple[int] | int,
-        padding: Tuple[int] | int = None,
+        kernel_size: Union[Tuple[int],int],
+        padding: Union[Tuple[int],int] = None,
         bias: bool = True,
         eps: float = 0.00001,
         pre_fft: bool = True,
diff --git a/lightorch/nn/functional.py b/lightorch/nn/functional.py
index ac14de4..3c63249 100644
--- a/lightorch/nn/functional.py
+++ b/lightorch/nn/functional.py
@@ -5,9 +5,10 @@
 from lightning.pytorch import LightningModule
 from einops import rearrange
 from torch.fft import fftn
+from .utils import FeatureExtractor
 
 
-def _fourierconvNd(n: int, x: Tensor, weight: Tensor, bias: Tensor | None) -> Tensor:
+def _fourierconvNd(n: int, x: Tensor, weight: Tensor, bias: Union[Tensor,None]) -> Tensor:
     # To fourier space
     weight = fftn(weight, dim=[-i for i in range(1, n + 1)])
 
@@ -22,7 +23,7 @@ def _fourierconvNd(n: int, x: Tensor, weight: Tensor, bias: Tensor | None) -> Te
 
 
 def _fourierdeconvNd(
-    n: int, x: Tensor, weight: Tensor, bias: Tensor | None, eps: float = 1e-5
+    n: int, x: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5
 ) -> Tensor:
     # To fourier space
     weight = fftn(weight, dim=[-i for i in range(1, n + 1)])
@@ -37,7 +38,7 @@ def _fourierdeconvNd(
     return x
 
 
-def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
+def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]):
     """
     x (Tensor): batch size, channels, height, width
     weight (Tensor): out channels, *kernel_size
@@ -74,7 +75,7 @@ def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
     return out
 
 
-def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
+def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]):
     """
     x (Tensor): batch size, channels, height, width
     weight (Tensor): out channels, *kernel_size
@@ -107,7 +108,7 @@ def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
     return out
 
 
-def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
+def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]):
     """
     x (Tensor): batch size, channels, sequence length
     weight (Tensor): out channels, kernel_size
@@ -130,7 +131,7 @@ def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None):
 
 
 def fourierdeconv3d(
-    x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None, eps: float = 1e-5
+    x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5
 ):
     """
     x (Tensor): batch size, channels, height, width
@@ -169,7 +170,7 @@ def fourierdeconv3d(
 
 
 def fourierdeconv2d(
-    x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None, eps: float = 1e-5
+    x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5
 ):
     """
     x (Tensor): batch size, channels, height, width
@@ -204,7 +205,7 @@ def fourierdeconv2d(
 
 
 def fourierdeconv1d(
-    x: Tensor, one: Tensor, weight: Tensor, bias: Tensor | None, eps: float = 1e-5
+    x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5
 ):
     """
     x (Tensor): batch size, channels, sequence length
@@ -236,7 +237,7 @@ def _partialconvnd(
     padding,
     dilation,
     update_mask: bool = True,
-) -> Tuple[Tensor, Tensor] | Tensor:
+) -> Union[Tuple[Tensor, Tensor], Tensor]:
 
     with torch.no_grad():
         sum_m: Tensor = conv(
@@ -357,7 +358,7 @@ def style_loss(
     input: Tensor,
     target: Tensor,
     F_p: Tensor,
-    feature_extractor: nn.Module | LightningModule = None,
+    feature_extractor: FeatureExtractor = None,
 ) -> Tensor:
     if feature_extractor is not None:
         phi_input: Tensor = feature_extractor(input)
@@ -376,7 +377,7 @@ def perceptual_loss(
     input: Tensor,
     target: Tensor,
     N_phi_p: Tensor,
-    feature_extractor: nn.Module | LightningModule = None,
+    feature_extractor: FeatureExtractor = None,
 ) -> Tensor:
     if feature_extractor is not None:
         phi_input: Tensor = feature_extractor(input)
diff --git a/lightorch/training/adversarial.py b/lightorch/training/adversarial.py
index 9178c80..c145808 100644
--- a/lightorch/training/adversarial.py
+++ b/lightorch/training/adversarial.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict
+from typing import Any, Dict, Union
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler
 import torch
@@ -7,7 +7,7 @@
 import torchvision
 
 class Module(Module_):
-    def __init__(self, *, optimizer: str | Optimizer, scheduler: str | LRScheduler = None, triggers: Dict[str, Dict[str, float]] = None, optimizer_kwargs: Dict[str, Any] = None, scheduler_kwargs: Dict[str, Any] = None, gradient_clip_algorithm: str = None, gradient_clip_val: float = None) -> None:
+    def __init__(self, *, optimizer: Union[str, Optimizer], scheduler: Union[str, LRScheduler] = None, triggers: Dict[str, Dict[str, float]] = None, optimizer_kwargs: Dict[str, Any] = None, scheduler_kwargs: Dict[str, Any] = None, gradient_clip_algorithm: str = None, gradient_clip_val: float = None) -> None:
         super().__init__(optimizer=optimizer, scheduler=scheduler, triggers=triggers, optimizer_kwargs=optimizer_kwargs, scheduler_kwargs=scheduler_kwargs, gradient_clip_algorithm=gradient_clip_algorithm, gradient_clip_val=gradient_clip_val)
         self.automatic_optimization = False
 
diff --git a/lightorch/training/cli.py b/lightorch/training/cli.py
index f139bf6..5af9742 100644
--- a/lightorch/training/cli.py
+++ b/lightorch/training/cli.py
@@ -1,11 +1,12 @@
 from lightning.pytorch.cli import LightningCLI
+from typing import Union
 import torch
 
 
 def trainer(
     matmul_precision: str = "high",
     deterministic: bool = True,
-    seed: bool | int = 123,
+    seed: Union[bool, int] = 123,
 ):
     torch.set_float32_matmul_precision(matmul_precision)
 
diff --git a/tests/test_nn.py b/tests/test_nn.py
index d4936d2..8fdfb97 100644
--- a/tests/test_nn.py
+++ b/tests/test_nn.py
@@ -193,4 +193,18 @@ def test_kan() -> None:
     # Placeholder for future implementation
     raise NotImplementedError("KAN test not implemented")
 
-# transformers, attention, mlp, etc.
\ No newline at end of file
+def test_trans() -> None:
+    # Placeholder for future implementation
+    raise NotImplementedError("Transformer test not implemented")
+
+def test_att() -> None:
+    # Placeholder for future implementation
+    raise NotImplementedError("Attention test not implemented")
+
+def test_ffn() -> None:
+    # Placeholder for future implementation
+    raise NotImplementedError("FFN test not implemented")
+
+def test_pos_embed() -> None:
+    # Placeholder for future implementation
+    raise NotImplementedError("Positional Encoding and Embedding test not implemented")
\ No newline at end of file