From 58925d51221dc08b93b5c532b8d7febd8c618b19 Mon Sep 17 00:00:00 2001 From: Jorgedavyd Date: Sun, 9 Jun 2024 11:01:28 -0400 Subject: [PATCH] seting up test automation with ffn, attention, transformers, embedding, etc. --- README.md | 1 - lightorch/nn/kan.py | 24 ----------- lightorch/nn/transformer/__init__.py | 1 + lightorch/nn/transformer/ffn.py | 59 ++-------------------------- tests/test_nn.py | 37 +++++++++++++---- 5 files changed, 35 insertions(+), 87 deletions(-) delete mode 100644 lightorch/nn/kan.py diff --git a/README.md b/README.md index 6a267bc..d9e9901 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,6 @@ exec: `python3 -m training -c config.yaml` - Multi-Objective and Single-Objective optimization and Hyperparameter tuning with optuna. ## Modules -- KAN: Kolmogorov-Arnold Networks - Fourier Convolution. - Fourier Deconvolution. - Partial Convolution. (Optimized implementation) diff --git a/lightorch/nn/kan.py b/lightorch/nn/kan.py deleted file mode 100644 index e2ba49d..0000000 --- a/lightorch/nn/kan.py +++ /dev/null @@ -1,24 +0,0 @@ -from torch import nn, Tensor - - -# Revise -class KAN(nn.Module): - def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: - super().__init__() - self.in_features = in_features - self.out_features = out_features - for i in range(1, in_features): - for o in range(1, out_features): - setattr( - self, f"phi{o}_{i}", nn.Linear(1, 1, bias) - ) # Add other learnable function - - def next_step(self, x: Tensor, i: int) -> Tensor: - out = 0 - for o in range(self.out_features): - out += getattr(self, f"phi{o}_{i}")(x) - return out - - def forward(self, x: Tensor) -> Tensor: - out = Tensor([self.next_step(x, i) for i in range(self.in_features)]) - return out diff --git a/lightorch/nn/transformer/__init__.py b/lightorch/nn/transformer/__init__.py index 460266a..83619f3 100644 --- a/lightorch/nn/transformer/__init__.py +++ b/lightorch/nn/transformer/__init__.py @@ -2,3 +2,4 @@ from .embedding import * from .positional import * from .ffn import * +from .transformer import * \ No newline at end of file diff --git a/lightorch/nn/transformer/ffn.py b/lightorch/nn/transformer/ffn.py index 525271a..841bfed 100644 --- a/lightorch/nn/transformer/ffn.py +++ b/lightorch/nn/transformer/ffn.py @@ -11,8 +11,8 @@ def __init__( activation: Callable[[Tensor], Tensor], ) -> None: super().__init__() - self.w1 = nn.Linear(in_features, in_features * k_multiplier, False) - self.w2 = nn.Linear(in_features * k_multiplier, out_features, False) + self.w1 = nn.Linear(in_features, in_features * k_multiplier, bias=False) + self.w2 = nn.Linear(in_features * k_multiplier, out_features, bias=False) self.activation = activation def forward(self, x: Tensor) -> Tensor: @@ -23,41 +23,26 @@ class FFN_ReLU(_DefaultFFN): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.ReLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_Sigmoid(_DefaultFFN): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.Sigmoid()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_Swish(_DefaultFFN): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.SiLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_GELU(_DefaultFFN): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.GELU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_Bilinear(_DefaultFFN): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.Identity()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class _GLU(nn.Module): def __init__( @@ -67,8 +52,8 @@ def __init__( activation: Callable[[Tensor], Tensor], ) -> None: super().__init__() - self.w1 = nn.Linear(in_features, out_features, True) - self.w2 = nn.Linear(in_features, out_features, True) + self.w1 = nn.Linear(in_features, out_features, bias=True) + self.w2 = nn.Linear(in_features, out_features, bias=True) self.activation = activation def forward(self, x: Tensor) -> Tensor: @@ -79,41 +64,26 @@ class BiGLU(_GLU): def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, nn.Identity()) - def forward(self, x: Tensor) -> Tensor: - return super().forward() - class GLU(_GLU): def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, nn.Sigmoid()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class ReGLU(_GLU): def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, nn.ReLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class GEGLU(_GLU): def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, nn.GELU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class SiGLU(_GLU): def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, nn.SiLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class _GLU_variants(nn.Module): def __init__( @@ -137,41 +107,21 @@ class FFN_SwiGLU(_GLU_variants): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.SiLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_ReGLU(_GLU_variants): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.ReLU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_GEGLU(_GLU_variants): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.GELU()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - - -class FFN_Bilinear(_GLU_variants): - def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: - super().__init__(in_features, k_multiplier, out_features, nn.Identity()) - - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - class FFN_GLU(_GLU_variants): def __init__(self, in_features: int, k_multiplier: int, out_features: int) -> None: super().__init__(in_features, k_multiplier, out_features, nn.Sigmoid()) - def forward(self, x: Tensor) -> Tensor: - return super().forward(x) - __all__ = [ "FFN_ReLU", @@ -187,6 +137,5 @@ def forward(self, x: Tensor) -> Tensor: "FFN_SwiGLU", "FFN_ReGLU", "FFN_GEGLU", - "FFN_Bilinear", "FFN_GLU", ] diff --git a/tests/test_nn.py b/tests/test_nn.py index 8fdfb97..4d88983 100644 --- a/tests/test_nn.py +++ b/tests/test_nn.py @@ -3,6 +3,7 @@ import random from lightorch.nn import * from .utils import * +import pytest random.seed(42) torch.manual_seed(42) @@ -189,10 +190,6 @@ def test_monte_carlo() -> None: assert output.shape == (32, 1), "MonteCarloFC failed" -def test_kan() -> None: - # Placeholder for future implementation - raise NotImplementedError("KAN test not implemented") - def test_trans() -> None: # Placeholder for future implementation raise NotImplementedError("Transformer test not implemented") @@ -201,9 +198,35 @@ def test_att() -> None: # Placeholder for future implementation raise NotImplementedError("Attention test not implemented") -def test_ffn() -> None: - # Placeholder for future implementation - raise NotImplementedError("FFN test not implemented") + +models_with_params = [ + (FFN_ReLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_Bilinear, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_Sigmoid, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_Swish, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_GELU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (BiGLU, {"in_features": 64, "out_features": 128}), + (GLU, {"in_features": 64, "out_features": 128}), + (ReGLU, {"in_features": 64, "out_features": 128}), + (GEGLU, {"in_features": 64, "out_features": 128}), + (SiGLU, {"in_features": 64, "out_features": 128}), + (FFN_SwiGLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_ReGLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_GEGLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), + (FFN_GLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), +] + +@pytest.mark.parametrize("model_class, params", models_with_params) +def test_ffn(model_class, params) -> None: + model = model_class(**params) + + in_features = params['in_features'] + x = torch.randn(32, in_features) + + output = model(x) + + out_features = params['out_features'] + assert output.shape == (32, out_features) def test_pos_embed() -> None: # Placeholder for future implementation