diff --git a/lightorch/__pycache__/__init__.cpython-310.pyc b/lightorch/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..7c29c23 Binary files /dev/null and b/lightorch/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightorch/__pycache__/_version.cpython-310.pyc b/lightorch/__pycache__/_version.cpython-310.pyc new file mode 100644 index 0000000..ab0e93a Binary files /dev/null and b/lightorch/__pycache__/_version.cpython-310.pyc differ diff --git a/lightorch/__pycache__/hparams.cpython-310.pyc b/lightorch/__pycache__/hparams.cpython-310.pyc new file mode 100644 index 0000000..4b063eb Binary files /dev/null and b/lightorch/__pycache__/hparams.cpython-310.pyc differ diff --git a/lightorch/antique.py b/lightorch/antique.py index 3c8228a..8b78880 100644 --- a/lightorch/antique.py +++ b/lightorch/antique.py @@ -10,6 +10,7 @@ # REASON: Optimization performance against other libraries (lightning) # requires tensorboard installed + def create_config(name_run: str): os.makedirs(f"./{name_run}/models", exist_ok=True) return { diff --git a/lightorch/hparams.py b/lightorch/hparams.py index 06bbb67..ea248c6 100644 --- a/lightorch/hparams.py +++ b/lightorch/hparams.py @@ -43,20 +43,22 @@ def objective(trial: optuna.trial.Trial): trainer.fit(model, datamodule=dataset) if isinstance(valid_metrics, str): - if valid_metrics == 'hp_metric': + if valid_metrics == "hp_metric": return trainer.callback_metrics[valid_metrics].item() - return trainer.callback_metrics[f'Training/{valid_metrics}'].item() - + return trainer.callback_metrics[f"Training/{valid_metrics}"].item() + else: out = [] for valid_metric in valid_metrics: - if valid_metric == 'hp_metric': + if valid_metric == "hp_metric": out.append(trainer.callback_metrics[valid_metric].item()) else: - out.append(trainer.callback_metrics[f'Training/{valid_metric}'].item()) - + out.append( + trainer.callback_metrics[f"Training/{valid_metric}"].item() + ) + return out - + if "precision" in kwargs: torch.set_float32_matmul_precision(precision) else: diff --git a/lightorch/nn/__pycache__/__init__.cpython-310.pyc b/lightorch/nn/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..56f4f6a Binary files /dev/null and b/lightorch/nn/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/complex.cpython-310.pyc b/lightorch/nn/__pycache__/complex.cpython-310.pyc new file mode 100644 index 0000000..b653fe5 Binary files /dev/null and b/lightorch/nn/__pycache__/complex.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/criterions.cpython-310.pyc b/lightorch/nn/__pycache__/criterions.cpython-310.pyc new file mode 100644 index 0000000..da2d69e Binary files /dev/null and b/lightorch/nn/__pycache__/criterions.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/dnn.cpython-310.pyc b/lightorch/nn/__pycache__/dnn.cpython-310.pyc new file mode 100644 index 0000000..cb23813 Binary files /dev/null and b/lightorch/nn/__pycache__/dnn.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/fourier.cpython-310.pyc b/lightorch/nn/__pycache__/fourier.cpython-310.pyc new file mode 100644 index 0000000..0dc6074 Binary files /dev/null and b/lightorch/nn/__pycache__/fourier.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/functional.cpython-310.pyc b/lightorch/nn/__pycache__/functional.cpython-310.pyc new file mode 100644 index 0000000..5eb4bcd Binary files /dev/null and b/lightorch/nn/__pycache__/functional.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/monte_carlo.cpython-310.pyc b/lightorch/nn/__pycache__/monte_carlo.cpython-310.pyc new file mode 100644 index 0000000..285e938 Binary files /dev/null and b/lightorch/nn/__pycache__/monte_carlo.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/normalization.cpython-310.pyc b/lightorch/nn/__pycache__/normalization.cpython-310.pyc new file mode 100644 index 0000000..bcdfcda Binary files /dev/null and b/lightorch/nn/__pycache__/normalization.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/partial.cpython-310.pyc b/lightorch/nn/__pycache__/partial.cpython-310.pyc new file mode 100644 index 0000000..317b109 Binary files /dev/null and b/lightorch/nn/__pycache__/partial.cpython-310.pyc differ diff --git a/lightorch/nn/__pycache__/utils.cpython-310.pyc b/lightorch/nn/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000..163e784 Binary files /dev/null and b/lightorch/nn/__pycache__/utils.cpython-310.pyc differ diff --git a/lightorch/nn/criterions.py b/lightorch/nn/criterions.py index 3dd4af0..5141a29 100644 --- a/lightorch/nn/criterions.py +++ b/lightorch/nn/criterions.py @@ -26,7 +26,9 @@ def __init__( class Loss(LighTorchLoss): def __init__(self, *loss) -> None: - assert (len(set(map(type, loss))) == len(loss)), 'Not valid input classes, each should be different.' + assert len(set(map(type, loss))) == len( + loss + ), "Not valid input classes, each should be different." super().__init__( list(set([*chain.from_iterable([i.labels for i in loss])])), _merge_dicts([i.factors for i in loss]), diff --git a/lightorch/nn/dnn.py b/lightorch/nn/dnn.py index 5dd95e5..bc90e55 100644 --- a/lightorch/nn/dnn.py +++ b/lightorch/nn/dnn.py @@ -43,4 +43,4 @@ def forward(self, input: Tensor) -> Tensor: return self.dnn(input) -__all__ = ["DeepNeuralNetwork"] \ No newline at end of file +__all__ = ["DeepNeuralNetwork"] diff --git a/lightorch/nn/fourier.py b/lightorch/nn/fourier.py index 8acf995..13691a4 100644 --- a/lightorch/nn/fourier.py +++ b/lightorch/nn/fourier.py @@ -15,8 +15,8 @@ def __init__( n: int, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 1e-5, pre_fft: bool = True, @@ -66,7 +66,7 @@ def __init__( self._init_parameters() - def get_padding(self, padding: Union[Tuple[int],int]) -> Sequence[int]: + def get_padding(self, padding: Union[Tuple[int], int]) -> Sequence[int]: if isinstance(padding, tuple): assert ( len(padding) == self.n @@ -96,7 +96,7 @@ def __init__( n: int, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], + kernel_size: Union[Tuple[int], int], padding: Tuple[int], bias: bool = True, eps: float = 0.00001, @@ -125,8 +125,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, @@ -170,8 +170,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, @@ -216,8 +216,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, @@ -263,8 +263,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, @@ -308,8 +308,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, @@ -353,8 +353,8 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[Tuple[int],int], - padding: Union[Tuple[int],int] = None, + kernel_size: Union[Tuple[int], int], + padding: Union[Tuple[int], int] = None, bias: bool = True, eps: float = 0.00001, pre_fft: bool = True, diff --git a/lightorch/nn/functional.py b/lightorch/nn/functional.py index 62f94e7..5356830 100644 --- a/lightorch/nn/functional.py +++ b/lightorch/nn/functional.py @@ -7,7 +7,9 @@ from .utils import FeatureExtractor2D -def _fourierconvNd(n: int, x: Tensor, weight: Tensor, bias: Union[Tensor,None]) -> Tensor: +def _fourierconvNd( + n: int, x: Tensor, weight: Tensor, bias: Union[Tensor, None] +) -> Tensor: # To fourier space weight = fftn(weight, dim=[-i for i in range(1, n + 1)]) @@ -22,7 +24,7 @@ def _fourierconvNd(n: int, x: Tensor, weight: Tensor, bias: Union[Tensor,None]) def _fourierdeconvNd( - n: int, x: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5 + n: int, x: Tensor, weight: Tensor, bias: Union[Tensor, None], eps: float = 1e-5 ) -> Tensor: # To fourier space weight = fftn(weight, dim=[-i for i in range(1, n + 1)]) @@ -37,7 +39,7 @@ def _fourierdeconvNd( return x -def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]): +def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None]): """ x (Tensor): batch size, channels, height, width weight (Tensor): out channels, *kernel_size @@ -74,7 +76,7 @@ def fourierconv3d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,Non return out -def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]): +def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None]): """ x (Tensor): batch size, channels, height, width weight (Tensor): out channels, *kernel_size @@ -107,7 +109,7 @@ def fourierconv2d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,Non return out -def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None]): +def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None]): """ x (Tensor): batch size, channels, sequence length weight (Tensor): out channels, kernel_size @@ -130,7 +132,7 @@ def fourierconv1d(x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,Non def fourierdeconv3d( - x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5 + x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None], eps: float = 1e-5 ): """ x (Tensor): batch size, channels, height, width @@ -169,7 +171,7 @@ def fourierdeconv3d( def fourierdeconv2d( - x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5 + x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None], eps: float = 1e-5 ): """ x (Tensor): batch size, channels, height, width @@ -204,7 +206,7 @@ def fourierdeconv2d( def fourierdeconv1d( - x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor,None], eps: float = 1e-5 + x: Tensor, one: Tensor, weight: Tensor, bias: Union[Tensor, None], eps: float = 1e-5 ): """ x (Tensor): batch size, channels, sequence length diff --git a/lightorch/nn/sequential/__pycache__/__init__.cpython-310.pyc b/lightorch/nn/sequential/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..4222682 Binary files /dev/null and b/lightorch/nn/sequential/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightorch/nn/sequential/__pycache__/residual.cpython-310.pyc b/lightorch/nn/sequential/__pycache__/residual.cpython-310.pyc new file mode 100644 index 0000000..7698bc0 Binary files /dev/null and b/lightorch/nn/sequential/__pycache__/residual.cpython-310.pyc differ diff --git a/lightorch/nn/sequential/residual.py b/lightorch/nn/sequential/residual.py index 15a56ef..f2d645d 100644 --- a/lightorch/nn/sequential/residual.py +++ b/lightorch/nn/sequential/residual.py @@ -2,16 +2,21 @@ from typing import Union, Tuple, Callable, Any, Union from ..functional import residual_connection + class _Residual(nn.Module): - def __init__(self, module: Union[nn.Module, Callable[[int, int], nn.Module]], n_layers: int): + def __init__( + self, module: Union[nn.Module, Callable[[int, int], nn.Module]], n_layers: int + ): super().__init__() self.model = nn.ModuleList([module for _ in range(n_layers)]) + def forward(self, x: Tensor) -> Tensor: for layer in self.model: x, _ = residual_connection(x, lambda x: layer(x)) - + return x + class LSTM(_Residual): def __init__( self, @@ -21,25 +26,29 @@ def __init__( res_layers: int, bias: bool = True, batch_first: bool = True, - dropout: float = 0., + dropout: float = 0.0, bidirectional: bool = False, proj_size: int = 0, device: Union[Any, None] = None, dtype: Union[Any, None] = None, ) -> None: - super().__init__(nn.LSTM( - input_size, - hidden_size, - lstm_layers, - bias, - batch_first, - dropout, - bidirectional, - proj_size, - device, - dtype - ), res_layers) - + super().__init__( + nn.LSTM( + input_size, + hidden_size, + lstm_layers, + bias, + batch_first, + dropout, + bidirectional, + proj_size, + device, + dtype, + ), + res_layers, + ) + + class GRU(_Residual): def __init__( self, @@ -49,21 +58,25 @@ def __init__( res_layers: int, bias: bool = True, batch_first: bool = True, - dropout: float = 0., + dropout: float = 0.0, bidirectional: bool = False, device: Union[Any, None] = None, dtype: Union[Any, None] = None, ) -> None: - super().__init__(nn.GRU( - input_size, - hidden_size, - gru_layers, - bias, - batch_first, - dropout, - bidirectional, - device, - dtype, - ), res_layers) - -__all__ = ['LSTM', 'GRU'] \ No newline at end of file + super().__init__( + nn.GRU( + input_size, + hidden_size, + gru_layers, + bias, + batch_first, + dropout, + bidirectional, + device, + dtype, + ), + res_layers, + ) + + +__all__ = ["LSTM", "GRU"] diff --git a/lightorch/nn/transformer/__init__.py b/lightorch/nn/transformer/__init__.py index 83619f3..ffb612d 100644 --- a/lightorch/nn/transformer/__init__.py +++ b/lightorch/nn/transformer/__init__.py @@ -2,4 +2,4 @@ from .embedding import * from .positional import * from .ffn import * -from .transformer import * \ No newline at end of file +from .transformer import * diff --git a/lightorch/nn/transformer/__pycache__/__init__.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..490bab5 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/__pycache__/attention.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/attention.cpython-310.pyc new file mode 100644 index 0000000..18d2696 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/attention.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/__pycache__/embedding.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/embedding.cpython-310.pyc new file mode 100644 index 0000000..17c12f5 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/embedding.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/__pycache__/ffn.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/ffn.cpython-310.pyc new file mode 100644 index 0000000..0abc956 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/ffn.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/__pycache__/positional.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/positional.cpython-310.pyc new file mode 100644 index 0000000..92cd049 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/positional.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/__pycache__/transformer.cpython-310.pyc b/lightorch/nn/transformer/__pycache__/transformer.cpython-310.pyc new file mode 100644 index 0000000..4375a90 Binary files /dev/null and b/lightorch/nn/transformer/__pycache__/transformer.cpython-310.pyc differ diff --git a/lightorch/nn/transformer/attention.py b/lightorch/nn/transformer/attention.py index 150a016..2eafe94 100644 --- a/lightorch/nn/transformer/attention.py +++ b/lightorch/nn/transformer/attention.py @@ -3,6 +3,7 @@ import torch from einops import rearrange from math import sqrt + """ Types: - GroupedQueryAttention @@ -19,21 +20,24 @@ class _AttentionBase(nn.Module): def __init__( - self, sequence_length: int, sequence_attention: bool = False, scale_factor: float = 1., flash_attention: bool = True, is_causal: bool = False, attn_mask: Tensor = None + self, + sequence_length: int, + sequence_attention: bool = False, + scale_factor: float = 1.0, + flash_attention: bool = True, + is_causal: bool = False, + attn_mask: Tensor = None, ) -> None: super().__init__() self.attn_mask = attn_mask if is_causal: - assert (attn_mask is None), 'You defined both attn_mask and is_causal' + assert attn_mask is None, "You defined both attn_mask and is_causal" self.attn_mask = torch.ones(sequence_length, sequence_length).tril() self.seq = sequence_attention self.scale_factor = scale_factor self.flash = flash_attention - - def normal_attention( - self, q: Tensor, k: Tensor, v: Tensor - ) -> Tensor: + def normal_attention(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: if self.flash: with torch.backends.cuda.sdp_kernel( enable_flash=True, enable_math=False, enable_mem_efficient=False @@ -44,17 +48,15 @@ def normal_attention( return out energy = q @ k.transpose(-1, -2) - + if self.attn_mask is not None: energy.masked_fill_(self.attn_mask, -torch.inf) energy *= self.scale_factor - return F.softmax(energy, dim = -1) @ v + return F.softmax(energy, dim=-1) @ v - def seq_attention( - self, q: Tensor, k: Tensor, v: Tensor - ) -> Tensor: + def seq_attention(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: if self.flash: with torch.backends.cuda.sdp_kernel( enable_flash=True, enable_math=False, enable_mem_efficient=False @@ -75,7 +77,7 @@ def seq_attention( energy *= self.scale_factor - return (F.softmax(energy, dim = -1) @ v.transpose(-1, -2)).transpose(-1, -2) + return (F.softmax(energy, dim=-1) @ v.transpose(-1, -2)).transpose(-1, -2) def attention(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: if self.seq: @@ -125,10 +127,17 @@ def __init__( ) -> None: if scale_factor is None: if sequence_attention: - scale_factor: float = 1/sqrt(sequence_length) + scale_factor: float = 1 / sqrt(sequence_length) else: - scale_factor: float = 1/sqrt(embed_dim) - super(GroupedQueryAttention, self).__init__(sequence_length, sequence_attention, scale_factor, flash_attention, is_causal, attn_mask) + scale_factor: float = 1 / sqrt(embed_dim) + super(GroupedQueryAttention, self).__init__( + sequence_length, + sequence_attention, + scale_factor, + flash_attention, + is_causal, + attn_mask, + ) # Defining the hidden spaces if vdim is None: vdim = embed_dim @@ -159,17 +168,20 @@ def __init__( def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: # Reshaping for broadcasting q = rearrange( - self.Wq(q), "b s (g q d) -> b g q s d", q=self.group_query_amount, g=self.n_groups + self.Wq(q), + "b s (g q d) -> b g q s d", + q=self.group_query_amount, + g=self.n_groups, ) # -> (B, S, vdim) -> (B, n_groups, n_queries, S, query_dim) - k = rearrange( - self.Wk(k), "b s (g d) -> b g s d", g=self.n_groups - ).unsqueeze(2) # -> (B, n_groups, S, query_dim) - v = rearrange( - self.Wv(v), "b s (g d) -> b g s d", g=self.n_groups - ).unsqueeze(2) # -> (B, n_groups, S, query_dim) - + k = rearrange(self.Wk(k), "b s (g d) -> b g s d", g=self.n_groups).unsqueeze( + 2 + ) # -> (B, n_groups, S, query_dim) + v = rearrange(self.Wv(v), "b s (g d) -> b g s d", g=self.n_groups).unsqueeze( + 2 + ) # -> (B, n_groups, S, query_dim) + print(q.shape, k.shape, v.shape) - + out = rearrange(self.attention(q, k, v), "b g q s d -> b s (g q d)") return self.fc(out) @@ -177,11 +189,11 @@ def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: class MultiHeadAttention(_AttentionBase): def __init__( - self, - embed_dim: int, + self, + embed_dim: int, sequence_length: int, - n_heads: int, - kdim: int = None, + n_heads: int, + kdim: int = None, vdim: int = None, is_causal: bool = False, attn_mask: bool = None, @@ -191,10 +203,17 @@ def __init__( ) -> None: if scale_factor is None: if sequence_attention: - scale_factor: float = 1/sqrt(sequence_length) + scale_factor: float = 1 / sqrt(sequence_length) else: - scale_factor: float = 1/sqrt(embed_dim) - super(MultiHeadAttention, self).__init__(sequence_length, sequence_attention, scale_factor, flash_attention, is_causal, attn_mask) + scale_factor: float = 1 / sqrt(embed_dim) + super(MultiHeadAttention, self).__init__( + sequence_length, + sequence_attention, + scale_factor, + flash_attention, + is_causal, + attn_mask, + ) # Defining hidden spaces if vdim is None: vdim = embed_dim @@ -250,10 +269,17 @@ def __init__( ) -> None: if scale_factor is None: if sequence_attention: - scale_factor: float = 1/sqrt(sequence_length) + scale_factor: float = 1 / sqrt(sequence_length) else: - scale_factor: float = 1/sqrt(embed_dim) - super(MultiQueryAttention, self).__init__(sequence_length, sequence_attention, scale_factor, flash_attention, is_causal, attn_mask) + scale_factor: float = 1 / sqrt(embed_dim) + super(MultiQueryAttention, self).__init__( + sequence_length, + sequence_attention, + scale_factor, + flash_attention, + is_causal, + attn_mask, + ) if vdim is None: vdim = embed_dim if kdim is None: diff --git a/lightorch/nn/transformer/positional.py b/lightorch/nn/transformer/positional.py index 34aa5be..98fbc6e 100644 --- a/lightorch/nn/transformer/positional.py +++ b/lightorch/nn/transformer/positional.py @@ -42,13 +42,15 @@ def __init__( # embedding size must be even assert d_model % 2 == 0, "d_model must be div by 2" self.theta_numerator = torch.arange(0, d_model, 2).float() - self.theta_j = 1.0 / (theta ** (self.theta_numerator / d_model)) # (Dim / 2) + self.theta_j = 1.0 / (theta ** (self.theta_numerator / d_model)) # (Dim / 2) # creates absolute position based on seq_len self.m_i = torch.arange(seq_len) # creates (m_i,theta_j) matrix function_inputs = torch.outer(self.m_i, self.theta_j).float() # translated into polar - self.freqs_complex = torch.polar(torch.ones_like(function_inputs), function_inputs) + self.freqs_complex = torch.polar( + torch.ones_like(function_inputs), function_inputs + ) def forward(self, x) -> Tensor: x_complex = torch.view_as_complex(x.float().reshape(*x.shape[:-1], -1, 2)) @@ -85,7 +87,7 @@ def __init__( def forward(self, x_n: Tensor) -> Tensor: out = x_n.clone() for _ in range(1, self.degree + 1): - (x_n, ) = torch.gradient( + (x_n,) = torch.gradient( x_n, spacing=(self.delta_t,), dim=-1, edge_order=self.edge_order ) out += x_n diff --git a/lightorch/nn/transformer/transformer.py b/lightorch/nn/transformer/transformer.py index 4959f2c..a6777b4 100644 --- a/lightorch/nn/transformer/transformer.py +++ b/lightorch/nn/transformer/transformer.py @@ -2,6 +2,7 @@ from typing import Optional, List, Sequence, Tuple from ..functional import residual_connection from .attention import SelfAttention, CrossAttention + """ # Base transformer: SelfAttention: SelfAttention module from attention (both work for decoder and encoder like architectures) @@ -12,7 +13,14 @@ class _Transformer(nn.Module): - def __init__(self, self_attention: SelfAttention, cross_attention: CrossAttention, ffn: nn.Module, postnorm: nn.Module, prenorm: nn.Module) -> None: + def __init__( + self, + self_attention: SelfAttention, + cross_attention: CrossAttention, + ffn: nn.Module, + postnorm: nn.Module, + prenorm: nn.Module, + ) -> None: super().__init__() self._self_attention = self_attention self._cross_attention = cross_attention @@ -58,7 +66,7 @@ def forward(self, x: Tensor, cross: Optional[Tensor] = None) -> Tensor: x = self.ffn(x) return x - + class Transformer(nn.Module): def __init__( @@ -109,7 +117,13 @@ def forward(self, x: Tensor) -> Tensor: class CrossTransformer(nn.Module): - def __init__(self, cell_1: TransformerCell, cell_2: TransformerCell, n_layers: int, fc: nn.Module) -> None: + def __init__( + self, + cell_1: TransformerCell, + cell_2: TransformerCell, + n_layers: int, + fc: nn.Module, + ) -> None: super().__init__() self.cell_1 = nn.ModuleList([cell_1 for _ in range(n_layers)]) self.cell_2 = nn.ModuleList([cell_2 for _ in range(n_layers)]) @@ -134,11 +148,11 @@ def _single_forward( return out0, out1 - def forward(self, head_1: Sequence, head_2: Sequence) -> Tuple[Tuple[Tensor], Tuple[Tensor]]: + def forward( + self, head_1: Sequence, head_2: Sequence + ) -> Tuple[Tuple[Tensor], Tuple[Tensor]]: for cell_1, cell_2 in zip(self.cell_1, self.cell_2): - head_1, head_2 = self._single_forward( - cell_1, cell_2, head_1, head_2 - ) + head_1, head_2 = self._single_forward(cell_1, cell_2, head_1, head_2) return head_1, head_2 diff --git a/lightorch/nn/utils.py b/lightorch/nn/utils.py index 47c96b2..61e657c 100644 --- a/lightorch/nn/utils.py +++ b/lightorch/nn/utils.py @@ -101,6 +101,7 @@ def forward(self, input: Tensor) -> List[Tensor]: if name == self.layers[-1]: return features + VALID_MODELS_3D = { "vgg19": { "model": vgg19, @@ -190,7 +191,8 @@ def forward(self, input: Tensor) -> List[Tensor]: if name == self.layers[-1]: return features + __all__ = [ - 'FeatureExtractor2D', - 'FeatureExtractor3D', -] \ No newline at end of file + "FeatureExtractor2D", + "FeatureExtractor3D", +] diff --git a/lightorch/training/__pycache__/__init__.cpython-310.pyc b/lightorch/training/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..1610151 Binary files /dev/null and b/lightorch/training/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightorch/training/adversarial.py b/lightorch/training/adversarial.py index c145808..7c3d037 100644 --- a/lightorch/training/adversarial.py +++ b/lightorch/training/adversarial.py @@ -6,9 +6,28 @@ from torch import Tensor import torchvision + class Module(Module_): - def __init__(self, *, optimizer: Union[str, Optimizer], scheduler: Union[str, LRScheduler] = None, triggers: Dict[str, Dict[str, float]] = None, optimizer_kwargs: Dict[str, Any] = None, scheduler_kwargs: Dict[str, Any] = None, gradient_clip_algorithm: str = None, gradient_clip_val: float = None) -> None: - super().__init__(optimizer=optimizer, scheduler=scheduler, triggers=triggers, optimizer_kwargs=optimizer_kwargs, scheduler_kwargs=scheduler_kwargs, gradient_clip_algorithm=gradient_clip_algorithm, gradient_clip_val=gradient_clip_val) + def __init__( + self, + *, + optimizer: Union[str, Optimizer], + scheduler: Union[str, LRScheduler] = None, + triggers: Dict[str, Dict[str, float]] = None, + optimizer_kwargs: Dict[str, Any] = None, + scheduler_kwargs: Dict[str, Any] = None, + gradient_clip_algorithm: str = None, + gradient_clip_val: float = None + ) -> None: + super().__init__( + optimizer=optimizer, + scheduler=scheduler, + triggers=triggers, + optimizer_kwargs=optimizer_kwargs, + scheduler_kwargs=scheduler_kwargs, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, + ) self.automatic_optimization = False def validation_step(self) -> None: diff --git a/lightorch/training/cli.py b/lightorch/training/cli.py index 5af9742..4156f54 100644 --- a/lightorch/training/cli.py +++ b/lightorch/training/cli.py @@ -15,7 +15,6 @@ def trainer( trainer_defaults={ "deterministic": deterministic, }, - ) diff --git a/lightorch/training/supervised.py b/lightorch/training/supervised.py index 2c7245d..cbed8f4 100644 --- a/lightorch/training/supervised.py +++ b/lightorch/training/supervised.py @@ -31,17 +31,19 @@ "linear": LinearLR, } + def interval(algo: LRScheduler) -> str: if isinstance(algo, OneCycleLR): return "step" else: return "epoch" + class Module(LightningModule): """ init: triggers: Dict[str, Dict[str, float]] -> This is an - interpretative implementation for grouped optimization + interpretative implementation for grouped optimization where the parameters are stored in groups given a "trigger", namely, as trigger parameters you can put a string describing the beginning of the parameters to optimize in a group. @@ -51,38 +53,47 @@ class Module(LightningModule): gradient_clip_algorithm: str -> Gradient clip algorithm [value, norm]. gradient_clip_val: float -> Clipping value. """ + def __init__( - self, - *, - optimizer: Union[str, Optimizer], - scheduler: Union[str, LRScheduler] = None, - triggers: Dict[str, Dict[str, float]] = None, - optimizer_kwargs: Dict[str, Any] = None, - scheduler_kwargs: Dict[str, Any] = None, - **kwargs - ) -> None: + self, + *, + optimizer: Union[str, Optimizer], + scheduler: Union[str, LRScheduler] = None, + triggers: Dict[str, Dict[str, float]] = None, + optimizer_kwargs: Dict[str, Any] = None, + scheduler_kwargs: Dict[str, Any] = None, + **kwargs, + ) -> None: super().__init__() for att in kwargs: setattr(self, att, kwargs[att]) - + # Initializing the optimizer and the triggers self.triggers = triggers if triggers is not None: - assert optimizer_kwargs is None, 'Not valid optimizer_kwargs parameter for trigger-based setting, include all optimizer parameters in the dictionary with their respective name.' + assert ( + optimizer_kwargs is None + ), "Not valid optimizer_kwargs parameter for trigger-based setting, include all optimizer parameters in the dictionary with their respective name." self.triggers = triggers else: if not isinstance(optimizer, Optimizer): - assert optimizer_kwargs is not None, 'Must specify optimizer_kwargs parameter for non-trigger-based setting.' + assert ( + optimizer_kwargs is not None + ), "Must specify optimizer_kwargs parameter for non-trigger-based setting." self.optimizer_kwargs = optimizer_kwargs else: - assert optimizer_kwargs is None, 'Not valid optimizer_kwargs parameter for initialized optimizer.' + assert ( + optimizer_kwargs is None + ), "Not valid optimizer_kwargs parameter for initialized optimizer." self.optimizer = optimizer if isinstance(optimizer, str) or issubclass(optimizer, Optimizer): self.optimizer = optimizer else: - if not getattr(self, 'optimizer', False): - raise ValueError(f'Not valid optimizer parameter, expecting str | Optimizer got {type(optimizer)}') + if not getattr(self, "optimizer", False): + raise ValueError( + f"Not valid optimizer parameter, expecting str | Optimizer got {type(optimizer)}" + ) # Initializing the scheduler if scheduler is not None: @@ -90,15 +101,19 @@ def __init__( self.scheduler = scheduler self.scheduler_kwargs = scheduler_kwargs elif isinstance(scheduler, LRScheduler): - self.scheduler = lambda optimizer: scheduler(optimizer=optimizer, **scheduler_kwargs) + self.scheduler = lambda optimizer: scheduler( + optimizer=optimizer, **scheduler_kwargs + ) else: - raise ValueError('Not valid scheduler parameter') + raise ValueError("Not valid scheduler parameter") else: - assert scheduler_kwargs is None, 'Not valid scheduler_kwargs parameter for NoneType scheduler' + assert ( + scheduler_kwargs is None + ), "Not valid scheduler_kwargs parameter for NoneType scheduler" self.scheduler = None def loss_forward(self, batch: Tensor, idx: int) -> Dict[str, Union[Tensor, float]]: - raise NotImplementedError('Should have defined loss_forward method.') + raise NotImplementedError("Should have defined loss_forward method.") def training_step(self, batch: Tensor, idx: int) -> Tensor: kwargs = self.loss_forward(batch, idx) @@ -148,11 +163,12 @@ def get_param_groups(self) -> Tuple: param_group["params"].append(param) param_group.update(self.triggers[trigger]) - + return param_groups return None + def _configure_optimizer(self) -> Optimizer: - if params:= self.get_param_groups() is not None: + if params := self.get_param_groups() is not None: if isinstance(self.optimizer, str): return VALID_OPTIMIZERS[self.optimizer](params) elif isinstance(self.optimizer, torch.optim.Optimizer): @@ -160,24 +176,26 @@ def _configure_optimizer(self) -> Optimizer: elif issubclass(self.optimizer, torch.optim.Optimizer): return self.optimizer(params) else: - + if isinstance(self.optimizer, str): self.optimizer = VALID_OPTIMIZERS[self.optimizer] elif isinstance(self.optimizer, Optimizer): return self.optimizer elif issubclass(self.optimizer, Optimizer): pass - + return self.optimizer(self.parameters(), **self.optimizer_kwargs) - + def _configure_scheduler(self, optimizer: Optimizer) -> LRScheduler: if isinstance(self.scheduler, str): if self.scheduler == "onecycle": - self.scheduler_kwargs["total_steps"] = self.trainer.estimated_stepping_batches + self.scheduler_kwargs["total_steps"] = ( + self.trainer.estimated_stepping_batches + ) return VALID_SCHEDULERS[self.scheduler](optimizer, **self.scheduler_kwargs) else: return self.scheduler(optimizer) - + def configure_optimizers(self) -> Union[Optimizer, Sequence[Optimizer]]: optimizer = self._configure_optimizer() if self.scheduler is not None: @@ -192,4 +210,5 @@ def configure_optimizers(self) -> Union[Optimizer, Sequence[Optimizer]]: } return {"optimizer": optimizer} + __all__ = ["Module"] diff --git a/requirements.sh b/requirements.sh old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1350118 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,156 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +aiohttp==3.9.5 + # via fsspec +aiosignal==1.3.1 + # via aiohttp +alembic==1.13.1 + # via optuna +async-timeout==4.0.3 + # via aiohttp +attrs==23.2.0 + # via aiohttp +colorlog==6.8.2 + # via optuna +einops==0.8.0 + # via -r requirements.in +filelock==3.14.0 + # via + # torch + # triton +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal +fsspec[http]==2024.6.0 + # via + # lightning + # pytorch-lightning + # torch +greenlet==3.0.3 + # via sqlalchemy +idna==3.7 + # via yarl +jinja2==3.1.4 + # via torch +lightning==2.2.5 + # via -r requirements.in +lightning-utilities==0.11.2 + # via + # lightning + # pytorch-lightning + # torchmetrics +mako==1.3.5 + # via alembic +markupsafe==2.1.5 + # via + # jinja2 + # mako +mpmath==1.3.0 + # via sympy +multidict==6.0.5 + # via + # aiohttp + # yarl +networkx==3.3 + # via torch +numpy==1.26.4 + # via + # lightning + # optuna + # pytorch-lightning + # torchmetrics + # torchvision +nvidia-cublas-cu12==12.1.3.1 + # via + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via torch +nvidia-cuda-runtime-cu12==12.1.105 + # via torch +nvidia-cudnn-cu12==8.9.2.26 + # via torch +nvidia-cufft-cu12==11.0.2.54 + # via torch +nvidia-curand-cu12==10.3.2.106 + # via torch +nvidia-cusolver-cu12==11.4.5.107 + # via torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # nvidia-cusolver-cu12 + # torch +nvidia-nccl-cu12==2.20.5 + # via torch +nvidia-nvjitlink-cu12==12.5.40 + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via torch +optuna==3.6.1 + # via -r requirements.in +packaging==24.1 + # via + # lightning + # lightning-utilities + # optuna + # pytorch-lightning + # torchmetrics +pillow==10.3.0 + # via torchvision +pytorch-lightning==2.2.5 + # via lightning +pyyaml==6.0.1 + # via + # lightning + # optuna + # pytorch-lightning +sqlalchemy==2.0.30 + # via + # alembic + # optuna +sympy==1.12.1 + # via torch +torch==2.3.1 + # via + # -r requirements.in + # lightning + # pytorch-lightning + # torchmetrics + # torchvision +torchmetrics==1.4.0.post0 + # via + # lightning + # pytorch-lightning +torchvision==0.18.1 + # via -r requirements.in +tqdm==4.66.4 + # via + # -r requirements.in + # lightning + # optuna + # pytorch-lightning +triton==2.3.1 + # via torch +typing-extensions==4.12.2 + # via + # alembic + # lightning + # lightning-utilities + # pytorch-lightning + # sqlalchemy + # torch +yarl==1.9.4 + # via aiohttp + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/tests/__pycache__/__init__.cpython-310.pyc b/tests/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..5bcb9ce Binary files /dev/null and b/tests/__pycache__/__init__.cpython-310.pyc differ diff --git a/tests/__pycache__/test_nn.cpython-310-pytest-8.2.2.pyc b/tests/__pycache__/test_nn.cpython-310-pytest-8.2.2.pyc new file mode 100644 index 0000000..a2d33cb Binary files /dev/null and b/tests/__pycache__/test_nn.cpython-310-pytest-8.2.2.pyc differ diff --git a/tests/__pycache__/test_supervised.cpython-310-pytest-8.2.2.pyc b/tests/__pycache__/test_supervised.cpython-310-pytest-8.2.2.pyc new file mode 100644 index 0000000..aef59bb Binary files /dev/null and b/tests/__pycache__/test_supervised.cpython-310-pytest-8.2.2.pyc differ diff --git a/tests/__pycache__/utils.cpython-310.pyc b/tests/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000..ff90ec0 Binary files /dev/null and b/tests/__pycache__/utils.cpython-310.pyc differ diff --git a/tests/test_nn.py b/tests/test_nn.py index 240a4b8..593892c 100644 --- a/tests/test_nn.py +++ b/tests/test_nn.py @@ -205,7 +205,7 @@ def test_transformer_cell() -> None: ffn = nn.Sequential( nn.Linear(embed_dim, 4 * embed_dim), nn.ReLU(), - nn.Linear(4 * embed_dim, embed_dim) + nn.Linear(4 * embed_dim, embed_dim), ) prenorm = nn.LayerNorm(embed_dim) postnorm = nn.LayerNorm(embed_dim) @@ -215,7 +215,7 @@ def test_transformer_cell() -> None: cross_attention=CrossAttention(attention), ffn=ffn, prenorm=prenorm, - postnorm=postnorm + postnorm=postnorm, ) output = transformer_cell.self_attention(input_tensor) @@ -227,6 +227,7 @@ def test_transformer_cell() -> None: output = transformer_cell.ffn(input_tensor) assert output.shape == input_tensor.shape + def test_transformer() -> None: batch_size = 32 seq_length = 10 @@ -236,12 +237,12 @@ def test_transformer() -> None: input_tensor = torch.randn(batch_size, seq_length, embed_dim) embedding_layer = nn.Linear(embed_dim, embed_dim) - positional_encoding = AbsoluteSinusoidalPositionalEncoding(0.) + positional_encoding = AbsoluteSinusoidalPositionalEncoding(0.0) attention = MultiHeadAttention(embed_dim, seq_length, n_heads) ffn = nn.Sequential( nn.Linear(embed_dim, 4 * embed_dim), nn.ReLU(), - nn.Linear(4 * embed_dim, embed_dim) + nn.Linear(4 * embed_dim, embed_dim), ) prenorm = nn.LayerNorm(embed_dim) postnorm = nn.LayerNorm(embed_dim) @@ -251,7 +252,7 @@ def test_transformer() -> None: cross_attention=CrossAttention(attention), ffn=ffn, prenorm=prenorm, - postnorm=postnorm + postnorm=postnorm, ) transformer = Transformer( @@ -264,6 +265,7 @@ def test_transformer() -> None: output = transformer(input_tensor) assert output.shape == (batch_size, seq_length, embed_dim) + def test_cross_transformer() -> None: batch_size = 32 seq_length = 10 @@ -277,7 +279,7 @@ def test_cross_transformer() -> None: ffn = nn.Sequential( nn.Linear(embed_dim, 4 * embed_dim), nn.ReLU(), - nn.Linear(4 * embed_dim, embed_dim) + nn.Linear(4 * embed_dim, embed_dim), ) prenorm = nn.LayerNorm(embed_dim) postnorm = nn.LayerNorm(embed_dim) @@ -287,7 +289,7 @@ def test_cross_transformer() -> None: cross_attention=CrossAttention(attention), ffn=ffn, prenorm=prenorm, - postnorm=postnorm + postnorm=postnorm, ) cell2 = TransformerCell( @@ -295,15 +297,18 @@ def test_cross_transformer() -> None: cross_attention=CrossAttention(attention), ffn=ffn, prenorm=prenorm, - postnorm=postnorm + postnorm=postnorm, ) - cross_transformer = CrossTransformer(cell1, cell2, n_layers=1, fc=nn.Linear(embed_dim, embed_dim)) + cross_transformer = CrossTransformer( + cell1, cell2, n_layers=1, fc=nn.Linear(embed_dim, embed_dim) + ) output = cross_transformer(first_input, second_input) assert output[0].shape == (batch_size, seq_length, embed_dim) assert output[1].shape == (batch_size, seq_length, embed_dim) + def test_att() -> None: batch_size = 32 seq_length = 10 @@ -318,7 +323,9 @@ def test_att() -> None: # Initialize attention mechanisms multi_head_attention = MultiHeadAttention(embed_dim, seq_length, n_heads) multi_query_attention = MultiQueryAttention(embed_dim, seq_length, n_queries) - grouped_query_attention = GroupedQueryAttention(embed_dim, seq_length, n_queries, n_groups) + grouped_query_attention = GroupedQueryAttention( + embed_dim, seq_length, n_queries, n_groups + ) # Wrap with SelfAttention and CrossAttention self_attention_mh = SelfAttention(multi_head_attention) @@ -330,22 +337,34 @@ def test_att() -> None: cross_attention_gq = CrossAttention(grouped_query_attention, method="i i c") output_mh_self = self_attention_mh(input_tensor) - assert output_mh_self.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_mh_self.shape}" + assert ( + output_mh_self.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_mh_self.shape}" output_mq_self = self_attention_mq(input_tensor) - assert output_mq_self.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_mq_self.shape}" + assert ( + output_mq_self.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_mq_self.shape}" output_gq_self = self_attention_gq(input_tensor) - assert output_gq_self.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_gq_self.shape}" + assert ( + output_gq_self.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_gq_self.shape}" output_mh_cross = cross_attention_mh(input_tensor, cross_tensor) - assert output_mh_cross.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_mh_cross.shape}" + assert ( + output_mh_cross.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_mh_cross.shape}" output_mq_cross = cross_attention_mq(input_tensor, cross_tensor) - assert output_mq_cross.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_mq_cross.shape}" + assert ( + output_mq_cross.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_mq_cross.shape}" output_gq_cross = cross_attention_gq(input_tensor, cross_tensor) - assert output_gq_cross.shape == input_tensor.shape, f"Expected shape {input_tensor.shape}, got {output_gq_cross.shape}" + assert ( + output_gq_cross.shape == input_tensor.shape + ), f"Expected shape {input_tensor.shape}, got {output_gq_cross.shape}" models_with_params = [ @@ -365,18 +384,20 @@ def test_att() -> None: (FFN_GLU, {"in_features": 64, "k_multiplier": 2, "out_features": 128}), ] + @pytest.mark.parametrize("model_class, params", models_with_params) def test_ffn(model_class, params) -> None: model = model_class(**params) - - in_features = params['in_features'] - x = torch.randn(32, in_features) + + in_features = params["in_features"] + x = torch.randn(32, in_features) output = model(x) - out_features = params['out_features'] + out_features = params["out_features"] assert output.shape == (32, out_features) + def test_pos() -> None: dropout = 0.1 batch_size = 32 @@ -390,8 +411,9 @@ def test_pos() -> None: abs_pos_enc = AbsoluteSinusoidalPositionalEncoding(dropout=dropout) rot_pos_enc = RotaryPositionalEncoding(d_model=embed_dim, seq_len=seq_length) - dn_pos_enc = DnPositionalEncoding(delta_t=delta_t, degree=degree, edge_order=edge_order) - + dn_pos_enc = DnPositionalEncoding( + delta_t=delta_t, degree=degree, edge_order=edge_order + ) output = rot_pos_enc(input_tensor) assert output.shape == input_tensor.shape @@ -400,6 +422,7 @@ def test_pos() -> None: output = dn_pos_enc(input_tensor) assert output.shape == input_tensor.shape + # implementation on c++ # def test_patch_embedding_3dcnn(): # batch_size = 2 @@ -424,9 +447,9 @@ def test_pos() -> None: # output = patch_embed(input_tensor) # assert output.shape == (batch_size, h_div * w_div, d_model) - + # feature_extractor = FeatureExtractor2D() - + # patch_embed = PatchEmbedding2DCNN(d_model=d_model, pe=pe, feature_extractor=feature_extractor, architecture=architecture, hidden_activations=hidden_activations, dropout=dropout) # output = patch_embed(input_tensor) @@ -442,11 +465,11 @@ def test_pos() -> None: # res_layers = 1 # shape = (input_size, sequence_length, batch_size) # x = torch.randn(*shape) # batch_size, sequence, input_size - + # model = GRU(input_size, hidden_size, rnn_layers, res_layers) # out = model(x) # assert (out.shape == shape), 'Residual GRU failed' - + # model = LSTM(input_size, hidden_size, rnn_layers, res_layers) # out = model(x) -# assert (out.shape == shape), 'Residual LSTM failed' \ No newline at end of file +# assert (out.shape == shape), 'Residual LSTM failed' diff --git a/tests/test_supervised.py b/tests/test_supervised.py index 7ff3bce..02e6f3b 100644 --- a/tests/test_supervised.py +++ b/tests/test_supervised.py @@ -20,14 +20,14 @@ # super().__init__(**hparams) # # Criterion # self.criterion = MSELoss() - + # self.model = nn.Sequential( # nn.Linear(10, 5), # nn.ReLU(), # nn.Linear(5, 1), # nn.Sigmoid() # ) - + # def forward(self, input: Tensor) -> Tensor: # return self.model(input) @@ -35,7 +35,7 @@ # def objective1(trial: optuna.trial.Trial): # return dict( # triggers = {'model': dict( -# lr = trial.suggest_float('lr', 1e-4, 1e-1), +# lr = trial.suggest_float('lr', 1e-4, 1e-1), # weight_decay = trial.suggest_float('weight_decay', 1e-4, 1e-1), # momentum = trial.suggest_float('momentum', 0.1, 0.7) # )}, @@ -83,4 +83,4 @@ # precision="medium", # n_trials=10, # trianer_kwargs=dict(fast_dev_run=True, accelerator = 'cpu'), -# ) \ No newline at end of file +# ) diff --git a/tests/utils.py b/tests/utils.py index 0743e62..4bcd473 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -24,8 +24,10 @@ def __init__( self, ) -> None: pass + def __len__(self) -> int: return 100 + def __getitem__(self, index) -> Tensor: return torch.randn(10)