[DOCS][docs/zeta/nn/modules/fused_dropout_layernorm.md]

kyegomez · Dec 20, 2023 · 3dc6384 · 3dc6384
1 parent c851c73
commit 3dc6384
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 0 deletions.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -110,6 +110,7 @@ nav:
               - CustomMLP: "zeta/nn/modules/custom_mlp.md"
               - PolymorphicNeuronLayer: "zeta/nn/modules/polymorphic_activation.md"
               - FusedDenseGELUDense: "zeta/nn/modules/fused_gelu_dense.md"
+              - FusedDropoutLayerNorm: "zeta/nn/modules/fused_dropout_layernorm.md"
           - zeta.nn.attention:
               - FlashAttention: "zeta/nn/attention/flash_attention.md"
               - MultiQueryAttention: "zeta/nn/attention/multiquery.md"

diff --git a/zeta/nn/modules/simple_mamba.py b/zeta/nn/modules/simple_mamba.py
@@ -0,0 +1,52 @@
+import torch
+from torch import nn
+from zeta.nn.modules.rms_norm import RMSNorm
+from zeta.nn.modules.residual import Residual
+
+
+class Mamba(nn.Module):
+    def __init__(
+        self,
+        vocab_size: int,
+        dim: int,
+        depth: int,
+        bias: bool = False,
+        *args,
+        **kwargs,
+    ):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, dim)
+        self.layers = nn.ModuleList(
+            [
+                Residual(self.rmsnorm, nn.Linear(dim, dim, bias=bias))
+                for _ in range(depth)
+            ]
+        )
+        self.rmsnorm = RMSNorm(dim)
+        self.linear = nn.Linear(dim, vocab_size, bias=bias)
+        self.linear.weight = self.embedding.weight
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.embedding(x)
+
+        for layer in self.layers:
+            x = layer(x)
+
+        x = self.rmsnorm(x)
+        logits = self.linear(x)
+
+        return logits
+
+
+# class MambaBlock(nn.Module):
+#     def __init__(
+#         self,
+#         dim,
+#         inner_dim,
+#         bias: bool = False,
+#         conv_bias=None,
+#         dim_conv=None,
+#         *args,
+#         **kwargs,
+#     ):
+#         super().__init__()