facebookresearch · lessw2020 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023
diff --git a/tests/modules/layers/test_normalizations.py b/tests/modules/layers/test_normalizations.py
@@ -5,7 +5,13 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from torchmultimodal.modules.layers.normalizations import Fp32GroupNorm, Fp32LayerNorm
+from tests.test_utils import assert_expected
+
+from torchmultimodal.modules.layers.normalizations import (
+ Fp32GroupNorm,
+ Fp32LayerNorm,
+ RMSNorm,
+)
 
 
 def test_fp32layernorm():
@@ -20,3 +26,38 @@ def test_fp32groupnorm():
  norm = Fp32GroupNorm(2, 4)
  output = norm(x)
  assert output.dtype == torch.float16
+
+
+def test_rms_norm_core_algo():
+ """compare RMSNorm with RMSNorm using F.norm version"""
+ dims = 10
+ rms_norm = RMSNorm(dims)
+
+ input_ones = torch.ones(dims, dtype=torch.float)
+
+ input_fixed = torch.tensor(
+ [0.999, 1.1111, 2.222, 3.333, 4.444, 5.555, 6.678, 7.987, 8.123, 9.101010],
+ dtype=torch.float16,
+ )
+ fixed_expected = torch.tensor(
+ [
+ 0.1749,
+ 0.1946,
+ 0.3892,
+ 0.5835,
+ 0.7783,
+ 0.9727,
+ 1.1699,
+ 1.3984,
+ 1.4229,
+ 1.5938,
+ ],
+ dtype=torch.float,
+ )
+
+ output_fixed = rms_norm(input_fixed)
+ output_ones = rms_norm(input_ones)
+
+ assert_expected(output_ones, input_ones)
+ assert_expected(output_fixed, fixed_expected, atol=1e-04, rtol=1e-05)
+ assert output_fixed.dtype == torch.float32
diff --git a/torchmultimodal/modules/layers/normalizations.py b/torchmultimodal/modules/layers/normalizations.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+import torch
 from torch import nn, Tensor
 
 
@@ -45,3 +46,29 @@ def forward(self, x: Tensor) -> Tensor:
  self.eps,
  )
  return output.type_as(x)
+
+
+class RMSNorm(nn.Module):
+ """Root Mean Square Layer Normalization
+ as proposed in: https://arxiv.org/abs/1910.07467
+
+ Calcs are done in fp32.
+
+ original impl: https://github.com/facebookresearch/llama/blob/main/llama/model.py
+
+ Args:
+ dim(int) = model size
+ eps(float) = epsilon
+ """
+
+ def __init__(self, dim: int, eps: float = 1e-6):
+ super().__init__()
+ self.eps = eps
+ self.scale = nn.Parameter(torch.ones(dim))
+
+ def _norm(self, x: Tensor) -> Tensor:
+ return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
+
+ def forward(self, x: Tensor) -> Tensor:
+ x_normed = self._norm(x.float()).type_as(x)
+ return x_normed * self.scale