add functional group norm and dtype device param (#10417)

Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
Oneflow-Inc · Jan 24, 2024 · bf9ae02 · bf9ae02
1 parent dafc2b1
commit bf9ae02
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 35 deletions.
diff --git a/docs/source/nn.functional.rst b/docs/source/nn.functional.rst
@@ -27,14 +27,17 @@ Convolution functions
     fold
     unfold
 
-BatchNorm functions
---------------------
+Normalization functions
+-----------------------
 
 .. autosummary::
     :toctree: generated
     :nosignatures:
 
     batch_norm
+    layer_norm
+    normalize
+    group_norm
 
 Pooling functions
 ----------------------------------
@@ -93,8 +96,6 @@ Non-linear activation functions
     hardsigmoid
     silu
     mish
-    layer_norm
-    normalize
 
 Linear functions
 ----------------

diff --git a/python/oneflow/nn/functional/__init__.py b/python/oneflow/nn/functional/__init__.py
@@ -65,7 +65,7 @@
 from oneflow._C import threshold
 from oneflow._C import silu
 from oneflow._C import mish
-from oneflow.nn.modules.normalization import layer_norm
+from oneflow.nn.modules.normalization import layer_norm, group_norm
 from oneflow._C import dropout, dropout1d, dropout2d, dropout3d
 from oneflow._C import smooth_l1_loss
 from .pad import pad

diff --git a/python/oneflow/nn/modules/normalization.py b/python/oneflow/nn/modules/normalization.py
@@ -24,6 +24,43 @@
 _shape_t = Union[int, Tuple[int], flow._oneflow_internal.Size]
 
 
+def group_norm(
+    input: Tensor,
+    num_groups: int,
+    weight: Tensor = None,
+    bias: Tensor = None,
+    eps: float = 1e-05,
+    num_channels: int = None,
+):
+    r"""Apply Group Normalization for last certain number of dimensions.
+
+    See :class:`~oneflow.nn.GroupNorm` for details.
+    """
+    assert len(input.shape) >= 3, "The dimensions of input tensor must larger than 2"
+    if num_channels is None:
+        num_channels = input.shape[1]
+    assert (
+        input.shape[1] == num_channels
+    ), "The channels of input tensor must equal num_channels"
+
+    affine = weight is not None and bias is not None
+    if input.is_cuda:
+        return flow._C.group_norm(input, weight, bias, affine, num_groups, eps)
+    else:
+        origin_shape = input.shape
+        reshape_to_1d = flow.reshape(input, shape=[origin_shape[0], num_groups, -1])
+        mean = flow.mean(reshape_to_1d, dim=2, keepdim=True)
+        variance = flow.var(reshape_to_1d, dim=2, unbiased=False, keepdim=True)
+        normalized = (reshape_to_1d - mean) / flow.sqrt(variance + eps)
+        normalized = flow.reshape(normalized, shape=[origin_shape[0], num_channels, -1])
+        if weight is not None:
+            normalized = normalized * weight.reshape(1, num_channels, 1)
+        if bias is not None:
+            normalized = normalized + bias.reshape(1, num_channels, 1)
+        res = flow.reshape(normalized, shape=tuple(input.shape))
+        return res
+
+
 class GroupNorm(Module):
     """
     Applies Group Normalization over a mini-batch of inputs as described in
@@ -84,6 +121,8 @@ def __init__(
         num_channels: int,
         eps: float = 1e-05,
         affine: bool = True,
+        device=None,
+        dtype=None,
     ) -> None:
         super().__init__()
         assert num_groups > 0, "The num_groups must larger than zero"
@@ -92,9 +131,14 @@ def __init__(
         self.num_channels = num_channels
         self.eps = eps
         self.affine = affine
+        factory_kwargs = {}
+        if device:
+            factory_kwargs["device"] = device
+        if dtype:
+            factory_kwargs["dtype"] = dtype
         if self.affine:
-            self.weight = flow.nn.Parameter(flow.Tensor(num_channels))
-            self.bias = flow.nn.Parameter(flow.Tensor(num_channels))
+            self.weight = flow.nn.Parameter(flow.Tensor(num_channels, **factory_kwargs))
+            self.bias = flow.nn.Parameter(flow.Tensor(num_channels, **factory_kwargs))
         else:
             self.register_parameter("weight", None)
             self.register_parameter("bias", None)
@@ -106,34 +150,9 @@ def reset_parameters(self) -> None:
             flow.nn.init.zeros_(self.bias)
 
     def forward(self, input: Tensor) -> Tensor:
-        assert (
-            len(input.shape) >= 3
-        ), "The dimensions of input tensor must larger than 2"
-        assert (
-            input.shape[1] == self.num_channels
-        ), "The channels of input tensor must equal num_channels"
-
-        if input.is_cuda:
-            return flow._C.group_norm(
-                input, self.weight, self.bias, self.affine, self.num_groups, self.eps
-            )
-        else:
-            origin_shape = input.shape
-            reshape_to_1d = flow.reshape(
-                input, shape=[origin_shape[0], self.num_groups, -1]
-            )
-            mean = flow.mean(reshape_to_1d, dim=2, keepdim=True)
-            variance = flow.var(reshape_to_1d, dim=2, unbiased=False, keepdim=True)
-            normalized = (reshape_to_1d - mean) / flow.sqrt(variance + self.eps)
-            normalized = flow.reshape(
-                normalized, shape=[origin_shape[0], self.num_channels, -1]
-            )
-            if self.weight is not None:
-                normalized = normalized * self.weight.reshape(1, self.num_channels, 1)
-            if self.bias is not None:
-                normalized = normalized + self.bias.reshape(1, self.num_channels, 1)
-            res = flow.reshape(normalized, shape=tuple(input.shape))
-            return res
+        return group_norm(
+            input, self.num_groups, self.weight, self.bias, self.eps, self.num_channels
+        )
 
     def extra_repr(self) -> str:
         return "{num_groups}, {num_channels}, eps={eps}, affine={affine}".format(