feat(mge/module): add group_local_conv

GitOrigin-RevId: 0503377bc5f2ed8edc8c21b4f0382c7a38462a86

feat(mge/module): add group_local_conv
GitOrigin-RevId: 0503377bc5f2ed8edc8c21b4f0382c7a38462a86
0e303710 · Megvii Engine Team · d225cbcd · 0e303710 · 0e303710 · 0e303710
3 changed file
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -221,10 +221,8 @@ def local_conv2d(
    padding: Union[int, Tuple[int, int]] = 0,
    dilation: Union[int, Tuple[int, int]] = 1,
    conv_mode="CROSS_CORRELATION",
-) -> Tensor:
-    """Applies spatial 2D convolution over an image with unshared kernels.
-
-    Refer to :class:`~.LocalConv2d` for more information.
+):
+    """Applies spatial 2D convolution over an groupped channeled image with untied kernels.
    """
    assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION"

@@ -232,6 +230,8 @@ def local_conv2d(
    pad_h, pad_w = expand_hw(padding)
    dilate_h, dilate_w = expand_hw(dilation)

+    Sparse = P.Convolution.Sparse
+
    op = builtin.GroupLocal(
        stride_h=stride_h,
        stride_w=stride_w,
@@ -239,7 +239,9 @@ def local_conv2d(
        pad_w=pad_w,
        dilate_h=dilate_h,
        dilate_w=dilate_w,
-        # strategy=get_conv_execution_strategy(),
+        mode=conv_mode,
+        compute_mode="DEFAULT",
+        sparse=Sparse.DENSE,
    )
    inp, weight = utils.convert_inputs(inp, weight)
    (output,) = apply(op, inp, weight)

--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -340,7 +340,7 @@ class ConvTranspose2d(_ConvNd):


 class LocalConv2d(Conv2d):
-    r"""Applies a spatial convolution with unshared kernels over an input 4D tensor.
+    r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor.
    It is also known as the locally connected layer.

    :param in_channels: number of input channels.

--- a/imperative/python/test/unit/module/test_conv.py
+++ b/imperative/python/test/unit/module/test_conv.py
@@ -48,62 +48,75 @@ def test_conv_transpose2d():
        conv_transpose2d.bias = Parameter(bias, dtype=np.float32)
    y = conv_transpose2d(tensor(inp))

-    np.testing.assert_allclose(out, y.numpy(), atol=2e-6)
+    np.testing.assert_almost_equal(out, y.numpy(), 2e-6)


 def test_local_conv2d():
-    batch_size = 10
-    in_channels = 4
-    out_channels = 8
-    input_height = 8
-    input_width = 8
-    kernel_size = 3
-    stride = 1
-    padding = 1
-    dilation = 1
-    groups = 1
-    local_conv2d = LocalConv2d(
-        in_channels=in_channels,
-        out_channels=out_channels,
-        input_height=input_height,
-        input_width=input_width,
-        kernel_size=kernel_size,
-        stride=stride,
-        padding=padding,
-        dilation=dilation,
-        groups=groups,
-    )
-    inputs = np.random.normal(
-        size=(batch_size, in_channels, input_height, input_width)
-    ).astype(np.float32)
-    output_height = (input_height + padding * 2 - kernel_size) // stride + 1
-    output_width = (input_width + padding * 2 - kernel_size) // stride + 1
-    weights = np.random.normal(
-        size=(
-            groups,
-            output_height,
-            output_width,
-            in_channels // groups,
-            kernel_size,
-            kernel_size,
-            out_channels // groups,
-        )
-    ).astype(np.float32)
-    local_conv2d.weight = Parameter(weights)
-    outputs = local_conv2d(tensor(inputs))
-    # naive calculation use numpy
-    # only test output_height == input_height, output_width == input_width, group == 1
-    inputs = np.pad(inputs, ((0, 0), (0, 0), (1, 1), (1, 1)))
-    expected = np.zeros(
-        (batch_size, out_channels, output_height, output_width), dtype=np.float32,
-    )
-    for n, oc, oh, ow in itertools.product(
-        *map(range, [batch_size, out_channels, output_height, output_width])
+    def test_func(
+        batch_size,
+        in_channels,
+        out_channels,
+        input_height,
+        input_width,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups,
    ):
-        ih, iw = oh * stride, ow * stride
-        expected[n, oc, ih, iw] = np.sum(
-            inputs[n, :, ih : ih + kernel_size, iw : iw + kernel_size]
-            * weights[0, oh, ow, :, :, :, oc]
+        local_conv2d = LocalConv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            input_height=input_height,
+            input_width=input_width,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        inputs = np.random.normal(
+            size=(batch_size, in_channels, input_height, input_width)
+        ).astype(np.float32)
+        output_height = (input_height + padding * 2 - kernel_size) // stride + 1
+        output_width = (input_width + padding * 2 - kernel_size) // stride + 1
+        weights = np.random.normal(
+            size=(
+                groups,
+                output_height,
+                output_width,
+                in_channels // groups,
+                kernel_size,
+                kernel_size,
+                out_channels // groups,
+            )
+        ).astype(np.float32)
+        local_conv2d.weight = Parameter(weights)
+        outputs = local_conv2d(tensor(inputs))
+        # naive calculation use numpy
+        # only test output_height == input_height, output_width == input_width
+        inputs = np.pad(inputs, ((0, 0), (0, 0), (1, 1), (1, 1)))
+        expected = np.zeros(
+            (batch_size, out_channels, output_height, output_width), dtype=np.float32,
        )
+        ic_group_size = in_channels // groups
+        oc_group_size = out_channels // groups
+        for n, oc, oh, ow in itertools.product(
+            *map(range, [batch_size, out_channels, output_height, output_width])
+        ):
+            ih, iw = oh * stride, ow * stride
+            g_id = oc // oc_group_size
+            expected[n, oc, ih, iw] = np.sum(
+                inputs[
+                    n,
+                    g_id * ic_group_size : (g_id + 1) * ic_group_size,
+                    ih : ih + kernel_size,
+                    iw : iw + kernel_size,
+                ]
+                * weights[g_id, oh, ow, :, :, :, oc % oc_group_size]
+            )
+        np.testing.assert_almost_equal(outputs.numpy(), expected, 1e-5)

-    np.testing.assert_allclose(outputs.numpy(), expected, atol=1e-5)
+    test_func(10, 4, 4, 5, 5, 3, 1, 1, 1, 1)
+    test_func(10, 32, 32, 8, 8, 3, 1, 1, 1, 2)
+    test_func(10, 32, 32, 8, 8, 3, 1, 1, 1, 4)