diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h
index 47b622bf64e4c8f2a918f0f5bcbe9998dc8b5640..4468a0cc2ae724d93ffa62452a1b37c0441ad534 100644
--- a/dnn/include/megdnn/oprs/nn.h
+++ b/dnn/include/megdnn/oprs/nn.h
@@ -543,11 +543,11 @@ class RegionRestrictedConvolutionForward : public ConvolutionBase<param::Convolu
 
 public:
     /**
-     * \param[in] src (n, ic, ih, iw)
-     * \param[in] filter (oc, ic, fh, fw)
+     * \param[in] src (n, ic, ih, iw) or (n, g*icpg, ih, iw)
+     * \param[in] filter (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw)
      * \param[in] rin (n, ih, iw)
      * \param[in] rout (n, oh, ow)
-     * \param[out] dst (n, oc, oh, ow)
+     * \param[out] dst (n, oc, oh, ow) or (n, g*ocpg, oh, ow)
      */
     virtual void exec(
             _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in rin,
@@ -592,11 +592,11 @@ class RegionRestrictedConvolutionBackwardData
 
 public:
     /**
-     * \param[in] filter (oc, ic, fh, fw)
-     * \param[in] diff (n, oc, oh, ow)
+     * \param[in] filter (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw)
+     * \param[in] diff (n, oc, oh, ow) or (n, g*ocpg, oh, ow)
      * \param[in] rin (n, ih, iw)
      * \param[in] rout (n, oh, ow)
-     * \param[out] grad (n, ic, ih, iw)
+     * \param[out] grad (n, ic, ih, iw) or (n, g*icpg, ih, iw)
      */
     virtual void exec(
             _megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_in rin,
@@ -635,11 +635,11 @@ class RegionRestrictedConvolutionBackwardFilter
 
 public:
     /**
-     * \param[in] src (n, ic, ih, iw)
-     * \param[in] diff (n, oc, oh, ow)
+     * \param[in] src (n, ic, ih, iw) or (n, g*icpg, ih, iw)
+     * \param[in] diff (n, oc, oh, ow) or (n, g*ocpg, oh, ow)
      * \param[in] rin (n, ih, iw)
      * \param[in] rout (n, oh, ow)
-     * \param[out] grad (oc, ic, fh, fw)
+     * \param[out] grad (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw)
      */
     virtual void exec(
             _megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_in rin,
diff --git a/dnn/src/cuda/region_restricted_convolution/opr_impl.cpp b/dnn/src/cuda/region_restricted_convolution/opr_impl.cpp
index 1b3d7398bb862a6e8f1170cefa1500402b71a41c..ae06f3aa063ab1f06e3b06ca246c94d7fb42fa25 100644
--- a/dnn/src/cuda/region_restricted_convolution/opr_impl.cpp
+++ b/dnn/src/cuda/region_restricted_convolution/opr_impl.cpp
@@ -20,7 +20,7 @@ void RegionRestrictedConvolutionForwardImpl::exec(
             src.layout, dst.layout, fm,
             param().compute_mode == Param::ComputeMode::DEFAULT);
     megdnn_assert(
-            fm.group > 1 && src.layout.dtype.category() == DTypeCategory::FLOAT &&
+            src.layout.dtype.category() == DTypeCategory::FLOAT &&
             param().compute_mode == Param::ComputeMode::DEFAULT &&
             fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 &&
             fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip &&
@@ -76,7 +76,7 @@ void RegionRestrictedConvolutionBackwardDataImpl::exec(
             diff.layout, grad.layout, fm,
             param().compute_mode == Param::ComputeMode::DEFAULT);
     megdnn_assert(
-            fm.group > 1 && diff.layout.dtype.category() == DTypeCategory::FLOAT &&
+            diff.layout.dtype.category() == DTypeCategory::FLOAT &&
             param().compute_mode == Param::ComputeMode::DEFAULT &&
             fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 &&
             fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip &&
@@ -120,7 +120,7 @@ void RegionRestrictedConvolutionBackwardFilterImpl::exec(
             workspace.size);
 
     megdnn_assert(
-            fm.group > 1 && src.layout.dtype.category() == DTypeCategory::FLOAT &&
+            src.layout.dtype.category() == DTypeCategory::FLOAT &&
             param().compute_mode == Param::ComputeMode::DEFAULT &&
             fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 &&
             fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip &&
diff --git a/dnn/test/cuda/region_restricted_convolution.cpp b/dnn/test/cuda/region_restricted_convolution.cpp
index 06857a2daf42f4768a63796011a8f5bfc228ae0a..4a09e048e9c7458a613001b277eae33b566a0b69 100644
--- a/dnn/test/cuda/region_restricted_convolution.cpp
+++ b/dnn/test/cuda/region_restricted_convolution.cpp
@@ -53,6 +53,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_FORWARD_LARGE_FILTER) {
         run(4, 8, 32, 5, 5 / 2, 1);
         run(4, 8, 32, 7, 7 / 2, 1);
         run(1, 2, 32, 9, 9 / 2, 1);
+        run(4, 1, 32, 9, 9 / 2, 1);
         run(4, 8, 32, 11, 11 / 2, 1);
         run(4, 8, 32, 13, 13 / 2, 1);
         run(4, 8, 32, 15, 15 / 2, 1);
@@ -723,6 +724,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_DATA_FP32) {
         run(4, 8, 32, 25, 25 / 2, 1);
         run(4, 8, 32, 27, 27 / 2, 1);
         run(4, 8, 32, 29, 29 / 2, 1);
+        run(4, 1, 32, 29, 29 / 2, 1);
         run(4, 8, 32, 31, 31 / 2, 1);
     }
 }
@@ -779,6 +781,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_DATA_FP32_RIN_EQ_ROUT) {
         run(4, 8, 32, 21, 21 / 2, 1);
         run(4, 8, 32, 23, 23 / 2, 1);
         run(4, 8, 32, 25, 25 / 2, 1);
+        run(4, 1, 32, 25, 25 / 2, 1);
         run(4, 8, 32, 27, 27 / 2, 1);
         run(4, 8, 32, 29, 29 / 2, 1);
         run(4, 8, 32, 31, 31 / 2, 1);
@@ -841,6 +844,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_FILTER_FP32) {
         run(4, 8, 32, 23, 23 / 2, 1);
         run(4, 8, 32, 25, 25 / 2, 1);
         run(4, 8, 32, 27, 27 / 2, 1);
+        run(4, 1, 32, 27, 27 / 2, 1);
         run(4, 8, 32, 29, 29 / 2, 1);
         run(4, 8, 32, 31, 31 / 2, 1);
     }
@@ -899,6 +903,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_FILTER_FP32_RIN_EQ_ROUT) {
         run(4, 8, 32, 17, 17 / 2, 1);
         run(4, 8, 32, 19, 19 / 2, 1);
         run(4, 8, 32, 21, 21 / 2, 1);
+        run(4, 1, 32, 21, 21 / 2, 1);
         run(4, 8, 32, 23, 23 / 2, 1);
         run(4, 8, 32, 25, 25 / 2, 1);
         run(4, 8, 32, 27, 27 / 2, 1);
diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py
index 2cd73b4eccdf057d9a196cef28d53b1534212cd2..a1f11f9f6304171d85ff60a5922ef667c1d29459 100644
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -2016,7 +2016,12 @@ def region_restricted_conv(
     stride_h, stride_w = expand_hw(stride)
     dilate_h, dilate_w = expand_hw(dilation)
 
-    sparse_type = "dense" if groups == 1 else "group"
+    sparse_type = "group"
+    assert groups > 0, (
+        "RegionRestrictedConv expected grouped conv mode, \
+    which requires groups > 0, but got groups=%d"
+        % (groups)
+    )
     op = builtin.RegionRestrictedConvolution(
         stride_h=stride_h,
         stride_w=stride_w,
diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py
index 84801838d387e2cbbb0aa76e04c4beb2a24f6813..97a6f20201ee17775080016de84c626847cb1b05 100644
--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -1050,8 +1050,8 @@ class RegionRestrictedConv(_ConvNd):
             Refer to :class:`~.module.padding.Pad` for more information.
 
     Note:
-        * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` ,
-            if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``
+        * weight shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``,
+            becasue RegionRestrictedConv support grouped conv only.
 
     Examples:
         >>> import numpy as np
@@ -1071,7 +1071,7 @@ class RegionRestrictedConv(_ConvNd):
         in_channels: int,
         out_channels: int,
         kernel_size: Union[int, Tuple[int, int]],
-        groups: int,
+        groups: int = 1,
         bias: bool = True,
         stride: Union[int, Tuple[int, int]] = 1,
         padding: Union[int, Tuple[int, int]] = 0,
@@ -1111,9 +1111,6 @@ class RegionRestrictedConv(_ConvNd):
         ichl = self.in_channels
         ochl = self.out_channels
         kh, kw = self.kernel_size
-        if group == 1:
-            # Assume format is NCHW
-            return (ochl, ichl, kh, kw)
 
         assert (
             ichl % group == 0 and ochl % group == 0
diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py
index 3a536fdbbd20cc86ff86a2d43cb9ffb7214af030..e091c6abd0414cc108571727dcf5198c719414e2 100644
--- a/imperative/python/test/unit/functional/test_functional.py
+++ b/imperative/python/test/unit/functional/test_functional.py
@@ -971,17 +971,16 @@ def test_region_restricted_conv_forward_backward_naive(bias):
 @pytest.mark.skipif(
     not is_cuda_available(), reason="rrconv cuda kernel requires cuda available"
 )
-@pytest.mark.parametrize("bias", [True, False])
-def test_region_restricted_conv_forward_backward_cuda(bias):
+@pytest.mark.parametrize("bias, groups", [(True, 1), (True, 3), (False, 1), (False, 3)])
+def test_region_restricted_conv_forward_backward_cuda(bias, groups):
     import megengine as mge
     import megengine.module as M
     from megengine.autodiff import GradManager
-    import megengine.distributed as dist
 
     # params
     handle = "gpu0"
     N = 1
-    GROUP = 3
+    GROUP = groups
     FH = FW = 2
     IH = IW = 2
     OH = OW = 1
@@ -1051,8 +1050,8 @@ def test_region_restricted_conv_forward_backward_cuda(bias):
 @pytest.mark.skipif(
     not is_cuda_available(), reason="rrconv cuda kernel requires cuda available"
 )
-@pytest.mark.parametrize("bias", [True, False])
-def test_region_restricted_conv_forward_backward_uint8(bias):
+@pytest.mark.parametrize("bias, groups", [(True, 1), (True, 3), (False, 1), (False, 3)])
+def test_region_restricted_conv_forward_backward_uint8(bias, groups):
     import megengine as mge
     import megengine.module as M
     from megengine.autodiff import GradManager
@@ -1060,7 +1059,7 @@ def test_region_restricted_conv_forward_backward_uint8(bias):
     # params
     handle = "gpu0"
     N = 1
-    GROUP = 2
+    GROUP = groups
     FH = FW = 1
     IH = IW = 4
     OH = OW = 4