Fix depthwise conv gpu kernel bug (#18582)

* fix depthwise conv gpu kernel bug, test=develop * add more depthwise conv test, test=develop

Fix depthwise conv gpu kernel bug (#18582)
* fix depthwise conv gpu kernel bug, test=develop * add more depthwise conv test, test=develop
22fa4c2d · LielinJiang · qingqing01 · c92b78b0 · 22fa4c2d · 22fa4c2d
Showing with 48 addition and 2 deletion

paddle/fluid/operators/math/depthwise_conv.cu paddle/fluid/operators/math/depthwise_conv.cu +6 -2

python/paddle/fluid/tests/unittests/test_conv2d_op.py python/paddle/fluid/tests/unittests/test_conv2d_op.py +42 -0

未找到文件。
--- a/paddle/fluid/operators/math/depthwise_conv.cu
+++ b/paddle/fluid/operators/math/depthwise_conv.cu
@@ -487,8 +487,12 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
    check_case(1, 2, 3);
    check_case(1, 2, 5);
    check_case(1, 2, -1);
-    check_case(0, 0, 3);
-    check_case(0, 0, 5);
+    check_case(2, 1, 3);
+    check_case(2, 1, 5);
+    check_case(2, 1, -1);
+    check_case(2, 2, 3);
+    check_case(2, 2, 5);
+    check_case(2, 2, -1);
    check_case(0, 0, -1);
 // NOTE(liangdun): 0,0 for other case
 // add other case if needed, e.g. check_case(2^n,1)

--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -208,6 +208,48 @@ class TestWith1x1(TestConv2dOp):
        self.groups = 3


+class TestWithDepthWise3x3(TestConv2dOp):
+    def init_test_case(self):
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.input_size = [3, 4, 10, 10]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [8, f_c, 3, 3]
+
+    def init_dilation(self):
+        self.dilations = [2, 2]
+
+    def init_group(self):
+        self.groups = 4
+
+
+class TestWithDepthWise5x5(TestConv2dOp):
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.input_size = [2, 4, 10, 10]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [8, f_c, 5, 5]
+
+    def init_group(self):
+        self.groups = 4
+
+
+class TestWithDepthWise7x7(TestConv2dOp):
+    def init_test_case(self):
+        self.pad = [1, 1]
+        self.stride = [2, 2]
+        self.input_size = [2, 8, 10, 10]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [16, f_c, 7, 7]
+
+    def init_group(self):
+        self.groups = 8
+
+
 class TestWithDilation(TestConv2dOp):
    def init_test_case(self):
        self.pad = [0, 0]