conv2d support FP16 on xpu and update unittest for conv2d, test=kunlun (#40395)

276017bb · zhangyikun02 · GitHub · 1eb96eec · 276017bb · 276017bb
3 changed file
--- a/paddle/fluid/operators/conv_op_xpu.cc
+++ b/paddle/fluid/operators/conv_op_xpu.cc
@@ -19,14 +19,16 @@ namespace operators {
 template <typename DeviceContext, typename T>
 class GemmConvXPUKernel : public framework::OpKernel<T> {
+  using XPUT = typename XPUTypeTrait<T>::Type;
 public:
-  void Compute(const framework::ExecutionContext& context) const override {
+  void Compute(const framework::ExecutionContext &context) const override {
-    const Tensor* input = context.Input<Tensor>("Input");
+    const Tensor *input = context.Input<Tensor>("Input");
    // The filter will be reshaped in the calculations,
    // so here use an assignment operation,
    // that avoids modifying the variable in the Scope.
    Tensor filter = *context.Input<Tensor>("Filter");
-    Tensor* output = context.Output<Tensor>("Output");
+    Tensor *output = context.Output<Tensor>("Output");
    output->mutable_data<T>(context.GetPlace());
    int groups = context.Attr<int>("groups");
    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
@@ -53,11 +55,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
    const int img_h = static_cast<int>(input->dims()[2]);
    const int img_w = static_cast<int>(input->dims()[3]);
    const int f = static_cast<int>(filter.dims()[0]);
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    int r = xpu::conv2d<float, float, float, int16_t>(
+    const XPUT *input_data = reinterpret_cast<const XPUT *>(input->data<T>());
-        dev_ctx.x_context(), input->data<float>(), filter.data<float>(),
+    const XPUT *filter_data = reinterpret_cast<const XPUT *>(filter.data<T>());
-        output->data<float>(), batch_size, img_c, img_h, img_w, f, ksize,
+    XPUT *output_data = reinterpret_cast<XPUT *>(output->data<T>());
-        strides, paddings, dilations, groups, nullptr, nullptr, nullptr, true);
+    auto &dev_ctx = context.template device_context<DeviceContext>();
+    int r = xpu::conv2d<XPUT, XPUT, XPUT, int16_t>(
+        dev_ctx.x_context(), input_data, filter_data, output_data, batch_size,
+        img_c, img_h, img_w, f, ksize, strides, paddings, dilations, groups,
+        nullptr, nullptr, nullptr, true);
    PADDLE_ENFORCE_EQ(
        r, XPU_SUCCESS,
        platform::errors::External("XPU conv kernel return wrong value[%d %s]",
@@ -67,14 +74,16 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
 template <typename DeviceContext, typename T>
 class GemmConvGradXPUKernel : public framework::OpKernel<T> {
+  using XPUT = typename XPUTypeTrait<T>::Type;
 public:
-  void Compute(const framework::ExecutionContext& context) const override {
+  void Compute(const framework::ExecutionContext &context) const override {
-    const Tensor* input = context.Input<Tensor>("Input");
+    const Tensor *input = context.Input<Tensor>("Input");
-    const Tensor* output_grad =
+    const Tensor *output_grad =
        context.Input<Tensor>(framework::GradVarName("Output"));
-    Tensor* input_grad =
+    Tensor *input_grad =
        context.Output<Tensor>(framework::GradVarName("Input"));
-    Tensor* filter_grad =
+    Tensor *filter_grad =
        context.Output<Tensor>(framework::GradVarName("Filter"));
    // The filter and filter_grad will be reshaped in the calculations,
    // so here use an assignment operation,
@@ -107,19 +116,27 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
    const int img_h = static_cast<int>(input->dims()[2]);
    const int img_w = static_cast<int>(input->dims()[3]);
    const int f = static_cast<int>(filter.dims()[0]);
+    const XPUT *input_data = reinterpret_cast<const XPUT *>(input->data<T>());
+    const XPUT *filter_data = reinterpret_cast<const XPUT *>(filter.data<T>());
+    const XPUT *output_grad_data =
+        reinterpret_cast<const XPUT *>(output_grad->data<T>());
+    XPUT *input_grad_data = nullptr;
    if (input_grad) {
      input_grad->mutable_data<T>(context.GetPlace());
+      input_grad_data = reinterpret_cast<XPUT *>(input_grad->data<T>());
    }
+    XPUT *filter_grad_data = nullptr;
    if (filter_grad) {
      filter_grad->mutable_data<T>(context.GetPlace());
+      filter_grad_data = reinterpret_cast<XPUT *>(filter_grad->data<T>());
    }
-    auto& dev_ctx = context.template device_context<DeviceContext>();
+    auto &dev_ctx = context.template device_context<DeviceContext>();
-    int r = xpu::conv2d_grad<float, float, float, int16_t>(
+    int r = xpu::conv2d_grad<XPUT, XPUT, XPUT, int16_t>(
-        dev_ctx.x_context(), input->data<T>(), filter.data<T>(),
+        dev_ctx.x_context(), input_data, filter_data, output_grad_data,
-        output_grad->data<T>(), input_grad ? input_grad->data<T>() : nullptr,
+        input_grad_data, filter_grad_data, batch_size, img_c, img_h, img_w, f,
-        filter_grad ? filter_grad->data<T>() : nullptr, batch_size, img_c,
+        ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr,
-        img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr,
+        nullptr, nullptr, true);
-        nullptr, nullptr, nullptr, nullptr, true);
    PADDLE_ENFORCE_EQ(
        r, XPU_SUCCESS,
        platform::errors::External("XPU conv kernel return wrong value[%d %s]",
@@ -130,14 +147,22 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP_XPU_KERNEL(
-    depthwise_conv2d,
+    conv2d, ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>);
+    ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext,
-REGISTER_OP_XPU_KERNEL(
+                           paddle::platform::float16>);
-    conv2d, ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>);
 REGISTER_OP_XPU_KERNEL(
    conv2d_grad,
-    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
+    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
+    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext,
+                               paddle::platform::float16>);
+REGISTER_OP_XPU_KERNEL(
+    depthwise_conv2d,
+    ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext, float>,
+    ops::GemmConvXPUKernel<paddle::platform::XPUDeviceContext,
+                           paddle::platform::float16>);
 REGISTER_OP_XPU_KERNEL(
    depthwise_conv2d_grad,
-    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
+    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
+    ops::GemmConvGradXPUKernel<paddle::platform::XPUDeviceContext,
+                               paddle::platform::float16>);
 #endif
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -51,16 +51,20 @@ XPUOpMap& get_kl2_ops() {
      {"clip", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
-      {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
-      {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+                                    pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                               pOpKernelType(vartype::FP16, XPUPlace())})},
      {"conv2d_transpose_grad",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"conv2d_transpose",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"depthwise_conv2d_grad",
-       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
      {"depthwise_conv2d",
-       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
      {"dropout_grad",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},

--- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
@@ -23,6 +23,7 @@ import paddle.fluid as fluid
 from op_test_xpu import XPUOpTest
 import paddle
 from paddle.fluid import Program, program_guard
+from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
 def conv2d_forward_naive(input,
@@ -159,320 +160,334 @@ def create_test_padding_VALID_class(parent):
    globals()[cls_name] = TestPaddingVALIDCase
-class TestConv2DOp(XPUOpTest):
+class XPUTestConv2DOp(XPUOpTestWrapper):
-    def setUp(self):
+    def __init__(self):
-        self.op_type = "conv2d"
+        self.op_name = 'conv2d'
-        self.use_cudnn = False
+        self.use_dynamic_create_class = False
-        self.exhaustive_search = False
-        self.use_cuda = False
+    class TestConv2DOp(XPUOpTest):
-        self.use_mkldnn = False
+        def setUp(self):
-        self.fuse_relu_before_depthwise_conv = False
+            self.dtype = self.in_type
-        self.data_format = "AnyLayout"
+            self.place = paddle.XPUPlace(0)
-        self.dtype = np.float32
+            self.op_type = "conv2d"
-        self.init_kernel_type()
+            self.use_cudnn = False
-        self.init_group()
+            self.exhaustive_search = False
-        self.init_dilation()
+            self.use_cuda = False
-        self.init_test_case()
+            self.use_mkldnn = False
-        conv2d_param = {
-            'stride': self.stride,
-            'pad': self.pad,
-            'dilation': self.dilations
-        }
-        input = np.random.random(self.input_size).astype(self.dtype)
-        if not self.has_cuda():
            self.fuse_relu_before_depthwise_conv = False
-        if self.fuse_relu_before_depthwise_conv:
+            self.data_format = "AnyLayout"
-            input = input - 0.5
+            self.init_kernel_type()
-            input -= (input < 0) * 0.1
+            self.init_group()
-            input += (input >= 0) * 0.1
+            self.init_dilation()
-            input2 = np.maximum(input, 0.0)
+            self.init_test_case()
-        else:
-            input2 = input
+            conv2d_param = {
-        filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype)
+                'stride': self.stride,
+                'pad': self.pad,
-        output, _, _, _, _ = conv2d_forward_naive(input2, filter, self.groups,
+                'dilation': self.dilations
-                                                  conv2d_param)
+            }
-        output = output.astype(self.dtype)
+            np.random.seed(100)
-        self.inputs = {
+            input = np.random.random(self.input_size).astype(self.dtype)
-            'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
+            if not self.has_cuda():
-            'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
+                self.fuse_relu_before_depthwise_conv = False
-        }
+            if self.fuse_relu_before_depthwise_conv:
-        self.attrs = {
+                input = input - 0.5
-            'strides': self.stride,
+                input -= (input < 0) * 0.1
-            'paddings': self.pad,
+                input += (input >= 0) * 0.1
-            'groups': self.groups,
+                input2 = np.maximum(input, 0.0)
-            'dilations': self.dilations,
+            else:
-            'use_cudnn': self.use_cudnn,
+                input2 = input
-            'use_mkldnn': self.use_mkldnn,
+            np.random.seed(1)
-            'data_format': self.data_format,
+            filter = np.random.uniform(-1, 1,
-            'fuse_relu_before_depthwise_conv':
+                                       self.filter_size).astype(self.dtype)
-            self.fuse_relu_before_depthwise_conv,
-            'exhaustive_search': self.exhaustive_search
+            output, _, _, _, _ = conv2d_forward_naive(input2, filter,
-        }
+                                                      self.groups, conv2d_param)
-        self.outputs = {'Output': output}
+            output = output.astype(self.dtype)
-    def has_cuda(self):
+            self.inputs = {
-        return core.is_compiled_with_cuda() and (self.use_cudnn or
+                'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
-                                                 self.use_cuda)
+                'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
+            }
-    def test_check_output(self):
+            self.attrs = {
-        if core.is_compiled_with_xpu():
+                'strides': self.stride,
-            paddle.enable_static()
+                'paddings': self.pad,
-            place = paddle.XPUPlace(0)
+                'groups': self.groups,
-            self.check_output_with_place(place)
+                'dilations': self.dilations,
+                'use_cudnn': self.use_cudnn,
-    def test_check_grad(self):
+                'use_mkldnn': self.use_mkldnn,
-        if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and
+                'data_format': self.data_format,
-                                        self.no_need_check_grad == True):
+                'fuse_relu_before_depthwise_conv':
-            return
+                self.fuse_relu_before_depthwise_conv,
-        if core.is_compiled_with_xpu():
+                'exhaustive_search': self.exhaustive_search
-            paddle.enable_static()
+            }
-            place = paddle.XPUPlace(0)
+            self.outputs = {'Output': output}
-            self.check_grad_with_place(place, {'Input', 'Filter'}, 'Output')
+        def has_cuda(self):
-    def test_check_grad_no_filter(self):
+            return core.is_compiled_with_cuda() and (self.use_cudnn or
-        if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and
+                                                     self.use_cuda)
-                                        self.no_need_check_grad == True):
-            return
+        def test_check_output(self):
-        if core.is_compiled_with_xpu():
+            if core.is_compiled_with_xpu():
-            paddle.enable_static()
+                paddle.enable_static()
-            place = paddle.XPUPlace(0)
+                self.check_output_with_place(self.place)
-            self.check_grad_with_place(
-                place, ['Input'], 'Output', no_grad_set=set(['Filter']))
+        def test_check_grad(self):
+            if (hasattr(self, "no_need_check_grad") and
-    def test_check_grad_no_input(self):
+                    self.no_need_check_grad == True):
-        if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and
+                return
-                                        self.no_need_check_grad == True):
+            if core.is_compiled_with_xpu():
-            return
+                paddle.enable_static()
-        if core.is_compiled_with_xpu():
+                self.check_grad_with_place(self.place, {'Input', 'Filter'},
-            paddle.enable_static()
+                                           'Output')
-            place = paddle.XPUPlace(0)
-            self.check_grad_with_place(
+        def test_check_grad_no_filter(self):
-                place, ['Filter'], 'Output', no_grad_set=set(['Input']))
+            if (hasattr(self, "no_need_check_grad") and
+                    self.no_need_check_grad == True):
-    def init_test_case(self):
+                return
-        self.pad = [0, 0]
+            if core.is_compiled_with_xpu():
-        self.stride = [1, 1]
+                paddle.enable_static()
-        self.input_size = [2, 3, 5, 5]  # NCHW
+                self.check_grad_with_place(
-        assert np.mod(self.input_size[1], self.groups) == 0
+                    self.place, ['Input'],
-        f_c = self.input_size[1] // self.groups
+                    'Output',
-        self.filter_size = [6, f_c, 3, 3]
+                    no_grad_set=set(['Filter']))
-    def init_test_case_2(self):
+        def test_check_grad_no_input(self):
-        pass
+            if (hasattr(self, "no_need_check_grad") and
+                    self.no_need_check_grad == True):
-    def init_dilation(self):
+                return
-        self.dilations = [1, 1]
+            if core.is_compiled_with_xpu():
+                paddle.enable_static()
-    def init_group(self):
+                self.check_grad_with_place(
-        self.groups = 1
+                    self.place, ['Filter'],
+                    'Output',
-    def init_kernel_type(self):
+                    no_grad_set=set(['Input']))
-        pass
+        def init_test_case(self):
+            self.pad = [0, 0]
-class TestWithPad(TestConv2DOp):
+            self.stride = [1, 1]
-    def init_test_case(self):
+            self.input_size = [2, 3, 5, 5]  # NCHW
-        self.pad = [1, 1]
+            assert np.mod(self.input_size[1], self.groups) == 0
-        self.stride = [1, 1]
+            f_c = self.input_size[1] // self.groups
-        self.input_size = [2, 3, 5, 5]  # NCHW
+            self.filter_size = [6, f_c, 3, 3]
-        assert np.mod(self.input_size[1], self.groups) == 0
-        f_c = self.input_size[1] // self.groups
-        self.filter_size = [6, f_c, 3, 3]
-class TestWithStride(TestConv2DOp):
-    def init_test_case(self):
-        self.pad = [1, 1]
-        self.stride = [2, 2]
-        self.input_size = [2, 3, 6, 6]  # NCHW
-        assert np.mod(self.input_size[1], self.groups) == 0
-        f_c = self.input_size[1] // self.groups
-        self.filter_size = [6, f_c, 3, 3]
-class TestWith1x1(TestConv2DOp):
-    def init_test_case(self):
-        self.pad = [0, 0]
-        self.stride = [1, 1]
-        self.input_size = [2, 3, 5, 5]  # NCHW
-        assert np.mod(self.input_size[1], self.groups) == 0
-        f_c = self.input_size[1] // self.groups
-        self.filter_size = [120, f_c, 1, 1]
-    def init_group(self):
-        self.groups = 1
-# Please Don't remove the following code.
-# Currently, CI use cudnn V5.0 which not support dilation conv.
-# class TestCUDNNWithDilation(TestWithDilation):
-#     def init_op_type(self):
-#         self.op_type = "conv_cudnn"
-# ---- test asymmetric padding ----
+        def init_test_case_2(self):
+            pass
+        def init_dilation(self):
+            self.dilations = [1, 1]
+        def init_group(self):
+            self.groups = 1
+        def init_kernel_type(self):
+            pass
+    class TestWithPad(TestConv2DOp):
+        def init_test_case(self):
+            self.pad = [1, 1]
+            self.stride = [1, 1]
+            self.input_size = [2, 3, 5, 5]  # NCHW
+            assert np.mod(self.input_size[1], self.groups) == 0
+            f_c = self.input_size[1] // self.groups
+            self.filter_size = [6, f_c, 3, 3]
+    class TestWithStride(TestConv2DOp):
+        def init_test_case(self):
+            self.pad = [1, 1]
+            self.stride = [2, 2]
+            self.input_size = [2, 3, 6, 6]  # NCHW
+            assert np.mod(self.input_size[1], self.groups) == 0
+            f_c = self.input_size[1] // self.groups
+            self.filter_size = [6, f_c, 3, 3]
+    class TestWith1x1(TestConv2DOp):
+        def init_test_case(self):
+            self.pad = [0, 0]
+            self.stride = [1, 1]
+            self.input_size = [2, 3, 5, 5]  # NCHW
+            assert np.mod(self.input_size[1], self.groups) == 0
+            f_c = self.input_size[1] // self.groups
+            self.filter_size = [120, f_c, 1, 1]
+        def init_group(self):
+            self.groups = 1
-class TestConv2DOp_v2(XPUOpTest):
-    def setUp(self):
+# ---- test asymmetric padding ----
-        self.op_type = "conv2d"
+class XPUTestConv2DOp_v2(XPUOpTestWrapper):
-        self.use_cudnn = False
+    def __init__(self):
-        self.exhaustive_search = False
+        self.op_name = 'conv2d'
-        self.use_cuda = False
+        self.use_dynamic_create_class = False
-        self.use_mkldnn = False
-        self.fuse_relu_before_depthwise_conv = False
+    class TestConv2DOp_v2(XPUOpTest):
-        self.dtype = np.float32
+        def setUp(self):
-        self.init_kernel_type()
+            self.dtype = self.in_type
-        self.init_group()
+            self.place = paddle.XPUPlace(0)
-        self.init_dilation()
+            self.op_type = "conv2d"
-        self.init_data_format()
+            self.use_cudnn = False
-        self.init_test_case()
+            self.exhaustive_search = False
-        self.init_paddings()
+            self.use_cuda = False
-        self.init_test_case_2()
+            self.use_mkldnn = False
-        conv2d_param = {
-            'stride': self.stride,
-            'pad': self.pad,
-            'dilation': self.dilations
-        }
-        input = np.random.random(self.input_size).astype(self.dtype)
-        if not self.has_cuda():
            self.fuse_relu_before_depthwise_conv = False
-        if self.fuse_relu_before_depthwise_conv:
+            self.init_kernel_type()
-            input = input - 0.5
+            self.init_group()
-            input -= (input < 0) * 0.1
+            self.init_dilation()
-            input += (input >= 0) * 0.1
+            self.init_data_format()
-            input2 = np.maximum(input, 0.0)
+            self.init_test_case()
-        else:
+            self.init_paddings()
-            input2 = input
+            self.init_test_case_2()
-        filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype)
-        output, _, _, _, _ = conv2d_forward_naive(
+            conv2d_param = {
-            input2, filter, self.groups, conv2d_param, self.padding_algorithm,
+                'stride': self.stride,
-            self.data_format)
+                'pad': self.pad,
-        output = output.astype(self.dtype)
+                'dilation': self.dilations
+            }
-        self.inputs = {
-            'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
+            np.random.seed(100)
-            'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
+            input = np.random.random(self.input_size).astype(self.dtype)
-        }
+            if not self.has_cuda():
-        self.attrs = {
+                self.fuse_relu_before_depthwise_conv = False
-            'strides': self.stride,
+            if self.fuse_relu_before_depthwise_conv:
-            'paddings': self.pad,
+                input = input - 0.5
-            'padding_algorithm': self.padding_algorithm,
+                input -= (input < 0) * 0.1
-            'groups': self.groups,
+                input += (input >= 0) * 0.1
-            'dilations': self.dilations,
+                input2 = np.maximum(input, 0.0)
-            'use_cudnn': self.use_cudnn,
+            else:
-            'use_mkldnn': self.use_mkldnn,
+                input2 = input
-            'data_format': self.data_format,
+            np.random.seed(8)
-            'fuse_relu_before_depthwise_conv':
+            filter = np.random.uniform(-1, 1,
-            self.fuse_relu_before_depthwise_conv,
+                                       self.filter_size).astype(self.dtype)
-            'exhaustive_search': self.exhaustive_search
+            output, _, _, _, _ = conv2d_forward_naive(
-        }
+                input2, filter, self.groups, conv2d_param,
-        self.outputs = {'Output': output}
+                self.padding_algorithm, self.data_format)
+            output = output.astype(self.dtype)
-    def has_cuda(self):
-        return core.is_compiled_with_cuda() and (self.use_cudnn or
+            self.inputs = {
-                                                 self.use_cuda)
+                'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
+                'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter)
-    def test_check_output(self):
+            }
-        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.attrs = {
-        if core.is_compiled_with_xpu():
+                'strides': self.stride,
-            paddle.enable_static()
+                'paddings': self.pad,
-            place = paddle.XPUPlace(0)
+                'padding_algorithm': self.padding_algorithm,
-            self.check_output_with_place(place)
+                'groups': self.groups,
+                'dilations': self.dilations,
-    def test_check_grad(self):
+                'use_cudnn': self.use_cudnn,
-        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+                'use_mkldnn': self.use_mkldnn,
-        if self.dtype == np.float16:
+                'data_format': self.data_format,
-            return
+                'fuse_relu_before_depthwise_conv':
-        if core.is_compiled_with_xpu():
+                self.fuse_relu_before_depthwise_conv,
-            paddle.enable_static()
+                'exhaustive_search': self.exhaustive_search
-            place = paddle.XPUPlace(0)
+            }
-            self.check_grad_with_place(place, {'Input', 'Filter'}, 'Output')
+            self.outputs = {'Output': output}
-    def test_check_grad_no_filter(self):
+        def has_cuda(self):
-        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            return core.is_compiled_with_cuda() and (self.use_cudnn or
-        if self.dtype == np.float16:
+                                                     self.use_cuda)
-            return
-        if core.is_compiled_with_xpu():
+        def test_check_output(self):
-            paddle.enable_static()
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-            place = paddle.XPUPlace(0)
+            if core.is_compiled_with_xpu():
-            self.check_grad_with_place(
+                paddle.enable_static()
-                place, ['Input'], 'Output', no_grad_set=set(['Filter']))
+                self.check_output_with_place(place=self.place)
-    def test_check_grad_no_input(self):
+        def test_check_grad(self):
-        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-        if self.dtype == np.float16:
+            if (hasattr(self, "no_need_check_grad") and
-            return
+                    self.no_need_check_grad == True):
-        if core.is_compiled_with_xpu():
+                return
-            paddle.enable_static()
+            if core.is_compiled_with_xpu():
-            place = paddle.XPUPlace(0)
+                paddle.enable_static()
-            self.check_grad_with_place(
+                self.check_grad_with_place(self.place, {'Input', 'Filter'},
-                place, ['Filter'], 'Output', no_grad_set=set(['Input']))
+                                           'Output')
-    def init_test_case(self):
+        def test_check_grad_no_filter(self):
-        self.pad = [0, 0]
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-        self.stride = [1, 2]
+            if (hasattr(self, "no_need_check_grad") and
-        self.input_size = [2, 3, 5, 5]  # NCHW
+                    self.no_need_check_grad == True):
-        assert np.mod(self.input_size[1], self.groups) == 0
+                return
-        f_c = self.input_size[1] // self.groups
+            if core.is_compiled_with_xpu():
-        self.filter_size = [6, f_c, 4, 3]
+                paddle.enable_static()
+                self.check_grad_with_place(
-    def init_dilation(self):
+                    self.place, ['Input'],
-        self.dilations = [1, 1]
+                    'Output',
+                    no_grad_set=set(['Filter']))
-    def init_group(self):
-        self.groups = 1
+        def test_check_grad_no_input(self):
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
-    def init_kernel_type(self):
+            if (hasattr(self, "no_need_check_grad") and
-        pass
+                    self.no_need_check_grad == True):
+                return
-    def init_paddings(self):
+            if core.is_compiled_with_xpu():
-        self.pad = [0, 0]
+                paddle.enable_static()
-        self.padding_algorithm = "EXPLICIT"
+                self.check_grad_with_place(
+                    self.place, ['Filter'],
-    def init_data_format(self):
+                    'Output',
-        self.data_format = "NCHW"
+                    no_grad_set=set(['Input']))
-    def init_test_case_2(self):
+        def init_test_case(self):
-        pass
+            self.pad = [0, 0]
+            self.stride = [1, 2]
+            self.input_size = [2, 3, 5, 5]  # NCHW
-class TestConv2DOp_AsyPadding(TestConv2DOp_v2):
+            assert np.mod(self.input_size[1], self.groups) == 0
-    def init_paddings(self):
+            f_c = self.input_size[1] // self.groups
-        self.pad = [0, 0, 0, 0]
+            self.filter_size = [6, f_c, 4, 3]
-        self.padding_algorithm = "EXPLICIT"
+        def init_dilation(self):
+            self.dilations = [1, 1]
-class TestWithPad_AsyPadding(TestConv2DOp_v2):
-    def init_test_case(self):
+        def init_group(self):
-        self.stride = [1, 1]
+            self.groups = 1
-        self.input_size = [2, 3, 5, 5]  # NCHW
-        assert np.mod(self.input_size[1], self.groups) == 0
+        def init_kernel_type(self):
-        f_c = self.input_size[1] // self.groups
+            pass
-        self.filter_size = [6, f_c, 3, 3]
+        def init_paddings(self):
-    def init_paddings(self):
+            self.pad = [0, 0]
-        self.pad = [1, 1, 1, 1]
+            self.padding_algorithm = "EXPLICIT"
-        self.padding_algorithm = "EXPLICIT"
+        def init_data_format(self):
+            self.data_format = "NCHW"
-class TestWithStride_AsyPadding(TestConv2DOp_v2):
-    def init_test_case(self):
+        def init_test_case_2(self):
-        self.stride = [2, 2]
+            pass
-        self.input_size = [2, 3, 6, 6]  # NCHW
-        assert np.mod(self.input_size[1], self.groups) == 0
+    class TestConv2DOp_AsyPadding(TestConv2DOp_v2):
-        f_c = self.input_size[1] // self.groups
+        def init_paddings(self):
-        self.filter_size = [6, f_c, 3, 3]
+            self.pad = [0, 0, 0, 0]
+            self.padding_algorithm = "EXPLICIT"
-    def init_paddings(self):
-        self.pad = [1, 1, 1, 1]
+    class TestWithPad_AsyPadding(TestConv2DOp_v2):
-        self.padding_algorithm = "EXPLICIT"
+        def init_test_case(self):
+            self.stride = [1, 1]
+            self.input_size = [2, 3, 5, 5]  # NCHW
+            assert np.mod(self.input_size[1], self.groups) == 0
+            f_c = self.input_size[1] // self.groups
+            self.filter_size = [6, f_c, 3, 3]
+        def init_paddings(self):
+            self.pad = [1, 1, 1, 1]
+            self.padding_algorithm = "EXPLICIT"
+    class TestWithStride_AsyPadding(TestConv2DOp_v2):
+        def init_test_case(self):
+            self.stride = [2, 2]
+            self.input_size = [2, 3, 6, 6]  # NCHW
+            assert np.mod(self.input_size[1], self.groups) == 0
+            f_c = self.input_size[1] // self.groups
+            self.filter_size = [6, f_c, 3, 3]
+        def init_paddings(self):
+            self.pad = [1, 1, 1, 1]
+            self.padding_algorithm = "EXPLICIT"
+support_types = get_xpu_op_support_types('conv2d')
+for stype in support_types:
+    create_test_class(globals(), XPUTestConv2DOp, stype)
+    create_test_class(globals(), XPUTestConv2DOp_v2, stype)
 #---------- test SAME VALID -----------
 #create_test_padding_SAME_class(TestConv2DOp_AsyPadding)