diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 25d01912f141911ca26610dca232468c9e1b6993..3ead16451a3af0f9a72482f9bed4c1fa8776df15 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220708") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220712") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() @@ -19,7 +19,7 @@ endif() if(NOT DEFINED XPU_XDNN_BASE_URL) set(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") - set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220708") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220712") else() set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") endif() diff --git a/paddle/fluid/operators/conv_op_xpu.cc b/paddle/fluid/operators/conv_op_xpu.cc index f65921dbc17762f0fae21d0b1f3f3bf81e69a337..638983ea26be9fa694c697cd1460d0d05ed8329d 100644 --- a/paddle/fluid/operators/conv_op_xpu.cc +++ b/paddle/fluid/operators/conv_op_xpu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h" +#include "paddle/fluid/platform/device/device_wrapper.h" #ifdef PADDLE_WITH_XPU namespace paddle { namespace operators { @@ -71,9 +72,26 @@ class GemmConvXPUKernel : public framework::OpKernel { XPUT *output_data = reinterpret_cast(output->data()); auto &dev_ctx = context.template device_context(); + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + + XPUT *filter_data_tmp; + const XPUT *filter_data_ptr = filter_data; + if (data_format == "NHWC") { + filter_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); + std::vector filter_shape = phi::vectorize(filter.dims()); + int r = xpu::transpose(dev_ctx.x_context(), + filter_data, + filter_data_tmp, + filter_shape, + {0, 2, 3, 1}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + filter_data_ptr = reinterpret_cast(filter_data_tmp); + } + int r = xpu::conv2d(dev_ctx.x_context(), input_data, - filter_data, + filter_data_ptr, output_data, batch_size, img_c, @@ -89,11 +107,7 @@ class GemmConvXPUKernel : public framework::OpKernel { nullptr, nullptr, is_nchw); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External( - "XPU conv kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d"); } }; @@ -134,6 +148,7 @@ class GemmConvGradXPUKernel : public framework::OpKernel { framework::DDim filter_data_dims = phi::slice_ddim(filter.dims(), 2, filter.dims().size()); std::vector ksize = phi::vectorize(filter_data_dims); + std::vector filter_shape = phi::vectorize(filter.dims()); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -165,12 +180,35 @@ class GemmConvGradXPUKernel : public framework::OpKernel { filter_grad_data = reinterpret_cast(filter_grad->data()); } auto &dev_ctx = context.template device_context(); + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + + XPUT *filter_data_tmp; + XPUT *filter_grad_data_tmp; + const XPUT *filter_data_ptr = filter_data; + XPUT *filter_grad_data_ptr = filter_grad_data; + if (data_format == "NHWC") { + filter_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); + int r = xpu::transpose(dev_ctx.x_context(), + filter_data, + filter_data_tmp, + filter_shape, + {0, 2, 3, 1}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + filter_data_ptr = reinterpret_cast(filter_data_tmp); + + if (filter_grad_data != nullptr) { + filter_grad_data_tmp = RAII_GUARD.alloc(filter.numel()); + PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp); + filter_grad_data_ptr = filter_grad_data_tmp; + } + } int r = xpu::conv2d_grad(dev_ctx.x_context(), input_data, - filter_data, + filter_data_ptr, output_grad_data, input_grad_data, - filter_grad_data, + filter_grad_data_ptr, batch_size, img_c, img_h, @@ -187,11 +225,18 @@ class GemmConvGradXPUKernel : public framework::OpKernel { nullptr, nullptr, is_nchw); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External( - "XPU conv kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_grad"); + + if ((filter_grad_data_ptr != nullptr) && (data_format == "NHWC")) { + std::vector filter_shape_fhwc = { + filter_shape[0], filter_shape[2], filter_shape[3], filter_shape[1]}; + int r = xpu::transpose(dev_ctx.x_context(), + filter_grad_data_ptr, + filter_grad_data, + filter_shape_fhwc, + {0, 3, 1, 2}); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); + } } }; } // namespace operators diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py index 387dd88bcd4ea90fe5e9088a9dbe14843bc8708e..9dd7247c4a39dd56faf78743b773bd4bcb836193 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py @@ -498,10 +498,41 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): self.padding_algorithm = "EXPLICIT" +class XPUTestConv2DOp_NHWC(XPUOpTestWrapper): + + def __init__(self): + self.op_name = 'conv2d' + self.use_dynamic_create_class = False + + class TestConv2DOp_AsyPadding_NHWC( + XPUTestConv2DOp_v2.TestConv2DOp_AsyPadding): + + def init_data_format(self): + self.data_format = "NHWC" + + def init_test_case_2(self): + N, C, H, W = self.input_size + self.input_size = [N, H, W, C] + + class TestWithPad_AsyPadding_NHWC(XPUTestConv2DOp_v2.TestWithPad_AsyPadding + ): + + def init_data_format(self): + self.data_format = "NHWC" + + def init_test_case_2(self): + N, C, H, W = self.input_size + self.input_size = [N, H, W, C] + + support_types = get_xpu_op_support_types('conv2d') for stype in ['float32']: create_test_class(globals(), XPUTestConv2DOp, stype) create_test_class(globals(), XPUTestConv2DOp_v2, stype) + create_test_class(globals(), + XPUTestConv2DOp_NHWC, + stype, + ignore_deivce_version=[core.XPUVersion.XPU1]) #---------- test SAME VALID ----------- #create_test_padding_SAME_class(TestConv2DOp_AsyPadding) @@ -512,9 +543,5 @@ for stype in ['float32']: #create_test_padding_VALID_class(TestWithPad_AsyPadding) #create_test_padding_VALID_class(TestWithStride_AsyPadding) -# ------------ test channel last --------- -#create_test_channel_last_class(TestConv2DOp_AsyPadding) -#create_test_channel_last_class(TestWithPad_AsyPadding) - if __name__ == '__main__': unittest.main()