diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index ef4e1b3aedfd0c2b7b8485c2058b2d01ab7125e2..bb2e864f5cda254400e3ca3ae4846bfc581c84b0 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220810") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220812") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() @@ -19,7 +19,7 @@ endif() if(NOT DEFINED XPU_XDNN_BASE_URL) set(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") - set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220810") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220812") else() set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") endif() diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index fbd2cc49263ee6d5128bd654ab185b2be7d179f7..6dfe2945cafd7ad7ae8a4e0448218e0e100d2df2 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -404,6 +404,49 @@ struct XPULogGradFunctor : public BaseActivationFunctor { } }; +template +struct XPUMishFunctor : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Out"); + const T *x_data = x->data(); + T *y_data = y->mutable_data(ctx.GetPlace()); + + float threshold = ctx.Attr("threshold"); + + auto xpu_context = + ctx.device_context().x_context(); + int r = xpu::mish(xpu_context, x_data, y_data, x->numel(), threshold); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish"); + } +}; + +template +struct XPUMishGradFunctor : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + const auto *x = ctx.Input("X"); + auto *dOut = ctx.Input(framework::GradVarName("Out")); + auto *dX = ctx.Output(framework::GradVarName("X")); + const T *x_data = x->data(); + const T *y_grad = dOut->data(); + T *x_grad = dX->mutable_data(ctx.GetPlace()); + + float threshold = ctx.Attr("threshold"); + + auto xpu_context = + ctx.device_context().x_context(); + int r = xpu::mish_grad(xpu_context, + reinterpret_cast(x_data), + reinterpret_cast( + x_data), // mish_grad do not need y_data + reinterpret_cast(y_grad), + reinterpret_cast(x_grad), + dX->numel(), + threshold); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish_grad"); + } +}; + template struct XPUPowFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { @@ -589,6 +632,7 @@ REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, XPULeakyReluFunctor, XPULeakyReluGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(mish, XPUMishFunctor, XPUMishGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(reciprocal, XPUReciprocalFunctor, XPUReciprocalGradFunctor) diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 364c1ceee5092457dab1978bf1528a0bec1a4ce3..bb20116e234e762a33a61eb2c27a6333c2bce58e 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -111,6 +111,10 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"conv2d_transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"depthwise_conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"depthwise_conv2d", @@ -342,6 +346,8 @@ XPUOpMap& get_kl2_ops() { {"merged_momentum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, + {"mish_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mish", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"momentum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), @@ -559,6 +565,8 @@ XPUOpMap& get_kl2_ops() { {"update_loss_scaling", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, + {"uniform_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"unsqueeze2_grad", XPUKernelSet({pOpKernelType(vartype::FP64, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()), diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py index 561dd43c689e7d9f1a96a13d806150ab29c99ffd..813ae2e0f8952f02bb1a37d3301c64a79e6c206a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py @@ -1100,5 +1100,57 @@ def ref_thresholded_relu(x, threshold=1.0): return out +class XPUTestMishOP(XPUOpTestWrapper): + + def __init__(self): + self.op_name = 'mish' + self.use_dynamic_create_class = False + + class XPUTestMishBase(TestActivationOPBase): + + def set_case(self): + self.op_type = "mish" + self.dtype = self.in_type + + self.init_config() + threshold = np.random.uniform(0, 1) + out = ref_mish(self.x, threshold) + + self.inputs = {'X': self.x} + self.outputs = {'Out': out} + self.attrs = {'use_xpu': True, 'threshold': threshold} + + def init_config(self): + self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + + class XPUTestMish2(XPUTestMishBase): + + def init_config(self): + self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype) + + class XPUTestMish3(XPUTestMishBase): + + def init_config(self): + self.x = np.random.uniform(-2, 2, + [4, 512, 15, 15]).astype(self.dtype) + + class XPUTestMish4(XPUTestMishBase): + + def init_config(self): + self.x = np.random.uniform(-2, 2, + [4, 256, 22, 22]).astype(self.dtype) + + +support_types = get_xpu_op_support_types('mish') +for stype in support_types: + create_test_class(globals(), XPUTestMishOP, stype) + + +def ref_mish(x, threshold=20): + sp = np.select([x <= threshold, x > threshold], [np.log(1 + np.exp(x)), x]) + out = x * np.tanh(sp) + return out + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py index 9f774889835459cff73c64c183c64f8c9b887191..3e1b2e55479f9cb4c333d7347275f93f9d0a3f45 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from op_test_xpu import OpTest, XPUOpTest import paddle from paddle.fluid import Program, program_guard +from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper def dmc_bilinear(data_im, height, width, h, w): @@ -111,181 +112,189 @@ def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): return out -class TestModulatedDeformableConvOp(XPUOpTest): - - def setUp(self): - self.op_type = "deformable_conv" - self.dtype = np.float32 - self.init_group() - self.init_dilation() - self.init_test_case() - - conv_param = { - 'stride': self.stride, - 'pad': self.pad, - 'dilation': self.dilations - } - - input = np.random.random(self.input_size).astype(self.dtype) - offset = 10 * np.random.random(self.offset_size).astype(self.dtype) - mask = 10 * np.random.random(self.mask_size).astype(self.dtype) - filter = np.random.random(self.filter_size).astype(self.dtype) - output = dconv_im2col_gemm(input, offset, mask, filter, self.groups, - conv_param) - output = output.astype(self.dtype) - - self.inputs = { - 'Input': OpTest.np_dtype_to_fluid_dtype(input), - 'Offset': OpTest.np_dtype_to_fluid_dtype(offset), - 'Mask': OpTest.np_dtype_to_fluid_dtype(mask), - 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) - } - self.attrs = { - 'strides': self.stride, - 'paddings': self.pad, - 'groups': self.groups, - 'deformable_groups': self.deformable_groups, - 'im2col_step': self.im2col_step, - 'dilations': self.dilations, - } - self.outputs = {'Output': output} - - def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn - or self.use_cuda) - - def test_check_output(self): - if core.is_compiled_with_xpu(): - paddle.enable_static() - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - def test_check_grad(self): - if core.is_compiled_with_xpu(): - paddle.enable_static() - place = paddle.XPUPlace(0) - self.check_grad_with_place(place, - {'Input', 'Offset', 'Mask', 'Filter'}, - 'Output', - max_relative_error=0.06) - - def init_test_case(self): - self.pad = [1, 1] - self.stride = [1, 1] - self.dilations = [1, 1] - self.input_size = [2, 8, 4, 4] # NCHW - assert np.mod(self.input_size[1], self.groups) == 0 - f_c = self.input_size[1] // self.groups - self.filter_size = [8, f_c, 3, 3] - self.im2col_step = 1 - self.deformable_groups = 1 - offset_c = 2 * self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - mask_c = self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - self.offset_size = [ - self.input_size[0], offset_c, self.input_size[2], self.input_size[3] - ] - self.mask_size = [ - self.input_size[0], mask_c, self.input_size[2], self.input_size[3] - ] - - def init_dilation(self): - self.dilations = [1, 1] - - def init_group(self): - self.groups = 1 - - -class TestWithDilation(TestModulatedDeformableConvOp): - - def init_test_case(self): - self.pad = [2, 2] - self.stride = [1, 1] - self.input_size = [4, 3, 4, 4] # NCHW - assert np.mod(self.input_size[1], self.groups) == 0 - f_c = self.input_size[1] // self.groups - self.filter_size = [6, f_c, 3, 3] - self.im2col_step = 1 - self.deformable_groups = 1 - offset_c = 2 * self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - mask_c = self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - self.offset_size = [ - self.input_size[0], offset_c, self.input_size[2], self.input_size[3] - ] - self.mask_size = [ - self.input_size[0], mask_c, self.input_size[2], self.input_size[3] - ] - - def init_dilation(self): - self.dilations = [2, 2] - - -class TestWith3x3(TestModulatedDeformableConvOp): - - def init_test_case(self): - self.pad = [1, 1] - self.stride = [1, 1] - self.input_size = [2, 3, 5, 5] # NCHW - assert np.mod(self.input_size[1], self.groups) == 0 - f_c = self.input_size[1] // self.groups - self.filter_size = [6, f_c, 3, 3] - self.im2col_step = 1 - self.deformable_groups = 1 - offset_c = 2 * self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - mask_c = self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - self.offset_size = [ - self.input_size[0], offset_c, self.input_size[2], self.input_size[3] - ] - self.mask_size = [ - self.input_size[0], mask_c, self.input_size[2], self.input_size[3] - ] - - -class TestModulatedDeformableConvInvalidInput(unittest.TestCase): - - def test_error(self): - - def test_invalid_input(): - paddle.enable_static() - input = [1, 3, 32, 32] - offset = fluid.data(name='offset', - shape=[None, 3, 32, 32], - dtype='float32') - mask = fluid.data(name='mask', - shape=[None, 3, 32, 32], - dtype='float32') - loss = fluid.layers.deformable_conv(input, - offset, - mask, - num_filters=4, - filter_size=1) - - self.assertRaises(TypeError, test_invalid_input) - - def test_invalid_offset(): - paddle.enable_static() - input = fluid.data(name='input', - shape=[None, 3, 32, 32], - dtype='int32') - offset = fluid.data(name='offset', - shape=[None, 3, 32, 32], - dtype='float32') - mask = fluid.data(name='mask', - shape=[None, 3, 32, 32], - dtype='float32') - loss = fluid.layers.deformable_conv(input, - offset, - mask, - num_filters=4, - filter_size=1) - - self.assertRaises(TypeError, test_invalid_offset) - +class XPUTestModulatedDeformableConvOp(XPUOpTestWrapper): + + def __init__(self): + self.op_name = 'deformable_conv' + self.use_dynamic_create_class = False + + class TestModulatedDeformableConvOp(XPUOpTest): + + def setUp(self): + self.op_type = "deformable_conv" + self.dtype = self.in_type + self.place = paddle.XPUPlace(0) + self.init_group() + self.init_dilation() + self.init_test_case() + + conv_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilation': self.dilations + } + + input = np.random.random(self.input_size).astype(self.dtype) + offset = 10 * np.random.random(self.offset_size).astype(self.dtype) + mask = 10 * np.random.random(self.mask_size).astype(self.dtype) + filter = np.random.random(self.filter_size).astype(self.dtype) + output = dconv_im2col_gemm(input, offset, mask, filter, self.groups, + conv_param) + output = output.astype(self.dtype) + + self.inputs = { + 'Input': OpTest.np_dtype_to_fluid_dtype(input), + 'Offset': OpTest.np_dtype_to_fluid_dtype(offset), + 'Mask': OpTest.np_dtype_to_fluid_dtype(mask), + 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) + } + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups, + 'deformable_groups': self.deformable_groups, + 'im2col_step': self.im2col_step, + 'dilations': self.dilations, + } + self.outputs = {'Output': output} + + def test_check_output(self): + if core.is_compiled_with_xpu(): + paddle.enable_static() + self.check_output_with_place(self.place) + + def test_check_grad(self): + if core.is_compiled_with_xpu(): + paddle.enable_static() + self.check_grad_with_place( + self.place, {'Input', 'Offset', 'Mask', 'Filter'}, + 'Output', + max_relative_error=0.06) + + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 8, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [8, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = 2 * self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + mask_c = self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], offset_c, self.input_size[2], + self.input_size[3] + ] + self.mask_size = [ + self.input_size[0], mask_c, self.input_size[2], + self.input_size[3] + ] + + def init_dilation(self): + self.dilations = [1, 1] + + def init_group(self): + self.groups = 1 + + class TestWithDilation(TestModulatedDeformableConvOp): + + def init_test_case(self): + self.pad = [2, 2] + self.stride = [1, 1] + self.input_size = [4, 3, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = 2 * self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + mask_c = self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], offset_c, self.input_size[2], + self.input_size[3] + ] + self.mask_size = [ + self.input_size[0], mask_c, self.input_size[2], + self.input_size[3] + ] + + def init_dilation(self): + self.dilations = [2, 2] + + class TestWith3x3(TestModulatedDeformableConvOp): + + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + self.im2col_step = 1 + self.deformable_groups = 1 + offset_c = 2 * self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + mask_c = self.deformable_groups * self.filter_size[ + 2] * self.filter_size[3] + self.offset_size = [ + self.input_size[0], offset_c, self.input_size[2], + self.input_size[3] + ] + self.mask_size = [ + self.input_size[0], mask_c, self.input_size[2], + self.input_size[3] + ] + + class TestModulatedDeformableConvInvalidInput(unittest.TestCase): + + def test_error(self): + + def test_invalid_input(): + paddle.enable_static() + input = [1, 3, 32, 32] + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) + + self.assertRaises(TypeError, test_invalid_input) + + def test_invalid_offset(): + paddle.enable_static() + input = fluid.data(name='input', + shape=[None, 3, 32, 32], + dtype='int32') + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) + + self.assertRaises(TypeError, test_invalid_offset) + + +support_types = get_xpu_op_support_types('deformable_conv') +for stype in support_types: + create_test_class(globals(), XPUTestModulatedDeformableConvOp, stype) if __name__ == '__main__': unittest.main()