From 9056cc8b12faa4beb037dab1646ac2dc71428292 Mon Sep 17 00:00:00 2001 From: RuohengMa <120699764+RuohengMa@users.noreply.github.com> Date: Wed, 18 Jan 2023 16:56:21 +0800 Subject: [PATCH] [PHI] remove bitwise and, or, xor (#49916) * add reduce_sum_int64 and reduce_sum_int8 xpu kernels * [PHI] add clip grad kernel with support type float32 and int32 * [PHI unittest] add clip_grad unit test * adapt code to clang-format * update xpu api output with clip_grad api * remove int8 support of reduce_sum xpu kernel since it can not pass unit tests * adapt license date, add code for XPUDataType convertion * add int8 support of reduce_sum * add reduce_sum unit tests for dtype int64, int8, and add more test cases * update license date * remove buggy bitwise and, or and xor xpu kernels, refine bitwise not xpu kernel * change license date --- cmake/external/xpu.cmake | 2 +- paddle/phi/backends/xpu/xpu2_op_list.cc | 12 ++--- paddle/phi/kernels/clip_grad_kernel.h | 2 +- paddle/phi/kernels/reduce_sum_kernel.cc | 10 +++-- paddle/phi/kernels/reduce_sum_kernel.h | 2 +- paddle/phi/kernels/xpu/bitwise.cc | 45 +++---------------- paddle/phi/kernels/xpu/clip_grad_kernel.cc | 44 ++++++++++++++++++ paddle/phi/kernels/xpu/reduce_sum_kernel.cc | 5 ++- .../tests/unittests/xpu/test_clip_op_xpu.py | 16 +++++-- .../unittests/xpu/test_reduce_sum_op_xpu.py | 40 ++++++++++++++++- 10 files changed, 121 insertions(+), 57 deletions(-) create mode 100644 paddle/phi/kernels/xpu/clip_grad_kernel.cc diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index be088de898..f04e5f9d30 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20230110") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20230114") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 367231972a..8451ee2774 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -67,10 +67,7 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT64})}, {"bilinear_interp_v2", XPUKernelSet({phi::DataType::FLOAT32})}, {"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})}, - {"bitwise_and", XPUKernelSet({phi::DataType::BOOL})}, {"bitwise_not", XPUKernelSet({phi::DataType::BOOL})}, - {"bitwise_or", XPUKernelSet({phi::DataType::BOOL})}, - {"bitwise_xor", XPUKernelSet({phi::DataType::BOOL})}, {"broadcast", XPUKernelSet({phi::DataType::FLOAT32})}, {"c_allgather", XPUKernelSet({phi::DataType::FLOAT16, @@ -109,6 +106,8 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"clip", XPUKernelSet({phi::DataType::FLOAT32})}, {"clip_by_norm", XPUKernelSet({phi::DataType::FLOAT32})}, + {"clip_grad", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32})}, {"coalesce_tensor", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"concat_grad", @@ -435,7 +434,10 @@ XPUOpMap& get_kl2_ops() { {"reduce_min", XPUKernelSet({phi::DataType::FLOAT32})}, {"reduce_prod", XPUKernelSet({phi::DataType::FLOAT32})}, {"reduce_sum_grad", XPUKernelSet({phi::DataType::FLOAT32})}, - {"reduce_sum", XPUKernelSet({phi::DataType::FLOAT32})}, + {"reduce_sum", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::INT8, + phi::DataType::INT64})}, {"relu6", XPUKernelSet({phi::DataType::FLOAT32})}, {"relu6_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"relu_grad", diff --git a/paddle/phi/kernels/clip_grad_kernel.h b/paddle/phi/kernels/clip_grad_kernel.h index 8a7e5b99fd..bc6245ce90 100644 --- a/paddle/phi/kernels/clip_grad_kernel.h +++ b/paddle/phi/kernels/clip_grad_kernel.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/paddle/phi/kernels/reduce_sum_kernel.cc b/paddle/phi/kernels/reduce_sum_kernel.cc index c6cfe42566..a3ff565fce 100644 --- a/paddle/phi/kernels/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/reduce_sum_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ void SumKernel(const Context& dev_ctx, bool keep_dim, DenseTensor* out) { bool reduce_all = recompute_reduce_all(x, dims); - SumRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out); + SumRawKernel( + dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out); } } // namespace phi @@ -82,5 +83,8 @@ PD_REGISTER_KERNEL( #endif #if defined(PADDLE_WITH_XPU) -PD_REGISTER_KERNEL(sum, XPU, ALL_LAYOUT, phi::SumKernel, float) {} +PD_REGISTER_KERNEL( + sum, XPU, ALL_LAYOUT, phi::SumKernel, float, int8_t, int64_t) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} #endif diff --git a/paddle/phi/kernels/reduce_sum_kernel.h b/paddle/phi/kernels/reduce_sum_kernel.h index 3bcf025d96..e994b073fc 100644 --- a/paddle/phi/kernels/reduce_sum_kernel.h +++ b/paddle/phi/kernels/reduce_sum_kernel.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/paddle/phi/kernels/xpu/bitwise.cc b/paddle/phi/kernels/xpu/bitwise.cc index a897a37acd..019acf52f8 100644 --- a/paddle/phi/kernels/xpu/bitwise.cc +++ b/paddle/phi/kernels/xpu/bitwise.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,51 +19,18 @@ namespace phi { -template -void BitwiseAndKernel(const Context& ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - ctx.template Alloc(out); - int r = xpu::logical_and( - ctx.x_context(), x.data(), y.data(), out->data(), x.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise and"); -} - -template -void BitwiseOrKernel(const Context& ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - ctx.template Alloc(out); - int r = xpu::logical_or( - ctx.x_context(), x.data(), y.data(), out->data(), x.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise or"); -} - -template -void BitwiseXorKernel(const Context& ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - ctx.template Alloc(out); - int r = xpu::logical_xor( - ctx.x_context(), x.data(), y.data(), out->data(), x.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise xor"); -} - template void BitwiseNotKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + using XPUDataType = typename XPUTypeTrait::Type; ctx.template Alloc(out); - int r = - xpu::logical_not(ctx.x_context(), x.data(), out->data(), x.numel()); + int r = xpu::logical_not(ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(out->data()), + x.numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise not"); } } // namespace phi -PD_REGISTER_KERNEL(bitwise_and, XPU, ALL_LAYOUT, phi::BitwiseAndKernel, bool) {} -PD_REGISTER_KERNEL(bitwise_or, XPU, ALL_LAYOUT, phi::BitwiseOrKernel, bool) {} -PD_REGISTER_KERNEL(bitwise_xor, XPU, ALL_LAYOUT, phi::BitwiseXorKernel, bool) {} PD_REGISTER_KERNEL(bitwise_not, XPU, ALL_LAYOUT, phi::BitwiseNotKernel, bool) {} diff --git a/paddle/phi/kernels/xpu/clip_grad_kernel.cc b/paddle/phi/kernels/xpu/clip_grad_kernel.cc new file mode 100644 index 0000000000..ff1cc21660 --- /dev/null +++ b/paddle/phi/kernels/xpu/clip_grad_kernel.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/clip_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void ClipGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const Scalar& min, + const Scalar& max, + DenseTensor* x_grad) { + ctx.template Alloc(x_grad); + using XPUDataType = typename XPUTypeTrait::Type; + int r = + xpu::clip_grad(ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(out_grad.data()), + reinterpret_cast(x_grad->data()), + x.numel(), + min.to(), + max.to()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_grad"); +} +} // namespace phi + +PD_REGISTER_KERNEL( + clip_grad, XPU, ALL_LAYOUT, phi::ClipGradKernel, float, int) {} diff --git a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc index ac13dc3de3..dd3abc7bad 100644 --- a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,4 +46,5 @@ void SumRawKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL(sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float) {} +PD_REGISTER_KERNEL( + sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float, int8_t, int64_t) {} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py index 075ff7f7e8..4bf88d40b7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ from xpu.get_test_cover_info import ( import paddle import paddle.fluid as fluid -from paddle.fluid import Program, program_guard +from paddle.fluid import Program, core, program_guard class XPUTestClipOp(XPUOpTestWrapper): @@ -51,7 +51,7 @@ class XPUTestClipOp(XPUOpTestWrapper): def set_xpu(self): self.__class__.use_xpu = True - self.__class__.no_need_check_grad = True + self.__class__.no_need_check_grad = False self.__class__.op_type = self.dtype def init_data(self): @@ -91,6 +91,16 @@ class XPUTestClipOp(XPUOpTestWrapper): self.check_output_with_place(self.place) paddle.disable_static() + def test_check_grad(self): + if hasattr(self, "no_need_check_grad") and self.no_need_check_grad: + return + if core.is_compiled_with_xpu(): + paddle.enable_static() + self.check_grad_with_place( + self.place, ['X'], 'Out', check_eager=True + ) + paddle.disable_static() + class TestClipOp1(TestClipOp): def init_data(self): self.shape = (8, 16, 8) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py index d8a1e9efcb..2ffc6c2d22 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): 'reduce_all': self.reduce_all, 'keep_dim': self.keep_dim, } - self.inputs = {'X': np.random.random(self.shape).astype("float32")} + self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)} if self.attrs['reduce_all']: self.outputs = {'Out': self.inputs['X'].sum()} else: @@ -63,6 +63,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): self.axis = (0,) self.reduce_all = False self.keep_dim = False + self.dtype = self.in_type def test_check_output(self): self.check_output_with_place(self.place) @@ -71,12 +72,47 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X'], 'Out') class XPUTestReduceSumCase1(XPUTestReduceSumBase): + def init_case(self): + self.shape = (5, 6, 10) + self.axis = (0,) + self.reduce_all = False + self.keep_dim = False + + class XPUTestReduceSumCase2(XPUTestReduceSumBase): def init_case(self): self.shape = (5, 6, 10) self.axis = (0,) self.reduce_all = False self.keep_dim = True + class XPUTestReduceSumCase3(XPUTestReduceSumBase): + def init_case(self): + self.shape = (5, 6, 10) + self.axis = (0,) + self.reduce_all = True + self.keep_dim = False + + class XPUTestReduceSumCase4(XPUTestReduceSumBase): + def init_case(self): + self.shape = (5, 6, 10) + self.axis = (1,) + self.reduce_all = False + self.keep_dim = False + + class XPUTestReduceSumCase5(XPUTestReduceSumBase): + def init_case(self): + self.shape = (5, 6, 10) + self.axis = (1,) + self.reduce_all = False + self.keep_dim = True + + class XPUTestReduceSumCase6(XPUTestReduceSumBase): + def init_case(self): + self.shape = (5, 6, 10) + self.axis = (1,) + self.reduce_all = True + self.keep_dim = False + support_types = get_xpu_op_support_types('reduce_sum') for stype in support_types: -- GitLab