[PHI] remove bitwise and, or, xor (#49916)

* add reduce_sum_int64 and reduce_sum_int8 xpu kernels * [PHI] add clip grad kernel with support type float32 and int32 * [PHI unittest] add clip_grad unit test * adapt code to clang-format * update xpu api output with clip_grad api * remove int8 support of reduce_sum xpu kernel since it can not pass unit tests * adapt license date, add code for XPUDataType convertion * add int8 support of reduce_sum * add reduce_sum unit tests for dtype int64, int8, and add more test cases * update license date * remove buggy bitwise and, or and xor xpu kernels, refine bitwise not xpu kernel * change license date

[PHI] remove bitwise and, or, xor (#49916)
* add reduce_sum_int64 and reduce_sum_int8 xpu kernels * [PHI] add clip grad kernel with support type float32 and int32 * [PHI unittest] add clip_grad unit test * adapt code to clang-format * update xpu api output with clip_grad api * remove int8 support of reduce_sum xpu kernel since it can not pass unit tests * adapt license date, add code for XPUDataType convertion * add int8 support of reduce_sum * add reduce_sum unit tests for dtype int64, int8, and add more test cases * update license date * remove buggy bitwise and, or and xor xpu kernels, refine bitwise not xpu kernel * change license date
9056cc8b · RuohengMa · GitHub · f9043c78 · 9056cc8b · 9056cc8b
10 changed file
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
 if(NOT DEFINED XPU_BASE_URL)
  set(XPU_BASE_URL_WITHOUT_DATE
      "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20230110")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20230114")
 else()
  set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()

--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -67,10 +67,7 @@ XPUOpMap& get_kl2_ops() {
                     phi::DataType::INT64})},
      {"bilinear_interp_v2", XPUKernelSet({phi::DataType::FLOAT32})},
      {"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"bitwise_and", XPUKernelSet({phi::DataType::BOOL})},
      {"bitwise_not", XPUKernelSet({phi::DataType::BOOL})},
-      {"bitwise_or", XPUKernelSet({phi::DataType::BOOL})},
-      {"bitwise_xor", XPUKernelSet({phi::DataType::BOOL})},
      {"broadcast", XPUKernelSet({phi::DataType::FLOAT32})},
      {"c_allgather",
       XPUKernelSet({phi::DataType::FLOAT16,
@@ -109,6 +106,8 @@ XPUOpMap& get_kl2_ops() {
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"clip", XPUKernelSet({phi::DataType::FLOAT32})},
      {"clip_by_norm", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"clip_grad",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32})},
      {"coalesce_tensor",
       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
      {"concat_grad",
@@ -435,7 +434,10 @@ XPUOpMap& get_kl2_ops() {
      {"reduce_min", XPUKernelSet({phi::DataType::FLOAT32})},
      {"reduce_prod", XPUKernelSet({phi::DataType::FLOAT32})},
      {"reduce_sum_grad", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"reduce_sum", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"reduce_sum",
+       XPUKernelSet({phi::DataType::FLOAT32,
+                     phi::DataType::INT8,
+                     phi::DataType::INT64})},
      {"relu6", XPUKernelSet({phi::DataType::FLOAT32})},
      {"relu6_grad", XPUKernelSet({phi::DataType::FLOAT32})},
      {"relu_grad",

--- a/paddle/phi/kernels/clip_grad_kernel.h
+++ b/paddle/phi/kernels/clip_grad_kernel.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/paddle/phi/kernels/reduce_sum_kernel.cc
+++ b/paddle/phi/kernels/reduce_sum_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,8 @@ void SumKernel(const Context& dev_ctx,
               bool keep_dim,
               DenseTensor* out) {
  bool reduce_all = recompute_reduce_all(x, dims);
-  SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
+  SumRawKernel<T, Context>(
+      dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
 }

 }  // namespace phi
@@ -82,5 +83,8 @@ PD_REGISTER_KERNEL(
 #endif

 #if defined(PADDLE_WITH_XPU)
-PD_REGISTER_KERNEL(sum, XPU, ALL_LAYOUT, phi::SumKernel, float) {}
+PD_REGISTER_KERNEL(
+    sum, XPU, ALL_LAYOUT, phi::SumKernel, float, int8_t, int64_t) {
+  kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
+}
 #endif
--- a/paddle/phi/kernels/reduce_sum_kernel.h
+++ b/paddle/phi/kernels/reduce_sum_kernel.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/paddle/phi/kernels/xpu/bitwise.cc
+++ b/paddle/phi/kernels/xpu/bitwise.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -19,51 +19,18 @@

 namespace phi {

-template <typename T, typename Context>
-void BitwiseAndKernel(const Context& ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& y,
-                      DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_and(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise and");
-}
-
-template <typename T, typename Context>
-void BitwiseOrKernel(const Context& ctx,
-                     const DenseTensor& x,
-                     const DenseTensor& y,
-                     DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_or(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise or");
-}
-
-template <typename T, typename Context>
-void BitwiseXorKernel(const Context& ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& y,
-                      DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_xor(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise xor");
-}
-
 template <typename T, typename Context>
 void BitwiseNotKernel(const Context& ctx,
                      const DenseTensor& x,
                      DenseTensor* out) {
+  using XPUDataType = typename XPUTypeTrait<T>::Type;
  ctx.template Alloc<T>(out);
-  int r =
-      xpu::logical_not(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
+  int r = xpu::logical_not(ctx.x_context(),
+                           reinterpret_cast<const XPUDataType*>(x.data<T>()),
+                           reinterpret_cast<XPUDataType*>(out->data<T>()),
+                           x.numel());
  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise not");
 }
 }  // namespace phi

-PD_REGISTER_KERNEL(bitwise_and, XPU, ALL_LAYOUT, phi::BitwiseAndKernel, bool) {}
-PD_REGISTER_KERNEL(bitwise_or, XPU, ALL_LAYOUT, phi::BitwiseOrKernel, bool) {}
-PD_REGISTER_KERNEL(bitwise_xor, XPU, ALL_LAYOUT, phi::BitwiseXorKernel, bool) {}
 PD_REGISTER_KERNEL(bitwise_not, XPU, ALL_LAYOUT, phi::BitwiseNotKernel, bool) {}
--- a/paddle/phi/kernels/xpu/clip_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/clip_grad_kernel.cc
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/clip_grad_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void ClipGradKernel(const Context& ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    const Scalar& min,
+                    const Scalar& max,
+                    DenseTensor* x_grad) {
+  ctx.template Alloc<T>(x_grad);
+  using XPUDataType = typename XPUTypeTrait<T>::Type;
+  int r =
+      xpu::clip_grad(ctx.x_context(),
+                     reinterpret_cast<const XPUDataType*>(x.data<T>()),
+                     reinterpret_cast<const XPUDataType*>(out_grad.data<T>()),
+                     reinterpret_cast<XPUDataType*>(x_grad->data<T>()),
+                     x.numel(),
+                     min.to<T>(),
+                     max.to<T>());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_grad");
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    clip_grad, XPU, ALL_LAYOUT, phi::ClipGradKernel, float, int) {}
--- a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc
+++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -46,4 +46,5 @@ void SumRawKernel(const Context& dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float) {}
+PD_REGISTER_KERNEL(
+    sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float, int8_t, int64_t) {}
--- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ from xpu.get_test_cover_info import (

 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard


 class XPUTestClipOp(XPUOpTestWrapper):
@@ -51,7 +51,7 @@ class XPUTestClipOp(XPUOpTestWrapper):

        def set_xpu(self):
            self.__class__.use_xpu = True
-            self.__class__.no_need_check_grad = True
+            self.__class__.no_need_check_grad = False
            self.__class__.op_type = self.dtype

        def init_data(self):
@@ -91,6 +91,16 @@ class XPUTestClipOp(XPUOpTestWrapper):
            self.check_output_with_place(self.place)
            paddle.disable_static()

+        def test_check_grad(self):
+            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
+                return
+            if core.is_compiled_with_xpu():
+                paddle.enable_static()
+                self.check_grad_with_place(
+                    self.place, ['X'], 'Out', check_eager=True
+                )
+                paddle.disable_static()
+
    class TestClipOp1(TestClipOp):
        def init_data(self):
            self.shape = (8, 16, 8)

--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
                'reduce_all': self.reduce_all,
                'keep_dim': self.keep_dim,
            }
-            self.inputs = {'X': np.random.random(self.shape).astype("float32")}
+            self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)}
            if self.attrs['reduce_all']:
                self.outputs = {'Out': self.inputs['X'].sum()}
            else:
@@ -63,6 +63,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
            self.axis = (0,)
            self.reduce_all = False
            self.keep_dim = False
+            self.dtype = self.in_type

        def test_check_output(self):
            self.check_output_with_place(self.place)
@@ -71,12 +72,47 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
            self.check_grad_with_place(self.place, ['X'], 'Out')

    class XPUTestReduceSumCase1(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (0,)
+            self.reduce_all = False
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase2(XPUTestReduceSumBase):
        def init_case(self):
            self.shape = (5, 6, 10)
            self.axis = (0,)
            self.reduce_all = False
            self.keep_dim = True

+    class XPUTestReduceSumCase3(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (0,)
+            self.reduce_all = True
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase4(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase5(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = True
+
+    class XPUTestReduceSumCase6(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = True
+            self.keep_dim = False
+

 support_types = get_xpu_op_support_types('reduce_sum')
 for stype in support_types: