[XPU] Add gather_nd fp16 and add check_dtype_op_blacklist (#55860)

307128d1 · jiangfan06 · GitHub · b546b923 · 307128d1 · 307128d1
3 changed file
--- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
+++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
@@ -406,7 +406,10 @@ void AutoMixedPrecisionPass::GetOpPrecision() const {
        support_low_precision = OpSupportPrecision(
            GetOpOriginalType(op_type), backend_, low_precision_, black_list_);
-        if (op_node->Op()->HasAttr("dtype")) {
+        std::unordered_set<std::string> check_dtype_op_blacklist(
+            {"arg_max", "arg_min"});
+        if (op_node->Op()->HasAttr("dtype") &&
+            !check_dtype_op_blacklist.count(GetOpOriginalType(op_type))) {
          auto dtype = op_node->Op()->GetAttrIfExists<int>("dtype");
          support_low_precision =
              support_low_precision &&

--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
@@ -397,7 +397,8 @@ XPUOpMap& get_kl2_ops() {
      {"gather_nd",
       XPUKernelSet({phi::DataType::INT32,
                     phi::DataType::INT64,
-                     phi::DataType::FLOAT32})},
+                     phi::DataType::FLOAT32,
+                     phi::DataType::FLOAT16})},
      {"gather",
       XPUKernelSet({phi::DataType::FLOAT32,
                     phi::DataType::FLOAT16,

--- a/paddle/phi/kernels/xpu/gather_nd_kernel.cc
+++ b/paddle/phi/kernels/xpu/gather_nd_kernel.cc
@@ -24,6 +24,7 @@ void GatherNdKernel(const Context &ctx,
                    const DenseTensor &x,
                    const DenseTensor &index,
                    DenseTensor *out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
  ctx.template Alloc<T>(out);
  if (x.numel() == 0) {
@@ -57,8 +58,8 @@ void GatherNdKernel(const Context &ctx,
    // int broadcast(Context* ctx, const T* x, T* y, const std::vector<int>&
    // xshape, const std::vector<int>& yshape)
    int r = xpu::broadcast(ctx.x_context(),
-                           x.data<T>(),
+                           reinterpret_cast<const XPUType *>(x.data<T>()),
-                           out->data<T>(),
+                           reinterpret_cast<XPUType *>(out->data<T>()),
                           {1, x_numel},
                           {remain_numel, x_numel});
    PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast");
@@ -87,17 +88,19 @@ void GatherNdKernel(const Context &ctx,
  int ret = XPU_SUCCESS;
  if (index_type == DataType::INT32) {
-    ret = xpu::gather_nd<T, int>(ctx.x_context(),
+    ret = xpu::gather_nd<XPUType, int>(
-                                 x.data<T>(),
+        ctx.x_context(),
+        reinterpret_cast<const XPUType *>(x.data<T>()),
        index.data<int>(),
-                                 out->data<T>(),
+        reinterpret_cast<XPUType *>(out->data<T>()),
        x_vec,
        index_shape);
  } else {
-    ret = xpu::gather_nd<T, int64_t>(ctx.x_context(),
+    ret = xpu::gather_nd<XPUType, int64_t>(
-                                     x.data<T>(),
+        ctx.x_context(),
+        reinterpret_cast<const XPUType *>(x.data<T>()),
        index.data<int64_t>(),
-                                     out->data<T>(),
+        reinterpret_cast<XPUType *>(out->data<T>()),
        x_vec,
        index_shape);
  }
@@ -106,5 +109,11 @@ void GatherNdKernel(const Context &ctx,
 }  // namespace phi
-PD_REGISTER_KERNEL(
+PD_REGISTER_KERNEL(gather_nd,
-    gather_nd, XPU, ALL_LAYOUT, phi::GatherNdKernel, float, int64_t, int) {}
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::GatherNdKernel,
+                   float,
+                   int64_t,
+                   int,
+                   phi::dtype::float16) {}