add fp16 for masked_select on kunlun, *test=kunlun (#41215)

ff818c77 · TTerror · GitHub · 482e5b6c · ff818c77 · ff818c77
Showing with 7 addition and 2 deletion

paddle/fluid/operators/masked_select_op_xpu.cc paddle/fluid/operators/masked_select_op_xpu.cc +6 -2

paddle/fluid/platform/device/xpu/xpu2_op_list.h paddle/fluid/platform/device/xpu/xpu2_op_list.h +1 -0

未找到文件。
--- a/paddle/fluid/operators/masked_select_op_xpu.cc
+++ b/paddle/fluid/operators/masked_select_op_xpu.cc
@@ -19,13 +19,15 @@ namespace operators {
 template <typename T>
 class MaskedSelectXPUKernel : public framework::OpKernel<T> {
+  using XPUType = typename XPUTypeTrait<T>::Type;
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto input = context.Input<framework::Tensor>("X");
    auto mask = context.Input<framework::Tensor>("Mask");
    auto out = context.Output<framework::Tensor>("Y");
    auto* mask_data = mask->data<bool>();
-    auto* input_data = input->data<T>();
+    auto* input_data = reinterpret_cast<const XPUType*>(input->data<T>());
    auto input_dim = input->dims();
    auto mask_dim = mask->dims();
    PADDLE_ENFORCE_EQ(
@@ -51,7 +53,8 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
    framework::DDim out_dim{out_size_cpu};
    out->Resize(out_dim);
-    auto out_data = out->mutable_data<T>(context.GetPlace());
+    auto out_data =
+        reinterpret_cast<XPUType*>(out->mutable_data<T>(context.GetPlace()));
    auto input_shape = phi::vectorize<int>(input_dim);
    auto mask_shape = phi::vectorize<int>(mask_dim);
@@ -69,6 +72,7 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 REGISTER_OP_XPU_KERNEL(masked_select, ops::MaskedSelectXPUKernel<float>,
+                       ops::MaskedSelectXPUKernel<paddle::platform::float16>,
                       ops::MaskedSelectXPUKernel<int>,
                       ops::MaskedSelectXPUKernel<int64_t>);
 #endif
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -243,6 +243,7 @@ XPUOpMap& get_kl2_ops() {
      {"masked_select",
       XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()),
                     pOpKernelType(vartype::INT64, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace()),
                     pOpKernelType(vartype::FP32, XPUPlace())})},
      {"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"matmul_v2_grad",