[phi] Migrate dropout and dropout_grad XPU kernel to phi (#45561)

* test=kunlun * test=kunlun

[phi] Migrate dropout and dropout_grad XPU kernel to phi (#45561)
* test=kunlun * test=kunlun
9fd61d8b · feifei-111 · GitHub · 56869d99 · 56869d99 · 9fd61d8b
3 changed file
--- a/paddle/fluid/operators/dropout_op_xpu.cc
+++ b/paddle/fluid/operators/dropout_op_xpu.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/device/device_wrapper.h"
-namespace paddle {
-namespace operators {
-
-#ifdef PADDLE_WITH_XPU
-
-using Tensor = framework::Tensor;
-template <typename DeviceContext, typename T>
-class DropoutXPUKernel : public framework::OpKernel<T> {
-  using XPUTyp = typename XPUTypeTrait<T>::Type;
-
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* x = context.Input<Tensor>("X");
-    auto* y = context.Output<Tensor>("Out");
-    const auto* x_data = x->data<T>();
-    auto* y_data = y->mutable_data<T>(context.GetPlace());
-    float dropout_prob = context.Attr<float>("dropout_prob");
-    auto dropout_implementation =
-        context.Attr<std::string>("dropout_implementation");
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-
-    auto* seed =
-        context.HasInput("Seed") ? context.Input<Tensor>("Seed") : nullptr;
-
-    int is_upscale = (dropout_implementation == "upscale_in_train");
-
-    if (!context.Attr<bool>("is_test")) {
-      int seed_data = 0;
-      if (seed) {
-        if (platform::is_xpu_place(seed->place())) {
-          memory::Copy(platform::CPUPlace(),
-                       &seed_data,
-                       seed->place(),
-                       seed->data<int>(),
-                       sizeof(int));
-        } else {
-          seed_data = *(seed->data<int>());
-        }
-
-      } else {
-        seed_data =
-            context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : 0;
-      }
-
-      auto* mask = context.Output<Tensor>("Mask");
-      auto* mask_data = mask->mutable_data<T>(context.GetPlace());
-      // Special case when dropout_prob is 1.0
-      if (dropout_prob == 1.0f) {
-        int r = xpu::constant(dev_ctx.x_context(),
-                              reinterpret_cast<XPUTyp*>(y_data),
-                              y->numel(),
-                              XPUTyp(0));
-        PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
-        r = xpu::constant(dev_ctx.x_context(),
-                          reinterpret_cast<XPUTyp*>(mask_data),
-                          mask->numel(),
-                          XPUTyp(0));
-        PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
-        return;
-      }
-      int r = xpu::dropout(dev_ctx.x_context(),
-                           reinterpret_cast<const XPUTyp*>(x->data<T>()),
-                           reinterpret_cast<XPUTyp*>(y->data<T>()),
-                           reinterpret_cast<XPUTyp*>(mask_data),
-                           seed_data,
-                           mask->numel(),
-                           is_upscale,
-                           dropout_prob);
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout");
-    } else {
-      float scale =
-          (is_upscale) ? (1.0) : (static_cast<float>(1.0f - dropout_prob));
-      int r = xpu::scale(dev_ctx.x_context(),
-                         reinterpret_cast<const XPUTyp*>(x_data),
-                         reinterpret_cast<XPUTyp*>(y_data),
-                         x->numel(),
-                         false,
-                         scale,
-                         0.0f);
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
-    }
-  }
-};
-template <typename DeviceContext, typename T>
-class DropoutGradXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
-
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    PADDLE_ENFORCE_EQ(!context.Attr<bool>("is_test"),
-                      true,
-                      platform::errors::InvalidArgument(
-                          "GradOp is only callable when is_test is false"));
-    auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
-    auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
-    auto* mask = context.Input<Tensor>("Mask");
-    grad_x->mutable_data<T>(context.GetPlace());
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    auto& dropout_implementation =
-        context.Attr<std::string>("dropout_implementation");
-    float dropout_prob = context.Attr<float>("dropout_prob");
-    const T* mask_data = mask->data<T>();
-
-    if (dropout_implementation != "upscale_in_train") {
-      int r = xpu::mul(dev_ctx.x_context(),
-                       reinterpret_cast<const XPUType*>(grad_y->data<T>()),
-                       reinterpret_cast<const XPUType*>(mask_data),
-                       reinterpret_cast<XPUType*>(grad_x->data<T>()),
-                       grad_y->numel());
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul");
-      return;
-    }
-
-    auto version = platform::get_xpu_version(context.GetPlace().GetDeviceId());
-    if (version == phi::backends::xpu::XPUVersion::XPU1) {
-      xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
-      XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm<XPUType>(mask->numel());
-      float scale =
-          (dropout_prob == 1.0f) ? (1.0f) : (1.0f / (1.0f - dropout_prob));
-      int r = xpu::scale(dev_ctx.x_context(),
-                         reinterpret_cast<const XPUType*>(mask->data<T>()),
-                         reinterpret_cast<XPUType*>(mask_new),
-                         mask->numel(),
-                         false,
-                         scale,
-                         0.0f);
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
-      r = xpu::mul(dev_ctx.x_context(),
-                   reinterpret_cast<const XPUType*>(grad_y->data<T>()),
-                   reinterpret_cast<const XPUType*>(mask_new),
-                   reinterpret_cast<XPUType*>(grad_x->data<T>()),
-                   grad_y->numel());
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul");
-    } else {
-      int r =
-          xpu::dropout_grad(dev_ctx.x_context(),
-                            reinterpret_cast<const XPUType*>(mask->data<T>()),
-                            reinterpret_cast<const XPUType*>(grad_y->data<T>()),
-                            reinterpret_cast<XPUType*>(grad_x->data<T>()),
-                            dropout_prob,
-                            grad_y->numel());
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout_grad");
-    }
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-REGISTER_OP_XPU_KERNEL(
-    dropout,
-    ops::DropoutXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::DropoutXPUKernel<paddle::platform::XPUDeviceContext, plat::float16>);
-REGISTER_OP_XPU_KERNEL(
-    dropout_grad,
-    ops::DropoutGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::DropoutGradXPUKernel<paddle::platform::XPUDeviceContext,
-                              plat::float16>);
-#endif
--- a/paddle/phi/kernels/xpu/dropout_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/dropout_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/dropout_grad_kernel.h"
+
+#include <memory>
+#include <string>
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void DropoutGradRawKernel(const Context& dev_ctx,
+                          const DenseTensor& mask,
+                          const DenseTensor& out_grad,
+                          const Scalar& p,
+                          bool is_test,
+                          const std::string& mode,
+                          DenseTensor* x_grad) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  PADDLE_ENFORCE_EQ(!is_test,
+                    true,
+                    phi::errors::InvalidArgument(
+                        "GradOp is only callable when is_test is false"));
+  auto* grad_x = x_grad;
+  auto* grad_y = &out_grad;
+  dev_ctx.template Alloc<T>(grad_x);
+  float dropout_prob = p.to<float>();
+  const T* mask_data = mask.data<T>();
+
+  if (mode != "upscale_in_train") {
+    int r = xpu::mul(dev_ctx.x_context(),
+                     reinterpret_cast<const XPUType*>(grad_y->data<T>()),
+                     reinterpret_cast<const XPUType*>(mask_data),
+                     reinterpret_cast<XPUType*>(grad_x->data<T>()),
+                     grad_y->numel());
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul");
+    return;
+  }
+
+  auto version =
+      phi::backends::xpu::get_xpu_version(dev_ctx.GetPlace().GetDeviceId());
+  if (version == phi::backends::xpu::XPUVersion::XPU1) {
+    xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
+    XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm<XPUType>(mask.numel());
+    float scale =
+        (dropout_prob == 1.0f) ? (1.0f) : (1.0f / (1.0f - dropout_prob));
+    int r = xpu::scale(dev_ctx.x_context(),
+                       reinterpret_cast<const XPUType*>(mask.data<T>()),
+                       reinterpret_cast<XPUType*>(mask_new),
+                       mask.numel(),
+                       false,
+                       scale,
+                       0.0f);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
+    r = xpu::mul(dev_ctx.x_context(),
+                 reinterpret_cast<const XPUType*>(grad_y->data<T>()),
+                 reinterpret_cast<const XPUType*>(mask_new),
+                 reinterpret_cast<XPUType*>(grad_x->data<T>()),
+                 grad_y->numel());
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul");
+  } else {
+    int r =
+        xpu::dropout_grad(dev_ctx.x_context(),
+                          reinterpret_cast<const XPUType*>(mask.data<T>()),
+                          reinterpret_cast<const XPUType*>(grad_y->data<T>()),
+                          reinterpret_cast<XPUType*>(grad_x->data<T>()),
+                          dropout_prob,
+                          grad_y->numel());
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout_grad");
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(dropout_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::DropoutGradRawKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/xpu/dropout_kernel.cc
+++ b/paddle/phi/kernels/xpu/dropout_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/dropout_kernel.h"
+
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void DropoutRawKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const paddle::optional<DenseTensor>& seed_tensor,
+                      const Scalar& p,
+                      bool is_test,
+                      const std::string& mode,
+                      int seed,
+                      bool fix_seed,
+                      DenseTensor* out,
+                      DenseTensor* mask) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  auto* y = out;
+  const auto* x_data = x.data<T>();
+  auto* y_data = dev_ctx.template Alloc<T>(y);
+  float dropout_prob = p.to<float>();
+
+  int is_upscale = (mode == "upscale_in_train");
+
+  if (!is_test) {
+    int seed_data = 0;
+    if (seed_tensor.get_ptr() != nullptr) {
+      if ((seed_tensor->place()).GetType() == phi::AllocationType::XPU) {
+        paddle::memory::Copy(phi::CPUPlace(),
+                             &seed_data,
+                             seed_tensor->place(),
+                             seed_tensor->data<int>(),
+                             sizeof(int));
+      } else {
+        seed_data = *(seed_tensor->data<int>());
+      }
+
+    } else {
+      seed_data = fix_seed ? seed : 0;
+    }
+
+    auto* mask_data = dev_ctx.template Alloc<T>(mask);
+    // Special case when dropout_prob is 1.0
+    if (dropout_prob == 1.0f) {
+      int r = xpu::constant(dev_ctx.x_context(),
+                            reinterpret_cast<XPUType*>(y_data),
+                            y->numel(),
+                            XPUType(0));
+      PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
+      r = xpu::constant(dev_ctx.x_context(),
+                        reinterpret_cast<XPUType*>(mask_data),
+                        mask->numel(),
+                        XPUType(0));
+      PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
+      return;
+    }
+    int r = xpu::dropout(dev_ctx.x_context(),
+                         reinterpret_cast<const XPUType*>(x.data<T>()),
+                         reinterpret_cast<XPUType*>(y->data<T>()),
+                         reinterpret_cast<XPUType*>(mask_data),
+                         seed_data,
+                         mask->numel(),
+                         is_upscale,
+                         dropout_prob);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout");
+  } else {
+    float scale =
+        (is_upscale) ? (1.0) : (static_cast<float>(1.0f - dropout_prob));
+    int r = xpu::scale(dev_ctx.x_context(),
+                       reinterpret_cast<const XPUType*>(x_data),
+                       reinterpret_cast<XPUType*>(y_data),
+                       x.numel(),
+                       false,
+                       scale,
+                       0.0f);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(dropout,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::DropoutRawKernel,
+                   float,
+                   phi::dtype::float16) {}