Move XPU momentum to phi (#45565)

* Move XPU momentum to phi, test=kunlun * Fix mu type, test=kunlun

Move XPU momentum to phi (#45565)
* Move XPU momentum to phi, test=kunlun * Fix mu type, test=kunlun
d7807806 · WangZhen · GitHub · 7db017b0 · 7db017b0 · d7807806
Showing with 72 addition and 82 deletion

paddle/fluid/operators/optimizers/momentum_op_xpu.cc paddle/fluid/operators/optimizers/momentum_op_xpu.cc +0 -82

paddle/phi/kernels/xpu/momentum_kernel.cc paddle/phi/kernels/xpu/momentum_kernel.cc +72 -0

未找到文件。
--- a/paddle/fluid/operators/optimizers/momentum_op_xpu.cc
+++ b/paddle/fluid/operators/optimizers/momentum_op_xpu.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#ifdef PADDLE_WITH_XPU
-#include <string>
-#include "paddle/fluid/operators/optimizers/sgd_op.h"
-#include "paddle/fluid/platform/device/device_wrapper.h"
-namespace paddle {
-namespace operators {
-template <typename DeviceContext, typename T>
-class MomentumOpXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    T mu = static_cast<T>(ctx.Attr<float>("mu"));
-    bool use_nesterov = ctx.Attr<bool>("use_nesterov");
-    auto learning_rate = ctx.Input<framework::Tensor>("LearningRate");
-    auto param = ctx.Input<framework::Tensor>("Param");
-    auto param_out = ctx.Output<framework::Tensor>("ParamOut");
-    auto* velocity = ctx.Input<framework::Tensor>("Velocity");
-    auto velocity_out = ctx.Output<framework::Tensor>("VelocityOut");
-    param_out->mutable_data<T>(ctx.GetPlace());
-    velocity_out->mutable_data<T>(ctx.GetPlace());
-    auto* lr = learning_rate->data<float>();
-    auto regularization_method = ctx.Attr<std::string>("regularization_method");
-    auto regularization_coeff = ctx.Attr<float>("regularization_coeff");
-    if (regularization_method != "l2_decay") {
-      // only support l2_decay
-      regularization_coeff = 0.0f;
-    }
-    auto* grad_var = ctx.InputVar("Grad");
-    PADDLE_ENFORCE_EQ(grad_var->IsType<framework::LoDTensor>(),
-                      true,
-                      platform::errors::PermissionDenied(
-                          "Unsupported Variable Type of Param & Grad in "
-                          "MomentumOp-XPU. Excepted "
-                          "LodTensor, But received [%s] and [%s]",
-                          paddle::framework::ToTypeName(grad_var->Type())));
-    auto grad = ctx.Input<framework::Tensor>("Grad");
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-    // int momentum(Context* ctx, const T* param, const T* velocity, const T*
-    // grad, T* param_out, T* velocity_out, int len, const float* lr, int
-    // use_nesterov, float mu, float l2_weight_decay);
-    int r = xpu::momentum(dev_ctx.x_context(),
-                          reinterpret_cast<const XPUType*>(param->data<T>()),
-                          reinterpret_cast<const XPUType*>(velocity->data<T>()),
-                          reinterpret_cast<const XPUType*>(grad->data<T>()),
-                          reinterpret_cast<XPUType*>(param_out->data<T>()),
-                          reinterpret_cast<XPUType*>(velocity_out->data<T>()),
-                          param_out->numel(),
-                          lr,
-                          use_nesterov,
-                          mu,
-                          regularization_coeff);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "momentum");
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    momentum,
-    ops::MomentumOpXPUKernel<paddle::platform::XPUDeviceContext, float>,
-    ops::MomentumOpXPUKernel<paddle::platform::XPUDeviceContext,
-                             paddle::platform::float16>);
-#endif
--- a/paddle/phi/kernels/xpu/momentum_kernel.cc
+++ b/paddle/phi/kernels/xpu/momentum_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/momentum_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+namespace phi {
+template <typename T, typename Context>
+void MomentumDenseKernel(const Context& dev_ctx,
+                         const DenseTensor& param,
+                         const DenseTensor& grad,
+                         const DenseTensor& velocity,
+                         const DenseTensor& learning_rate,
+                         const paddle::optional<DenseTensor>& master_param,
+                         float mu,
+                         bool use_nesterov,
+                         const std::string& regularization_method,
+                         float regularization_coeff,
+                         bool multi_precision,
+                         float rescale_grad,
+                         DenseTensor* param_out,
+                         DenseTensor* velocity_out,
+                         DenseTensor* master_param_out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  dev_ctx.template Alloc<T>(param_out);
+  dev_ctx.template Alloc<T>(velocity_out);
+  auto* lr = learning_rate.data<float>();
+  if (regularization_method != "l2_decay") {
+    // only support l2_decay
+    regularization_coeff = 0.0f;
+  }
+  // int momentum(Context* ctx, const T* param, const T* velocity, const T*
+  // grad, T* param_out, T* velocity_out, int len, const float* lr, int
+  // use_nesterov, float mu, float l2_weight_decay);
+  int r = xpu::momentum(dev_ctx.x_context(),
+                        reinterpret_cast<const XPUType*>(param.data<T>()),
+                        reinterpret_cast<const XPUType*>(velocity.data<T>()),
+                        reinterpret_cast<const XPUType*>(grad.data<T>()),
+                        reinterpret_cast<XPUType*>(param_out->data<T>()),
+                        reinterpret_cast<XPUType*>(velocity_out->data<T>()),
+                        param_out->numel(),
+                        lr,
+                        use_nesterov,
+                        static_cast<T>(mu),
+                        regularization_coeff);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "momentum");
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(momentum,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MomentumDenseKernel,
+                   float,
+                   phi::dtype::float16) {}