[Fluid] move lars_momentum to phi (#55798)

* [Fluid] move lars_momentum to phi * add sig * fix optional Output * off check_dygraph * fix input * fix operator[] * fix * try fix AllocateTmpTensor * fix * fix type * Update paddle/phi/kernels/gpu/lars_momentum_kernel.cu * fix type * rollback * Add Registration * try fix win * try fix win * try use double * try use operator *(float,const Derived &) * try auto * fix * fix * fix * fix dtype * fix type * fix index

[Fluid] move lars_momentum to phi (#55798)
* [Fluid] move lars_momentum to phi * add sig * fix optional Output * off check_dygraph * fix input * fix operator[] * fix * try fix AllocateTmpTensor * fix * fix type * Update paddle/phi/kernels/gpu/lars_momentum_kernel.cu * fix type * rollback * Add Registration * try fix win * try fix win * try use double * try use operator *(float,const Derived &) * try auto * fix * fix * fix * fix dtype * fix type * fix index
b0c2ee26 · gouzil · GitHub · 6839a7b9 · b0c2ee26 · 6839a7b9
8 changed file
--- a/paddle/fluid/operators/optimizers/lars_momentum_op.cc
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cc
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/optimizers/lars_momentum_op.h"
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/op_registry.h"
 namespace paddle {
 namespace operators {
@@ -233,6 +234,3 @@ REGISTER_OPERATOR(
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::LarsMomentumOpVarTypeInference);
-PD_REGISTER_STRUCT_KERNEL(
-    lars_momentum, CPU, ALL_LAYOUT, ops::LarsMomentumOpKernel, float, double) {}
--- a/paddle/fluid/operators/optimizers/lars_momentum_op.h
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/op_registry.h"
-namespace paddle {
-namespace operators {
-template <typename T, typename DeviceContext>
-class LarsMomentumOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto param_out = ctx.MultiOutput<phi::DenseTensor>("ParamOut");
-    auto velocity_out = ctx.MultiOutput<phi::DenseTensor>("VelocityOut");
-    auto param = ctx.MultiInput<phi::DenseTensor>("Param");
-    auto velocity = ctx.MultiInput<phi::DenseTensor>("Velocity");
-    auto learning_rate = ctx.MultiInput<phi::DenseTensor>("LearningRate");
-    auto grad = ctx.MultiInput<phi::DenseTensor>("Grad");
-    auto weight_decay_arr = ctx.Attr<std::vector<float>>("lars_weight_decay");
-    T mu = static_cast<T>(ctx.Attr<float>("mu"));
-    T lars_coeff = ctx.Attr<float>("lars_coeff");
-    T epsilon = ctx.Attr<float>("epsilon");
-    T rescale_grad = ctx.Attr<float>("rescale_grad");
-    int op_num = param.size();
-    for (int i = 0; i < op_num; ++i) {
-      auto* lr = learning_rate[i]->data<T>();
-      T lars_weight_decay = weight_decay_arr[i];
-      param_out[i]->mutable_data<T>(ctx.GetPlace());
-      velocity_out[i]->mutable_data<T>(ctx.GetPlace());
-      auto p_out = framework::EigenVector<T>::Flatten(*(param_out[i]));
-      auto v_out = framework::EigenVector<T>::Flatten(*(velocity_out[i]));
-      auto p = framework::EigenVector<T>::Flatten(*(param[i]));
-      auto v = framework::EigenVector<T>::Flatten(*(velocity[i]));
-      auto g = framework::EigenVector<T>::Flatten(*(grad[i]));
-      auto rescale_g = rescale_grad * g;
-      phi::DenseTensor p_norm_t, g_norm_t;
-      p_norm_t.Resize({1});
-      g_norm_t.Resize({1});
-      p_norm_t.mutable_data<T>(ctx.GetPlace());
-      g_norm_t.mutable_data<T>(ctx.GetPlace());
-      auto ep_norm = framework::EigenScalar<T>::From(p_norm_t);
-      auto eg_norm = framework::EigenScalar<T>::From(g_norm_t);
-      ep_norm = p.square().sum().sqrt();
-      eg_norm = rescale_g.square().sum().sqrt();
-      T local_lr = lr[0];
-      if (lars_weight_decay > 0 && ep_norm(0) > 0 && eg_norm(0) > 0) {
-        local_lr = lr[0] * lars_coeff * ep_norm(0) /
-                   (eg_norm(0) + lars_weight_decay * ep_norm(0) + epsilon);
-      }
-      v_out = v * mu + local_lr * (rescale_g + lars_weight_decay * p);
-      p_out = p - v_out;
-    }
-  }
-};
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #ifdef PADDLE_WITH_XPU
+#include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/optimizers/lars_momentum_op.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 namespace paddle {

--- a/paddle/phi/kernels/cpu/lars_momentum_kernel.cc
+++ b/paddle/phi/kernels/cpu/lars_momentum_kernel.cc
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/lars_momentum_kernel.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
+namespace phi {
+template <typename T, typename Context>
+void LarsMomentumKernel(
+    const Context& dev_ctx,
+    const std::vector<const DenseTensor*>& param,
+    const std::vector<const DenseTensor*>& velocity,
+    const std::vector<const DenseTensor*>& learning_rate,
+    const std::vector<const DenseTensor*>& grad,
+    const paddle::optional<std::vector<const DenseTensor*>>& master_param,
+    const std::vector<float>& weight_decay_arr,
+    float mu,
+    float lars_coeff,
+    float epsilon,
+    bool multi_precision,
+    float rescale_grad,
+    std::vector<DenseTensor*> param_out,
+    std::vector<DenseTensor*> velocity_out,
+    std::vector<DenseTensor*> master_param_out) {
+  int op_num = param.size();
+  T mu_ = static_cast<T>(mu);
+  for (int i = 0; i < op_num; ++i) {
+    auto* lr = learning_rate[i]->data<T>();
+    T lars_weight_decay = weight_decay_arr[i];
+    dev_ctx.template Alloc<T>(param_out[i]);
+    dev_ctx.template Alloc<T>(velocity_out[i]);
+    auto p_out = phi::EigenVector<T>::Flatten(*(param_out[i]));
+    auto v_out = phi::EigenVector<T>::Flatten(*(velocity_out[i]));
+    auto p = phi::EigenVector<T>::Flatten(*(param[i]));
+    auto v = phi::EigenVector<T>::Flatten(*(velocity[i]));
+    Eigen::TensorMap<Eigen::Tensor<const T, 1, 1>> g =
+        phi::EigenVector<T>::Flatten(*(grad[i]));
+    auto rescale_g = static_cast<T>(rescale_grad) * g;
+    phi::DenseTensor p_norm_t, g_norm_t;
+    p_norm_t.Resize({1});
+    g_norm_t.Resize({1});
+    dev_ctx.template Alloc<T>(&p_norm_t);
+    dev_ctx.template Alloc<T>(&g_norm_t);
+    auto ep_norm = phi::EigenScalar<T>::From(p_norm_t);
+    auto eg_norm = phi::EigenScalar<T>::From(g_norm_t);
+    ep_norm = p.square().sum().sqrt();
+    eg_norm = rescale_g.square().sum().sqrt();
+    T local_lr = lr[0];
+    if (lars_weight_decay > 0 && ep_norm(0) > 0 && eg_norm(0) > 0) {
+      local_lr = lr[0] * lars_coeff * ep_norm(0) /
+                 (eg_norm(0) + lars_weight_decay * ep_norm(0) + epsilon);
+    }
+    v_out = v * mu_ + local_lr * (rescale_g + lars_weight_decay * p);
+    p_out = p - v_out;
+  }
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(
+    lars_momentum, CPU, ALL_LAYOUT, phi::LarsMomentumKernel, float, double) {}
--- a/paddle/fluid/operators/optimizers/lars_momentum_op.cu
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cu
--- a/paddle/phi/kernels/lars_momentum_kernel.h
+++ b/paddle/phi/kernels/lars_momentum_kernel.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/dense_tensor.h"
+namespace phi {
+template <typename T, typename Context>
+void LarsMomentumKernel(
+    const Context& dev_ctx,
+    const std::vector<const DenseTensor*>& param,
+    const std::vector<const DenseTensor*>& velocity,
+    const std::vector<const DenseTensor*>& learning_rate,
+    const std::vector<const DenseTensor*>& grad,
+    const paddle::optional<std::vector<const DenseTensor*>>& master_param,
+    const std::vector<float>& weight_decay_arr,
+    float mu,
+    float lars_coeff,
+    float epsilon,
+    bool multi_precision,
+    float rescale_grad,
+    std::vector<DenseTensor*> param_out,
+    std::vector<DenseTensor*> velocity_out,
+    std::vector<DenseTensor*> master_param_out);
+}  // namespace phi
--- a/paddle/phi/ops/compat/lars_momentum_sig.cc
+++ b/paddle/phi/ops/compat/lars_momentum_sig.cc
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/core/compat/op_utils.h"
+namespace phi {
+KernelSignature LarsMomentumOpArgumentMapping(
+    const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "lars_momentum",
+      {"Param", "Velocity", "LearningRate", "Grad", "MasterParam"},
+      {"lars_weight_decay",
+       "mu",
+       "lars_coeff",
+       "epsilon",
+       "multi_precision",
+       "rescale_grad"},
+      {"ParamOut", "VelocityOut", "MasterParamOut"});
+}
+}  // namespace phi
+PD_REGISTER_ARG_MAPPING_FN(lars_momentum, phi::LarsMomentumOpArgumentMapping);
--- a/test/legacy_test/test_momentum_op.py
+++ b/test/legacy_test/test_momentum_op.py
@@ -312,7 +312,7 @@ class TestLarsMomentumOp(OpTest):
    def test_check_output(self):
        paddle.enable_static()
-        self.check_output()
+        self.check_output(check_dygraph=False)
    def config(self):
        self.params_num = 1