[Phi] migrate clip_by_norm to phi (#44458)

2883e4b2 · lyq · GitHub · dafe855e · 2883e4b2 · dafe855e
18 changed file
--- a/paddle/fluid/operators/clip_by_norm_op.cc
+++ b/paddle/fluid/operators/clip_by_norm_op.cc
@@ -13,11 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/clip_by_norm_op.h"
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/unary.h"
 namespace ops = paddle::operators;
+DECLARE_INFER_SHAPE_FUNCTOR(clip_by_norm,
+                            ClipByNormInferShapeFunctor,
+                            PD_INFER_META(phi::ClipByNormInferMeta));
 REGISTER_OP_WITHOUT_GRADIENT(clip_by_norm,
                             ops::ClipByNormOp,
-                             ops::ClipByNormOpMaker);
+                             ops::ClipByNormOpMaker,
+                             ClipByNormInferShapeFunctor);
-REGISTER_OP_CPU_KERNEL(clip_by_norm,
-                       ops::ClipByNormKernel<phi::CPUContext, float>);
--- a/paddle/fluid/operators/clip_by_norm_op.cu
+++ b/paddle/fluid/operators/clip_by_norm_op.cu
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/clip_by_norm_op.h"
-#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
-namespace paddle {
-namespace operators {
-using Tensor = framework::Tensor;
-template <>
-class ClipByNormKernel<platform::CUDADeviceContext, platform::float16>
-    : public framework::OpKernel<platform::float16> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto max_norm = context.Attr<float>("max_norm");
-    auto in_var = context.InputVar("X");
-    auto& dev_ctx =
-        context.template device_context<platform::CUDADeviceContext>();
-    Tensor* output = nullptr;
-    const Tensor* input = nullptr;
-    if (in_var->IsType<framework::LoDTensor>()) {
-      input = context.Input<Tensor>("X");
-      output = context.Output<Tensor>("Out");
-      output->mutable_data<platform::float16>(context.GetPlace());
-    } else if (in_var->IsType<phi::SelectedRows>()) {
-      auto* x = context.Input<phi::SelectedRows>("X");
-      // merge ids in selected rows first
-      math::scatter::MergeAdd<platform::CUDADeviceContext, platform::float16>
-          merge_func;
-      phi::SelectedRows* merged_input =
-          const_cast<framework::Scope&>(context.scope())
-              .Var()
-              ->GetMutable<phi::SelectedRows>();
-      merge_func(context.template device_context<platform::CUDADeviceContext>(),
-                 *x,
-                 merged_input);
-      input = &(merged_input->value());
-      phi::SelectedRows* output_selected_rows =
-          context.Output<phi::SelectedRows>("Out");
-      output_selected_rows->set_rows(merged_input->rows());
-      output_selected_rows->set_height(merged_input->height());
-      output = output_selected_rows->mutable_value();
-      output->Resize(merged_input->value().dims());
-      output->mutable_data<platform::float16>(context.GetPlace());
-    } else {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "Invalid input variable type, only support LodTensor and "
-          "SelectedRows types, but got type is %s.",
-          framework::ToTypeName(in_var->Type())));
-    }
-    PADDLE_ENFORCE_NOT_NULL(input,
-                            platform::errors::InvalidArgument(
-                                "Input(X) of ClipByNormOp should not be null. "
-                                "Please check if it is created correctly."));
-    std::vector<int> reduce_dims;
-    reduce_dims.resize(input->dims().size());
-    for (int i = 0; i < reduce_dims.size(); ++i) {
-      reduce_dims[i] = i;
-    }
-    Tensor tmp = context.AllocateTmpTensor<float, platform::CUDADeviceContext>(
-        {1}, dev_ctx);
-    TensorReduceImpl<platform::float16,
-                     float,
-                     kps::AddFunctor,
-                     kps::SquareFunctor<platform::float16, float>>(
-        dev_ctx,
-        *input,
-        &tmp,
-        kps::SquareFunctor<platform::float16, float>(),
-        reduce_dims,
-        dev_ctx.stream());
-    auto tmp_eigen = EigenVector<float>::Flatten(tmp);
-    auto x_norm = tmp_eigen.sqrt();
-    auto x = EigenVector<platform::float16>::Flatten(*input);
-    auto out = EigenVector<platform::float16>::Flatten(*output);
-    auto& place =
-        *context.template device_context<platform::CUDADeviceContext>()
-             .eigen_device();
-    auto temp = (x_norm <= max_norm).template cast<float>();
-    auto epsilon =
-        ((x_norm <= static_cast<float>(1e-30)).all().template cast<float>()) *
-        static_cast<float>(1e-6);
-    auto scaling =
-        (temp + (static_cast<float>(1) - temp) * max_norm / (x_norm + epsilon))
-            .template cast<platform::float16>();
-    Eigen::array<int, 1> one_dim{{1}};
-    Eigen::DSizes<int, 1> m_dsize(input->numel());
-    out.device(place) = x * scaling.reshape(one_dim).broadcast(m_dsize);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-REGISTER_OP_CUDA_KERNEL(
-    clip_by_norm,
-    ops::ClipByNormKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ClipByNormKernel<paddle::platform::CUDADeviceContext, plat::float16>);
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -30,76 +30,6 @@ template <typename T,
          typename IndexType = Eigen::DenseIndex>
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
-template <typename DeviceContext, typename T>
-class ClipByNormKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto max_norm = context.Attr<T>("max_norm");
-    auto in_var = context.InputVar("X");
-    Tensor* output = nullptr;
-    const Tensor* input = nullptr;
-    if (in_var->IsType<framework::LoDTensor>()) {
-      input = context.Input<Tensor>("X");
-      output = context.Output<Tensor>("Out");
-      output->mutable_data<T>(context.GetPlace());
-    } else if (in_var->IsType<phi::SelectedRows>()) {
-      auto* x = context.Input<phi::SelectedRows>("X");
-      // merge ids in selected rows first
-      math::scatter::MergeAdd<DeviceContext, T> merge_func;
-      phi::SelectedRows* merged_input =
-          const_cast<framework::Scope&>(context.scope())
-              .Var()
-              ->GetMutable<phi::SelectedRows>();
-      merge_func(
-          context.template device_context<DeviceContext>(), *x, merged_input);
-      input = &(merged_input->value());
-      phi::SelectedRows* output_selected_rows =
-          context.Output<phi::SelectedRows>("Out");
-      output_selected_rows->set_rows(merged_input->rows());
-      output_selected_rows->set_height(merged_input->height());
-      output = output_selected_rows->mutable_value();
-      output->Resize(merged_input->value().dims());
-      output->mutable_data<T>(context.GetPlace());
-    } else {
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "Invalid input variable type, only support LodTensor and "
-          "SelectedRows types, but got type is %s.",
-          framework::ToTypeName(in_var->Type())));
-    }
-    PADDLE_ENFORCE_NOT_NULL(input,
-                            platform::errors::InvalidArgument(
-                                "Input(X) of ClipByNormOp should not be null. "
-                                "Please check if it is created correctly."));
-    auto x = EigenVector<T>::Flatten(*input);
-    auto out = EigenVector<T>::Flatten(*output);
-    auto x_norm = x.square().sum().sqrt();
-    auto& place =
-        *context.template device_context<DeviceContext>().eigen_device();
-    auto temp = (x_norm <= max_norm).template cast<T>();
-    auto epsilon =
-        ((x_norm <= static_cast<T>(1e-30)).all().template cast<T>()) *
-        static_cast<T>(1e-6);
-    auto scaling =
-        temp + (static_cast<T>(1) - temp) * max_norm / (x_norm + epsilon);
-    Eigen::array<int, 1> one_dim{{1}};
-    Eigen::DSizes<int, 1> m_dsize(input->numel());
-    if (context.GetPlace() == platform::CPUPlace()) {
-      out.device(place) =
-          x * scaling.reshape(one_dim).eval().broadcast(m_dsize);
-    } else {
-      out.device(place) = x * scaling.reshape(one_dim).broadcast(m_dsize);
-    }
-  }
-};
 class ClipByNormOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

--- a/paddle/fluid/operators/dgc_clip_by_norm_op.h
+++ b/paddle/fluid/operators/dgc_clip_by_norm_op.h
@@ -15,20 +15,24 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/operators/clip_by_norm_op.h"
+#include "paddle/phi/kernels/clip_by_norm_kernel.h"
+#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
 namespace paddle {
 namespace operators {
+using Tensor = framework::Tensor;
 template <typename DeviceContext, typename T>
-class DGCClipByNormKernel : public ClipByNormKernel<DeviceContext, T> {
+class DGCClipByNormKernel : public framework::OpKernel<T> {
 public:
-  void Compute(const framework::ExecutionContext& context) const override {
+  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto rampup_begin_step = context.Attr<float>("rampup_begin_step");
+    auto rampup_begin_step = ctx.Attr<float>("rampup_begin_step");
    if (static_cast<int>(rampup_begin_step) < 0) {
      return;
    }
-    auto current_step_tensor = context.Input<framework::Tensor>("current_step");
+    auto current_step_tensor = ctx.Input<framework::Tensor>("current_step");
    auto* current_step = current_step_tensor->data<T>();
    VLOG(10) << "current_step:" << *current_step
@@ -41,7 +45,30 @@ class DGCClipByNormKernel : public ClipByNormKernel<DeviceContext, T> {
      return;
    }
-    return ClipByNormKernel<DeviceContext, T>::Compute(context);
+    auto in_var = ctx.InputVar("X");
+    auto max_norm = ctx.Attr<float>("max_norm");
+    auto& dev_ctx = ctx.device_context<DeviceContext>();
+    if (in_var->IsType<framework::LoDTensor>()) {
+      auto* x = ctx.Input<Tensor>("X");
+      auto* y = ctx.Output<Tensor>("Out");
+      return phi::ClipByNormKernel<T>(
+          static_cast<const typename framework::ConvertToPhiContext<
+              DeviceContext>::TYPE&>(dev_ctx),
+          *x,
+          max_norm,
+          y);
+    } else if (in_var->IsType<phi::SelectedRows>()) {
+      auto* x = ctx.Input<phi::SelectedRows>("X");
+      phi::SelectedRows* output_selected_rows =
+          ctx.Output<phi::SelectedRows>("Out");
+      return phi::sr::ClipByNormKernel<T>(
+          static_cast<const typename framework::ConvertToPhiContext<
+              DeviceContext>::TYPE&>(dev_ctx),
+          *x,
+          max_norm,
+          output_selected_rows);
+    }
  };
 };

--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -368,6 +368,14 @@
    func : clip
  backward : clip_grad
+- api : clip_by_norm
+  args : (Tensor x, float max_norm)
+  output : Tensor(out)
+  infer_meta :
+    func : ClipByNormInferMeta
+  kernel :
+    func : clip_by_norm
 - api : complex
  args : (Tensor x, Tensor y)
  output : Tensor

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -264,6 +264,18 @@ void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out) {
  out->set_dtype(x.dtype());
 }
+void ClipByNormInferMeta(const MetaTensor& x, float max_norm, MetaTensor* out) {
+  PADDLE_ENFORCE_GT(
+      max_norm,
+      0,
+      phi::errors::InvalidArgument("max_norm should be greater than 0. "
+                                   "Received max_norm is %f.",
+                                   max_norm));
+  out->set_dims(x.dims());
+  out->set_dtype(x.dtype());
+  out->share_lod(x);
+}
 void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, MetaTensor* out) {
  out->set_dims(x.dims());
  out->set_dtype(dtype == DataType::UNDEFINED ? x.dtype() : dtype);

--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -62,6 +62,8 @@ void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out);
 void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out);
+void ClipByNormInferMeta(const MetaTensor& x, float max_norm, MetaTensor* out);
 void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, MetaTensor* out);
 void CumInferMeta(const MetaTensor& x,

--- a/paddle/phi/kernels/clip_by_norm_kernel.h
+++ b/paddle/phi/kernels/clip_by_norm_kernel.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/phi/core/dense_tensor.h"
+namespace phi {
+template <typename T, typename Context>
+void ClipByNormKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      float max_norm,
+                      DenseTensor* out);
+}  // namespace phi
--- a/paddle/phi/kernels/cpu/clip_by_norm_kernel.cc
+++ b/paddle/phi/kernels/cpu/clip_by_norm_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/clip_by_norm_kernel.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h"
+namespace phi {
+template <typename T, typename Context>
+void ClipByNormKernel(const Context& dev_ctx,
+                      const DenseTensor& in,
+                      float max_norm,
+                      DenseTensor* output) {
+  return ClipByNormFunctor<T, Context>(dev_ctx, in, max_norm, output);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(
+    clip_by_norm, CPU, ALL_LAYOUT, phi::ClipByNormKernel, float) {}
--- a/paddle/phi/kernels/gpu/clip_by_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/clip_by_norm_kernel.cu
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/clip_by_norm_kernel.h"
+#include <typeinfo>
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
+#include "paddle/phi/kernels/funcs/reduce_function.h"
+#include "paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h"
+namespace phi {
+template <typename T, typename Context>
+void ClipByNormKernel(const Context& dev_ctx,
+                      const DenseTensor& in,
+                      float max_norm,
+                      DenseTensor* output) {
+  if (typeid(T) == typeid(float)) {
+    return ClipByNormFunctor<float, Context>(dev_ctx, in, max_norm, output);
+  }
+  auto input = &in;
+  dev_ctx.template Alloc<dtype::float16>(output);
+  PADDLE_ENFORCE_NOT_NULL(input,
+                          phi::errors::InvalidArgument(
+                              "Input(X) of ClipByNormOp should not be null. "
+                              "Please check if it is created correctly."));
+  std::vector<int> reduce_dims;
+  reduce_dims.resize(input->dims().size());
+  for (int i = 0; i < reduce_dims.size(); ++i) {
+    reduce_dims[i] = i;
+  }
+  DenseTensor tmp_tensor;
+  auto* tmp = &tmp_tensor;
+  tmp->Resize({1});
+  dev_ctx.template Alloc<float>(tmp);
+  phi::funcs::ReduceKernel<dtype::float16,
+                           float,
+                           kps::AddFunctor,
+                           kps::SquareFunctor<dtype::float16, float>>(
+      dev_ctx,
+      *input,
+      tmp,
+      kps::SquareFunctor<dtype::float16, float>(),
+      reduce_dims);
+  auto tmp_eigen = phi::EigenVector<float>::Flatten(*tmp);
+  auto x_norm = tmp_eigen.sqrt();
+  auto x = phi::EigenVector<dtype::float16>::Flatten(*input);
+  auto out = phi::EigenVector<dtype::float16>::Flatten(*output);
+  auto* place = dev_ctx.eigen_device();
+  auto temp = (x_norm <= max_norm).template cast<float>();
+  auto epsilon =
+      ((x_norm <= static_cast<float>(1e-30)).all().template cast<float>()) *
+      static_cast<float>(1e-6);
+  auto scaling =
+      (temp + (static_cast<float>(1) - temp) * max_norm / (x_norm + epsilon))
+          .template cast<dtype::float16>();
+  Eigen::array<int, 1> one_dim{{1}};
+  Eigen::DSizes<int, 1> m_dsize(input->numel());
+  out.device(*place) = x * scaling.reshape(one_dim).broadcast(m_dsize);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(clip_by_norm,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::ClipByNormKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h
+++ b/paddle/phi/kernels/impl/clip_by_norm_kernel_impl.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
+namespace phi {
+template <typename T, typename Context>
+void ClipByNormFunctor(const Context& dev_ctx,
+                       const DenseTensor& in,
+                       float max_norm,
+                       DenseTensor* output) {
+  auto input = &in;
+  dev_ctx.template Alloc<T>(output);
+  PADDLE_ENFORCE_NOT_NULL(input,
+                          phi::errors::InvalidArgument(
+                              "Input(X) of ClipByNormOp should not be null. "
+                              "Please check if it is created correctly."));
+  auto x = phi::EigenVector<T>::Flatten(*input);
+  auto out = phi::EigenVector<T>::Flatten(*output);
+  auto x_norm = x.square().sum().sqrt();
+  auto* place = dev_ctx.eigen_device();
+  auto temp = (x_norm <= max_norm).template cast<T>();
+  auto epsilon = ((x_norm <= static_cast<T>(1e-30)).all().template cast<T>()) *
+                 static_cast<T>(1e-6);
+  auto scaling =
+      temp + (static_cast<T>(1) - temp) * max_norm / (x_norm + epsilon);
+  Eigen::array<int, 1> one_dim{{1}};
+  Eigen::DSizes<int, 1> m_dsize(input->numel());
+  if (dev_ctx.GetPlace() == phi::CPUPlace()) {
+    out.device(*place) = x * scaling.reshape(one_dim).eval().broadcast(m_dsize);
+  } else {
+    out.device(*place) = x * scaling.reshape(one_dim).broadcast(m_dsize);
+  }
+}
+}  // namespace phi
--- a/paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h
+++ b/paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/selected_rows.h"
+namespace phi {
+namespace sr {
+template <typename T, typename Context>
+void ClipByNormKernel(const Context& dev_ctx,
+                      const SelectedRows& x,
+                      float max_norm,
+                      SelectedRows* out);
+}  // namespace sr
+}  // namespace phi
--- a/paddle/phi/kernels/selected_rows/cpu/clip_by_norm_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/cpu/clip_by_norm_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h"
+PD_REGISTER_KERNEL(
+    clip_by_norm_sr, CPU, ALL_LAYOUT, phi::sr::ClipByNormKernel, float) {}
--- a/paddle/phi/kernels/selected_rows/gpu/clip_by_norm_kernel.cu
+++ b/paddle/phi/kernels/selected_rows/gpu/clip_by_norm_kernel.cu
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h"
+PD_REGISTER_KERNEL(clip_by_norm_sr,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sr::ClipByNormKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+++ b/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/core/selected_rows.h"
+#include "paddle/phi/kernels/clip_by_norm_kernel.h"
+#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
+namespace phi {
+namespace sr {
+template <typename T, typename Context>
+void ClipByNormKernel(const Context& dev_ctx,
+                      const SelectedRows& x,
+                      float max_norm,
+                      SelectedRows* out) {
+  phi::SelectedRows merged_input;
+  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  merge_func(dev_ctx, x, &merged_input);
+  auto input = &(merged_input.value());
+  out->set_rows(merged_input.rows());
+  out->set_height(merged_input.height());
+  auto out_tensor = out->mutable_value();
+  out_tensor->Resize(merged_input.value().dims());
+  return phi::ClipByNormKernel<T, Context>(
+      dev_ctx, *input, max_norm, out_tensor);
+}
+}  // namespace sr
+}  // namespace phi
--- a/paddle/phi/ops/compat/clip_by_norm_sig.cc
+++ b/paddle/phi/ops/compat/clip_by_norm_sig.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/phi/core/compat/op_utils.h"
+namespace phi {
+KernelSignature ClipByNormOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  if (ctx.IsDenseTensorInput("X")) {
+    return KernelSignature("clip_by_norm", {"X"}, {"max_norm"}, {"Out"});
+  } else if (ctx.IsSelectedRowsInput("X")) {
+    return KernelSignature("clip_by_norm_sr", {"X"}, {"max_norm"}, {"Out"});
+  }
+  return KernelSignature("unregistered", {}, {}, {});
+}
+}  // namespace phi
+PD_REGISTER_ARG_MAPPING_FN(clip_by_norm, phi::ClipByNormOpArgumentMapping);
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -13043,6 +13043,8 @@ def clip_by_norm(x, max_norm, name=None):
            # [[0.5, 0.5], [0.5, 0.5]]
    """
+    if in_dygraph_mode():
+        return _C_ops.final_state_clip_by_norm(x, max_norm)
    if _non_static_mode():
        return _C_ops.clip_by_norm(x, 'max_norm', max_norm)

--- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
@@ -27,6 +27,7 @@ class TestClipByNormOp(OpTest):
    def setUp(self):
        self.max_relative_error = 0.006
+        self.python_api = fluid.layers.clip_by_norm
        self.init_dtype()
        self.initTestCase()
        input = np.random.random(self.shape).astype(self.dtype)
@@ -45,7 +46,7 @@ class TestClipByNormOp(OpTest):
        self.outputs = {'Out': output}
    def test_check_output(self):
-        self.check_output()
+        self.check_output(check_eager=True)
    def initTestCase(self):
        self.shape = (100, )
@@ -85,7 +86,9 @@ class TestClipByNormOpFp16(TestClipByNormOp):
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
            if core.is_float16_supported(place):
-                self.check_output_with_place(place, atol=0.001)
+                self.check_output_with_place(place,
+                                             atol=0.001,
+                                             check_eager=True)
 class TestClipByNormOpFp16Case1(TestClipByNormOpFp16):