diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc
deleted file mode 100644
index 611b00b7c62ade1a333eeff3f1d0dbcda28ecb41..0000000000000000000000000000000000000000
--- a/paddle/fluid/operators/group_norm_op.cc
+++ /dev/null
@@ -1,250 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
-#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
-#include "paddle/fluid/prim/utils/static/desc_tensor.h"
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/backward.h"
-#include "paddle/phi/infermeta/ternary.h"
-
-namespace paddle {
-namespace operators {
-
-using DataLayout = phi::DataLayout;
-
-class GroupNormOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-};
-
-class GroupNormOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "The input tensor.");
-    AddInput("Scale",
-             "Scale is a 1-dimensional tensor of size C"
-             "that is applied to the output.")
-        .AsDispensable();
-    AddInput("Bias",
-             "Bias is a 1-dimensional tensor of size C "
-             "that is applied to the output")
-        .AsDispensable();
-    AddOutput("Y", "Result after normalization.");
-    AddOutput("Mean", "Mean of each group.").AsIntermediate();
-    AddOutput("Variance", "Variance of each group.").AsIntermediate();
-
-    AddAttr<float>("epsilon",
-                   "Constant for numerical stability [default 1e-5].")
-        .SetDefault(1e-5)
-        .AddCustomChecker([](const float &epsilon) {
-          PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 1.0f,
-                            true,
-                            platform::errors::InvalidArgument(
-                                "'epsilon' in Op(GroupNorm) should be between"
-                                "0.0 and 1.0f, But received [%s].",
-                                epsilon));
-        });
-    AddAttr<int>("groups", "The number of groups that divided from channels.")
-        .AddCustomChecker([](const int &groups) {
-          PADDLE_ENFORCE_GT(
-              groups,
-              0,
-              platform::errors::InvalidArgument(
-                  "'groups' in Op(GroupNorm) should be greater than zero,"
-                  "But received [%s].",
-                  groups));
-        });
-    AddAttr<std::string>("data_layout",
-                         "An optional string from: \"NHWC\", \"NCHW\". ")
-        .SetDefault("NCHW");
-    AddComment(R"DOC(
-Group Normalization
-
-Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_
-)DOC");
-  }
-};
-
-class GroupNormGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext *ctx) const override {
-    // check input
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "GroupNormGrad");
-    OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "GroupNormGrad");
-    OP_INOUT_CHECK(
-        ctx->HasInput("Variance"), "Input", "Variance", "GroupNormGrad");
-    OP_INOUT_CHECK(ctx->HasInput("Mean"), "Input", "Mean", "GroupNormGrad");
-    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")),
-                   "Input",
-                   framework::GradVarName("Y"),
-                   "GroupNormGrad");
-
-    // check output
-    if (ctx->HasOutput(framework::GradVarName("X"))) {
-      ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y"));
-    }
-    if (ctx->HasOutput(framework::GradVarName("Scale"))) {
-      ctx->SetOutputDim(framework::GradVarName("Scale"),
-                        ctx->GetInputDim("Scale"));
-    }
-    if (ctx->HasOutput(framework::GradVarName("Bias"))) {
-      ctx->SetOutputDim(framework::GradVarName("Bias"),
-                        ctx->GetInputDim("Bias"));
-    }
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext &ctx) const override {
-    const auto *var = ctx.InputVar(framework::GradVarName("Y"));
-
-    PADDLE_ENFORCE_NOT_NULL(
-        var,
-        platform::errors::InvalidArgument(
-            "Input(Y@GRAD) of GroupNormGradOp should not be null"));
-    const phi::DenseTensor *t = nullptr;
-    if (var->IsType<phi::DenseTensor>()) {
-      t = &var->Get<phi::DenseTensor>();
-    } else if (var->IsType<phi::DenseTensor>()) {
-      t = &var->Get<phi::DenseTensor>();
-    }
-    PADDLE_ENFORCE_NOT_NULL(t,
-                            platform::errors::InvalidArgument(
-                                "Input(Y@GRAD) phi::DenseTensor of "
-                                "GroupNormGradOp should not be null"));
-    return phi::KernelKey(framework::TransToProtoVarType(t->dtype()),
-                          ctx.GetPlace());
-  }
-};
-
-template <typename T>
-class GroupNormGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("group_norm_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("Scale", this->Input("Scale"));
-    op->SetInput("Bias", this->Input("Bias"));
-    op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
-    op->SetInput("Y", this->Output("Y"));
-    op->SetInput("Mean", this->Output("Mean"));
-    op->SetInput("Variance", this->Output("Variance"));
-
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
-    op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale"));
-
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-class GroupNormCompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
-  using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase;
-
- public:
-  void Apply() override {
-    // inputs and outputs of group_norm
-    paddle::Tensor x = this->GetSingleForwardInput("X");
-    paddle::optional<paddle::Tensor> scale =
-        this->GetOptionalSingleForwardInput("Scale");
-    paddle::optional<paddle::Tensor> bias =
-        this->GetOptionalSingleForwardInput("Bias");
-    paddle::Tensor y = this->GetSingleForwardOutput("Y");
-    paddle::Tensor mean = this->GetSingleForwardOutput("Mean");
-    paddle::Tensor variance = this->GetSingleForwardOutput("Variance");
-
-    paddle::Tensor y_grad = this->GetSingleOutputGrad("Y");
-    paddle::Tensor x_grad = this->GetSingleInputGrad("X");
-    paddle::Tensor scale_grad = this->GetSingleInputGrad("Scale");
-    paddle::Tensor bias_grad = this->GetSingleInputGrad("Bias");
-
-    auto dx_ptr = this->GetOutputPtr(&x_grad);
-    std::string dx_name = this->GetOutputName(x_grad);
-    auto dscale_ptr = this->GetOutputPtr(&scale_grad);
-    std::string dscale_name = this->GetOutputName(scale_grad);
-    auto dbias_ptr = this->GetOutputPtr(&bias_grad);
-    std::string dbias_name = this->GetOutputName(bias_grad);
-
-    // attrs of group_norm
-    auto groups = this->Attr<int>("groups");
-    auto epsilon = this->Attr<float>("epsilon");
-    auto data_layout = this->Attr<std::string>("data_layout");
-
-    VLOG(3) << "Runing group_norm composite func";
-
-    prim::group_norm_grad<prim::DescTensor>(x,
-                                            scale,
-                                            bias,
-                                            y,
-                                            mean,
-                                            variance,
-                                            y_grad,
-                                            epsilon,
-                                            groups,
-                                            data_layout,
-                                            dx_ptr,
-                                            dscale_ptr,
-                                            dbias_ptr);
-    this->RecoverOutputName(x_grad, dx_name);
-    this->RecoverOutputName(scale_grad, dscale_name);
-    this->RecoverOutputName(bias_grad, dbias_name);
-  }
-};
-
-DECLARE_INPLACE_OP_INFERER(GroupNormGradInplaceInferer,
-                           {framework::GradVarName("Y"),
-                            framework::GradVarName("X")});
-
-class GroupNormOpInferVarType
-    : public framework::PassInDtypeAndVarTypeToOutput {
- protected:
-  std::unordered_map<std::string, std::string> &GetInputOutputWithSameType()
-      const override {
-    static std::unordered_map<std::string, std::string> m{{"X", /*->*/ "Y"}};
-    return m;
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-DECLARE_INFER_SHAPE_FUNCTOR(group_norm,
-                            GroupNormInferShapeFunctor,
-                            PD_INFER_META(phi::GroupNormInferMeta));
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(group_norm,
-                  ops::GroupNormOp,
-                  ops::GroupNormOpMaker,
-                  ops::GroupNormOpInferVarType,
-                  ops::GroupNormGradMaker<paddle::framework::OpDesc>,
-                  ops::GroupNormGradMaker<paddle::imperative::OpBase>,
-                  ops::GroupNormCompositeGradOpMaker,
-                  GroupNormInferShapeFunctor);
-REGISTER_OPERATOR(group_norm_grad,
-                  ops::GroupNormGradOp,
-                  ops::GroupNormGradInplaceInferer);
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
index 661f59ef6a7d6c40c0d6741c49642e5d6c241b54..88eefdab0850efd9402376cd9a5a3e6a54ed47fd 100644
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -749,6 +749,20 @@
     func : grid_sample_grad
     data_type : x
 
+- backward_op : group_norm_grad
+  forward : group_norm (Tensor x, Tensor scale, Tensor bias, float epsilon = 1e-5, int groups = -1, str data_layout = "NCHW") -> Tensor(y), Tensor(mean), Tensor(variance)
+  args : (Tensor x, Tensor scale, Tensor bias, Tensor y, Tensor mean, Tensor variance, Tensor y_grad, float epsilon, int groups, str data_layout)
+  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
+  infer_meta :
+    func : GeneralTernaryGradInferMeta
+    param : [y, scale, bias]
+  kernel :
+    func : group_norm_grad
+    data_type : y_grad
+  composite : group_norm_grad(x, scale, bias, y, mean, variance, y_grad, epsilon, groups, data_layout, x_grad, scale_grad, bias_grad)
+  optional: scale, bias
+  inplace : (y_grad -> x_grad)
+
 - backward_op : gumbel_softmax_grad
   forward : gumbel_softmax (Tensor x, float temperature, bool hard, int axis) -> Tensor(out)
   args : (Tensor out, Tensor out_grad, int axis)
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 42175a73bc86d2a2c122d03b5380ef14be8a7ca4..8f53dc2c145a285f2f0918f83242904f7ef632b8 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -408,20 +408,6 @@
   composite : gather_grad(x, index, out_grad, axis, x_grad)
   no_need_buffer : x
 
-- backward_op : group_norm_grad
-  forward : group_norm (Tensor x, Tensor scale, Tensor bias, float epsilon, int groups, str data_layout) -> Tensor(y), Tensor(mean), Tensor(variance)
-  args : (Tensor x, Tensor scale, Tensor bias, Tensor y, Tensor mean, Tensor variance, Tensor y_grad, float epsilon, int groups, str data_layout)
-  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
-  infer_meta :
-    func : GeneralTernaryGradInferMeta
-    param : [y, scale, bias]
-  kernel :
-    func : group_norm_grad
-    data_type : y_grad
-  composite : group_norm_grad(x, scale, bias, y, mean, variance, y_grad, epsilon, groups, data_layout)
-  optional: scale, bias
-  inplace : (y_grad -> x_grad)
-
 - backward_op : hardswish_grad
   forward : hardswish (Tensor x) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index 921c28caf4798fd1316240d8f93c98e6e8bca241..02726a74ff0ac14625c74f2d64083629ca13f2f1 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -538,17 +538,6 @@
   kernel :
     func : greater_than
 
-- op : group_norm
-  args : (Tensor x, Tensor scale, Tensor bias, float epsilon, int groups, str data_layout)
-  output : Tensor(y), Tensor(mean), Tensor(variance)
-  infer_meta :
-    func : GroupNormInferMeta
-  kernel :
-    func : group_norm
-  optional : scale, bias
-  intermediate : mean, variance
-  backward : group_norm_grad
-
 - op : hardswish
   args : (Tensor x)
   output : Tensor(out)
diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
index c98c9b910d5660280382fcb3f450a14790adca3b..09edd84a98e30be914ee207dac9c2012dc8ee72d 100644
--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -873,6 +873,17 @@
     data_type : x
   backward : grid_sample_grad
 
+- op : group_norm
+  args : (Tensor x, Tensor scale, Tensor bias, float epsilon = 1e-5, int groups = -1, str data_layout = "NCHW")
+  output : Tensor(y), Tensor(mean), Tensor(variance)
+  infer_meta :
+    func : GroupNormInferMeta
+  kernel :
+    func : group_norm
+  optional : scale, bias
+  intermediate : mean, variance
+  backward : group_norm_grad
+
 - op : gumbel_softmax
   args : (Tensor x, float temperature = 1.0, bool hard = false, int axis = -1)
   output : Tensor
diff --git a/paddle/phi/ops/compat/group_norm_sig.cc b/paddle/phi/ops/compat/group_norm_sig.cc
deleted file mode 100644
index 36fcd2472ac42ba36b6c7ef7c50ff03b7b0367ba..0000000000000000000000000000000000000000
--- a/paddle/phi/ops/compat/group_norm_sig.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature GroupNormOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("group_norm",
-                         {"X", "Scale", "Bias"},
-                         {"epsilon", "groups", "data_layout"},
-                         {"Y", "Mean", "Variance"});
-}
-
-KernelSignature GroupNormGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature(
-      "group_norm_grad",
-      {"X", "Scale", "Bias", "Y", "Mean", "Variance", "Y@GRAD"},
-      {"epsilon", "groups", "data_layout"},
-      {"X@GRAD", "Scale@GRAD", "Bias@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(group_norm, phi::GroupNormOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(group_norm_grad,
-                           phi::GroupNormGradOpArgumentMapping);