【Prim】Add multiply,expand,div vjp rules (#49831)

* support elementwise base func * fix compiling error and add test * support vjp for div using comp * remove additional change * fix dy2st error with magic num * fix dy magic num * another magic * another magic * another magic * add skip rename strategy * support add vjp * support add with new axis cal * support sub vjp * [prim] add multiply vjp rules * [prim] add multiply vjp rules * [prim] fix no infershape with composite in _append_backward_ops * [prim] add expand vjp rule * [prim] add exp vjp rule * uncomment infer shape for reshape/sum static prim api * [prim] fix tanh nullptr error * remove some print message * fix magic number in run_program relative tests @JiaBinYang * [prim] add expand,multiply,exp vjp rules * fix only support single direction reduce error * infer reduce dims using out dims Co-authored-by: N JiabinYang <360788950@qq.com>

【Prim】Add multiply,expand,div vjp rules (#49831)
* support elementwise base func * fix compiling error and add test * support vjp for div using comp * remove additional change * fix dy2st error with magic num * fix dy magic num * another magic * another magic * another magic * add skip rename strategy * support add vjp * support add with new axis cal * support sub vjp * [prim] add multiply vjp rules * [prim] add multiply vjp rules * [prim] fix no infershape with composite in _append_backward_ops * [prim] add expand vjp rule * [prim] add exp vjp rule * uncomment infer shape for reshape/sum static prim api * [prim] fix tanh nullptr error * remove some print message * fix magic number in run_program relative tests @JiaBinYang * [prim] add expand,multiply,exp vjp rules * fix only support single direction reduce error * infer reduce dims using out dims Co-authored-by: N JiabinYang <360788950@qq.com>
39c6765a · Xiaoxu Chen · GitHub · 28864137 · 39c6765a · 39c6765a
18 changed file
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
@@ -19,6 +19,9 @@ limitations under the License. */
 #include "paddle/fluid/operators/elementwise/elementwise_op.h"
 #include "paddle/fluid/platform/complex.h"
+#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h"
+#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
+#include "paddle/fluid/prim/utils/static/desc_tensor.h"
 namespace paddle {
 namespace operators {
@@ -63,6 +66,33 @@ class ElementwiseMulOpGradMaker : public framework::SingleGradOpMaker<T> {
  }
 };
+class ElementwiseMulGradCompositeOpMaker
+    : public prim::GradCompositeOpMakerBase {
+  using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase;
+ public:
+  void Apply() override {
+    auto x = this->GetSingleForwardInput("X");
+    auto y = this->GetSingleForwardInput("Y");
+    auto out_grad = this->GetSingleOutputGrad("Out");
+    auto x_grad = this->GetSingleInputGrad("X");
+    auto x_grad_p = this->GetOutputPtr(&x_grad);
+    auto x_grad_name = this->GetOutputName(x_grad);
+    auto y_grad = this->GetSingleInputGrad("Y");
+    auto y_grad_p = this->GetOutputPtr(&y_grad);
+    auto y_grad_name = this->GetOutputName(y_grad);
+    prim::multiply_grad<prim::DescTensor>(
+        x,
+        y,
+        out_grad,
+        static_cast<int>(this->Attr<int>("axis")),
+        x_grad_p,
+        y_grad_p);
+    this->RecoverOutputName(x_grad, x_grad_name);
+    this->RecoverOutputName(y_grad, y_grad_name);
+  }
+};
 template <typename T>
 class ElementwiseMulDoubleGradMaker : public framework::SingleGradOpMaker<T> {
 public:
@@ -123,7 +153,8 @@ REGISTER_OPERATOR(elementwise_mul,
                  ops::ElementwiseMulOpMaker,
                  ops::ElementwiseOpInferVarType,
                  ops::ElementwiseMulOpGradMaker<paddle::framework::OpDesc>,
-                  ops::ElementwiseMulOpGradMaker<paddle::imperative::OpBase>);
+                  ops::ElementwiseMulOpGradMaker<paddle::imperative::OpBase>,
+                  ops::ElementwiseMulGradCompositeOpMaker);
 REGISTER_OPERATOR(
    elementwise_mul_grad,
    ops::ElementwiseOpGrad,

--- a/paddle/fluid/operators/expand_v2_op.cc
+++ b/paddle/fluid/operators/expand_v2_op.cc
@@ -20,6 +20,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/prim/api/manual/backward/composite_backward_api.h"
+#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
+#include "paddle/fluid/prim/utils/static/desc_tensor.h"
 #include "paddle/phi/core/infermeta_utils.h"
 #include "paddle/phi/infermeta/unary.h"
@@ -190,6 +193,23 @@ class ExpandV2GradOpMaker : public framework::SingleGradOpMaker<T> {
  }
 };
+class ExpandV2GradCompositeOpMaker : public prim::GradCompositeOpMakerBase {
+  using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase;
+ public:
+  void Apply() override {
+    auto x = this->GetSingleForwardInput("X");
+    auto out_grad = this->GetSingleOutputGrad("Out");
+    auto x_grad = this->GetSingleInputGrad("X");
+    auto x_grad_p = this->GetOutputPtr(&x_grad);
+    auto x_grad_name = this->GetOutputName(x_grad);
+    auto shape = this->Attr<std::vector<int>>("shape");
+    prim::expand_grad<prim::DescTensor>(
+        x, out_grad, paddle::experimental::IntArray(shape), x_grad_p);
+    this->RecoverOutputName(x_grad, x_grad_name);
+  }
+};
 template <typename T>
 class ExpandV2DoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
@@ -223,6 +243,7 @@ namespace ops = paddle::operators;
 REGISTER_OPERATOR(expand_v2,
                  ops::ExpandV2Op,
                  ops::ExpandV2OpMaker,
+                  ops::ExpandV2GradCompositeOpMaker,
                  ops::ExpandV2GradOpMaker<paddle::framework::OpDesc>,
                  ops::ExpandV2GradOpMaker<paddle::imperative::OpBase>,
                  ExpandInferShapeFunctor);

--- a/paddle/fluid/prim/api/manual/backward/composite_backward_api.h
+++ b/paddle/fluid/prim/api/manual/backward/composite_backward_api.h
@@ -23,16 +23,17 @@ namespace prim {
 using Tensor = paddle::experimental::Tensor;
 using IntArray =
    paddle::experimental::IntArrayBase<paddle::experimental::Tensor>;
-// using IntArray = paddle::experimental::IntArray;
 //  This function should have as same signature as phi, which defined in
 //  paddle/phi/api/backward/backward_api.h
 template <typename T>
 void tanh_grad(const Tensor& out, const Tensor& grad_out, Tensor* grad_x) {
+  if (!grad_x) return;
  auto tmp = pow<T>(out, 2.0);
  tmp = scale<T>(tmp, -1.0, 1.0, true);
  auto grad_x_tmp = multiply<T>(grad_out, tmp);
  grad_x->set_impl(grad_x_tmp.impl());
 }
 template <typename T>
 void subtract_grad(const Tensor& x,
                   const Tensor& y,
@@ -42,25 +43,33 @@ void subtract_grad(const Tensor& x,
                   Tensor* dy) {
  if (dy) {
    auto scale_out_grad = scale<T>(out_grad, -1.0, 0.0, true);
-    if (phi::product(x.dims()) > phi::product(y.dims())) {
+    if (x.dims() != y.dims()) {
      // Maybe need reduce here
-      phi::DDim reduce_dim = get_reduce_dims(x.dims(), y.dims());
+      phi::DDim reduce_dim = get_reduce_dims(y.dims(), x.dims());
-      auto dy_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(scale_out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
+        by_pass<T>(scale_out_grad, dy);
-      auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+      } else {
-      dy->set_impl(dy_tmp.impl());
+        auto dy_reduce_res = sum<T>(
+            scale_out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
+        auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+        dy->set_impl(dy_tmp.impl());
+      }
    } else {
      by_pass<T>(scale_out_grad, dy);
    }
  }
  if (dx) {
-    if (phi::product(y.dims()) > phi::product(x.dims())) {
+    if (y.dims() != x.dims()) {
      // Maybe need reduce here
-      auto reduce_dim = get_reduce_dims(y.dims(), x.dims());
+      auto reduce_dim = get_reduce_dims(x.dims(), y.dims());
-      auto dx_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
+        by_pass<T>(out_grad, dx);
-      auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+      } else {
-      dx->set_impl(dx_tmp.impl());
+        auto dx_reduce_res =
+            sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
+        auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+        dx->set_impl(dx_tmp.impl());
+      }
    } else {
      by_pass<T>(out_grad, dx);
    }
@@ -75,25 +84,34 @@ void add_grad(const Tensor& x,
              Tensor* dx,
              Tensor* dy) {
  if (dy) {
-    if (phi::product(x.dims()) > phi::product(y.dims())) {
+    if (x.dims() != y.dims()) {
      // Maybe need reduce here
-      phi::DDim reduce_dim = get_reduce_dims(x.dims(), y.dims());
+      phi::DDim reduce_dim = get_reduce_dims(y.dims(), x.dims());
-      auto dy_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
+        by_pass<T>(out_grad, dy);
-      auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+      } else {
-      dy->set_impl(dy_tmp.impl());
+        auto dy_reduce_res =
+            sum<T>(out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
+        auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+        dy->set_impl(dy_tmp.impl());
+      }
    } else {
      by_pass<T>(out_grad, dy);
    }
  }
  if (dx) {
-    if (phi::product(y.dims()) > phi::product(x.dims())) {
+    if (y.dims() != x.dims()) {
      // Maybe need reduce here
-      auto reduce_dim = get_reduce_dims(y.dims(), x.dims());
+      auto reduce_dim = get_reduce_dims(x.dims(), y.dims());
-      auto dx_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
+        by_pass<T>(out_grad, dx);
-      auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+      } else {
-      dx->set_impl(dx_tmp.impl());
+        auto dx_reduce_res =
+            sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
+        auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+        dx->set_impl(dx_tmp.impl());
+      }
    } else {
      by_pass<T>(out_grad, dx);
    }
@@ -130,9 +148,9 @@ void sum_grad(const Tensor& x,
      axis_ = axis.GetData();
    }
    auto out_grad_ = unsqueeze<T>(out_grad, axis_);
-    x_grad_tmp = expand<T>(out_grad_, x_dim);
+    x_grad_tmp = expand<T>(out_grad_, IntArray(x_dim));
  } else {
-    x_grad_tmp = expand<T>(out_grad, x_dim);
+    x_grad_tmp = expand<T>(out_grad, IntArray(x_dim));
  }
  x_grad->set_impl(x_grad_tmp.impl());
@@ -152,13 +170,17 @@ void divide_grad(const Tensor& x,
    auto tmp1 = divide<T>(x, tmp0);
    auto tmp2 = scale<T>(tmp1, -1.0, 0.0, true);
    auto dy_res = multiply<T>(tmp2, out_grad);
-    if (phi::product(x.dims()) > phi::product(y.dims())) {
+    if (x.dims() != y.dims()) {
      // Maybe need reduce here
-      phi::DDim reduce_dim = get_reduce_dims(x.dims(), y.dims());
+      phi::DDim reduce_dim = get_reduce_dims(y.dims(), x.dims());
-      auto dy_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(dy_res, phi::vectorize(reduce_dim), y.dtype(), false);
+        dy->set_impl(dy_res.impl());
-      auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+      } else {
-      dy->set_impl(dy_tmp.impl());
+        auto dy_reduce_res =
+            sum<T>(dy_res, phi::vectorize(reduce_dim), y.dtype(), false);
+        auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
+        dy->set_impl(dy_tmp.impl());
+      }
    } else {
      dy->set_impl(dy_res.impl());
    }
@@ -168,13 +190,18 @@ void divide_grad(const Tensor& x,
    auto one_tensor = full<T>(phi::vectorize(y.dims()), 1.0);
    auto tmp0 = divide<T>(one_tensor, y);
    auto dx_res = multiply<T>(tmp0, out_grad);
-    if (phi::product(y.dims()) > phi::product(x.dims())) {
+    if (y.dims() != x.dims()) {
      // Maybe need reduce here
-      auto reduce_dim = get_reduce_dims(y.dims(), x.dims());
+      auto reduce_dim = get_reduce_dims(x.dims(), y.dims());
-      auto dx_reduce_res =
+      if (!reduce_dim.size()) {
-          sum<T>(dx_res, phi::vectorize(reduce_dim), x.dtype(), false);
+        dx->set_impl(dx_res.impl());
-      auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+      } else {
-      dx->set_impl(dx_tmp.impl());
+        auto dx_reduce_res =
+            sum<T>(dx_res, phi::vectorize(reduce_dim), x.dtype(), false);
+        auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
+        dx->set_impl(dx_tmp.impl());
+      }
    } else {
      dx->set_impl(dx_res.impl());
    }
@@ -190,5 +217,86 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) {
    x_grad->set_impl(x_grad_tmp.impl());
  }
 }
+template <typename T>
+void multiply_grad(const Tensor& x,
+                   const Tensor& y,
+                   const Tensor& out_grad,
+                   int axis,
+                   Tensor* x_grad,
+                   Tensor* y_grad) {
+  if (x_grad) {
+    auto x_grad_unreduce = multiply<T>(out_grad, y);
+    if (x.dims() != y.dims()) {
+      auto axes = get_reduce_dims(x.dims(), y.dims());
+      if (!axes.size()) {
+        x_grad->set_impl(x_grad_unreduce.impl());
+      } else {
+        auto x_grad_reduced = sum<T>(x_grad_unreduce,
+                                     phi::vectorize(axes),
+                                     x_grad_unreduce.dtype(),
+                                     false);
+        if (x_grad_reduced.dims().size() != x.dims().size()) {
+          x_grad_reduced = reshape<T>(x_grad_reduced, x.shape());
+        }
+        x_grad->set_impl(x_grad_reduced.impl());
+      }
+    } else {
+      x_grad->set_impl(x_grad_unreduce.impl());
+    }
+  }
+  if (y_grad) {
+    auto y_grad_unreduce = multiply<T>(out_grad, x);
+    if (y.dims() != x.dims()) {
+      auto axes = get_reduce_dims(y.dims(), x.dims());
+      if (!axes.size()) {
+        y_grad->set_impl(y_grad_unreduce.impl());
+      } else {
+        auto y_grad_reduced = sum<T>(y_grad_unreduce,
+                                     phi::vectorize(axes),
+                                     y_grad_unreduce.dtype(),
+                                     false);
+        if (y_grad_reduced.dims().size() != y.dims().size()) {
+          y_grad_reduced = reshape<T>(y_grad_reduced, y.shape());
+        }
+        y_grad->set_impl(y_grad_reduced.impl());
+      }
+    } else {
+      y_grad->set_impl(y_grad_unreduce.impl());
+    }
+  }
+}
+template <typename T>
+void expand_grad(const Tensor& x,
+                 const Tensor& out_grad,
+                 const IntArray& shape,
+                 Tensor* x_grad) {
+  if (x_grad) {
+    auto out_dims = phi::make_ddim(shape.GetData());
+    if (out_dims != x.dims()) {
+      auto axes = get_reduce_dims(x.dims(), out_dims);
+      if (!axes.size()) {
+        by_pass<T>(out_grad, x_grad);
+      } else {
+        auto reduced = sum<T>(out_grad, phi::vectorize(axes), x.dtype(), false);
+        if (reduced.dims().size() != x.dims().size()) {
+          reduced = reshape<T>(reduced, x.shape());
+        }
+        x_grad->set_impl(reduced.impl());
+      }
+    } else {
+      by_pass<T>(out_grad, x_grad);
+    }
+  }
+}
+template <typename T>
+void exp_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) {
+  if (x_grad) {
+    x_grad->set_impl(multiply<T>(out_grad, out).impl());
+  }
+}
 }  // namespace prim
 }  // namespace paddle
--- a/paddle/fluid/prim/api/manual/prim_api/eager_prim_api.cc
+++ b/paddle/fluid/prim/api/manual/prim_api/eager_prim_api.cc
@@ -67,5 +67,15 @@ template <>
 Tensor reshape<Tensor>(Tensor x, IntArray shape) {
  return ::reshape_ad_func(x, shape);
 }
+template <>
+Tensor exp<Tensor>(const Tensor& x) {
+  return ::exp_ad_func(x);
+}
+template <typename T>
+Tensor expand(const Tensor& x, const IntArray& shape) {
+  return ::expand_ad_func(x, shape);
+}
 }  // namespace prim
 }  // namespace paddle
--- a/paddle/fluid/prim/api/manual/prim_api/prim_api.h
+++ b/paddle/fluid/prim/api/manual/prim_api/prim_api.h
@@ -57,5 +57,11 @@ Tensor sum(Tensor x,
 template <typename T>
 Tensor reshape(Tensor x, IntArray shape);
+template <typename T>
+Tensor expand(const Tensor& x, const IntArray& shape);
+template <typename T>
+Tensor exp(const Tensor& x);
 }  // namespace prim
 }  // namespace paddle
--- a/paddle/fluid/prim/api/manual/prim_api/static_prim_api.cc
+++ b/paddle/fluid/prim/api/manual/prim_api/static_prim_api.cc
@@ -199,7 +199,7 @@ Tensor sum<DescTensor>(Tensor x,
      "Out", {std::static_pointer_cast<prim::DescTensor>(out.impl())->Name()});
  op->CheckAttrs();
  op->InferVarType(block);
-  // TODO(jiabin): This may have runtime shape skip infershape for now.
+  // TODO(jiabin, cxxly): This may have runtime shape skip infershape for now.
  return out;
 }
@@ -222,7 +222,23 @@ Tensor reshape<DescTensor>(Tensor x, paddle::experimental::IntArray shape) {
      "Out", {std::static_pointer_cast<prim::DescTensor>(out.impl())->Name()});
  op->CheckAttrs();
  op->InferVarType(block);
-  // TODO(jiabin): This may have runtime shape skip infershape for now.
+  // TODO(jiabin, cxxly): This may have runtime shape skip infershape for now.
+  return out;
+}
+template <>
+Tensor exp<DescTensor>(const Tensor& x) {
+  Tensor out = empty<DescTensor>({}, phi::DataType::FLOAT32, paddle::Place());
+  framework::BlockDesc* block = StaticCompositeContext::Instance().GetBlock();
+  framework::OpDesc* op = block->AppendOp();
+  op->SetType("exp");
+  op->SetInput("X",
+               {std::static_pointer_cast<prim::DescTensor>(x.impl())->Name()});
+  op->SetOutput(
+      "Out", {std::static_pointer_cast<prim::DescTensor>(out.impl())->Name()});
+  op->CheckAttrs();
+  op->InferVarType(block);
+  op->InferShape(*block);
  return out;
 }
 }  // namespace prim

--- a/paddle/fluid/prim/api/manual/utils/utils.h
+++ b/paddle/fluid/prim/api/manual/utils/utils.h
@@ -16,11 +16,12 @@
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_proto_maker.h"
+#include "paddle/fluid/operators/common_infer_shape_functions.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/ddim.h"
-using IntArray = paddle::experimental::IntArray;
 namespace paddle {
 namespace prim {
 // We put some api like utils here
@@ -36,43 +37,42 @@ paddle::experimental::Tensor empty_like(const paddle::experimental::Tensor& x,
 template <typename T>
 void by_pass(const paddle::experimental::Tensor& x,
             paddle::experimental::Tensor* out);
 // These method don't need to be specified
-static phi::DDim get_reduce_dims(const phi::DDim& x_dims,
+static phi::DDim get_reduce_dims_from_out(const phi::DDim& dout_dims,
-                                 const phi::DDim& y_dims) {
+                                          const phi::DDim& in_dims) {
  std::vector<int64_t> result;
-  PADDLE_ENFORCE_GE(phi::product(x_dims),
+  int bat = dout_dims.size() - in_dims.size();
-                    phi::product(y_dims),
-                    phi::errors::InvalidArgument(
-                        "Only x_dims >= y_dims is accepted for "
-                        "get_reduce_dims, but we got x_dims: %s, y_dims: %s",
-                        x_dims,
-                        y_dims));
-  int bat = x_dims.size() - y_dims.size();
  for (int i = 0; i < bat; ++i) {
    result.push_back(i);
  }
-  for (int i = 0; i < y_dims.size(); ++i) {
+  for (int i = 0; i < in_dims.size(); ++i) {
-    if (y_dims[i] == 1) {
+    if (in_dims[i] == 1) {
      result.push_back(i + bat);
    } else {
      PADDLE_ENFORCE_EQ(
-          y_dims[i],
+          in_dims[i],
-          x_dims[i + bat],
+          dout_dims[i + bat],
          platform::errors::InvalidArgument(
              "ReduceDims dimension mismatch. Operands could "
-              "not be broadcast together with the shape of x_dims = [%s] and "
+              "not be broadcast together with the shape of dout = [%s] and "
-              "the shape of y_dims = [%s]. Received [%d] in X is not equal to "
+              "the shape of in_dims = [%s]. Received [%d] in X is not equal to "
              "[%d] in Y at i:%d.",
-              x_dims,
+              dout_dims,
-              y_dims,
+              in_dims,
-              x_dims[i + bat],
+              dout_dims[i + bat],
-              y_dims[i],
+              in_dims[i],
              i));
    }
  }
-  auto res_dims = phi::make_ddim(result);
+  return phi::make_ddim(result);
-  VLOG(4) << "Reduce Dims is: " << res_dims;
-  return res_dims;
 }
+static phi::DDim get_reduce_dims(const phi::DDim& x_dims,
+                                 const phi::DDim& y_dims) {
+  auto out_dims = paddle::operators::details::BroadcastTwoDims(x_dims, y_dims);
+  return get_reduce_dims_from_out(out_dims, x_dims);
+}
 }  // namespace prim
 }  // namespace paddle
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -431,6 +431,7 @@
  kernel :
    func : exp_grad
  inplace : (out_grad -> x_grad)
+  composite : exp_grad(out, out_grad, x_grad)
 - backward_op : expm1_grad
  forward : expm1 (Tensor x) -> Tensor(out)

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -475,6 +475,7 @@
    func : expand_grad
  no_need_buffer : x
  backward : expand_double_grad
+  composite: expand_grad(x, out_grad, shape, x_grad_p)
 - backward_op : exponential__grad
  forward : exponential_ (Tensor x, float lam) -> Tensor(out)
@@ -880,6 +881,7 @@
    param : [x, y]
  kernel :
    func : multiply_grad
+  composite: multiply_grad(x, y, out_grad, axis, x_grad, y_grad)
  backward : multiply_double_grad
 - backward_op : multiply_triple_grad

--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/CMakeLists.txt
@@ -8,10 +8,3 @@ set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0)
 foreach(TEST_OP ${TEST_OPS})
  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
 endforeach()
-set_tests_properties(test_comp_eager_tanh_grad PROPERTIES TIMEOUT 60)
-set_tests_properties(test_comp_eager_div_grad PROPERTIES TIMEOUT 60)
-set_tests_properties(test_comp_eager_sum_grad PROPERTIES TIMEOUT 60)
-set_tests_properties(test_comp_eager_add_grad PROPERTIES TIMEOUT 60)
-set_tests_properties(test_comp_eager_sub_grad PROPERTIES TIMEOUT 60)
-set_tests_properties(test_comp_eager_sqrt_grad PROPERTIES TIMEOUT 60)
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import autograd
+import autograd.numpy
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core
+@param.parameterized_class(
+    ('primal', 'cotangent', 'dtype'),
+    [
+        (np.random.rand(10, 10), np.random.rand(10, 10), np.float32),
+    ],
+)
+class TestExpGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        core.set_prim_enabled(True)
+        cls.primal = cls.primal.astype(cls.dtype)
+        if cls.cotangent is not None:
+            cls.cotangent = cls.cotangent.astype(cls.dtype)
+    @classmethod
+    def tearDownClass(cls):
+        core.set_prim_enabled(False)
+    def test_exp_grad_comp(self):
+        def actual(primal, cotangent):
+            primal = paddle.to_tensor(primal)
+            primal.stop_gradient = False
+            return paddle.grad(
+                paddle.exp(primal), primal, paddle.to_tensor(cotangent)
+            )[0]
+        def desired(primal, cotangent):
+            cotangent = (
+                np.ones_like(cotangent, dtype=primal.dtype)
+                if cotangent is None
+                else cotangent
+            )
+            return autograd.make_vjp(autograd.numpy.exp)(primal)[0](cotangent)
+        np.testing.assert_allclose(
+            actual=actual(self.primal, self.cotangent),
+            desired=desired(self.primal, self.cotangent),
+            rtol=1e-6,
+            atol=0,
+        )
+    def test_stop_gradients(self):
+        with self.assertRaises(ValueError):
+            primal = paddle.to_tensor(self.primal)
+            primal.stop_gradient = True
+            return paddle.grad(
+                paddle.exp(primal), primal, paddle.to_tensor(self.cotangent)
+            )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core
+@param.parameterized_class(
+    ('name', 'primal', 'cotangent', 'shape', 'dtype'),
+    (
+        (
+            'same_shape',
+            np.random.rand(10, 10),
+            np.random.rand(10, 10),
+            (10, 10),
+            np.float32,
+        ),
+        (
+            'same_rank',
+            np.random.rand(1, 10),
+            np.random.rand(10, 10),
+            (10, 10),
+            np.float32,
+        ),
+        (
+            'same_rank',
+            np.random.rand(10, 1, 10, 1),
+            np.random.rand(10, 10, 10, 10),
+            (10, 10, 10, 10),
+            np.float32,
+        ),
+        (
+            'diff_rank',
+            np.random.rand(1, 10, 1),
+            np.random.rand(10, 10, 10, 10),
+            (10, 10, 10, 10),
+            np.float32,
+        ),
+    ),
+)
+class TestExpandGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primal = cls.primal.astype(cls.dtype)
+        cls.cotangent = cls.cotangent.astype(cls.dtype)
+    @classmethod
+    def tearDownClass(cls):
+        core.set_prim_enabled(False)
+    def test_comp(self):
+        def func(primal, cotangent, shape):
+            primal = paddle.to_tensor(primal)
+            primal.stop_gradient = False
+            cotangent = paddle.to_tensor(cotangent)
+            return paddle.grad(paddle.expand(primal, shape), primal, cotangent)[
+                0
+            ]
+        def actual(primal, cotangent, shape):
+            core.set_prim_enabled(True)
+            return func(primal, cotangent, shape)
+        def desired(primal, cotangent, shape):
+            core.set_prim_enabled(False)
+            return func(primal, cotangent, shape)
+        np.testing.assert_allclose(
+            actual=actual(self.primal, self.cotangent, self.shape),
+            desired=desired(self.primal, self.cotangent, self.shape),
+            rtol=1e-6,
+            atol=0,
+        )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core
+@param.parameterized_class(
+    ('name', 'primals', 'stop_gradients', 'cotangents', 'dtype'),
+    (
+        (
+            'test_normal_case',
+            (np.random.rand(2, 3, 4), np.random.rand(2, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_broadcast_diff_rank',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(3, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_broadcast_same_rank',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(2, 1, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_stop_gradient',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(2, 1, 3, 4)),
+            (False, True),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_reduce_axe_empty',
+            (np.random.rand(2, 3, 3, 4), np.random.rand(2, 1, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+    ),
+)
+class TestMultiplyGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primals = tuple(primal.astype(cls.dtype) for primal in cls.primals)
+        cls.cotangents = tuple(co.astype(cls.dtype) for co in cls.cotangents)
+    def as_tuple(self, x):
+        return (x,) if isinstance(x, paddle.Tensor) else x
+    def vjp(self):
+        primals, cotangents = self.primals, self.cotangents
+        primals = tuple(paddle.to_tensor(primal) for primal in primals)
+        for primal, flag in zip(primals, self.stop_gradients):
+            primal.stop_gradient = flag
+        cotangents = tuple(paddle.to_tensor(co) for co in cotangents)
+        out = self.as_tuple(paddle.multiply(*primals))
+        grads = paddle.grad(out, primals, cotangents, allow_unused=True)
+        return [g for g in grads if g is not None]
+    def test_comp(self):
+        core.set_prim_enabled(True)
+        actual = self.vjp()
+        core.set_prim_enabled(False)
+        desired = self.vjp()
+        for i, j in zip(actual, desired):
+            np.testing.assert_allclose(
+                i,
+                j,
+                rtol=1e-6,
+                atol=0,
+            )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
@@ -51,7 +51,7 @@ from paddle.fluid import core
        ),
    ],
 )
-class TestDivGradComp(unittest.TestCase):
+class TestAddGradComp(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.primal0 = cls.primal0.astype(cls.dtype)

--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import autograd
+import autograd.numpy
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core
+@param.parameterized_class(
+    ('primal', 'cotangent', 'dtype'),
+    [
+        (np.random.rand(10, 10), np.random.rand(10, 10), np.float32),
+        (np.random.rand(10, 10), None, np.float32),
+    ],
+)
+class TestExpGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        core.set_prim_enabled(True)
+        cls.primal = cls.primal.astype(cls.dtype)
+        if cls.cotangent is not None:
+            cls.cotangent = cls.cotangent.astype(cls.dtype)
+    @classmethod
+    def tearDownClass(cls):
+        core.set_prim_enabled(False)
+    def setUp(self):
+        paddle.enable_static()
+    def tearDown(self):
+        paddle.disable_static()
+    def test_exp_grad_comp(self):
+        def actual(primal, cotangent):
+            mp, sp = paddle.static.Program(), paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                x = paddle.static.data('primal', primal.shape, primal.dtype)
+                x.stop_gradient = False
+                v = (
+                    None
+                    if cotangent is None
+                    else paddle.static.data(
+                        'cotangent', cotangent.shape, cotangent.dtype
+                    )
+                )
+                y = paddle.exp(x)
+                x_cotangent = paddle.static.gradients(y, x, v)
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            return exe.run(
+                program=mp,
+                feed={'primal': primal, 'cotangent': cotangent},
+                fetch_list=x_cotangent,
+            )[0]
+        def desired(primal, cotangent):
+            cotangent = (
+                np.ones_like(cotangent, dtype=primal.dtype)
+                if cotangent is None
+                else cotangent
+            )
+            return autograd.make_vjp(autograd.numpy.exp)(primal)[0](cotangent)
+        np.testing.assert_allclose(
+            actual=actual(self.primal, self.cotangent),
+            desired=desired(self.primal, self.cotangent),
+            rtol=1e-6,
+            atol=0,
+        )
+    def test_stop_gradient(self):
+        def actual(primal, cotangent):
+            mp, sp = paddle.static.Program(), paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                x = paddle.static.data('primal', primal.shape, primal.dtype)
+                x.stop_gradient = True
+                v = (
+                    None
+                    if cotangent is None
+                    else paddle.static.data(
+                        'cotangent', cotangent.shape, cotangent.dtype
+                    )
+                )
+                y = paddle.exp(x)
+                x_cotangent = paddle.static.gradients(y, x, v)
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            return exe.run(
+                program=mp,
+                feed={'primal': primal, 'cotangent': cotangent},
+                fetch_list=x_cotangent,
+            )
+        def desired(primal, cotangent):
+            return []
+        self.assertEqual(
+            actual(self.primal, self.cotangent),
+            desired(self.primal, self.cotangent),
+        )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core
+@param.parameterized_class(
+    ('name', 'primal', 'cotangent', 'shape', 'dtype'),
+    (
+        (
+            'same_shape',
+            np.random.rand(10, 10),
+            np.random.rand(10, 10),
+            (10, 10),
+            np.float32,
+        ),
+        (
+            'same_rank',
+            np.random.rand(1, 10),
+            np.random.rand(10, 10),
+            (10, 10),
+            np.float32,
+        ),
+        (
+            'same_rank',
+            np.random.rand(10, 1, 10, 1),
+            np.random.rand(10, 10, 10, 10),
+            (10, 10, 10, 10),
+            np.float32,
+        ),
+        (
+            'diff_rank',
+            np.random.rand(1, 10, 1),
+            np.random.rand(10, 10, 10, 10),
+            (10, 10, 10, 10),
+            np.float32,
+        ),
+        (
+            'single_direction_broadcast',
+            np.random.rand(10, 10, 10, 10),
+            np.random.rand(1, 10, 1),
+            (10, 10, 10, 10),
+            np.float32,
+        ),
+    ),
+)
+class TestExpandGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primal = cls.primal.astype(cls.dtype)
+        cls.cotangent = cls.cotangent.astype(cls.dtype)
+        paddle.enable_static()
+    @classmethod
+    def tearDownClass(cls):
+        paddle.disable_static()
+        core.set_prim_enabled(False)
+    def test_comp(self):
+        def func(primal, cotangent, shape):
+            mp, sp = paddle.static.Program(), paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                x = paddle.static.data('primal', primal.shape, primal.dtype)
+                x.stop_gradient = False
+                v = paddle.static.data(
+                    'cotangent', cotangent.shape, cotangent.dtype
+                )
+                y = paddle.expand(x, shape)
+                x_cotangent = paddle.static.gradients(y, x)
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            return exe.run(
+                program=mp,
+                feed={'primal': primal, 'cotangent': cotangent},
+                fetch_list=x_cotangent,
+            )[0]
+        def actual(primal, cotangent, shape):
+            core.set_prim_enabled(True)
+            return func(primal, cotangent, shape)
+        def desired(primal, cotangent, shape):
+            core.set_prim_enabled(False)
+            return func(primal, cotangent, shape)
+        np.testing.assert_allclose(
+            actual=actual(self.primal, self.cotangent, self.shape),
+            desired=desired(self.primal, self.cotangent, self.shape),
+            rtol=1e-6,
+            atol=0,
+        )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import parameterized as param
+import paddle
+from paddle.fluid import core, framework
+@param.parameterized_class(
+    ('name', 'primals', 'stop_gradients', 'cotangents', 'dtype'),
+    (
+        (
+            'test_normal_case',
+            (np.random.rand(2, 3, 4), np.random.rand(2, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_broadcast_diff_rank',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(3, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_broadcast_same_rank',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(2, 1, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_stop_gradient',
+            (np.random.rand(2, 3, 1, 4), np.random.rand(2, 1, 3, 4)),
+            (False, True),
+            (np.random.rand(2, 3, 3, 4),),
+            np.float32,
+        ),
+        (
+            'test_reduce_axe_empty',
+            (np.random.rand(2, 3, 3, 4), np.random.rand(2, 1, 3, 4)),
+            (False, False),
+            (np.random.rand(2, 1, 3, 1),),
+            np.float32,
+        ),
+    ),
+)
+class TestMultiplyGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primals = tuple(primal.astype(cls.dtype) for primal in cls.primals)
+        cls.cotangents = tuple(co.astype(cls.dtype) for co in cls.cotangents)
+    def setUp(self):
+        paddle.enable_static()
+    def tearDown(self):
+        paddle.disable_static()
+    def as_tuple(self, x):
+        return (x,) if isinstance(x, framework.Variable) else x
+    def vjp(self):
+        primals, cotangents = self.primals, self.cotangents
+        mp, sp = paddle.static.Program(), paddle.static.Program()
+        with paddle.static.program_guard(mp, sp):
+            primals = tuple(
+                paddle.static.data(f'primal{i}', primal.shape, primal.dtype)
+                for i, primal in enumerate(primals)
+            )
+            for primal, flag in zip(primals, self.stop_gradients):
+                primal.stop_gradient = flag
+            cotangents = tuple(
+                paddle.static.data(f'cotangent{i}', co.shape, co.dtype)
+                for i, co in enumerate(cotangents)
+            )
+            out = self.as_tuple(paddle.multiply(*primals))
+            grads = paddle.static.gradients(out, primals)
+        exe = paddle.static.Executor()
+        exe.run(sp)
+        return exe.run(
+            program=mp,
+            feed={
+                **{
+                    f'primal{i}': primal
+                    for i, primal in enumerate(self.primals)
+                },
+                **{f'cotangent{i}': co for i, co in enumerate(self.cotangents)},
+            },
+            fetch_list=[g for g in grads if g is not None],
+        )
+    def test_comp(self):
+        core.set_prim_enabled(True)
+        actual = self.vjp()
+        core.set_prim_enabled(False)
+        desired = self.vjp()
+        self.assertEqual(len(actual), len(desired))
+        for i, j in zip(actual, desired):
+            np.testing.assert_allclose(
+                i,
+                j,
+                rtol=1e-6,
+                atol=0,
+            )
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
@@ -39,14 +39,15 @@ from paddle.fluid import core
            np.random.rand(2, 3, 1, 4),
            np.float32,
        ),
+        (np.random.rand(2, 3, 3, 4), np.random.rand(2, 3, 1, 4), np.float32),
        (
-            np.random.rand(2, 3, 3, 4),
+            np.random.rand(2, 1, 3, 4),
            np.random.rand(2, 3, 1, 4),
            np.float32,
        ),
        (
            np.random.rand(2, 3, 3, 4),
-            np.random.rand(2, 3, 1, 1),
+            np.random.rand(2, 1, 1, 4),
            np.float32,
        ),
    ],