diff --git a/paddle/fluid/operators/graph_send_recv_op.cc b/paddle/fluid/operators/graph_send_recv_op.cc
index d9c0ec51714644dbc1670d7ab763b52a4d8e636b..e9ba861c3b88bee44dc3d06379ba17a81034bb39 100644
--- a/paddle/fluid/operators/graph_send_recv_op.cc
+++ b/paddle/fluid/operators/graph_send_recv_op.cc
@@ -58,6 +58,10 @@ class GraphSendRecvOpMaker : public framework::OpProtoAndCheckerMaker {
              "The input tensor with data type float32, float64, int32, int64.");
     AddInput("Src_index", "The source index tensor.");
     AddInput("Dst_index", "The destination index tensor.");
+    AddInput("Out_size",
+             "(Tensor<int>, optional). The 0th dimension of the output."
+             "It has a higher priority than Attr(out_size).")
+        .AsDispensable();
     AddOutput("Out", "Output tensor of graph_send_recv op.");
     AddOutput("Dst_count",
               "Count tensor of Dst_index, mainly for MEAN pool_type.")
@@ -68,12 +72,12 @@ class GraphSendRecvOpMaker : public framework::OpProtoAndCheckerMaker {
                          "tensors of Dst_index.")
         .SetDefault("SUM")
         .InEnum({"SUM", "MEAN", "MIN", "MAX"});
-    AddAttr<int64_t>(
+    AddAttr<std::vector<int64_t>>(
         "out_size",
-        "(int64_t, default 0)"
+        "(vector<int64_t>, default {0})"
         "Define the first dimension of Output tensor."
-        "If set default 0, then the shape of Out is the same with X.")
-        .SetDefault(0);
+        "If set default {0}, then the shape of Out is the same with X.")
+        .SetDefault({0});
     AddComment(R"DOC(
 Graph Learning Send_Recv combine operator.
 
diff --git a/paddle/fluid/pybind/op_function_generator.h b/paddle/fluid/pybind/op_function_generator.h
index 8f66d258edac4288828fff2624611da403303ede..588e5521e60709ad7a99397a44683f94343a3a12 100644
--- a/paddle/fluid/pybind/op_function_generator.h
+++ b/paddle/fluid/pybind/op_function_generator.h
@@ -225,6 +225,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
       "Bias3",
       "Mean3",
       "Var3"}},
+    {"graph_send_recv", {"X", "Src_index", "Dst_index", "Out_size"}},
 };
 
 // NOTE(zhiqiu): Like op_ins_map.
diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
index 6fefa0affdfcb496a3c57ebf2844e85d108f4a5f..e5a808e97b0dc1c9ead835bc2257108edfdca776 100755
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -1060,7 +1060,7 @@
     func : generate_proposals_v2
 
 - api : graph_send_recv
-  args : (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", int64_t out_size = 0)
+  args : (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", IntArray out_size = {0})
   output : Tensor(out), Tensor(dst_count)
   infer_meta :
     func : GraphSendRecvInferMeta
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 20b05d4cfc6aa63ff00f9ca8b9ea28de62701654..b08a42ea3fde3f73ef423907ba1ea470ac1d6c4d 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -941,7 +941,7 @@
     func : gelu_grad
 
 - backward_api : graph_send_recv_grad
-  forward : graph_send_recv (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", int64_t out_size = 0) -> Tensor(out), Tensor(dst_count)
+  forward : graph_send_recv (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", IntArray out_size = {0}) -> Tensor(out), Tensor(dst_count)
   args : (Tensor x, Tensor src_index, Tensor dst_index, Tensor out, Tensor dst_count, Tensor out_grad, str pool_type = "SUM")
   output : Tensor(x_grad)
   infer_meta :
diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc
index 3ee42b86d6e3e433280d38f00f35efc541d46c0b..a919a955a541a13c9e2a64f61e97a40544c93c92 100644
--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -412,7 +412,7 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                             const MetaTensor& src_index,
                             const MetaTensor& dst_index,
                             const std::string& pool_type,
-                            int64_t out_size,
+                            const IntArray& out_size,
                             MetaTensor* out,
                             MetaTensor* dst_count) {
   auto src_index_dims = src_index.dims();
@@ -455,23 +455,13 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                         "Src_index and Dst_index should have the same shape."));
 
   auto dims = x.dims();
-  if (out_size <= 0) {
-    out->set_dims(dims);
-  } else {
-    std::vector<int64_t> dims_ = phi::vectorize(dims);
-    if (dims_.size() > 0) {
-      dims_[0] = out_size;
-    }
-    out->set_dims(phi::make_ddim(dims_));
-  }
+  std::vector<int64_t> dims_ = phi::vectorize(dims);
+  dims_[0] = -1;
+  out->set_dims(phi::make_ddim(dims_));
   out->set_dtype(x.dtype());
 
   if (pool_type == "MEAN") {
-    if (out_size <= 0) {
-      dst_count->set_dims({dims[0]});
-    } else {
-      dst_count->set_dims({out_size});
-    }
+    dst_count->set_dims({-1});
     dst_count->set_dtype(DataType::INT32);
   }
 }
diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h
index 55a63b1c957c40257024117942c21382df676e22..466bd3df5de2d21f27ccc88fec7852e93868814b 100644
--- a/paddle/phi/infermeta/ternary.h
+++ b/paddle/phi/infermeta/ternary.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 
+#include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/meta_tensor.h"
 
 namespace phi {
@@ -75,7 +76,7 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
                             const MetaTensor& src_index,
                             const MetaTensor& dst_index,
                             const std::string& pool_type,
-                            int64_t out_size,
+                            const IntArray& out_size,
                             MetaTensor* out,
                             MetaTensor* dst_count);
 
diff --git a/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc
index e4034230c7866f1223172e9be1bf6fdcd7f3d124..d4b9c8c60e3f84b0c3845f28dce0aa5e55d23a9d 100644
--- a/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc
+++ b/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc
@@ -88,27 +88,35 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx,
                                        DenseTensor* dst_count = nullptr) {
   const int& index_size = src_index.dims()[0];
 
-  ctx.template Alloc<T>(out);
-  T* p_output = out->data<T>();
   const auto& src_dims = x.dims();
   int64_t memset_size = 1;
   if (out_size <= 0) {
+    out->Resize(src_dims);
     for (int i = 0; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   } else {
+    // Set out dim following out_size.
+    std::vector<int64_t> dims_ = phi::vectorize(src_dims);
+    if (dims_.size() > 0) {
+      dims_[0] = out_size;
+    }
+    out->Resize(phi::make_ddim(dims_));
     memset_size = out_size;
     for (int i = 1; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   }
+
+  ctx.template Alloc<T>(out);
+  T* p_output = out->data<T>();
   const size_t& memset_bytes = memset_size * sizeof(T);
   memset(p_output, 0, memset_bytes);
 
   if (index_size == 0) return;
-
   const IndexT* s_index = src_index.data<IndexT>();
   const IndexT* d_index = dst_index.data<IndexT>();
+
   if (pool_type == "SUM") {
     GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(
         src_dims[0], index_size, s_index, d_index, x, out, pool_type);
@@ -119,10 +127,12 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx,
     GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvMaxFunctor<T>>(
         src_dims[0], index_size, s_index, d_index, x, out, pool_type);
   } else if (pool_type == "MEAN") {
+    int64_t input_size = out_size <= 0 ? src_dims[0] : out_size;
+    dst_count->Resize({input_size});
     ctx.template Alloc<int>(dst_count);
     int* p_dst_count = dst_count->data<int>();
-    memset(p_dst_count, 0, src_dims[0] * sizeof(int));
-    GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(src_dims[0],
+    memset(p_dst_count, 0, input_size * sizeof(int));
+    GraphSendRecvCpuLoop<T, IndexT, GraphSendRecvSumFunctor<T>>(input_size,
                                                                 index_size,
                                                                 s_index,
                                                                 d_index,
@@ -139,16 +149,29 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count) {
   auto index_type = src_index.dtype();
+  auto& out_size_data = out_size.GetData();
   if (index_type == phi::DataType::INT32) {
-    GraphSendRecvOpKernelLaunchHelper<Context, T, int32_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpKernelLaunchHelper<Context, T, int32_t>(ctx,
+                                                           x,
+                                                           src_index,
+                                                           dst_index,
+                                                           pool_type,
+                                                           out_size_data[0],
+                                                           out,
+                                                           dst_count);
   } else if (index_type == phi::DataType::INT64) {
-    GraphSendRecvOpKernelLaunchHelper<Context, T, int64_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpKernelLaunchHelper<Context, T, int64_t>(ctx,
+                                                           x,
+                                                           src_index,
+                                                           dst_index,
+                                                           pool_type,
+                                                           out_size_data[0],
+                                                           out,
+                                                           dst_count);
   }
 }
 
diff --git a/paddle/phi/kernels/gpu/graph_send_recv_funcs.h b/paddle/phi/kernels/gpu/graph_send_recv_funcs.h
index a93603ae18f1caefbffed49ecd01ddc247b7463b..4be92ae18629c8c049e38d093ca229ad2426a6f4 100644
--- a/paddle/phi/kernels/gpu/graph_send_recv_funcs.h
+++ b/paddle/phi/kernels/gpu/graph_send_recv_funcs.h
@@ -81,7 +81,7 @@ __global__ void InputResetMaxCUDAKernel(T* output,
                                         size_t input_size,
                                         size_t slice_size) {
   CUDA_KERNEL_LOOP_TYPE(i, input_size * slice_size, int64_t) {
-    if (*(output + i) == std::numeric_limits<T>::min()) {
+    if (*(output + i) == std::numeric_limits<T>::lowest()) {
       *(output + i) = 0;
     }
   }
diff --git a/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu b/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu
index 7ecf352ffe9966920a5fc8544633107013d39181..4dc2794d9c94919281c33b8fb75e1e05e24fc53d 100644
--- a/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu
+++ b/paddle/phi/kernels/gpu/graph_send_recv_kernel.cu
@@ -37,20 +37,27 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
                                            DenseTensor* out,
                                            DenseTensor* dst_count = nullptr) {
   const int& index_size = src_index.dims()[0];
-  ctx.template Alloc<T>(out);
-  T* p_output = out->data<T>();
   const auto& src_dims = x.dims();
   int64_t memset_size = 1;
   if (out_size <= 0) {
+    out->Resize(src_dims);
     for (int i = 0; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   } else {
+    // Set out dim following out_size.
+    std::vector<int64_t> dims_ = phi::vectorize(out->dims());
+    if (dims_.size() > 0) {
+      dims_[0] = out_size;
+    }
+    out->Resize(phi::make_ddim(dims_));
     memset_size = out_size;
     for (int i = 1; i < src_dims.size(); ++i) {
       memset_size *= src_dims[i];
     }
   }
+  ctx.template Alloc<T>(out);
+  T* p_output = out->data<T>();
   const size_t& memset_bytes = memset_size * sizeof(T);
   if (pool_type == "SUM" || pool_type == "MEAN") {
 #ifdef PADDLE_WITH_HIP
@@ -63,7 +70,7 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
     thrust::fill(thrust::device,
                  p_output_ptr,
                  p_output_ptr + memset_size,
-                 std::numeric_limits<T>::min());
+                 std::numeric_limits<T>::lowest());
   } else if (pool_type == "MIN") {
     thrust::device_ptr<T> p_output_ptr(p_output);
     thrust::fill(thrust::device,
@@ -91,7 +98,7 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
   int64_t max_grid_dimx = ctx.GetCUDAMaxGridDimSize()[0];
   int64_t grid_tmp = (n + block - 1) / block;
   int64_t grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx;
-  int64_t input_size = src_dims[0];
+  int64_t input_size = out_size <= 0 ? src_dims[0] : out_size;
   if (pool_type == "SUM") {
     GraphSendRecvSumCUDAFunctor<T, IndexT> functor;
     GraphSendRecvCUDAKernel<T, IndexT, GraphSendRecvSumCUDAFunctor<T, IndexT>>
@@ -103,9 +110,6 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
 
-    if (out_size > 0) {
-      input_size = out_size;
-    }
     int64_t grid_max_tmp = (input_size * slice_size + block - 1) / block;
     int64_t grid_max =
         grid_max_tmp < max_grid_dimx ? grid_max_tmp : max_grid_dimx;
@@ -117,9 +121,6 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
 
-    if (out_size > 0) {
-      input_size = out_size;
-    }
     int64_t grid_min_tmp = (input_size * slice_size + block - 1) / block;
     int64_t grid_min =
         grid_min_tmp < max_grid_dimx ? grid_min_tmp : max_grid_dimx;
@@ -130,12 +131,9 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx,
     GraphSendRecvCUDAKernel<T, IndexT, GraphSendRecvSumCUDAFunctor<T, IndexT>>
         <<<grid, block, 0, ctx.stream()>>>(
             p_src, s_index, d_index, p_output, index_size, slice_size, functor);
-
+    dst_count->Resize({input_size});
     ctx.template Alloc<int32_t>(dst_count);
-    int32_t* p_dst_count = dst_count->data<int32_t>();
-    if (out_size > 0) {
-      input_size = out_size;
-    }
+    int* p_dst_count = dst_count->data<int>();
 
 #ifdef PADDLE_WITH_HIP
     hipMemset(p_dst_count, 0, input_size * sizeof(int));
@@ -161,16 +159,29 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count) {
   auto index_type = src_index.dtype();
+  auto& out_size_data = out_size.GetData();
   if (index_type == phi::DataType::INT32) {
-    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int32_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int32_t>(ctx,
+                                                               x,
+                                                               src_index,
+                                                               dst_index,
+                                                               pool_type,
+                                                               out_size_data[0],
+                                                               out,
+                                                               dst_count);
   } else if (index_type == phi::DataType::INT64) {
-    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int64_t>(
-        ctx, x, src_index, dst_index, pool_type, out_size, out, dst_count);
+    GraphSendRecvOpCUDAKernelLaunchHelper<Context, T, int64_t>(ctx,
+                                                               x,
+                                                               src_index,
+                                                               dst_index,
+                                                               pool_type,
+                                                               out_size_data[0],
+                                                               out,
+                                                               dst_count);
   }
 }
 
diff --git a/paddle/phi/kernels/graph_send_recv_kernel.h b/paddle/phi/kernels/graph_send_recv_kernel.h
index 8f635225b75a45f5c868e6c49ef8767c4ea65b08..cd625c92b93eae7fe4f81a3035cf435b164a07e9 100644
--- a/paddle/phi/kernels/graph_send_recv_kernel.h
+++ b/paddle/phi/kernels/graph_send_recv_kernel.h
@@ -16,6 +16,7 @@
 
 #include <string>
 
+#include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace phi {
@@ -26,7 +27,7 @@ void GraphSendRecvKernel(const Context& ctx,
                          const DenseTensor& src_index,
                          const DenseTensor& dst_index,
                          const std::string& pool_type,
-                         int64_t out_size,
+                         const IntArray& out_size,
                          DenseTensor* out,
                          DenseTensor* dst_count);
 
diff --git a/paddle/phi/ops/compat/graph_send_recv_sig.cc b/paddle/phi/ops/compat/graph_send_recv_sig.cc
index 9df2cf4d0fe9180bffe78bde3c0940cad34c86cd..c8c15619d5d396be7c59d42fb9f7544f16d0c849 100644
--- a/paddle/phi/ops/compat/graph_send_recv_sig.cc
+++ b/paddle/phi/ops/compat/graph_send_recv_sig.cc
@@ -18,10 +18,17 @@ namespace phi {
 
 KernelSignature GraphSendRecvOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
-  return KernelSignature("graph_send_recv",
-                         {"X", "Src_index", "Dst_index"},
-                         {"pool_type", "out_size"},
-                         {"Out", "Dst_count"});
+  if (ctx.HasInput("Out_size")) {
+    return KernelSignature("graph_send_recv",
+                           {"X", "Src_index", "Dst_index"},
+                           {"pool_type", "Out_size"},
+                           {"Out", "Dst_count"});
+  } else {
+    return KernelSignature("graph_send_recv",
+                           {"X", "Src_index", "Dst_index"},
+                           {"pool_type", "out_size"},
+                           {"Out", "Dst_count"});
+  }
 }
 
 KernelSignature GraphSendRecvGradOpArgumentMapping(
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 2e4dc53290226ce574aa89f56a119bd87d98ce4c..1c013e1ca34946c9bfddbd82a3b1f68947a31c1b 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -78,6 +78,7 @@ import paddle.onnx  # noqa: F401
 import paddle.reader  # noqa: F401
 import paddle.static  # noqa: F401
 import paddle.vision  # noqa: F401
+import paddle.geometric  # noqa: F401
 
 from .tensor.attribute import is_complex  # noqa: F401
 from .tensor.attribute import is_integer  # noqa: F401
diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
index c0fdb134f16d60c724bef35728fe3bd66ea68250..73c15255190666ffc54df53123c5b84ea464d3ec 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
@@ -28,8 +28,8 @@ def graph_send_recv_wrapper(x,
                             pool_type="sum",
                             out_size=None,
                             name=None):
-    return paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                           pool_type.lower(), out_size, name)
+    return paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                        pool_type.lower(), out_size, name)
 
 
 class TestGraphSendRecvMaxOp(OpTest):
@@ -268,20 +268,143 @@ class API_GraphSendRecvOpTest(unittest.TestCase):
                 {}\n{}, check diff!".format(np_res, ret_res))
 
     def test_dygraph(self):
-        device = paddle.CPUPlace()
-        with paddle.fluid.dygraph.guard(device):
-            x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]]),
-                                 dtype="float32")
-            src_index = paddle.to_tensor(np.array([0, 1, 2, 0]), dtype="int32")
-            dst_index = paddle.to_tensor(np.array([1, 2, 1, 0]), dtype="int32")
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]]),
+                             dtype="float32")
+        src_index = paddle.to_tensor(np.array([0, 1, 2, 0]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([1, 2, 1, 0]), dtype="int32")
+        res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "sum")
+        res_mean = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                   "mean")
+        res_max = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "max")
+        res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "min")
+
+        np_sum = np.array([[0, 2, 3], [2, 8, 10], [1, 4, 5]], dtype="float32")
+        np_mean = np.array([[0, 2, 3], [1, 4, 5], [1, 4, 5]], dtype="float32")
+        np_max = np.array([[0, 2, 3], [2, 6, 7], [1, 4, 5]], dtype="float32")
+        np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="float32")
+
+        ret = [res_sum, res_mean, res_max, res_min]
+
+        for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
+            self.assertTrue(
+                np.allclose(np_res, ret_res, atol=1e-6), "two value is\
+                {}\n{}, check diff!".format(np_res, ret_res))
+
+    def test_int32_input(self):
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
+                             dtype="int32")
+        src_index = paddle.to_tensor(np.array([0, 1, 2, 0, 1]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([1, 2, 1, 0, 1]), dtype="int32")
+        res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "sum")
+        res_mean = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                   "mean")
+        res_max = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "max")
+        res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index,
+                                                  "min")
+
+        np_sum = np.array([[0, 2, 3], [3, 12, 14], [1, 4, 5]], dtype="int32")
+        np_mean = np.array([[0, 2, 3], [1, 4, 4], [1, 4, 5]], dtype="int32")
+        np_max = np.array([[0, 2, 3], [2, 6, 6], [1, 4, 5]], dtype="int32")
+        np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="int32")
+
+        ret = [res_sum, res_mean, res_max, res_min]
+
+        for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
+            self.assertTrue(
+                np.allclose(np_res, ret_res, atol=1e-6), "two value is\
+                {}\n{}, check diff!".format(np_res, ret_res))
+
+    def test_set_outsize_gpu(self):
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
+                             dtype="float32")
+        src_index = paddle.to_tensor(np.array([0, 0, 1]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([0, 1, 1]), dtype="int32")
+        res = paddle.incubate.graph_send_recv(x, src_index, dst_index, "sum")
+        out_size = paddle.max(dst_index) + 1
+        res_set_outsize = paddle.incubate.graph_send_recv(
+            x, src_index, dst_index, "sum", out_size)
+
+        np_res = np.array([[0, 2, 3], [1, 6, 8], [0, 0, 0]], dtype="float32")
+        np_res_set_outsize = np.array([[0, 2, 3], [1, 6, 8]], dtype="float32")
+
+        self.assertTrue(
+            np.allclose(np_res, res, atol=1e-6), "two value is\
+                {}\n{}, check diff!".format(np_res, res))
+        self.assertTrue(
+            np.allclose(np_res_set_outsize, res_set_outsize, atol=1e-6),
+            "two value is\
+                {}\n{}, check diff!".format(np_res_set_outsize,
+                                            res_set_outsize))
+
+    def test_out_size_tensor_static(self):
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data(name="x", shape=[3, 3], dtype="float32")
+            src_index = paddle.static.data(name="src", shape=[3], dtype="int32")
+            dst_index = paddle.static.data(name="dst", shape=[3], dtype="int32")
+            out_size = paddle.static.data(name="out_size",
+                                          shape=[1],
+                                          dtype="int32")
+
             res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "sum")
-            res_mean = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                       "mean")
-            res_max = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "max")
-            res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "min")
+                                                      "sum", out_size)
+
+            exe = paddle.static.Executor(paddle.CPUPlace())
+            data1 = np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]], dtype='float32')
+            data2 = np.array([0, 0, 1], dtype="int32")
+            data3 = np.array([0, 1, 1], dtype="int32")
+            data4 = np.array([2], dtype="int32")
+
+            np_sum = np.array([[0, 2, 3], [1, 6, 8]], dtype="float32")
+
+            ret = exe.run(feed={
+                'x': data1,
+                'src': data2,
+                'dst': data3,
+                'out_size': data4,
+            },
+                          fetch_list=[res_sum])
+        self.assertTrue(
+            np.allclose(np_sum, ret[0], atol=1e-6), "two value is\
+                        {}\n{}, check diff!".format(np_sum, ret[0]))
+
+    def test_api_eager_dygraph(self):
+        with _test_eager_guard():
+            self.test_dygraph()
+            self.test_int32_input()
+            self.test_set_outsize_gpu()
+
+
+class API_GeometricSendURecvTest(unittest.TestCase):
+
+    def test_static(self):
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data(name="x", shape=[3, 3], dtype="float32")
+            src_index = paddle.static.data(name="src", shape=[4], dtype="int32")
+            dst_index = paddle.static.data(name="dst", shape=[4], dtype="int32")
+
+            res_sum = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                   "sum")
+            res_mean = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                    "mean")
+            res_max = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                   "max")
+            res_min = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                   "min")
+
+            exe = paddle.static.Executor(paddle.CPUPlace())
+            data1 = np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype='float32')
+            data2 = np.array([0, 1, 2, 0], dtype="int32")
+            data3 = np.array([1, 2, 1, 0], dtype="int32")
 
             np_sum = np.array([[0, 2, 3], [2, 8, 10], [1, 4, 5]],
                               dtype="float32")
@@ -292,38 +415,58 @@ class API_GraphSendRecvOpTest(unittest.TestCase):
             np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]],
                               dtype="float32")
 
-            ret = [res_sum, res_mean, res_max, res_min]
+            ret = exe.run(feed={
+                'x': data1,
+                'src': data2,
+                'dst': data3
+            },
+                          fetch_list=[res_sum, res_mean, res_max, res_min])
 
         for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
             self.assertTrue(
                 np.allclose(np_res, ret_res, atol=1e-6), "two value is\
                 {}\n{}, check diff!".format(np_res, ret_res))
 
-    def test_int32_input(self):
-        device = paddle.CPUPlace()
-        with paddle.fluid.dygraph.guard(device):
-            x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
-                                 dtype="int32")
-            src_index = paddle.to_tensor(np.array([0, 1, 2, 0, 1]),
-                                         dtype="int32")
-            dst_index = paddle.to_tensor(np.array([1, 2, 1, 0, 1]),
-                                         dtype="int32")
-            res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "sum")
-            res_mean = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                       "mean")
-            res_max = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "max")
-            res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                      "min")
+    def test_dygraph(self):
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]]),
+                             dtype="float32")
+        src_index = paddle.to_tensor(np.array([0, 1, 2, 0]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([1, 2, 1, 0]), dtype="int32")
+        res_sum = paddle.geometric.send_u_recv(x, src_index, dst_index, "sum")
+        res_mean = paddle.geometric.send_u_recv(x, src_index, dst_index, "mean")
+        res_max = paddle.geometric.send_u_recv(x, src_index, dst_index, "max")
+        res_min = paddle.geometric.send_u_recv(x, src_index, dst_index, "min")
+
+        np_sum = np.array([[0, 2, 3], [2, 8, 10], [1, 4, 5]], dtype="float32")
+        np_mean = np.array([[0, 2, 3], [1, 4, 5], [1, 4, 5]], dtype="float32")
+        np_max = np.array([[0, 2, 3], [2, 6, 7], [1, 4, 5]], dtype="float32")
+        np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="float32")
+
+        ret = [res_sum, res_mean, res_max, res_min]
 
-            np_sum = np.array([[0, 2, 3], [3, 12, 14], [1, 4, 5]],
-                              dtype="int32")
-            np_mean = np.array([[0, 2, 3], [1, 4, 4], [1, 4, 5]], dtype="int32")
-            np_max = np.array([[0, 2, 3], [2, 6, 6], [1, 4, 5]], dtype="int32")
-            np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="int32")
+        for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
+            self.assertTrue(
+                np.allclose(np_res, ret_res, atol=1e-6), "two value is\
+                {}\n{}, check diff!".format(np_res, ret_res))
 
-            ret = [res_sum, res_mean, res_max, res_min]
+    def test_int32_input(self):
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
+                             dtype="int32")
+        src_index = paddle.to_tensor(np.array([0, 1, 2, 0, 1]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([1, 2, 1, 0, 1]), dtype="int32")
+        res_sum = paddle.geometric.send_u_recv(x, src_index, dst_index, "sum")
+        res_mean = paddle.geometric.send_u_recv(x, src_index, dst_index, "mean")
+        res_max = paddle.geometric.send_u_recv(x, src_index, dst_index, "max")
+        res_min = paddle.geometric.send_u_recv(x, src_index, dst_index, "min")
+
+        np_sum = np.array([[0, 2, 3], [3, 12, 14], [1, 4, 5]], dtype="int32")
+        np_mean = np.array([[0, 2, 3], [1, 4, 4], [1, 4, 5]], dtype="int32")
+        np_max = np.array([[0, 2, 3], [2, 6, 6], [1, 4, 5]], dtype="int32")
+        np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="int32")
+
+        ret = [res_sum, res_mean, res_max, res_min]
 
         for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
             self.assertTrue(
@@ -331,31 +474,60 @@ class API_GraphSendRecvOpTest(unittest.TestCase):
                 {}\n{}, check diff!".format(np_res, ret_res))
 
     def test_set_outsize_gpu(self):
-        if paddle.fluid.core.is_compiled_with_cuda():
-            x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
-                                 dtype="float32")
-            src_index = paddle.to_tensor(np.array([0, 0, 1]), dtype="int32")
-            dst_index = paddle.to_tensor(np.array([0, 1, 1]), dtype="int32")
-            res = paddle.incubate.graph_send_recv(x, src_index, dst_index,
-                                                  "sum")
-            out_size = paddle.max(dst_index) + 1
-            res_set_outsize = paddle.incubate.graph_send_recv(
-                x, src_index, dst_index, "sum", out_size)
-
-            np_res = np.array([[0, 2, 3], [1, 6, 8], [0, 0, 0]],
-                              dtype="float32")
-            np_res_set_outsize = np.array([[0, 2, 3], [1, 6, 8]],
-                                          dtype="float32")
-
-            self.assertTrue(
-                np.allclose(np_res, res, atol=1e-6), "two value is\
+        paddle.disable_static()
+        x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]),
+                             dtype="float32")
+        src_index = paddle.to_tensor(np.array([0, 0, 1]), dtype="int32")
+        dst_index = paddle.to_tensor(np.array([0, 1, 1]), dtype="int32")
+        res = paddle.geometric.send_u_recv(x, src_index, dst_index, "sum")
+        out_size = paddle.max(dst_index) + 1
+        res_set_outsize = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                       "sum", out_size)
+
+        np_res = np.array([[0, 2, 3], [1, 6, 8], [0, 0, 0]], dtype="float32")
+        np_res_set_outsize = np.array([[0, 2, 3], [1, 6, 8]], dtype="float32")
+
+        self.assertTrue(
+            np.allclose(np_res, res, atol=1e-6), "two value is\
                 {}\n{}, check diff!".format(np_res, res))
-            self.assertTrue(
-                np.allclose(np_res_set_outsize, res_set_outsize, atol=1e-6),
-                "two value is\
+        self.assertTrue(
+            np.allclose(np_res_set_outsize, res_set_outsize, atol=1e-6),
+            "two value is\
                 {}\n{}, check diff!".format(np_res_set_outsize,
                                             res_set_outsize))
 
+    def test_out_size_tensor_static(self):
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data(name="x", shape=[3, 3], dtype="float32")
+            src_index = paddle.static.data(name="src", shape=[3], dtype="int32")
+            dst_index = paddle.static.data(name="dst", shape=[3], dtype="int32")
+            out_size = paddle.static.data(name="out_size",
+                                          shape=[1],
+                                          dtype="int32")
+
+            res_sum = paddle.geometric.send_u_recv(x, src_index, dst_index,
+                                                   "sum", out_size)
+
+            exe = paddle.static.Executor(paddle.CPUPlace())
+            data1 = np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]], dtype='float32')
+            data2 = np.array([0, 0, 1], dtype="int32")
+            data3 = np.array([0, 1, 1], dtype="int32")
+            data4 = np.array([2], dtype="int32")
+
+            np_sum = np.array([[0, 2, 3], [1, 6, 8]], dtype="float32")
+
+            ret = exe.run(feed={
+                'x': data1,
+                'src': data2,
+                'dst': data3,
+                'out_size': data4,
+            },
+                          fetch_list=[res_sum])
+        self.assertTrue(
+            np.allclose(np_sum, ret[0], atol=1e-6), "two value is\
+                        {}\n{}, check diff!".format(np_sum, ret[0]))
+
     def test_api_eager_dygraph(self):
         with _test_eager_guard():
             self.test_dygraph()
diff --git a/python/paddle/geometric/__init__.py b/python/paddle/geometric/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e59062a7cc6a3aea991f104d857c84b2816776e
--- /dev/null
+++ b/python/paddle/geometric/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .message_passing import send_u_recv  # noqa: F401
+
+__all__ = [
+    'send_u_recv',
+]
diff --git a/python/paddle/geometric/message_passing/__init__.py b/python/paddle/geometric/message_passing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9580e658650aef3aed4e8327d38f95064e03157
--- /dev/null
+++ b/python/paddle/geometric/message_passing/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .send_recv import send_u_recv  # noqa: F401
diff --git a/python/paddle/geometric/message_passing/send_recv.py b/python/paddle/geometric/message_passing/send_recv.py
new file mode 100644
index 0000000000000000000000000000000000000000..87379730a2a60bf0a071c00a22ff5b6879abd1e4
--- /dev/null
+++ b/python/paddle/geometric/message_passing/send_recv.py
@@ -0,0 +1,162 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.framework import Variable
+from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype
+from paddle import _C_ops
+
+from .utils import convert_out_size_to_list, get_out_size_tensor_inputs
+
+
+def send_u_recv(x,
+                src_index,
+                dst_index,
+                pool_type="sum",
+                out_size=None,
+                name=None):
+    """
+
+    Graph Learning message passing api.
+
+    This api is mainly used in Graph Learning domain, and the main purpose is to reduce intermediate memory 
+    consumption in the process of message passing. Take `x` as the input tensor, we first use `src_index`
+    to gather the corresponding data, and then use `dst_index` to update the corresponding position of output tensor 
+    in different pooling types, like sum, mean, max, or min. Besides, we can use `out_size` to set necessary output shape.
+
+    .. code-block:: text
+
+           Given:
+
+           X = [[0, 2, 3],
+                [1, 4, 5],
+                [2, 6, 7]]
+
+           src_index = [0, 1, 2, 0]
+
+           dst_index = [1, 2, 1, 0]
+
+           pool_type = "sum"
+
+           out_size = None
+
+           Then:
+
+           Out = [[0, 2, 3],
+                  [2, 8, 10],
+                  [1, 4, 5]]
+
+    Args:
+        x (Tensor): The input tensor, and the available data type is float32, float64, int32, int64.
+        src_index (Tensor): An 1-D tensor, and the available data type is int32, int64.
+        dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`. 
+                            The available data type is int32, int64. 
+        pool_type (str): Different pooling types, including `sum`, `mean`, `max`, `min`.
+                         Default value is `sum`.
+        out_size (int|Tensor|None): We can set `out_size` to get necessary output shape. If not set or 
+                                    out_size is smaller or equal to 0, then this input will not be used.
+                                    Otherwise, `out_size` should be equal with or larger than 
+                                    max(dst_index) + 1.
+        name (str, optional): Name for the operation (optional, default is None).
+                              For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        out (Tensor): The output tensor, should have the same shape and same dtype as input tensor `x`. 
+                      If `out_size` is set correctly, then it should have the same shape as `x` except 
+                      the 0th dimension.
+
+    Examples:
+
+        .. code-block:: python
+
+            import paddle
+
+            x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32")
+            indexes = paddle.to_tensor([[0, 1], [1, 2], [2, 1], [0, 0]], dtype="int32")
+            src_index = indexes[:, 0]
+            dst_index = indexes[:, 1]
+            out = paddle.geometric.send_u_recv(x, src_index, dst_index, pool_type="sum")
+            # Outputs: [[0., 2., 3.], [2., 8., 10.], [1., 4., 5.]]
+
+            x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32")
+            indexes = paddle.to_tensor([[0, 1], [2, 1], [0, 0]], dtype="int32")
+            src_index = indexes[:, 0]
+            dst_index = indexes[:, 1]
+            out_size = paddle.max(dst_index) + 1
+            out = paddle.geometric.send_u_recv(x, src_index, dst_index, pool_type="sum", out_size=out_size)
+            # Outputs: [[0., 2., 3.], [[2., 8., 10.]]]
+
+            x = paddle.to_tensor([[0, 2, 3], [1, 4, 5], [2, 6, 7]], dtype="float32")
+            indexes = paddle.to_tensor([[0, 1], [2, 1], [0, 0]], dtype="int32")
+            src_index = indexes[:, 0]
+            dst_index = indexes[:, 1]
+            out = paddle.geometric.send_u_recv(x, src_index, dst_index, pool_type="sum")
+            # Outputs: [[0., 2., 3.], [2., 8., 10.], [0., 0., 0.]]
+
+    """
+
+    if pool_type not in ["sum", "mean", "max", "min"]:
+        raise ValueError(
+            "pool_type should be `sum`, `mean`, `max` or `min`, but received %s"
+            % pool_type)
+
+    # TODO(daisiming): Should we add judgement for out_size: max(dst_index) + 1.
+
+    if _in_legacy_dygraph():
+        out_size = convert_out_size_to_list(out_size)
+        out, tmp = _C_ops.graph_send_recv(x, src_index,
+                                          dst_index, None, 'pool_type',
+                                          pool_type.upper(), 'out_size',
+                                          out_size)
+        return out
+    if in_dygraph_mode():
+        out_size = convert_out_size_to_list(out_size)
+        return _C_ops.final_state_graph_send_recv(x, src_index, dst_index,
+                                                  pool_type.upper(), out_size)
+
+    check_variable_and_dtype(x, "X", ("float32", "float64", "int32", "int64"),
+                             "graph_send_recv")
+    check_variable_and_dtype(src_index, "Src_index", ("int32", "int64"),
+                             "graph_send_recv")
+    check_variable_and_dtype(dst_index, "Dst_index", ("int32", "int64"),
+                             "graph_send_recv")
+    if out_size:
+        check_type(out_size, 'out_size', (int, np.int32, np.int64, Variable),
+                   'graph_send_recv')
+    if isinstance(out_size, Variable):
+        check_dtype(out_size.dtype, 'out_size', ['int32', 'int64'],
+                    'graph_send_recv')
+
+    helper = LayerHelper("send_u_recv", **locals())
+    out = helper.create_variable_for_type_inference(dtype=x.dtype)
+    dst_count = helper.create_variable_for_type_inference(dtype="int32",
+                                                          stop_gradient=True)
+
+    inputs = {"X": x, "Src_index": src_index, "Dst_index": dst_index}
+    attrs = {"pool_type": pool_type.upper()}
+    get_out_size_tensor_inputs(inputs=inputs,
+                               attrs=attrs,
+                               out_size=out_size,
+                               op_type='graph_send_recv')
+
+    helper.append_op(type="graph_send_recv",
+                     inputs=inputs,
+                     outputs={
+                         "Out": out,
+                         "Dst_count": dst_count
+                     },
+                     attrs=attrs)
+    return out
diff --git a/python/paddle/geometric/message_passing/utils.py b/python/paddle/geometric/message_passing/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3614f829daf52ea56aed4c5ff98d12e56c64ca16
--- /dev/null
+++ b/python/paddle/geometric/message_passing/utils.py
@@ -0,0 +1,52 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddle.fluid.framework import Variable
+from paddle.fluid.data_feeder import check_dtype, convert_dtype
+from paddle.fluid.layers.tensor import cast
+
+
+def convert_out_size_to_list(out_size):
+    """
+    Convert out_size(int, np.int32, np.int64, Variable) to list
+    in imperative mode.
+    """
+    if out_size is None:
+        out_size = [0]
+    elif isinstance(out_size, (int, np.int32, np.int64)):
+        out_size = [out_size]
+    else:
+        out_size = [out_size.numpy().astype(int)[0]]
+    return out_size
+
+
+def get_out_size_tensor_inputs(inputs, attrs, out_size, op_type):
+    """
+    Convert out_size(int, np.int32, np.int64, Variable) to inputs
+    and attrs in static mode.
+    """
+    if out_size is None:
+        attrs['out_size'] = [0]
+    elif isinstance(out_size, (int, np.int32, np.int64)):
+        attrs['out_size'] = [out_size]
+    elif isinstance(out_size, Variable):
+        out_size.stop_gradient = True
+        check_dtype(out_size.dtype, 'out_size', ['int32', 'int64'], 'op_type',
+                    '(When type of out_size in' + op_type + ' is Variable.)')
+        if (convert_dtype(out_size.dtype) == 'int64'):
+            out_size = cast(out_size, 'int32')
+        inputs["Out_size"] = out_size
+    else:
+        raise TypeError("Out_size only supports Variable or int.")
diff --git a/python/paddle/incubate/operators/graph_send_recv.py b/python/paddle/incubate/operators/graph_send_recv.py
index e9937558e9b3a95d52c8f235f1164ed7d5afa755..132a6d4657ca1d80622795d2ec9c73b66aa4ed68 100644
--- a/python/paddle/incubate/operators/graph_send_recv.py
+++ b/python/paddle/incubate/operators/graph_send_recv.py
@@ -12,13 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numpy as np
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid import core
+from paddle.fluid.framework import Variable
+from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype
+from paddle.fluid.layers.tensor import cast
 from paddle import _C_ops
+import paddle.utils.deprecated as deprecated
 
 
+@deprecated(
+    since="2.4.0",
+    update_to="paddle.geometric.send_u_recv",
+    level=1,
+    reason="graph_send_recv in paddle.incubate will be removed in future")
 def graph_send_recv(x,
                     src_index,
                     dst_index,
@@ -63,14 +71,17 @@ def graph_send_recv(x,
                             The available data type is int32, int64. 
         pool_type (str): The pooling type of graph_send_recv, including `sum`, `mean`, `max`, `min`.
                          Default value is `sum`.
-        out_size (int64|None): We can set `out_size` to get necessary output shape. If not set, then this 
-                              attribute will not be used. If set, it should be equal with or larger than
-                              max(dst_index) + 1.
+        out_size (int|Tensor|None): We can set `out_size` to get necessary output shape. If not set or 
+                                    out_size is smaller or equal to 0, then this input will not be used.
+                                    Otherwise, `out_size` should be equal with or larger than 
+                                    max(dst_index) + 1.
         name (str, optional): Name for the operation (optional, default is None).
                               For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        out (Tensor): The output tensor, should have the same shape and same dtype as input tensor `x`.
+        out (Tensor): The output tensor, should have the same shape and same dtype as input tensor `x`. 
+                      If `out_size` is set correctly, then it should have the same shape as `x` except 
+                      the 0th dimension.
 
     Examples:
 
@@ -109,31 +120,17 @@ def graph_send_recv(x,
 
     # TODO(daisiming): Should we add judgement for out_size: max(dst_index) + 1.
 
-    if out_size is None or out_size <= 0:
-        if _in_legacy_dygraph():
-            out, tmp = _C_ops.graph_send_recv(x, src_index, dst_index,
-                                              'pool_type', pool_type.upper())
-            return out
-        if in_dygraph_mode():
-            return _C_ops.final_state_graph_send_recv(x, src_index, dst_index,
-                                                      pool_type.upper(), 0)
-    else:
-        if _in_legacy_dygraph():
-            out, tmp = _C_ops.graph_send_recv(x, src_index,
-                                              dst_index, 'pool_type',
-                                              pool_type.upper(), 'out_size',
-                                              out_size)
-            return out
-        if in_dygraph_mode():
-            if isinstance(out_size, core.eager.Tensor):
-                if (out_size.size < 1):
-                    raise ValueError(
-                        "out_size should be long type, but received Tensor type."
-                    )
-                out_size = out_size.numpy()[0]
-            return _C_ops.final_state_graph_send_recv(x, src_index, dst_index,
-                                                      pool_type.upper(),
-                                                      out_size)
+    if _in_legacy_dygraph():
+        out_size = convert_out_size_to_list(out_size)
+        out, tmp = _C_ops.graph_send_recv(x, src_index,
+                                          dst_index, None, 'pool_type',
+                                          pool_type.upper(), 'out_size',
+                                          out_size)
+        return out
+    if in_dygraph_mode():
+        out_size = convert_out_size_to_list(out_size)
+        return _C_ops.final_state_graph_send_recv(x, src_index, dst_index,
+                                                  pool_type.upper(), out_size)
 
     check_variable_and_dtype(x, "X", ("float32", "float64", "int32", "int64"),
                              "graph_send_recv")
@@ -141,25 +138,64 @@ def graph_send_recv(x,
                              "graph_send_recv")
     check_variable_and_dtype(dst_index, "Dst_index", ("int32", "int64"),
                              "graph_send_recv")
+    if out_size:
+        check_type(out_size, 'out_size', (int, np.int32, np.int64, Variable),
+                   'graph_send_recv')
+    if isinstance(out_size, Variable):
+        check_dtype(out_size.dtype, 'out_size', ['int32', 'int64'],
+                    'graph_send_recv')
 
     helper = LayerHelper("graph_send_recv", **locals())
     out = helper.create_variable_for_type_inference(dtype=x.dtype)
     dst_count = helper.create_variable_for_type_inference(dtype="int32",
                                                           stop_gradient=True)
+
+    inputs = {"X": x, "Src_index": src_index, "Dst_index": dst_index}
+    attrs = {"pool_type": pool_type.upper()}
+    get_out_size_tensor_inputs(inputs=inputs,
+                               attrs=attrs,
+                               out_size=out_size,
+                               op_type='graph_send_recv')
+
     helper.append_op(type="graph_send_recv",
-                     inputs={
-                         "X": x,
-                         "Src_index": src_index,
-                         "Dst_index": dst_index
-                     },
+                     inputs=inputs,
                      outputs={
                          "Out": out,
                          "Dst_count": dst_count
                      },
-                     attrs={
-                         "pool_type":
-                         pool_type.upper(),
-                         "out_size":
-                         0 if out_size is None or out_size <= 0 else out_size
-                     })
+                     attrs=attrs)
     return out
+
+
+def convert_out_size_to_list(out_size):
+    """
+    Convert out_size(int, np.int32, np.int64, Variable) to list
+    in imperative mode.
+    """
+    if out_size is None:
+        out_size = [0]
+    elif isinstance(out_size, (int, np.int32, np.int64)):
+        out_size = [out_size]
+    else:
+        out_size = [out_size.numpy().astype(int)[0]]
+    return out_size
+
+
+def get_out_size_tensor_inputs(inputs, attrs, out_size, op_type):
+    """
+    Convert out_size(int, np.int32, np.int64, Variable) to inputs
+    and attrs in static mode.
+    """
+    if out_size is None:
+        attrs['out_size'] = [0]
+    elif isinstance(out_size, (int, np.int32, np.int64)):
+        attrs['out_size'] = [out_size]
+    elif isinstance(out_size, Variable):
+        out_size.stop_gradient = True
+        check_dtype(out_size.dtype, 'out_size', ['int32', 'int64'], op_type,
+                    '(When type of out_size in' + op_type + ' is Variable.)')
+        if (convert_dtype(out_size.dtype) == 'int64'):
+            out_size = cast(out_size, 'int32')
+        inputs["Out_size"] = out_size
+    else:
+        raise TypeError("Out_size only supports Variable or int.")
diff --git a/python/setup.py.in b/python/setup.py.in
index 46056c9d064665617924a3eb42df236ec66e8d91..55129c47c220b8ed39b91db8f06ef30d7e5b2ad2 100755
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -400,6 +400,8 @@ packages=['paddle',
           'paddle.device.cuda',
           'paddle.version',
           'paddle.profiler',
+          'paddle.geometric',
+          'paddle.geometric.message_passing',
           ]
 
 with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: