change infermeta and remove makePtenTenosr in reshape (#39186)

7613129e · YuanRisheng · GitHub · 09104d02 · 7613129e · 7613129e
13 changed file
--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -557,7 +557,7 @@ class ReduceOp : public framework::OperatorWithKernel {
      if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
        if (!reduce_all) {
          return framework::KernelSignature(
-              "sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"});
+              "sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"});
        }
        return framework::KernelSignature(
            "sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"},

--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -38,33 +38,6 @@ namespace operators {

 using Tensor = framework::Tensor;

-inline std::vector<int> get_new_shape(
-    const std::vector<const Tensor *> &list_new_shape_tensor) {
-  // get tensor from
-  std::vector<int> vec_new_shape;
-  for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
-    auto tensor = list_new_shape_tensor[i];
-    PADDLE_ENFORCE_EQ(
-        tensor->dims(), framework::make_ddim({1}),
-        platform::errors::InvalidArgument(
-            "If the element type of 'shape' in ReshapeOp is Tensor, "
-            "the element's shape must be [1]. But received the element's shape "
-            "is [%s]",
-            tensor->dims()));
-    if (platform::is_gpu_place(tensor->place()) ||
-        platform::is_xpu_place(tensor->place())) {
-      framework::Tensor temp;
-      paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
-
-      vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
-    } else {
-      vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
-    }
-  }
-
-  return vec_new_shape;
-}
-
 class ReshapeOp : public framework::OperatorWithKernel {
 public:
  ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
@@ -370,30 +343,6 @@ class ReshapeKernel {
  void operator()(const framework::ExecutionContext &ctx) const {
    auto *out = ctx.Output<framework::LoDTensor>("Out");
    auto *in = ctx.Input<framework::LoDTensor>("X");
-    // framework::DDim out_dims = out->dims();
-    auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
-
-    // we can't MakePtenDenseTensor by out, because the out of reshape may have
-    // multiple states, some can MakePtenDenseTensor but other's cannot:
-    // 1. out tensor is not initialized
-    // 2. out tensor is input (complete inplace)
-    // 3. out tensor is view of input
-    // We can't MakePtenDenseTensor for case 2, so we solve this case by
-    // creating a temporary tensor here:
-    pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
-                               in->dims(), in->layout()};
-    auto pt_out_tmp = std::make_shared<pten::DenseTensor>(
-        pten::make_intrusive<paddle::experimental::SharedStorage>(
-            ctx.GetPlace()),
-        std::move(meta));
-    pten::DenseTensor *pt_out = nullptr;
-    if (in != nullptr && out != nullptr && in->Holder() != nullptr &&
-        out->Holder() != nullptr &&
-        in->Holder()->ptr() == out->Holder()->ptr()) {
-      pt_out = pt_x.get();
-    } else {
-      pt_out = pt_out_tmp.get();
-    }

    auto list_new_shape_tensor =
        ctx.MultiInput<framework::Tensor>("ShapeTensor");
@@ -410,55 +359,46 @@ class ReshapeKernel {
          framework::Tensor temp;
          paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
                                            &temp);
-          pt_vec_shape.push_back(
-              std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
+          pt_vec_shape.push_back(std::move(temp));
        } else {
-          pt_vec_shape.push_back(
-              std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
+          pt_vec_shape.push_back(*tensor);
        }
      }
      pt_scalar_shape = pten::ScalarArray(pt_vec_shape);
    } else if (shape_tensor) {
-      std::unique_ptr<pten::DenseTensor> pt_shape;
+      pten::DenseTensor pt_shape;
      if (platform::is_gpu_place(shape_tensor->place()) ||
          platform::is_xpu_place(shape_tensor->place())) {
        framework::Tensor temp;
        paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(),
                                          &temp);
-        pt_shape = paddle::experimental::MakePtenDenseTensor(temp);
+        pt_shape = std::move(temp);
      } else {
-        pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor);
+        pt_shape = *shape_tensor;
      }
-      pt_scalar_shape = pten::ScalarArray(*pt_shape.get());
+      pt_scalar_shape = pten::ScalarArray(pt_shape);
    } else {
      auto &shape_attr = ctx.Attr<std::vector<int>>("shape");
      pt_scalar_shape = pten::ScalarArray(shape_attr);
    }
    if (platform::is_cpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
-      pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx),
-                          *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), *in,
+                          pt_scalar_shape, out);
    }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    if (platform::is_gpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
    }
 #endif
 #ifdef PADDLE_WITH_XPU
    if (platform::is_xpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
-      pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx),
-                          *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
+                          pt_scalar_shape, out);
    }
 #endif
-    // non-inplace need move all result from pt_out to out, inplace need set
-    // result dims.
-    if (in != out) {
-      paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
-    } else {
-      out->Resize(pt_out->dims());
-    }
  }
 };

@@ -469,25 +409,22 @@ class ReshapeGradKernel {
    auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
    d_x->mutable_data(ctx.GetPlace(), d_out->type());

-    auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
-    auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out);
-
    if (platform::is_cpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
      pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx),
-                              *pt_d_out.get(), pt_d_x.get());
+                              *d_out, d_x);
    }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    if (platform::is_gpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get());
+      pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
    }
 #endif
 #ifdef PADDLE_WITH_XPU
    if (platform::is_xpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
      pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
-                              *pt_d_out.get(), pt_d_x.get());
+                              *d_out, d_x);
    }
 #endif
  }
@@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel {
    auto *dd_out = ctx.Output<framework::Tensor>("DDOut");
    dd_out->mutable_data(ctx.GetPlace(), dd_x->type());

-    auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x);
-    auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out);
-
    if (platform::is_cpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
      pten::ReshapeDoubleGradKernel(
-          static_cast<const pten::CPUContext &>(dev_ctx), *pt_dd_x.get(),
-          pt_dd_out.get());
+          static_cast<const pten::CPUContext &>(dev_ctx), *dd_x, dd_out);
    }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    if (platform::is_gpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get());
+      pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
    }
 #endif
 #ifdef PADDLE_WITH_XPU
    if (platform::is_xpu_place(ctx.GetPlace())) {
      auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
      pten::ReshapeDoubleGradKernel(
-          static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(),
-          pt_dd_out.get());
+          static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
    }
 #endif
  }

--- a/paddle/pten/api/include/kernel_signature.h
+++ b/paddle/pten/api/include/kernel_signature.h
@@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&,
 using sum_kernel = void (*)(const DeviceContext&,
                            const DenseTensor&,
                            const std::vector<int64_t>&,
-                            bool,
                            DataType,
+                            bool,
                            DenseTensor*);

 using subtract_kernel = void (*)(const DeviceContext&,

--- a/paddle/pten/core/dense_tensor.cc
+++ b/paddle/pten/core/dense_tensor.cc
@@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
  meta_ = std::move(meta);
 }

+void DenseTensor::set_meta(const DenseTensorMeta& meta) {
+  PADDLE_ENFORCE(
+      meta.valid(),
+      paddle::platform::errors::InvalidArgument(
+          "Input meta is invalid, please check the meta attribute."));
+  meta_.dims = meta.dims;
+  meta_.dtype = meta.dtype;
+  meta_.is_scalar = meta.is_scalar;
+  meta_.layout = meta.layout;
+  meta_.lod = meta.lod;
+  meta_.offset = meta.offset;
+}
+
 /* @jim19930609: This interface will be further modified util we finalized the
   design for Allocator - Allocation
   For now, we have to temporarily accommodate two independent use cases:

--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -131,6 +131,8 @@ class DenseTensor : public TensorBase,
  /// \param meta The meta information of the tensor.
  void set_meta(DenseTensorMeta&& meta);

+  void set_meta(const DenseTensorMeta& meta);
+
  /// \brief Test whether the metadata is valid.
  /// \return Whether the metadata is valid.
  bool valid() const noexcept override { return meta_.valid(); }

--- a/paddle/pten/infermeta/binary.cc
+++ b/paddle/pten/infermeta/binary.cc
@@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
 }

 DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
-                                     const DenseTensorMeta& y_meta,
-                                     int axis) {
+                                     const DenseTensorMeta& y_meta) {
+  return ElementwiseRawInferMeta(x_meta, y_meta, -1);
+}
+
+DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
+                                        const DenseTensorMeta& y_meta,
+                                        int axis) {
  DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
  if (x_meta.dims != y_meta.dims) {
    auto x_dims = x_meta.dims;

--- a/paddle/pten/infermeta/binary.h
+++ b/paddle/pten/infermeta/binary.h
@@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
                                bool trans_y);

 DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
-                                     const DenseTensorMeta& y_meta,
-                                     int axis);
+                                     const DenseTensorMeta& y_meta);
+
+DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
+                                        const DenseTensorMeta& y_meta,
+                                        int axis);
+
 }  // namespace pten
--- a/paddle/pten/infermeta/unary.cc
+++ b/paddle/pten/infermeta/unary.cc
@@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta,
  return InferMetaFromVecValue(x_meta, shape.GetData());
 }

+/*  Why not use ReduceInferMeta directly?
+    Because we need make InferMetaFunction's args follow the design of api.yaml
+*/
+DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
+                             const std::vector<int64_t>& axis,
+                             DataType dtype,
+                             bool keep_dim) {
+  return ReduceInferMeta(x_meta, axis, keep_dim, dtype);
+}
+
 DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
                                const std::vector<int64_t>& axis,
                                bool keep_dim,

--- a/paddle/pten/infermeta/unary.h
+++ b/paddle/pten/infermeta/unary.h
@@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
                                const std::vector<int64_t>& axis,
                                bool keep_dim,
                                DataType dtype = DataType::UNDEFINED);
+
+DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
+                             const std::vector<int64_t>& axis,
+                             DataType dtype,
+                             bool keep_dim);
 }  // namespace pten
--- a/paddle/pten/kernels/math_kernel.cc
+++ b/paddle/pten/kernels/math_kernel.cc
@@ -33,8 +33,8 @@ template <typename T, typename Context>
 void SumKernel(const Context& dev_ctx,
               const DenseTensor& x,
               const std::vector<int64_t>& dims,
-               bool keep_dim,
               DataType out_dtype,
+               bool keep_dim,
               DenseTensor* out) {
  bool reduce_all = false;
  SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);

--- a/paddle/pten/kernels/math_kernel.h
+++ b/paddle/pten/kernels/math_kernel.h
@@ -50,8 +50,8 @@ template <typename T, typename Context>
 void SumKernel(const Context& dev_ctx,
               const DenseTensor& x,
               const std::vector<int64_t>& dims,
-               bool keep_dim,
               DataType out_dtype,
+               bool keep_dim,
               DenseTensor* out);

 template <typename T, typename Context>
@@ -110,7 +110,7 @@ template <typename T, typename Context>
 DenseTensor Add(const Context& dev_ctx,
                const DenseTensor& x,
                const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
  AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
  return dense_out;
@@ -120,7 +120,7 @@ template <typename T, typename Context>
 DenseTensor Subtract(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
  SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
  return dense_out;
@@ -130,7 +130,7 @@ template <typename T, typename Context>
 DenseTensor Divide(const Context& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
  DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
  return dense_out;
@@ -140,7 +140,7 @@ template <typename T, typename Context>
 DenseTensor Multiply(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
  MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
  return dense_out;
@@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx,
                const std::vector<int64_t>& axis,
                DataType dtype,
                bool keep_dim) {
-  auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype);
+  auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim);
  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));

-  SumKernel<T, Context>(dev_ctx, x, axis, keep_dim, dtype, &dense_out);
+  SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
  return dense_out;
 }


--- a/paddle/pten/kernels/reshape_kernel.cc
+++ b/paddle/pten/kernels/reshape_kernel.cc
@@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx,
    out->ResizeAndAllocate(out_meta.dims);
    return;
  }
-
-  out->Resize(x.dims());
-  out->mutable_data(x.place());
+  out->set_meta(out_meta);
+  out->mutable_data(dev_ctx.GetPlace());
  pten::Copy(dev_ctx, x, false, out);
  out->Resize(out_meta.dims);
  out->ResetLoD(x.lod());

--- a/python/paddle/utils/code_gen/api.yaml
+++ b/python/paddle/utils/code_gen/api.yaml
@@ -3,7 +3,7 @@
  output : Tensor
  infer_meta :
    func : ElementwiseInferMeta
-    param : [x, y, -1]
+    param : [x, y]
  kernel :
    func : add

@@ -40,7 +40,7 @@
  output : Tensor
  infer_meta :
    func : ElementwiseInferMeta
-    param : [x, y, -1]
+    param : [x, y]
  kernel :
    func : divide

@@ -135,7 +135,7 @@
  output : Tensor
  infer_meta :
    func : ElementwiseInferMeta
-    param : [x, y, -1]
+    param : [x, y]
  kernel :
    func : multiply

@@ -166,19 +166,19 @@
  output : Tensor
  infer_meta :
    func : ElementwiseInferMeta
-    param : [x, y, -1]
+    param : [x, y]
  kernel :
    func : subtract

 - api : sum
  args : (const Tensor& x, const std::vector<int64_t>& axis={}, DataType dtype=DataType::UNDEFINED, bool keep_dim=false)
  output : Tensor
-  infer_meta :
-    func : ReduceInferMeta
-    param: [x, axis, keep_dim, dtype]
-  kernel :
+  infer_meta : 
+    func : SumInferMeta
+    param: [x, axis, dtype, keep_dim]
+  kernel : 
    func : sum
-    param : [x, axis, keep_dim, dtype]
+    param : [x, axis, dtype, keep_dim]
    data_type : x

 - api : zeros_like