From 7613129e8d05439741e7c843dc62232827b9ecc5 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Tue, 25 Jan 2022 20:02:33 +0800 Subject: [PATCH] change infermeta and remove makePtenTenosr in reshape (#39186) --- paddle/fluid/operators/reduce_ops/reduce_op.h | 2 +- paddle/fluid/operators/reshape_op.cc | 102 +++--------------- paddle/pten/api/include/kernel_signature.h | 2 +- paddle/pten/core/dense_tensor.cc | 13 +++ paddle/pten/core/dense_tensor.h | 2 + paddle/pten/infermeta/binary.cc | 9 +- paddle/pten/infermeta/binary.h | 8 +- paddle/pten/infermeta/unary.cc | 10 ++ paddle/pten/infermeta/unary.h | 5 + paddle/pten/kernels/math_kernel.cc | 2 +- paddle/pten/kernels/math_kernel.h | 14 +-- paddle/pten/kernels/reshape_kernel.cc | 5 +- python/paddle/utils/code_gen/api.yaml | 18 ++-- 13 files changed, 81 insertions(+), 111 deletions(-) diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 661fb772f1c..87f51e4b800 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -557,7 +557,7 @@ class ReduceOp : public framework::OperatorWithKernel { if (ctx.InputVar("X")->IsType()) { if (!reduce_all) { return framework::KernelSignature( - "sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"}); + "sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"}); } return framework::KernelSignature( "sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"}, diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 5170729a769..6c2d5ebcc7d 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -38,33 +38,6 @@ namespace operators { using Tensor = framework::Tensor; -inline std::vector get_new_shape( - const std::vector &list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - PADDLE_ENFORCE_EQ( - tensor->dims(), framework::make_ddim({1}), - platform::errors::InvalidArgument( - "If the element type of 'shape' in ReshapeOp is Tensor, " - "the element's shape must be [1]. But received the element's shape " - "is [%s]", - tensor->dims())); - if (platform::is_gpu_place(tensor->place()) || - platform::is_xpu_place(tensor->place())) { - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - - vec_new_shape.push_back(static_cast(*temp.data())); - } else { - vec_new_shape.push_back(static_cast(*tensor->data())); - } - } - - return vec_new_shape; -} - class ReshapeOp : public framework::OperatorWithKernel { public: ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs, @@ -370,30 +343,6 @@ class ReshapeKernel { void operator()(const framework::ExecutionContext &ctx) const { auto *out = ctx.Output("Out"); auto *in = ctx.Input("X"); - // framework::DDim out_dims = out->dims(); - auto pt_x = paddle::experimental::MakePtenDenseTensor(*in); - - // we can't MakePtenDenseTensor by out, because the out of reshape may have - // multiple states, some can MakePtenDenseTensor but other's cannot: - // 1. out tensor is not initialized - // 2. out tensor is input (complete inplace) - // 3. out tensor is view of input - // We can't MakePtenDenseTensor for case 2, so we solve this case by - // creating a temporary tensor here: - pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()), - in->dims(), in->layout()}; - auto pt_out_tmp = std::make_shared( - pten::make_intrusive( - ctx.GetPlace()), - std::move(meta)); - pten::DenseTensor *pt_out = nullptr; - if (in != nullptr && out != nullptr && in->Holder() != nullptr && - out->Holder() != nullptr && - in->Holder()->ptr() == out->Holder()->ptr()) { - pt_out = pt_x.get(); - } else { - pt_out = pt_out_tmp.get(); - } auto list_new_shape_tensor = ctx.MultiInput("ShapeTensor"); @@ -410,55 +359,46 @@ class ReshapeKernel { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - pt_vec_shape.push_back( - std::move(*(paddle::experimental::MakePtenDenseTensor(temp)))); + pt_vec_shape.push_back(std::move(temp)); } else { - pt_vec_shape.push_back( - std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor)))); + pt_vec_shape.push_back(*tensor); } } pt_scalar_shape = pten::ScalarArray(pt_vec_shape); } else if (shape_tensor) { - std::unique_ptr pt_shape; + pten::DenseTensor pt_shape; if (platform::is_gpu_place(shape_tensor->place()) || platform::is_xpu_place(shape_tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), &temp); - pt_shape = paddle::experimental::MakePtenDenseTensor(temp); + pt_shape = std::move(temp); } else { - pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor); + pt_shape = *shape_tensor; } - pt_scalar_shape = pten::ScalarArray(*pt_shape.get()); + pt_scalar_shape = pten::ScalarArray(pt_shape); } else { auto &shape_attr = ctx.Attr>("shape"); pt_scalar_shape = pten::ScalarArray(shape_attr); } if (platform::is_cpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); - pten::ReshapeKernel(static_cast(dev_ctx), - *pt_x.get(), pt_scalar_shape, pt_out); + pten::ReshapeKernel(static_cast(dev_ctx), *in, + pt_scalar_shape, out); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_gpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); - pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out); + pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out); } #endif #ifdef PADDLE_WITH_XPU if (platform::is_xpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); - pten::ReshapeKernel(static_cast(dev_ctx), - *pt_x.get(), pt_scalar_shape, pt_out); + pten::ReshapeKernel(static_cast(dev_ctx), *in, + pt_scalar_shape, out); } #endif - // non-inplace need move all result from pt_out to out, inplace need set - // result dims. - if (in != out) { - paddle::experimental::SharesStorage(pt_out, static_cast(out)); - } else { - out->Resize(pt_out->dims()); - } } }; @@ -469,25 +409,22 @@ class ReshapeGradKernel { auto *d_x = ctx.Output(framework::GradVarName("X")); d_x->mutable_data(ctx.GetPlace(), d_out->type()); - auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x); - auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out); - if (platform::is_cpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); pten::ReshapeGradKernel(static_cast(dev_ctx), - *pt_d_out.get(), pt_d_x.get()); + *d_out, d_x); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_gpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); - pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get()); + pten::ReshapeGradKernel(dev_ctx, *d_out, d_x); } #endif #ifdef PADDLE_WITH_XPU if (platform::is_xpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); pten::ReshapeGradKernel(static_cast(dev_ctx), - *pt_d_out.get(), pt_d_x.get()); + *d_out, d_x); } #endif } @@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel { auto *dd_out = ctx.Output("DDOut"); dd_out->mutable_data(ctx.GetPlace(), dd_x->type()); - auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x); - auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out); - if (platform::is_cpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); pten::ReshapeDoubleGradKernel( - static_cast(dev_ctx), *pt_dd_x.get(), - pt_dd_out.get()); + static_cast(dev_ctx), *dd_x, dd_out); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_gpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); - pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get()); + pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out); } #endif #ifdef PADDLE_WITH_XPU if (platform::is_xpu_place(ctx.GetPlace())) { auto &dev_ctx = ctx.device_context(); pten::ReshapeDoubleGradKernel( - static_cast(dev_ctx), *pt_dd_x.get(), - pt_dd_out.get()); + static_cast(dev_ctx), *dd_x, dd_out); } #endif } diff --git a/paddle/pten/api/include/kernel_signature.h b/paddle/pten/api/include/kernel_signature.h index b8e7b0d75bc..863adbea36a 100644 --- a/paddle/pten/api/include/kernel_signature.h +++ b/paddle/pten/api/include/kernel_signature.h @@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&, using sum_kernel = void (*)(const DeviceContext&, const DenseTensor&, const std::vector&, - bool, DataType, + bool, DenseTensor*); using subtract_kernel = void (*)(const DeviceContext&, diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 7fb532e00fe..45eacf342fa 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { meta_ = std::move(meta); } +void DenseTensor::set_meta(const DenseTensorMeta& meta) { + PADDLE_ENFORCE( + meta.valid(), + paddle::platform::errors::InvalidArgument( + "Input meta is invalid, please check the meta attribute.")); + meta_.dims = meta.dims; + meta_.dtype = meta.dtype; + meta_.is_scalar = meta.is_scalar; + meta_.layout = meta.layout; + meta_.lod = meta.lod; + meta_.offset = meta.offset; +} + /* @jim19930609: This interface will be further modified util we finalized the design for Allocator - Allocation For now, we have to temporarily accommodate two independent use cases: diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 88c459e6d87..47c88aefa53 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -131,6 +131,8 @@ class DenseTensor : public TensorBase, /// \param meta The meta information of the tensor. void set_meta(DenseTensorMeta&& meta); + void set_meta(const DenseTensorMeta& meta); + /// \brief Test whether the metadata is valid. /// \return Whether the metadata is valid. bool valid() const noexcept override { return meta_.valid(); } diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc index 083fb0fca21..cb605db78d9 100644 --- a/paddle/pten/infermeta/binary.cc +++ b/paddle/pten/infermeta/binary.cc @@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, } DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - int axis) { + const DenseTensorMeta& y_meta) { + return ElementwiseRawInferMeta(x_meta, y_meta, -1); +} + +DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta, + const DenseTensorMeta& y_meta, + int axis) { DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout); if (x_meta.dims != y_meta.dims) { auto x_dims = x_meta.dims; diff --git a/paddle/pten/infermeta/binary.h b/paddle/pten/infermeta/binary.h index c86fc12a20a..658211e48ac 100644 --- a/paddle/pten/infermeta/binary.h +++ b/paddle/pten/infermeta/binary.h @@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, bool trans_y); DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - int axis); + const DenseTensorMeta& y_meta); + +DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta, + const DenseTensorMeta& y_meta, + int axis); + } // namespace pten diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index 27e1dc9511d..95b419856b7 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta, return InferMetaFromVecValue(x_meta, shape.GetData()); } +/* Why not use ReduceInferMeta directly? + Because we need make InferMetaFunction's args follow the design of api.yaml +*/ +DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta, + const std::vector& axis, + DataType dtype, + bool keep_dim) { + return ReduceInferMeta(x_meta, axis, keep_dim, dtype); +} + DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, const std::vector& axis, bool keep_dim, diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index ae42cbd5dd2..388a9fca348 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, const std::vector& axis, bool keep_dim, DataType dtype = DataType::UNDEFINED); + +DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta, + const std::vector& axis, + DataType dtype, + bool keep_dim); } // namespace pten diff --git a/paddle/pten/kernels/math_kernel.cc b/paddle/pten/kernels/math_kernel.cc index 423282ab97c..29a2b48fa7c 100644 --- a/paddle/pten/kernels/math_kernel.cc +++ b/paddle/pten/kernels/math_kernel.cc @@ -33,8 +33,8 @@ template void SumKernel(const Context& dev_ctx, const DenseTensor& x, const std::vector& dims, - bool keep_dim, DataType out_dtype, + bool keep_dim, DenseTensor* out) { bool reduce_all = false; SumRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out); diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h index 95379baaf35..afef5866931 100644 --- a/paddle/pten/kernels/math_kernel.h +++ b/paddle/pten/kernels/math_kernel.h @@ -50,8 +50,8 @@ template void SumKernel(const Context& dev_ctx, const DenseTensor& x, const std::vector& dims, - bool keep_dim, DataType out_dtype, + bool keep_dim, DenseTensor* out); template @@ -110,7 +110,7 @@ template DenseTensor Add(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); + auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); AddKernel(dev_ctx, x, y, &dense_out); return dense_out; @@ -120,7 +120,7 @@ template DenseTensor Subtract(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); + auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); SubtractKernel(dev_ctx, x, y, &dense_out); return dense_out; @@ -130,7 +130,7 @@ template DenseTensor Divide(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); + auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); DivideKernel(dev_ctx, x, y, &dense_out); return dense_out; @@ -140,7 +140,7 @@ template DenseTensor Multiply(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); + auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); MultiplyKernel(dev_ctx, x, y, &dense_out); return dense_out; @@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx, const std::vector& axis, DataType dtype, bool keep_dim) { - auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype); + auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim); auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); - SumKernel(dev_ctx, x, axis, keep_dim, dtype, &dense_out); + SumKernel(dev_ctx, x, axis, dtype, keep_dim, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/reshape_kernel.cc b/paddle/pten/kernels/reshape_kernel.cc index 9bfad22374c..4b706e9e685 100644 --- a/paddle/pten/kernels/reshape_kernel.cc +++ b/paddle/pten/kernels/reshape_kernel.cc @@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx, out->ResizeAndAllocate(out_meta.dims); return; } - - out->Resize(x.dims()); - out->mutable_data(x.place()); + out->set_meta(out_meta); + out->mutable_data(dev_ctx.GetPlace()); pten::Copy(dev_ctx, x, false, out); out->Resize(out_meta.dims); out->ResetLoD(x.lod()); diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index f37b45eef1b..e5ccd6b0405 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -3,7 +3,7 @@ output : Tensor infer_meta : func : ElementwiseInferMeta - param : [x, y, -1] + param : [x, y] kernel : func : add @@ -40,7 +40,7 @@ output : Tensor infer_meta : func : ElementwiseInferMeta - param : [x, y, -1] + param : [x, y] kernel : func : divide @@ -135,7 +135,7 @@ output : Tensor infer_meta : func : ElementwiseInferMeta - param : [x, y, -1] + param : [x, y] kernel : func : multiply @@ -166,19 +166,19 @@ output : Tensor infer_meta : func : ElementwiseInferMeta - param : [x, y, -1] + param : [x, y] kernel : func : subtract - api : sum args : (const Tensor& x, const std::vector& axis={}, DataType dtype=DataType::UNDEFINED, bool keep_dim=false) output : Tensor - infer_meta : - func : ReduceInferMeta - param: [x, axis, keep_dim, dtype] - kernel : + infer_meta : + func : SumInferMeta + param: [x, axis, dtype, keep_dim] + kernel : func : sum - param : [x, axis, keep_dim, dtype] + param : [x, axis, dtype, keep_dim] data_type : x - api : zeros_like -- GitLab