From 7e29cea97312bb898398d4a535c40ec4f81b2eb6 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Sun, 30 Jan 2022 14:32:57 +0800 Subject: [PATCH] [PTen] Change all InferMeta functions (#39222) * change unary infermeta * change other infermeta * change all infermeta format * resolve conflit * fix test failed * resolve reshape conflit * fix compile failed * adapt auto api gen * fix reshape failed * fix concat failed * resolve conflict --- paddle/fluid/framework/custom_kernel_test.cc | 6 +- paddle/fluid/framework/infershape_utils.cc | 8 ++ paddle/pten/api/lib/api_utils.h | 35 +++-- paddle/pten/api/lib/manual_api.cc | 11 +- paddle/pten/core/infermeta_utils.h | 10 -- paddle/pten/core/meta_tensor.cc | 28 ++-- paddle/pten/core/meta_tensor.h | 21 ++- paddle/pten/infermeta/CMakeLists.txt | 4 +- paddle/pten/infermeta/backward.cc | 16 ++- paddle/pten/infermeta/backward.h | 16 ++- paddle/pten/infermeta/binary.cc | 68 +++++----- paddle/pten/infermeta/binary.h | 55 ++++---- paddle/pten/infermeta/multiary.cc | 23 ++-- paddle/pten/infermeta/multiary.h | 10 +- paddle/pten/infermeta/nullary.cc | 24 ++-- paddle/pten/infermeta/nullary.h | 29 +++-- paddle/pten/infermeta/unary.cc | 123 ++++++++++-------- paddle/pten/infermeta/unary.h | 65 ++++----- paddle/pten/kernels/cast_kernel.h | 5 +- paddle/pten/kernels/complex_kernel.h | 5 +- paddle/pten/kernels/concat_kernel.h | 12 +- paddle/pten/kernels/dot_kernel.h | 5 +- paddle/pten/kernels/empty_kernel.h | 10 +- paddle/pten/kernels/flatten_kernel.h | 5 +- paddle/pten/kernels/full_kernel.h | 10 +- paddle/pten/kernels/funcs/concat_funcs.h | 2 +- paddle/pten/kernels/math_kernel.h | 31 +++-- paddle/pten/kernels/matmul_kernel.h | 5 +- paddle/pten/kernels/reshape_kernel.cc | 11 +- paddle/pten/kernels/reshape_kernel.h | 5 +- paddle/pten/kernels/scale_kernel.h | 5 +- paddle/pten/kernels/sign_kernel.h | 5 +- paddle/pten/tests/api/scale_api.h | 11 +- python/paddle/utils/code_gen/api.yaml | 2 +- python/paddle/utils/code_gen/api_gen.py | 14 +- .../paddle/utils/code_gen/backward_api_gen.py | 13 +- python/paddle/utils/code_gen/gen_utils.py | 21 ++- 37 files changed, 414 insertions(+), 315 deletions(-) diff --git a/paddle/fluid/framework/custom_kernel_test.cc b/paddle/fluid/framework/custom_kernel_test.cc index 708b7bbe8a..5f01681624 100644 --- a/paddle/fluid/framework/custom_kernel_test.cc +++ b/paddle/fluid/framework/custom_kernel_test.cc @@ -212,11 +212,13 @@ TEST(CustomKernel, custom_kernel_dot) { kernel_context.EmplaceBackAttr(fake_attr_int64_vec); kernel_context.EmplaceBackAttr(fake_attr_int_vec); - auto out_meta = pten::DotInferMeta(dense_x->meta(), dense_y->meta()); auto dense_out = std::make_shared( pten::make_intrusive( pten::TransToFluidPlace(backend)), - std::move(out_meta)); + pten::DenseTensorMeta()); + + pten::MetaTensor meta_out(dense_out.get()); + pten::DotInferMeta(*dense_x, *dense_y, &meta_out); kernel_context.EmplaceBackOutput(dense_out.get()); // idx:0 index:[0,1) // fake_input_vec: idx:1, index:[1,3) diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index 80787c3b87..e1d7190a9e 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -186,6 +186,14 @@ class CompatMetaTensor : public pten::MetaTensor { } } + void share_meta(const MetaTensor& meta_tensor) override { + set_dims(meta_tensor.dims()); + set_dtype(meta_tensor.dtype()); + // VarDesc doesn't contains layout, so we cannot share layout + // set_layout(meta_tensor.layout()); + share_lod(meta_tensor); + } + private: const LoD& GetRuntimeLoD() const { auto* var = BOOST_GET_CONST(Variable*, var_); diff --git a/paddle/pten/api/lib/api_utils.h b/paddle/pten/api/lib/api_utils.h index 6332132fac..3d18cc611c 100644 --- a/paddle/pten/api/lib/api_utils.h +++ b/paddle/pten/api/lib/api_utils.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/compat/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/meta_tensor.h" namespace paddle { namespace experimental { @@ -44,44 +45,38 @@ inline std::unique_ptr> TensorToDenseTensor( /* ----------------- for infer_meta --------------------- */ -inline const pten::DenseTensorMeta& GetDenseTensorMeta( - const pten::DenseTensor& tensor) { - return tensor.meta(); +inline pten::MetaTensor MakeMetaTensor(const pten::DenseTensor& tensor) { + return pten::MetaTensor(tensor); } -inline std::vector GetDenseTensorMeta( +inline std::vector MakeMetaTensor( const std::vector& tensors) { - std::vector metas; - metas.reserve(tensors.size()); + std::vector meta_tensors; + meta_tensors.reserve(tensors.size()); for (const auto& t : tensors) { - metas.push_back(t.meta()); + meta_tensors.emplace_back(t); } - return metas; + return meta_tensors; } /* ------------------ for output ----------------------- */ -inline pten::DenseTensor* SetKernelOutput(const pten::DenseTensorMeta& meta, - Backend backend, - Tensor* out) { +inline pten::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { auto dense_tensor = std::make_shared( pten::make_intrusive(pten::TransToFluidPlace(backend)), - meta); + pten::DenseTensorMeta()); out->set_impl(dense_tensor); return dense_tensor.get(); } inline std::vector SetKernelOutput( - const std::vector& metas, - Backend backend, - std::vector* out) { - size_t n = metas.size(); - out->reserve(n); - std::vector results(n); - for (size_t i = 0; i < n; ++i) { + size_t out_size, Backend backend, std::vector* out) { + out->reserve(out_size); + std::vector results(out_size); + for (size_t i = 0; i < out_size; ++i) { auto tensor_ptr = std::make_shared( pten::make_intrusive(pten::TransToFluidPlace(backend)), - metas[i]); + pten::DenseTensorMeta()); results[i] = tensor_ptr.get(); out->emplace_back(); out->back().set_impl(tensor_ptr); diff --git a/paddle/pten/api/lib/manual_api.cc b/paddle/pten/api/lib/manual_api.cc index c7d05533bb..5b697c3ff7 100644 --- a/paddle/pten/api/lib/manual_api.cc +++ b/paddle/pten/api/lib/manual_api.cc @@ -57,20 +57,19 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { kernel_context.EmplaceBackInput(dense_x.get()); kernel_context.EmplaceBackAttr(blocking); - // 4. InferMeta - auto out_meta = UnchangedInferMeta(dense_x->meta()); - - // 5. Prepare outputs + // 4. Prepare outputs & InferMeta auto dense_out = std::make_shared( pten::make_intrusive( pten::TransToFluidPlace(backend)), - std::move(out_meta)); + pten::DenseTensorMeta()); + pten::MetaTensor meta_out(dense_out.get()); + pten::UnchangedInferMeta(*dense_x, &meta_out); dense_out->mutable_data(pten::TransToFluidPlace(backend)); kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); - // 6. Call kernel + // 5. Call kernel kernel(&kernel_context); return out; diff --git a/paddle/pten/core/infermeta_utils.h b/paddle/pten/core/infermeta_utils.h index 0f45185c83..fecfab7153 100644 --- a/paddle/pten/core/infermeta_utils.h +++ b/paddle/pten/core/infermeta_utils.h @@ -26,16 +26,6 @@ limitations under the License. */ namespace pten { -// TODO(chenweihang): add other flags if needed -struct MetaConfig { - bool is_runtime{true}; - - MetaConfig() = default; - - // supporting implicit construction is easier to use - MetaConfig(bool is_runtime) : is_runtime(is_runtime) {} // NOLINT -}; - class InferMetaContext { public: InferMetaContext() = default; diff --git a/paddle/pten/core/meta_tensor.cc b/paddle/pten/core/meta_tensor.cc index fd558d20a0..d205ee1ca4 100644 --- a/paddle/pten/core/meta_tensor.cc +++ b/paddle/pten/core/meta_tensor.cc @@ -33,7 +33,7 @@ void MetaTensor::set_dims(const DDim& dims) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_))->dims = dims; } else { - PADDLE_THROW(paddle::platform::errors::Unimplemented( + PADDLE_THROW(pten::errors::Unimplemented( "Unsupported setting dims for `%s`.", tensor_->type_info().name())); } } @@ -43,7 +43,7 @@ void MetaTensor::set_dtype(DataType dtype) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_)) ->dtype = dtype; } else { - PADDLE_THROW(paddle::platform::errors::Unimplemented( + PADDLE_THROW(pten::errors::Unimplemented( "Unsupported settting dtype for `%s`.", tensor_->type_info().name())); } } @@ -53,7 +53,7 @@ void MetaTensor::set_layout(DataLayout layout) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_)) ->layout = layout; } else { - PADDLE_THROW(paddle::platform::errors::Unimplemented( + PADDLE_THROW(pten::errors::Unimplemented( "Unsupported settting layout for `%s`.", tensor_->type_info().name())); } } @@ -63,9 +63,9 @@ void MetaTensor::share_lod(const MetaTensor& meta_tensor) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_))->lod = meta_tensor.lod(); } else { - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Unsupported share lod inplace for `%s`.", - tensor_->type_info().name())); + PADDLE_THROW( + pten::errors::Unimplemented("Unsupported sharing lod inplace for `%s`.", + tensor_->type_info().name())); } } @@ -73,8 +73,20 @@ const LoD& MetaTensor::lod() const { if (pten::DenseTensor::classof(tensor_)) { return static_cast(tensor_)->lod(); } else { - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Unsupported setting dims for `%s`.", tensor_->type_info().name())); + PADDLE_THROW(pten::errors::Unimplemented("Unsupported getting lod of `%s`.", + tensor_->type_info().name())); + } +} + +void MetaTensor::share_meta(const MetaTensor& meta_tensor) { + if (pten::DenseTensor::classof(tensor_)) { + set_dims(meta_tensor.dims()); + set_dtype(meta_tensor.dtype()); + set_layout(meta_tensor.layout()); + share_lod(meta_tensor); + } else { + PADDLE_THROW(pten::errors::Unimplemented( + "Unsupported sharing meta for `%s`.", tensor_->type_info().name())); } } diff --git a/paddle/pten/core/meta_tensor.h b/paddle/pten/core/meta_tensor.h index ae2ec60626..6ccb698fe1 100644 --- a/paddle/pten/core/meta_tensor.h +++ b/paddle/pten/core/meta_tensor.h @@ -23,11 +23,26 @@ limitations under the License. */ namespace pten { +// TODO(chenweihang): add other flags if needed +struct MetaConfig { + bool is_runtime{true}; + + MetaConfig() = default; + + // supporting implicit construction is easier to use + MetaConfig(bool is_runtime) : is_runtime(is_runtime) {} // NOLINT +}; + class MetaTensor { public: - explicit MetaTensor(TensorBase* tensor) : tensor_(tensor) {} - MetaTensor() = default; + + // supporting implicit construction is easier to use + MetaTensor(TensorBase* tensor) : tensor_(tensor) {} // NOLINT + MetaTensor(const TensorBase& tensor) // NOLINT + : tensor_(const_cast(&tensor)) {} + MetaTensor(TensorBase& tensor) : tensor_(&tensor) {} // NOLINT + MetaTensor(const MetaTensor&) = default; MetaTensor(MetaTensor&&) = default; MetaTensor& operator=(const MetaTensor&) = delete; @@ -42,7 +57,9 @@ class MetaTensor { virtual void set_dims(const DDim& dims); virtual void set_dtype(DataType dtype); virtual void set_layout(DataLayout layout); + virtual void share_lod(const MetaTensor& meta_tensor); + virtual void share_meta(const MetaTensor& meta_tensor); private: // Because the lod in compiletime and runtime is different, diff --git a/paddle/pten/infermeta/CMakeLists.txt b/paddle/pten/infermeta/CMakeLists.txt index 2216d38708..c077e7b4c5 100644 --- a/paddle/pten/infermeta/CMakeLists.txt +++ b/paddle/pten/infermeta/CMakeLists.txt @@ -1,2 +1,2 @@ -cc_library(infermeta SRCS nullary.cc unary.cc binary.cc multiary.cc DEPS convert_utils infermeta_utils) -cc_library(backward_infermeta SRCS backward.cc DEPS convert_utils) +cc_library(infermeta SRCS nullary.cc unary.cc binary.cc multiary.cc DEPS convert_utils meta_tensor infermeta_utils) +cc_library(backward_infermeta SRCS backward.cc DEPS meta_tensor convert_utils) diff --git a/paddle/pten/infermeta/backward.cc b/paddle/pten/infermeta/backward.cc index 5a66e8cd2e..b7bb17bdd1 100644 --- a/paddle/pten/infermeta/backward.cc +++ b/paddle/pten/infermeta/backward.cc @@ -16,13 +16,15 @@ limitations under the License. */ namespace pten { -std::tuple MatmulGradInferMeta( - const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - const DenseTensorMeta& out_grad_meta, - bool transpose_x, - bool transpose_y) { - return std::make_tuple(x_meta, y_meta); +void MatmulGradInferMeta(const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& out_grad_meta, + bool transpose_x, + bool transpose_y, + MetaTensor* dx, + MetaTensor* dy) { + dx->share_meta(x); + dy->share_meta(y); } } // namespace pten diff --git a/paddle/pten/infermeta/backward.h b/paddle/pten/infermeta/backward.h index 03bdb3a962..d6b9686141 100644 --- a/paddle/pten/infermeta/backward.h +++ b/paddle/pten/infermeta/backward.h @@ -15,15 +15,17 @@ limitations under the License. */ #pragma once #include -#include "paddle/pten/core/tensor_meta.h" + +#include "paddle/pten/core/meta_tensor.h" namespace pten { -std::tuple MatmulGradInferMeta( - const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - const DenseTensorMeta& out_grad_meta, - bool transpose_x, - bool transpose_y); +void MatmulGradInferMeta(const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& out_grad_meta, + bool transpose_x, + bool transpose_y, + MetaTensor* dx, + MetaTensor* dy); } // namespace pten diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc index cb605db78d..02d78b5caa 100644 --- a/paddle/pten/infermeta/binary.cc +++ b/paddle/pten/infermeta/binary.cc @@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -// See Note [ Why still include the fluid headers? ] #include "paddle/pten/infermeta/binary.h" #include "paddle/pten/kernels/funcs/common_shape.h" namespace pten { -DenseTensorMeta DotInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta) { - auto x_dims = x_meta.dims; +void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { + auto x_dims = x.dims(); auto x_rank = static_cast(x_dims.size()); PADDLE_ENFORCE_EQ(true, 1 == x_rank || 2 == x_rank, @@ -29,10 +27,10 @@ DenseTensorMeta DotInferMeta(const DenseTensorMeta& x_meta, "should be 1 or 2", x_dims.to_str())); - auto y_dims = y_meta.dims; + auto y_dims = y.dims(); PADDLE_ENFORCE_EQ( true, - x_rank == (size_t)y_dims.size(), + x_rank == static_cast(y_dims.size()), paddle::platform::errors::PreconditionNotMet( "ShapeError: The shape of input tensor Y: %s should match with " "input tenosr X: %s", @@ -56,25 +54,27 @@ DenseTensorMeta DotInferMeta(const DenseTensorMeta& x_meta, y_dims.to_str())); x_dims[x_dims.size() - 1] = 1; - DenseTensorMeta return_meta(x_meta.dtype, x_dims, x_meta.layout); - return return_meta; + out->set_dims(x_dims); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); } -DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - bool trans_x, - bool trans_y) { - std::vector dims_x = pten::framework::vectorize(x_meta.dims); - std::vector dims_y = pten::framework::vectorize(y_meta.dims); +void MatmulInferMeta(const MetaTensor& x, + const MetaTensor& y, + bool trans_x, + bool trans_y, + MetaTensor* out) { + std::vector dims_x = pten::framework::vectorize(x.dims()); + std::vector dims_y = pten::framework::vectorize(y.dims()); auto ndims_x = dims_x.size(); auto ndims_y = dims_y.size(); PADDLE_ENFORCE_GT(ndims_x, - 0, + 0UL, paddle::platform::errors::InvalidArgument( "The Input(x) dims size must be greater than 0," " but reviced dims size is 0. ")); PADDLE_ENFORCE_GT(ndims_y, - 0, + 0UL, paddle::platform::errors::InvalidArgument( "The Input(y) dims size must be greater than 0," " but reviced dims size is 0. ")); @@ -127,21 +127,24 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, auto ddim_out = pten::framework::make_ddim(new_dims); - return {x_meta.dtype, ddim_out, x_meta.layout}; + out->set_dims(ddim_out); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); } -DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta) { - return ElementwiseRawInferMeta(x_meta, y_meta, -1); +void ElementwiseInferMeta(const MetaTensor& x, + const MetaTensor& y, + MetaTensor* out) { + return ElementwiseRawInferMeta(x, y, -1, std::move(out)); } -DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - int axis) { - DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout); - if (x_meta.dims != y_meta.dims) { - auto x_dims = x_meta.dims; - auto y_dims = y_meta.dims; +void ElementwiseRawInferMeta(const MetaTensor& x, + const MetaTensor& y, + int axis, + MetaTensor* out) { + if (x.dims() != y.dims()) { + auto x_dims = x.dims(); + auto y_dims = y.dims(); int max_dim = std::max(x_dims.size(), y_dims.size()); if (x_dims.size() == y_dims.size()) { PADDLE_ENFORCE_EQ((axis == -1) || (axis == 0), @@ -174,10 +177,15 @@ DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta, out_dims_array.data(), max_dim, axis); - return_meta.dims = pten::framework::make_ddim(out_dims_array); + auto out_dims = pten::framework::make_ddim(out_dims_array); + out->set_dims(out_dims); + } else { + out->set_dims(x.dims()); } - return_meta.lod = x_meta.lod; - return return_meta; + + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + out->share_lod(x); } } // namespace pten diff --git a/paddle/pten/infermeta/binary.h b/paddle/pten/infermeta/binary.h index 658211e48a..9920824649 100644 --- a/paddle/pten/infermeta/binary.h +++ b/paddle/pten/infermeta/binary.h @@ -14,38 +14,35 @@ limitations under the License. */ #pragma once -// See Note [ Why still include the fluid headers? ] -#include "paddle/pten/core/tensor_meta.h" +#include "paddle/pten/core/meta_tensor.h" namespace pten { // Common InferMeta Functions for binary operators, The format like: // -// 1. DenseTensorMeta [OpName]InferMeta(const DenseTensorMeta& x_meta, ...) -// {} -// 2. std::pair [OpName]InferMeta(const -// DenseTensorMeta& -// x_meta, ...) {} -// 3. std::tuple -// [OpName]InferMeta(const -// DenseTensorMeta& x_meta, ...) -// NOTE: The name "InferMeta" may be not appropriate. "InferMeta" may be good. -// Because functions in this file -// not only can infer shape, but alse need infer lod or other useful data. - -DenseTensorMeta DotInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta); - -DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - bool trans_x, - bool trans_y); - -DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta); - -DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta, - const DenseTensorMeta& y_meta, - int axis); - +// 1. void [FunctionDesc|OpName]InferMeta(const MetaTensor& x, +// const MetaTensor& y, +// ..., +// MetaTensor* out) {} +// +// NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good. +// Because functions in this file not only can infer shape, but also need +// infer lod or other useful data. + +void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); + +void MatmulInferMeta(const MetaTensor& x, + const MetaTensor& y, + bool trans_x, + bool trans_y, + MetaTensor* out); + +void ElementwiseInferMeta(const MetaTensor& x, + const MetaTensor& y, + MetaTensor* out); + +void ElementwiseRawInferMeta(const MetaTensor& x_meta, + const MetaTensor& y_meta, + int axis, + MetaTensor* out); } // namespace pten diff --git a/paddle/pten/infermeta/multiary.cc b/paddle/pten/infermeta/multiary.cc index ecd0396a28..869e87df5d 100644 --- a/paddle/pten/infermeta/multiary.cc +++ b/paddle/pten/infermeta/multiary.cc @@ -18,18 +18,19 @@ limitations under the License. */ #include "paddle/pten/kernels/funcs/concat_funcs.h" namespace pten { -DenseTensorMeta ConcatInferMeta(const std::vector& x_meta, - const Scalar& axis_scalar, - bool is_runtime) { - PADDLE_ENFORCE_GE(x_meta.size(), - 0, +void ConcatInferMeta(const std::vector& x, + const Scalar& axis_scalar, + MetaTensor* out, + MetaConfig config) { + PADDLE_ENFORCE_GE(x.size(), + 0UL, paddle::platform::errors::InvalidArgument( "The size of input meta vector should be greater" "than 0.")); int axis = axis_scalar.to(); // 1. calculate axis - int rank = x_meta[0].dims.size(); + int rank = x.at(0).dims().size(); PADDLE_ENFORCE_EQ( axis >= -rank && axis < rank, true, @@ -44,13 +45,15 @@ DenseTensorMeta ConcatInferMeta(const std::vector& x_meta, // 2. calculate out dims std::vector x_dims; - for (auto meta : x_meta) { - x_dims.push_back(meta.dims); + for (auto& x_t : x) { + x_dims.push_back(x_t.dims()); } pten::DDim out_dim = - pten::funcs::ComputeAndCheckShape(is_runtime, x_dims, axis); + pten::funcs::ComputeAndCheckShape(config.is_runtime, x_dims, axis); - return {x_meta[0].dtype, out_dim, x_meta[0].layout}; + out->set_dims(out_dim); + out->set_dtype(x.at(0).dtype()); + out->set_layout(x.at(0).layout()); } } // namespace pten diff --git a/paddle/pten/infermeta/multiary.h b/paddle/pten/infermeta/multiary.h index f8d5468e50..c251699da4 100644 --- a/paddle/pten/infermeta/multiary.h +++ b/paddle/pten/infermeta/multiary.h @@ -15,12 +15,12 @@ limitations under the License. */ #pragma once #include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/tensor_meta.h" +#include "paddle/pten/core/meta_tensor.h" namespace pten { -// TODO(chentianyu03) use std::vector as InferMeta inputs -DenseTensorMeta ConcatInferMeta(const std::vector& x_meta, - const Scalar& axis_scalar, - bool is_runtime); +void ConcatInferMeta(const std::vector& x, + const Scalar& axis_scalar, + MetaTensor* out, + MetaConfig config = MetaConfig()); } // namespace pten diff --git a/paddle/pten/infermeta/nullary.cc b/paddle/pten/infermeta/nullary.cc index 19e11f049f..fd9b2a8f71 100644 --- a/paddle/pten/infermeta/nullary.cc +++ b/paddle/pten/infermeta/nullary.cc @@ -12,23 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -// See Note [ Why still include the fluid headers? ] #include "paddle/pten/infermeta/nullary.h" namespace pten { -DenseTensorMeta CreateInferMeta(const std::vector& shape, - DataType dtype, - DataLayout layout) { - const auto& out_dims = pten::framework::make_ddim(shape); - return {dtype, out_dims, layout}; +void CreateInferMeta(const std::vector& shape, + DataType dtype, + DataLayout layout, + MetaTensor* out) { + auto out_dims = pten::framework::make_ddim(shape); + out->set_dims(out_dims); + out->set_dtype(dtype); + out->set_layout(layout); } -DenseTensorMeta CreateInferMeta(const ScalarArray& shape, - DataType dtype, - DataLayout layout) { - const auto& out_dims = pten::framework::make_ddim(shape.GetData()); - return {dtype, out_dims, layout}; +void CreateInferMeta(const ScalarArray& shape, + DataType dtype, + DataLayout layout, + MetaTensor* out) { + CreateInferMeta(shape.GetData(), dtype, layout, out); } } // namespace pten diff --git a/paddle/pten/infermeta/nullary.h b/paddle/pten/infermeta/nullary.h index 721a39bb3a..f0b6aad26b 100644 --- a/paddle/pten/infermeta/nullary.h +++ b/paddle/pten/infermeta/nullary.h @@ -15,24 +15,27 @@ limitations under the License. */ #pragma once #include "paddle/pten/common/scalar_array.h" -#include "paddle/pten/core/tensor_meta.h" +#include "paddle/pten/core/meta_tensor.h" namespace pten { // Common InferMeta Functions for 0-nary operators(no input tensor), The format // like: // -// 1. DenseTensorMeta [OpName]InferMeta( ...) -// NOTE: The name "InferMeta" may be not appropriate. "InferMeta" may be good. -// Because functions in this file -// not only can infer shape, but alse need infer lod or other useful data. - -DenseTensorMeta CreateInferMeta(const std::vector& shape, - DataType dtype, - DataLayout layout); - -DenseTensorMeta CreateInferMeta(const ScalarArray& shape, - DataType dtype, - DataLayout layout); +// 1. void [FunctionDesc|OpName]InferMeta(..., MetaTensor* out) +// +// NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good. +// Because functions in this file not only can infer shape, but also need +// infer lod or other useful data. + +void CreateInferMeta(const std::vector& shape, + DataType dtype, + DataLayout layout, + MetaTensor* out); + +void CreateInferMeta(const ScalarArray& shape, + DataType dtype, + DataLayout layout, + MetaTensor* out); } // namespace pten diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index e1a30d10e1..57bac52cef 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include +#include "paddle/pten/common/data_type.h" #include "paddle/pten/core/infermeta_utils.h" namespace pten { @@ -23,26 +24,22 @@ namespace pten { void UnchangedInferMetaNew(MetaConfig config, const MetaTensor& x, MetaTensor* out) { - out->set_dims(x.dims()); - out->set_dtype(x.dtype()); - out->set_layout(x.layout()); - out->share_lod(x); + out->share_meta(x); } DenseTensorMeta UnchangedInferMeta(const DenseTensorMeta& x_meta) { return x_meta; } -DenseTensorMeta ReductionInferMeta(const DenseTensorMeta& x_meta) { - const auto& out_dims = pten::framework::make_ddim({1}); - DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); - return return_meta; +void UnchangedInferMeta(const MetaTensor& x, MetaTensor* out) { + out->share_meta(x); } -DenseTensorMeta FlattenInferMeta(const DenseTensorMeta& x_meta, - int start_axis, - int stop_axis) { - auto& x_dims = x_meta.dims; +void FlattenInferMeta(const MetaTensor& x, + int start_axis, + int stop_axis, + MetaTensor* out) { + auto x_dims = x.dims(); int in_dims_size = x_dims.size(); if (start_axis < 0) { start_axis = start_axis + in_dims_size; @@ -75,29 +72,30 @@ DenseTensorMeta FlattenInferMeta(const DenseTensorMeta& x_meta, out_shape.push_back(x_dims[i]); } const auto& out_dims = pten::framework::make_ddim(out_shape); - DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); + out->set_dims(out_dims); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); - if (x_dims[0] == return_meta.dims[0]) { + if (x_dims[0] == out_dims[0]) { // Only pass LoD when the first dimension of output and Input(X) // are the same. - return_meta.lod = x_meta.lod; + out->share_lod(x); } - - return return_meta; } -DenseTensorMeta CastInferMeta(const DenseTensorMeta& x_meta, - const DataType out_dtype) { - DenseTensorMeta out_meta(out_dtype, x_meta.dims, x_meta.layout); - return out_meta; +void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out) { + out->set_dims(x.dims()); + out->set_dtype(out_dtype); + out->set_layout(x.layout()); } -DenseTensorMeta CreateLikeInferMeta(const DenseTensorMeta& x_meta, - DataType dtype, - DataLayout layout) { - return {dtype == DataType::UNDEFINED ? x_meta.dtype : dtype, - x_meta.dims, - layout == DataLayout::UNDEFINED ? x_meta.layout : layout}; +void CreateLikeInferMeta(const MetaTensor& x, + DataType dtype, + DataLayout layout, + MetaTensor* out) { + out->set_dims(x.dims()); + out->set_dtype(dtype == DataType::UNDEFINED ? x.dtype() : dtype); + out->set_layout(layout == DataLayout::UNDEFINED ? x.layout() : layout); } static pten::framework::DDim ValidateShape( @@ -220,46 +218,51 @@ static pten::framework::DDim ValidateShape( return pten::framework::make_ddim(output_shape); } -DenseTensorMeta InferMetaFromVecValue(const DenseTensorMeta& x_meta, - const std::vector& shape) { +void InferMetaFromVecValue(const MetaTensor& x, + const std::vector& shape, + MetaTensor* out) { PADDLE_ENFORCE_EQ(!shape.empty(), true, paddle::platform::errors::InvalidArgument( "The parameter 'shape' in ReshapeOp must be set. " "But received 'shape' is empty.")); - auto x_dims = x_meta.dims; + auto x_dims = x.dims(); auto out_dims = ValidateShape(shape, x_dims); - DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout); - if (x_dims[0] == return_meta.dims[0]) { + out->set_dims(out_dims); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + if (x_dims[0] == out_dims[0]) { // Only pass LoD when the first dimension of output and Input(X) // are the same. - return_meta.lod = x_meta.lod; + out->share_lod(x); } - return return_meta; } -DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta, - const ScalarArray& shape) { - return InferMetaFromVecValue(x_meta, shape.GetData()); +void ReshapeInferMeta(const MetaTensor& x, + const ScalarArray& shape, + MetaTensor* out) { + InferMetaFromVecValue(x, shape.GetData(), out); } /* Why not use ReduceInferMeta directly? Because we need make InferMetaFunction's args follow the design of api.yaml */ -DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta, - const std::vector& axis, - DataType dtype, - bool keep_dim) { - return ReduceInferMeta(x_meta, axis, keep_dim, dtype); +void SumInferMeta(const MetaTensor& x, + const std::vector& axis, + DataType dtype, + bool keep_dim, + MetaTensor* out) { + ReduceInferMeta(x, axis, keep_dim, dtype, std::move(out)); } -DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, - const std::vector& axis, - bool keep_dim, - DataType dtype) { +void ReduceInferMeta(const MetaTensor& x, + const std::vector& axis, + bool keep_dim, + DataType dtype, + MetaTensor* out) { bool reduce_all = true; std::set dims_set(axis.begin(), axis.end()); - for (int64_t i = 0; i < x_meta.dims.size(); ++i) { + for (int64_t i = 0; i < x.dims().size(); ++i) { if (dims_set.find(i) == dims_set.end()) { reduce_all = false; break; @@ -268,19 +271,19 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, std::vector out_dim_vector; if (keep_dim) { - for (int64_t i = 0; i < x_meta.dims.size(); ++i) { + for (int64_t i = 0; i < x.dims().size(); ++i) { if (reduce_all || dims_set.find(i) != dims_set.end()) { out_dim_vector.push_back(1); } else { - out_dim_vector.push_back(x_meta.dims.at(i)); + out_dim_vector.push_back(x.dims().at(i)); } } } else { - for (int64_t i = 0; i < x_meta.dims.size(); ++i) { + for (int64_t i = 0; i < x.dims().size(); ++i) { if (reduce_all || dims_set.find(i) != dims_set.end()) { continue; } else { - out_dim_vector.push_back(x_meta.dims.at(i)); + out_dim_vector.push_back(x.dims().at(i)); } } @@ -294,16 +297,24 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, if (dtype != DataType::UNDEFINED) { out_dtype = dtype; } else { - if (x_meta.dtype == DataType::BOOL || x_meta.dtype == DataType::INT32 || - x_meta.dtype == DataType::INT64) { + if (x.dtype() == DataType::BOOL || x.dtype() == DataType::INT32 || + x.dtype() == DataType::INT64) { out_dtype = DataType::INT64; } else { - out_dtype = x_meta.dtype; + out_dtype = x.dtype(); } } - DenseTensorMeta return_meta(out_dtype, out_dim, x_meta.layout); - return return_meta; + out->set_dims(out_dim); + out->set_dtype(out_dtype); + out->set_layout(x.layout()); +} + +void ReduceInferMeta(const MetaTensor& x, + const std::vector& axis, + bool keep_dim, + MetaTensor* out) { + ReduceInferMeta(x, axis, keep_dim, DataType::UNDEFINED, out); } } // namespace pten diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index 670c70de84..c1a939c2de 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -16,9 +16,7 @@ limitations under the License. */ // See Note [ Why still include the fluid headers? ] #include "paddle/pten/common/scalar_array.h" -#include "paddle/pten/core/infermeta_utils.h" #include "paddle/pten/core/meta_tensor.h" -#include "paddle/pten/core/tensor_meta.h" namespace pten { @@ -26,45 +24,54 @@ class MetaConfig; // Common InferMeta Functions for unary operators, The format like: // -// void [OpName]InferMeta(const MetaTensor& x, ..., MetaTensor* out) {} +// void [FunctionDesc|OpName]InferMeta(const MetaTensor& x, ..., MetaTensor* +// out) {} // // NOTE: The name "InferShape" may be not appropriate. "InferMeta" may be good. // Because functions in this file not only can infer shape, but also need // infer lod or other useful data. -// TODO(chenweihang): update all InferMeta function format in next pr, -// now add UnchangedInferMetaNew for test new format +// TODO(chenweihang): to avoid conflit, remove this function in next PR void UnchangedInferMetaNew(MetaConfig config, const MetaTensor& x, MetaTensor* out); -DenseTensorMeta UnchangedInferMeta(const DenseTensorMeta& x_meta); +void UnchangedInferMeta(const MetaTensor& x, MetaTensor* out); -DenseTensorMeta ReductionInferMeta(const DenseTensorMeta& x_meta); +void FlattenInferMeta(const MetaTensor& x, + int start_axis, + int stop_axis, + MetaTensor* out); -DenseTensorMeta FlattenInferMeta(const DenseTensorMeta& x_meta, - int start_axis, - int stop_axis); -DenseTensorMeta CastInferMeta(const DenseTensorMeta& x_meta, - const DataType out_dtype); +void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out); -DenseTensorMeta CreateLikeInferMeta(const DenseTensorMeta& x_meta, - DataType dtype, - DataLayout layout); +void CreateLikeInferMeta(const MetaTensor& x, + DataType dtype, + DataLayout layout, + MetaTensor* out); -DenseTensorMeta InferMetaFromVecValue(const DenseTensorMeta& x_meta, - const std::vector& shape); - -DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta, - const ScalarArray& shape); - -DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, - const std::vector& axis, - bool keep_dim, - DataType dtype = DataType::UNDEFINED); +void InferMetaFromVecValue(const MetaTensor& x, + const std::vector& shape, + MetaTensor* out); -DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta, - const std::vector& axis, - DataType dtype, - bool keep_dim); +void ReshapeInferMeta(const MetaTensor& x, + const ScalarArray& shape, + MetaTensor* out); + +void ReduceInferMeta(const MetaTensor& x, + const std::vector& axis, + bool keep_dim, + DataType dtype, + MetaTensor* out); + +void ReduceInferMeta(const MetaTensor& x, + const std::vector& axis, + bool keep_dim, + MetaTensor* out); + +void SumInferMeta(const MetaTensor& x, + const std::vector& axis, + DataType dtype, + bool keep_dim, + MetaTensor* out); } // namespace pten diff --git a/paddle/pten/kernels/cast_kernel.h b/paddle/pten/kernels/cast_kernel.h index 8fdce9cda6..a7f8461934 100644 --- a/paddle/pten/kernels/cast_kernel.h +++ b/paddle/pten/kernels/cast_kernel.h @@ -29,8 +29,9 @@ template DenseTensor Cast(const Context& dev_ctx, const DenseTensor& x, DataType out_dtype) { - auto out_meta = CastInferMeta(x.meta(), out_dtype); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + CastInferMeta(x, out_dtype, &meta_out); CastKernel(dev_ctx, x, out_dtype, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/complex_kernel.h b/paddle/pten/kernels/complex_kernel.h index ff27144eb4..ab1cb59872 100644 --- a/paddle/pten/kernels/complex_kernel.h +++ b/paddle/pten/kernels/complex_kernel.h @@ -32,8 +32,9 @@ template >::value, bool> = true> DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) { - auto out_meta = UnchangedInferMeta(x.meta()); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + UnchangedInferMeta(x, &meta_out); ConjKernel(dev_ctx, x, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/concat_kernel.h b/paddle/pten/kernels/concat_kernel.h index 310b9ba8c0..8c9103145f 100644 --- a/paddle/pten/kernels/concat_kernel.h +++ b/paddle/pten/kernels/concat_kernel.h @@ -30,14 +30,16 @@ template DenseTensor Concat(const Context& dev_ctx, const std::vector& x, const Scalar& axis) { - std::vector x_meta; - for (auto t : x) { - x_meta.push_back(t.meta()); + std::vector meta_x; + for (const auto& t : x) { + meta_x.emplace_back(t); } - auto out_meta = ConcatInferMeta(x_meta, axis.to(), true); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ConcatInferMeta(meta_x, axis.to(), &meta_out, /*is_runtime=*/true); ConcatKernel(dev_ctx, x, axis, &dense_out); return dense_out; } + } // namespace pten diff --git a/paddle/pten/kernels/dot_kernel.h b/paddle/pten/kernels/dot_kernel.h index 47f1c89109..67f6ca3517 100644 --- a/paddle/pten/kernels/dot_kernel.h +++ b/paddle/pten/kernels/dot_kernel.h @@ -29,8 +29,9 @@ template DenseTensor Dot(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = DotInferMeta(x.meta(), y.meta()); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + DotInferMeta(x, y, &meta_out); DotKernel(dev_ctx, x, y, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/empty_kernel.h b/paddle/pten/kernels/empty_kernel.h index d283ef5c1e..8a7da8fbd5 100644 --- a/paddle/pten/kernels/empty_kernel.h +++ b/paddle/pten/kernels/empty_kernel.h @@ -55,8 +55,9 @@ DenseTensor Empty(const Context& dev_ctx, DataType dtype = DataType::FLOAT32, Backend backend = Backend::CPU, // Is backend needed here? DataLayout layout = DataLayout::NCHW) { - auto out_meta = CreateInferMeta(shape, dtype, layout); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + CreateInferMeta(shape, dtype, layout, &meta_out); EmptyKernel(dev_ctx, shape, &dense_out); return dense_out; } @@ -68,8 +69,9 @@ DenseTensor EmptyLike( DataType dtype = DataType::UNDEFINED, Backend backend = Backend::UNDEFINED, // Is backend needed here? DataLayout layout = DataLayout::UNDEFINED) { - auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + CreateLikeInferMeta(x, dtype, layout, &meta_out); EmptyLikeKernel(dev_ctx, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/flatten_kernel.h b/paddle/pten/kernels/flatten_kernel.h index c974fda1ed..38d8786c7f 100644 --- a/paddle/pten/kernels/flatten_kernel.h +++ b/paddle/pten/kernels/flatten_kernel.h @@ -40,8 +40,9 @@ DenseTensor Flatten(const Context& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis) { - auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + FlattenInferMeta(x, start_axis, stop_axis, &meta_out); FlattenKernel(dev_ctx, x, start_axis, stop_axis, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/full_kernel.h b/paddle/pten/kernels/full_kernel.h index bc484fb4ed..030eb4b1c7 100644 --- a/paddle/pten/kernels/full_kernel.h +++ b/paddle/pten/kernels/full_kernel.h @@ -41,8 +41,9 @@ DenseTensor Full(const Context& dev_ctx, DataType dtype = DataType::FLOAT32, Backend backend = Backend::CPU, // Is backend needed here? DataLayout layout = DataLayout::NCHW) { - auto out_meta = CreateInferMeta(shape, dtype, layout); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + CreateInferMeta(shape, dtype, layout, &meta_out); FullKernel(dev_ctx, shape, val, &dense_out); return dense_out; } @@ -55,8 +56,9 @@ DenseTensor FullLike( DataType dtype = DataType::UNDEFINED, Backend backend = Backend::UNDEFINED, // Is backend needed here? DataLayout layout = DataLayout::UNDEFINED) { - auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + CreateLikeInferMeta(x, dtype, layout, &meta_out); FullLikeKernel(dev_ctx, val, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/funcs/concat_funcs.h b/paddle/pten/kernels/funcs/concat_funcs.h index 8455b80969..88fdad3a6d 100644 --- a/paddle/pten/kernels/funcs/concat_funcs.h +++ b/paddle/pten/kernels/funcs/concat_funcs.h @@ -35,7 +35,7 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) { } static inline pten::DDim ComputeAndCheckShape( - const bool is_runtime, + bool is_runtime, const std::vector& inputs_dims, const size_t axis) { const size_t n = inputs_dims.size(); diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h index 6dcb9dd77a..eb39b618eb 100644 --- a/paddle/pten/kernels/math_kernel.h +++ b/paddle/pten/kernels/math_kernel.h @@ -109,8 +109,9 @@ template DenseTensor Add(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); AddKernel(dev_ctx, x, y, &dense_out); return dense_out; } @@ -119,8 +120,9 @@ template DenseTensor Subtract(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); SubtractKernel(dev_ctx, x, y, &dense_out); return dense_out; } @@ -129,8 +131,9 @@ template DenseTensor Divide(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); DivideKernel(dev_ctx, x, y, &dense_out); return dense_out; } @@ -139,8 +142,9 @@ template DenseTensor Multiply(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y) { - auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); MultiplyKernel(dev_ctx, x, y, &dense_out); return dense_out; } @@ -150,8 +154,9 @@ DenseTensor Mean(const Context& dev_ctx, const DenseTensor& x, const std::vector& axis, bool keep_dim) { - auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + ReduceInferMeta(x, axis, keep_dim, x.dtype(), &meta_out); MeanKernel(dev_ctx, x, axis, keep_dim, &dense_out); return dense_out; } @@ -162,9 +167,9 @@ DenseTensor Sum(const Context& dev_ctx, const std::vector& axis, DataType dtype, bool keep_dim) { - auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); - + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + SumInferMeta(x, axis, dtype, keep_dim, &meta_out); SumKernel(dev_ctx, x, axis, dtype, keep_dim, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/matmul_kernel.h b/paddle/pten/kernels/matmul_kernel.h index f9cb2c3801..e6b9302cad 100644 --- a/paddle/pten/kernels/matmul_kernel.h +++ b/paddle/pten/kernels/matmul_kernel.h @@ -35,8 +35,9 @@ DenseTensor Matmul(const Context& dev_ctx, const DenseTensor& y, bool transpose_x, bool transpose_y) { - auto out_meta = MatmulInferMeta(x.meta(), y.meta(), transpose_x, transpose_y); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + MatmulInferMeta(x, y, transpose_x, transpose_y, &meta_out); MatmulKernel(dev_ctx, x, y, transpose_x, transpose_y, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/reshape_kernel.cc b/paddle/pten/kernels/reshape_kernel.cc index a76dfb09a0..c52d251582 100644 --- a/paddle/pten/kernels/reshape_kernel.cc +++ b/paddle/pten/kernels/reshape_kernel.cc @@ -26,15 +26,18 @@ void ReshapeKernel(const Context& dev_ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* out) { - auto out_meta = InferMetaFromVecValue(x.meta(), shape.GetData()); + MetaTensor meta_out(out); + InferMetaFromVecValue(x, shape.GetData(), &meta_out); if (x.initialized() && x.Holder() == out->Holder()) { - out->ResizeAndAllocate(out_meta.dims); + dev_ctx.Alloc(out); return; } - out->set_meta(out_meta); dev_ctx.Alloc(out); + // TODO(chenweihang): the output dims are overwrite after copying, + // here we need to use copy method that only copy data + auto dims = out->dims(); pten::Copy(dev_ctx, x, false, out); - out->Resize(out_meta.dims); + out->Resize(dims); out->ResetLoD(x.lod()); } diff --git a/paddle/pten/kernels/reshape_kernel.h b/paddle/pten/kernels/reshape_kernel.h index 293f6cd2ba..a5672ad6e5 100644 --- a/paddle/pten/kernels/reshape_kernel.h +++ b/paddle/pten/kernels/reshape_kernel.h @@ -38,8 +38,9 @@ template DenseTensor Reshape(const Context& dev_ctx, const DenseTensor& x, const std::vector& shape) { - auto out_meta = InferMetaFromVecValue(x.meta(), shape); - auto dense_out = Empty(dev_ctx, std::move(out_meta)); + auto dense_out = Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + InferMetaFromVecValue(x, shape, &meta_out); ReshapeKernel(dev_ctx, x, ScalarArray(shape), &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/scale_kernel.h b/paddle/pten/kernels/scale_kernel.h index 7fe627b442..357bc70b40 100644 --- a/paddle/pten/kernels/scale_kernel.h +++ b/paddle/pten/kernels/scale_kernel.h @@ -43,8 +43,9 @@ DenseTensor Scale(const Context& dev_ctx, const Scalar& scale, float bias, bool bias_after_scale) { - auto out_meta = UnchangedInferMeta(x.meta()); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + UnchangedInferMeta(x, &meta_out); ScaleKernel( dev_ctx, x, scale, bias, bias_after_scale, &dense_out); return dense_out; diff --git a/paddle/pten/kernels/sign_kernel.h b/paddle/pten/kernels/sign_kernel.h index 304b640d2a..4161c76e47 100644 --- a/paddle/pten/kernels/sign_kernel.h +++ b/paddle/pten/kernels/sign_kernel.h @@ -25,8 +25,9 @@ void SignKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out); template DenseTensor Sign(const Context& dev_ctx, const DenseTensor& x) { - auto out_meta = UnchangedInferMeta(x.meta()); - auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + auto dense_out = pten::Empty(dev_ctx); + MetaTensor meta_out(&dense_out); + UnchangedInferMeta(x, &meta_out); SignKernel(dev_ctx, x, &dense_out); return dense_out; } diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index 4b5a0a7daf..b3b8b8f77c 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -23,6 +23,7 @@ #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/core/meta_tensor.h" #include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/scale_kernel.h" @@ -68,11 +69,12 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, kernel_context.EmplaceBackAttr(bias); kernel_context.EmplaceBackAttr(bias_after_scale); - auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); auto dense_out = std::make_shared( pten::make_intrusive( pten::TransToFluidPlace(kernel_backend)), - std::move(out_meta)); + pten::DenseTensorMeta()); + pten::MetaTensor meta_out(dense_out.get()); + pten::UnchangedInferMeta(*dense_x, &meta_out); kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; @@ -234,11 +236,12 @@ Tensor scale_switch_case(const Tensor& x, auto dense_x = std::dynamic_pointer_cast(x.impl()); - auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); auto dense_out = std::make_shared( pten::make_intrusive( pten::TransToFluidPlace(kernel_backend)), - std::move(out_meta)); + pten::DenseTensorMeta()); + pten::MetaTensor meta_out(dense_out.get()); + pten::UnchangedInferMeta(*dense_x, &meta_out); Tensor out; out.set_impl(dense_out); diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index e5ccd6b040..7768cb926e 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -23,7 +23,7 @@ output : Tensor infer_meta : func : ConcatInferMeta - param : [x, axis, true] + param : [x, axis] kernel : func : concat diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index a610bc9e80..cc7b31559f 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -65,13 +65,15 @@ PADDLE_API {self.return_type} {self.api}({self.args['args_declare']}); def gene_output(self, output_type_list): kernel_output = "" + output_names = [] output_create = "" if len(output_type_list) == 1: kernel_output = 'dense_out' + output_names.append('dense_out') output_create = f""" {self.return_type} out; - auto dense_out = SetKernelOutput(out_meta, kernel_backend, &out);""" + auto dense_out = SetKernelOutput(kernel_backend, &out);""" elif len(output_type_list) > 1: output_create = f""" @@ -79,8 +81,9 @@ PADDLE_API {self.return_type} {self.api}({self.args['args_declare']}); for i in range(len(output_type_list)): kernel_output = kernel_output + f'dense_out_{i}, ' + output_names.append(f'dense_out_{i}') output_create = output_create + f""" - auto dense_out_{i} = SetKernelOutput(std::get<{i}>(out_meta), kernel_backend, &std::get<{i}>(out));""" + auto dense_out_{i} = SetKernelOutput(kernel_backend, &std::get<{i}>(out));""" kernel_output = kernel_output[:-2] else: @@ -88,22 +91,23 @@ PADDLE_API {self.return_type} {self.api}({self.args['args_declare']}); "{} : Output error: the output should not be empty.".format( self.api)) - return kernel_output, output_create + return kernel_output, output_names, output_create def gene_api_code(self): if self.is_base_api: input_tensors, kernel_args, kernel_signature = gen_utils.get_kernel_args( self.args['inputs'], self.args['attrs'], self.out_type_list, self.kernel['param']) - outputs_args, output_create = self.gene_output(self.out_type_list) + outputs_args, output_names, output_create = self.gene_output( + self.out_type_list) return f""" PADDLE_API {self.return_type} {self.api}({self.args["args_define"]}) {{ {gen_utils.gene_kernel_select(self.api, self.args['inputs']['names'], self.args['attrs'], self.kernel)} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); {input_tensors} -{gen_utils.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} {output_create} +{gen_utils.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], output_names, self.infer_meta)} using kernel_signature = {kernel_signature}; auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)({kernel_args}, {outputs_args}); diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index e53886339c..9df25e2dea 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -105,13 +105,15 @@ class BackwardAPI: def gene_output(self, output_type_list): kernel_output = "" + output_names = [] output_create = "" if len(output_type_list) == 1: kernel_output = 'dense_out' + output_names.append('dense_out') output_create = f""" {self.return_type} out; - auto dense_out = SetKernelOutput(out_meta, kernel_backend, &out);""" + auto dense_out = SetKernelOutput(kernel_backend, &out);""" elif len(output_type_list) > 1: output_create = f""" @@ -119,6 +121,7 @@ class BackwardAPI: for i, out_type_item in enumerate(output_type_list): kernel_output = kernel_output + f'dense_out_{i}, ' + output_names.append(f'dense_out_{i}') if out_type_item == 'Tensor': get_out_code = f'&out[{i}][0]' output_create = output_create + f""" @@ -127,7 +130,7 @@ class BackwardAPI: else: get_out_code = f'&out[{i}]' output_create = output_create + f""" - auto dense_out_{i} = SetKernelOutput(std::get<{i}>(out_meta), kernel_backend, {get_out_code});""" + auto dense_out_{i} = SetKernelOutput(kernel_backend, {get_out_code});""" kernel_output = kernel_output[:-2] else: @@ -135,14 +138,14 @@ class BackwardAPI: "{} : Output error: the output should not be empty.".format( self.backward_api)) - return kernel_output, output_create + return kernel_output, output_names, output_create def gene_api_code(self): if self.is_base_api: input_tensors, kernel_args, kernel_signature = gen_utils.get_kernel_args( self.args['inputs'], self.args['attrs'], self.output_type_list, self.kernel['param']) - outputs_args, output_create = self.gene_output( + outputs_args, output_names, output_create = self.gene_output( self.output_type_list) return f""" // {self.return_comment} @@ -151,8 +154,8 @@ class BackwardAPI: auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); {input_tensors} -{gen_utils.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} {output_create} +{gen_utils.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], output_names, self.infer_meta)} using kernel_signature = {kernel_signature}; auto* kernel_fn = kernel.GetVariadicKernelFn(); diff --git a/python/paddle/utils/code_gen/gen_utils.py b/python/paddle/utils/code_gen/gen_utils.py index f1aa299fb1..56143a8f51 100644 --- a/python/paddle/utils/code_gen/gen_utils.py +++ b/python/paddle/utils/code_gen/gen_utils.py @@ -15,6 +15,7 @@ import re PREFIX_TENSOR_NAME = 'dense_' +PREFIX_META_TENSOR_NAME = 'meta_' def parse_args(api_name, args_str): @@ -265,13 +266,21 @@ def gene_kernel_select(api, input_names, attrs, kernel) -> str: return kernel_select_code -def gene_infer_meta(input_names, attr_names, infer_meta) -> str: - infer_meta_params = infer_meta['param'] if infer_meta[ - 'param'] is not None else input_names + attr_names +def gene_infer_meta(input_names, attr_names, output_names, infer_meta) -> str: + infer_meta_params = infer_meta['param'] + output_names if infer_meta[ + 'param'] is not None else input_names + attr_names + output_names + # generate meta tensors + meta_tensor_code = "" param_code = "" for param in infer_meta_params: if param in input_names: - param_code = param_code + "GetDenseTensorMeta(*" + PREFIX_TENSOR_NAME + param + "), " + param_code = param_code + "MakeMetaTensor(*" + PREFIX_TENSOR_NAME + param + "), " + elif param in output_names: + meta_tensor_code = meta_tensor_code + " pten::MetaTensor " + param.replace( + PREFIX_TENSOR_NAME, + PREFIX_META_TENSOR_NAME) + "(" + param + ");\n" + param_code = param_code + "&" + param.replace( + PREFIX_TENSOR_NAME, PREFIX_META_TENSOR_NAME) + ", " elif param in attr_names: param_code = param_code + param + ", " elif isinstance(param, str): @@ -282,8 +291,8 @@ def gene_infer_meta(input_names, attr_names, infer_meta) -> str: param_code = param_code + str(param) + ", " param_code = param_code[:-2] - return f""" - auto out_meta = pten::{infer_meta['func']}({param_code}); + return f"""{meta_tensor_code} + pten::{infer_meta['func']}({param_code}); """ -- GitLab