From e78eb3f4791c521f79ee17dd838036d923cb3a2b Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 14 Dec 2021 23:13:53 -0600 Subject: [PATCH] replace moves_storage and alloc_construct (#38134) --- paddle/fluid/framework/custom_operator.cc | 4 +- paddle/pten/api/lib/utils.cc | 10 +-- paddle/pten/include/creation.h | 10 +-- paddle/pten/include/linalg.h | 10 +-- paddle/pten/include/manipulation.h | 26 ++++---- paddle/pten/include/math.h | 66 +++++++++---------- .../hybird/cuda/reduce/reduce_cuda_impl.h | 9 +-- paddle/pten/kernels/hybird/eigen/reduce.h | 8 +-- .../pten/kernels/hybird/general/reduce_impl.h | 5 +- paddle/pten/tests/api/scale_api.h | 17 +++-- python/paddle/utils/code_gen/api_gen.py | 10 +-- 11 files changed, 85 insertions(+), 90 deletions(-) diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index c70bb72e673..572e36be9b7 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "Tensors.", vec_true_outs.size(), outs.size())); for (size_t j = 0; j < vec_true_outs.size(); ++j) { - experimental::MovesStorage( + experimental::MovesSharedStorage( std::dynamic_pointer_cast(outs.at(j).impl()) .get(), vec_true_outs.at(j)); } } else { auto* true_out = ctx.Output(out_name); - experimental::MovesStorage( + experimental::MovesSharedStorage( std::dynamic_pointer_cast(outs.at(i).impl()) .get(), true_out); diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index 683eb4e5b0c..06d604cb83a 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/include/core.h" #include "paddle/pten/include/infermeta.h" @@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { auto out_meta = UnchangedInferMeta(dense_x->meta()); // 5. Prepare outputs - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(backend)), + std::move(out_meta)); kernel_context.EmplaceBackOutput(dense_out); Tensor out; out.set_impl(dense_out); diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h index a4f3a0464b3..7341ea18917 100644 --- a/paddle/pten/include/creation.h +++ b/paddle/pten/include/creation.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/creation.h" #include "paddle/pten/kernels/cuda/creation.h" @@ -32,10 +32,10 @@ DenseTensor FillAnyLike( Backend backend = Backend::UNDEFINED, // Is backend needed here? DataLayout layout = DataLayout::UNDEFINED) { auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); FillAnyLike(dev_ctx, val, &dense_out); return dense_out; } diff --git a/paddle/pten/include/linalg.h b/paddle/pten/include/linalg.h index b21645ef187..60ec451be2c 100644 --- a/paddle/pten/include/linalg.h +++ b/paddle/pten/include/linalg.h @@ -15,7 +15,7 @@ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/linalg.h" #include "paddle/pten/kernels/cuda/linalg.h" @@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx, const DenseTensor& x, const DenseTensor& y) { auto out_meta = DotInferMeta(x.meta(), y.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Dot(dev_ctx, x, y, &dense_out); return dense_out; } diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e694a89f700..e138c51e307 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -15,7 +15,7 @@ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cuda/manipulation.h" @@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx, int start_axis, int stop_axis) { auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Flatten(dev_ctx, x, start_axis, stop_axis, &dense_out); return dense_out; } @@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx, DataType out_dtype, DataType in_dtype) { auto out_meta = CastInferMeta(x.meta(), out_dtype); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Cast(dev_ctx, x, out_dtype, in_dtype, &dense_out); return dense_out; } @@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx, const DenseTensor& x, const std::vector& shape) { auto out_meta = InferMetaFromVecValue(x.meta(), shape); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Reshape(dev_ctx, x, ScalarArray(shape), &dense_out); return dense_out; } diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index c2b9f75bda0..1afad968ef5 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cuda/math.h" @@ -25,10 +25,10 @@ namespace pten { template DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) { auto out_meta = UnchangedInferMeta(x.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Sign(dev_ctx, x, &dense_out); return dense_out; } @@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx, const std::vector& axis, bool keep_dim) { auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); bool reduce_all = false; DataType out_dtype = pten::DataType::UNDEFINED; Mean( @@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx, DataType dtype, bool keep_dim) { auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + out_meta); // The real value of reduce_all will be get in kernel // so use default value(false) is OK. @@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx, float bias, bool bias_after_scale) { auto out_meta = UnchangedInferMeta(x.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Scale(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); return dense_out; } @@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseAdd(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseSub(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseDiv(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseMul(dev_ctx, x, y, axis, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index 16fc70b9ab7..e7aecf3b27a 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, // temp_output should be stored temp_data in output_data space or stored in // y_data; pten::DDim tmp_ddim; - const auto alloc = - std::make_shared(y->place()); pten::DenseTensor tmp = pten::DenseTensor( - alloc, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); + pten::make_intrusive(y->place()), + pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); auto x_data = x.data(); auto y_data = y->mutable_data(); @@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, reducer.initial(), stream); // framework::Tensor tmp; - const auto alloc = - std::make_shared(x.place()); pten::DenseTensor tmp = pten::DenseTensor( - alloc, + pten::make_intrusive(x.place()), pten::DenseTensorMeta(pten::DataType::UINT8, paddle::framework::make_ddim( {static_cast(temp_storage_bytes)}), diff --git a/paddle/pten/kernels/hybird/eigen/reduce.h b/paddle/pten/kernels/hybird/eigen/reduce.h index e6ab872928c..d60a416dfdb 100644 --- a/paddle/pten/kernels/hybird/eigen/reduce.h +++ b/paddle/pten/kernels/hybird/eigen/reduce.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/eigen/common.h" #include "paddle/pten/kernels/hybird/transpose.h" @@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx, const std::vector& dims, bool keep_dim) { // shuffle the reduced dim to the end - const auto alloc = - std::make_shared(input.place()); - pten::DenseTensor shuffled_input = pten::DenseTensor(alloc, input.meta()); + pten::DenseTensor shuffled_input = pten::DenseTensor( + pten::make_intrusive(input.place()), + input.meta()); GetShuffledInput(dev_ctx, input, &shuffled_input, dims); diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h index 50f40c5f2ca..52bdf18ad5a 100644 --- a/paddle/pten/kernels/hybird/general/reduce_impl.h +++ b/paddle/pten/kernels/hybird/general/reduce_impl.h @@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx, dev_ctx, x, out, dims, keep_dim, reduce_all); })); } else { - const auto alloc = - std::make_shared(x.place()); pten::DenseTensor tmp_tensor = pten::DenseTensor( - alloc, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); + pten::make_intrusive(x.place()), + pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); // cast x tensor to out_dtype first PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] { diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index 565bb0f139d..5668cbe2943 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, kernel_context.EmplaceBackAttr(bias_after_scale); auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); - - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); kernel_context.EmplaceBackOutput(dense_out); Tensor out; @@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x, auto dense_x = std::dynamic_pointer_cast(x.impl()); auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); Tensor out; out.set_impl(dense_out); diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index c7e04301ca5..02998547501 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{ auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); {input_tensors} {self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); Tensor out; out.set_impl(dense_out); @@ -345,7 +345,7 @@ def source_include(header_file_path): #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/kernel_declare.h" #include "paddle/pten/api/lib/kernel_dispatch.h" -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/include/core.h" #include "paddle/pten/include/infermeta.h" -- GitLab