diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index c70bb72e673e0f1d5b5716cda8a190552edc5acb..572e36be9b77680b10ff7006f99ac40b2a1b96cd 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "Tensors.", vec_true_outs.size(), outs.size())); for (size_t j = 0; j < vec_true_outs.size(); ++j) { - experimental::MovesStorage( + experimental::MovesSharedStorage( std::dynamic_pointer_cast(outs.at(j).impl()) .get(), vec_true_outs.at(j)); } } else { auto* true_out = ctx.Output(out_name); - experimental::MovesStorage( + experimental::MovesSharedStorage( std::dynamic_pointer_cast(outs.at(i).impl()) .get(), true_out); diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index 683eb4e5b0c01df14e7777b260bf2f1b0f53188e..06d604cb83afe91ed2da547e33bd39e02f276771 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/include/core.h" #include "paddle/pten/include/infermeta.h" @@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { auto out_meta = UnchangedInferMeta(dense_x->meta()); // 5. Prepare outputs - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(backend)), + std::move(out_meta)); kernel_context.EmplaceBackOutput(dense_out); Tensor out; out.set_impl(dense_out); diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h index a4f3a0464b35cc12fe8938c7d2508298f78cf187..7341ea18917b8e9ab31e34ac64c014845ea9fee9 100644 --- a/paddle/pten/include/creation.h +++ b/paddle/pten/include/creation.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/creation.h" #include "paddle/pten/kernels/cuda/creation.h" @@ -32,10 +32,10 @@ DenseTensor FillAnyLike( Backend backend = Backend::UNDEFINED, // Is backend needed here? DataLayout layout = DataLayout::UNDEFINED) { auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); FillAnyLike(dev_ctx, val, &dense_out); return dense_out; } diff --git a/paddle/pten/include/linalg.h b/paddle/pten/include/linalg.h index b21645ef187a849f5086b614b26030c3199fb8d9..60ec451be2cc82d8f7ccdbf5d903d5e73c5b22c4 100644 --- a/paddle/pten/include/linalg.h +++ b/paddle/pten/include/linalg.h @@ -15,7 +15,7 @@ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/linalg.h" #include "paddle/pten/kernels/cuda/linalg.h" @@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx, const DenseTensor& x, const DenseTensor& y) { auto out_meta = DotInferMeta(x.meta(), y.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Dot(dev_ctx, x, y, &dense_out); return dense_out; } diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e694a89f700cf5c68ce631f5671b285f23d298e2..e138c51e307c4d2e167d984c1872634ed77a6723 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -15,7 +15,7 @@ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cuda/manipulation.h" @@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx, int start_axis, int stop_axis) { auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Flatten(dev_ctx, x, start_axis, stop_axis, &dense_out); return dense_out; } @@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx, DataType out_dtype, DataType in_dtype) { auto out_meta = CastInferMeta(x.meta(), out_dtype); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Cast(dev_ctx, x, out_dtype, in_dtype, &dense_out); return dense_out; } @@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx, const DenseTensor& x, const std::vector& shape) { auto out_meta = InferMetaFromVecValue(x.meta(), shape); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Reshape(dev_ctx, x, ScalarArray(shape), &dense_out); return dense_out; } diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index c2b9f75bda0449d00adf1962db425bf493886e1c..1afad968ef577d35d17e00e0f964af0b59902ea2 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once // See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cuda/math.h" @@ -25,10 +25,10 @@ namespace pten { template DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) { auto out_meta = UnchangedInferMeta(x.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Sign(dev_ctx, x, &dense_out); return dense_out; } @@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx, const std::vector& axis, bool keep_dim) { auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); bool reduce_all = false; DataType out_dtype = pten::DataType::UNDEFINED; Mean( @@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx, DataType dtype, bool keep_dim) { auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + out_meta); // The real value of reduce_all will be get in kernel // so use default value(false) is OK. @@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx, float bias, bool bias_after_scale) { auto out_meta = UnchangedInferMeta(x.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); Scale(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); return dense_out; } @@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseAdd(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseSub(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseDiv(dev_ctx, x, y, axis, &dense_out); return dense_out; } @@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx, const DenseTensor& y, int axis) { auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); ElementwiseMul(dev_ctx, x, y, axis, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index 16fc70b9ab7a1c8e421f0da55a65dc5b4406de2c..e7aecf3b27aaf1d61a1c4aeaedd2e82b416385a0 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, // temp_output should be stored temp_data in output_data space or stored in // y_data; pten::DDim tmp_ddim; - const auto alloc = - std::make_shared(y->place()); pten::DenseTensor tmp = pten::DenseTensor( - alloc, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); + pten::make_intrusive(y->place()), + pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); auto x_data = x.data(); auto y_data = y->mutable_data(); @@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, reducer.initial(), stream); // framework::Tensor tmp; - const auto alloc = - std::make_shared(x.place()); pten::DenseTensor tmp = pten::DenseTensor( - alloc, + pten::make_intrusive(x.place()), pten::DenseTensorMeta(pten::DataType::UINT8, paddle::framework::make_ddim( {static_cast(temp_storage_bytes)}), diff --git a/paddle/pten/kernels/hybird/eigen/reduce.h b/paddle/pten/kernels/hybird/eigen/reduce.h index e6ab872928c77dab3f22a9ce3af24f5ca29256ae..d60a416dfdb37fc342b4ed89b4cd06513718c274 100644 --- a/paddle/pten/kernels/hybird/eigen/reduce.h +++ b/paddle/pten/kernels/hybird/eigen/reduce.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/eigen/common.h" #include "paddle/pten/kernels/hybird/transpose.h" @@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx, const std::vector& dims, bool keep_dim) { // shuffle the reduced dim to the end - const auto alloc = - std::make_shared(input.place()); - pten::DenseTensor shuffled_input = pten::DenseTensor(alloc, input.meta()); + pten::DenseTensor shuffled_input = pten::DenseTensor( + pten::make_intrusive(input.place()), + input.meta()); GetShuffledInput(dev_ctx, input, &shuffled_input, dims); diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h index 50f40c5f2ca12bca53aa42030cfc8890a7588bfe..52bdf18ad5a31d87f53fb48e983f6b07539f7044 100644 --- a/paddle/pten/kernels/hybird/general/reduce_impl.h +++ b/paddle/pten/kernels/hybird/general/reduce_impl.h @@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx, dev_ctx, x, out, dims, keep_dim, reduce_all); })); } else { - const auto alloc = - std::make_shared(x.place()); pten::DenseTensor tmp_tensor = pten::DenseTensor( - alloc, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); + pten::make_intrusive(x.place()), + pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); // cast x tensor to out_dtype first PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] { diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index 565bb0f139d9d5a1bd7cca57a1d1e5d3f1467738..5668cbe29439c454ef249343a90669de7d210480 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, kernel_context.EmplaceBackAttr(bias_after_scale); auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); - - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); kernel_context.EmplaceBackOutput(dense_out); Tensor out; @@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x, auto dense_x = std::dynamic_pointer_cast(x.impl()); auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); Tensor out; out.set_impl(dense_out); diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index c7e04301ca592be449d17e563b1d185549c7ee3f..029985475011eaa4aac415680dc0b9307f79a73e 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{ auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); {input_tensors} {self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} - const auto allocator = - std::make_shared( - pten::TransToFluidPlace(kernel_backend)); - auto dense_out = std::make_shared(allocator, out_meta); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); Tensor out; out.set_impl(dense_out); @@ -345,7 +345,7 @@ def source_include(header_file_path): #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/kernel_declare.h" #include "paddle/pten/api/lib/kernel_dispatch.h" -#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/include/core.h" #include "paddle/pten/include/infermeta.h"