未验证 提交 e78eb3f4 编写于 作者: C Chen Weihang 提交者: GitHub

replace moves_storage and alloc_construct (#38134)

上级 49108efa
......@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"Tensors.",
vec_true_outs.size(), outs.size()));
for (size_t j = 0; j < vec_true_outs.size(); ++j) {
experimental::MovesStorage(
experimental::MovesSharedStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
.get(),
vec_true_outs.at(j));
}
} else {
auto* true_out = ctx.Output<Tensor>(out_name);
experimental::MovesStorage(
experimental::MovesSharedStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
.get(),
true_out);
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
......@@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
auto out_meta = UnchangedInferMeta(dense_x->meta());
// 5. Prepare outputs
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)),
std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out);
Tensor out;
out.set_impl(dense_out);
......
......@@ -14,7 +14,7 @@
#pragma once
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/creation.h"
#include "paddle/pten/kernels/cuda/creation.h"
......@@ -32,10 +32,10 @@ DenseTensor FillAnyLike(
Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
FillAnyLike<T>(dev_ctx, val, &dense_out);
return dense_out;
}
......
......@@ -15,7 +15,7 @@
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/cuda/linalg.h"
......@@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = DotInferMeta(x.meta(), y.meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Dot<T>(dev_ctx, x, y, &dense_out);
return dense_out;
}
......
......@@ -15,7 +15,7 @@
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/cuda/manipulation.h"
......@@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx,
int start_axis,
int stop_axis) {
auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Flatten<T>(dev_ctx, x, start_axis, stop_axis, &dense_out);
return dense_out;
}
......@@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx,
DataType out_dtype,
DataType in_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Cast<T>(dev_ctx, x, out_dtype, in_dtype, &dense_out);
return dense_out;
}
......@@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& shape) {
auto out_meta = InferMetaFromVecValue(x.meta(), shape);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Reshape(dev_ctx, x, ScalarArray(shape), &dense_out);
return dense_out;
}
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/cuda/math.h"
......@@ -25,10 +25,10 @@ namespace pten {
template <typename T, typename ContextT>
DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) {
auto out_meta = UnchangedInferMeta(x.meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Sign<T>(dev_ctx, x, &dense_out);
return dense_out;
}
......@@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx,
const std::vector<int64_t>& axis,
bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
bool reduce_all = false;
DataType out_dtype = pten::DataType::UNDEFINED;
Mean<T>(
......@@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx,
DataType dtype,
bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
out_meta);
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
......@@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
float bias,
bool bias_after_scale) {
auto out_meta = UnchangedInferMeta(x.meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Scale<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out);
return dense_out;
}
......@@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......
......@@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
// temp_output should be stored temp_data in output_data space or stored in
// y_data;
pten::DDim tmp_ddim;
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(y->place());
pten::DenseTensor tmp = pten::DenseTensor(
alloc, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
pten::make_intrusive<paddle::experimental::SharedStorage>(y->place()),
pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>();
......@@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
reducer.initial(),
stream);
// framework::Tensor tmp;
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
pten::DenseTensor tmp = pten::DenseTensor(
alloc,
pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
pten::DenseTensorMeta(pten::DataType::UINT8,
paddle::framework::make_ddim(
{static_cast<int64_t>(temp_storage_bytes)}),
......
......@@ -14,7 +14,7 @@
#pragma once
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
#include "paddle/pten/kernels/hybird/transpose.h"
......@@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims,
bool keep_dim) {
// shuffle the reduced dim to the end
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(input.place());
pten::DenseTensor shuffled_input = pten::DenseTensor(alloc, input.meta());
pten::DenseTensor shuffled_input = pten::DenseTensor(
pten::make_intrusive<paddle::experimental::SharedStorage>(input.place()),
input.meta());
GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims);
......
......@@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx,
dev_ctx, x, out, dims, keep_dim, reduce_all);
}));
} else {
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
pten::DenseTensor tmp_tensor = pten::DenseTensor(
alloc, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
// cast x tensor to out_dtype first
PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] {
......
......@@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
kernel_context.EmplaceBackAttr(bias_after_scale);
auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out);
Tensor out;
......@@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x,
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta));
Tensor out;
out.set_impl(dense_out);
......
......@@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{input_tensors}
{self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)}
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta));
Tensor out;
out.set_impl(dense_out);
......@@ -345,7 +345,7 @@ def source_include(header_file_path):
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_declare.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册