未验证 提交 e78eb3f4 编写于 作者: C Chen Weihang 提交者: GitHub

replace moves_storage and alloc_construct (#38134)

上级 49108efa
...@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"Tensors.", "Tensors.",
vec_true_outs.size(), outs.size())); vec_true_outs.size(), outs.size()));
for (size_t j = 0; j < vec_true_outs.size(); ++j) { for (size_t j = 0; j < vec_true_outs.size(); ++j) {
experimental::MovesStorage( experimental::MovesSharedStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl()) std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
.get(), .get(),
vec_true_outs.at(j)); vec_true_outs.at(j));
} }
} else { } else {
auto* true_out = ctx.Output<Tensor>(out_name); auto* true_out = ctx.Output<Tensor>(out_name);
experimental::MovesStorage( experimental::MovesSharedStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl()) std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
.get(), .get(),
true_out); true_out);
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
...@@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { ...@@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
auto out_meta = UnchangedInferMeta(dense_x->meta()); auto out_meta = UnchangedInferMeta(dense_x->meta());
// 5. Prepare outputs // 5. Prepare outputs
const auto allocator = auto dense_out = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)); pten::TransToFluidPlace(backend)),
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta); std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out); kernel_context.EmplaceBackOutput(dense_out);
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#pragma once #pragma once
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/creation.h" #include "paddle/pten/kernels/cpu/creation.h"
#include "paddle/pten/kernels/cuda/creation.h" #include "paddle/pten/kernels/cuda/creation.h"
...@@ -32,10 +32,10 @@ DenseTensor FillAnyLike( ...@@ -32,10 +32,10 @@ DenseTensor FillAnyLike(
Backend backend = Backend::UNDEFINED, // Is backend needed here? Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) { DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout); auto out_meta = FullLikeInferMeta(x.meta(), dtype, layout);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
FillAnyLike<T>(dev_ctx, val, &dense_out); FillAnyLike<T>(dev_ctx, val, &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
// See Note: [ How do we organize the kernel directory ] // See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h" #include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/cuda/linalg.h" #include "paddle/pten/kernels/cuda/linalg.h"
...@@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx, ...@@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto out_meta = DotInferMeta(x.meta(), y.meta()); auto out_meta = DotInferMeta(x.meta(), y.meta());
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Dot<T>(dev_ctx, x, y, &dense_out); Dot<T>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
// See Note: [ How do we organize the kernel directory ] // See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/cuda/manipulation.h" #include "paddle/pten/kernels/cuda/manipulation.h"
...@@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx, ...@@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx,
int start_axis, int start_axis,
int stop_axis) { int stop_axis) {
auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis); auto out_meta = FlattenInferMeta(x.meta(), start_axis, stop_axis);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Flatten<T>(dev_ctx, x, start_axis, stop_axis, &dense_out); Flatten<T>(dev_ctx, x, start_axis, stop_axis, &dense_out);
return dense_out; return dense_out;
} }
...@@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx, ...@@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx,
DataType out_dtype, DataType out_dtype,
DataType in_dtype) { DataType in_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype); auto out_meta = CastInferMeta(x.meta(), out_dtype);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Cast<T>(dev_ctx, x, out_dtype, in_dtype, &dense_out); Cast<T>(dev_ctx, x, out_dtype, in_dtype, &dense_out);
return dense_out; return dense_out;
} }
...@@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx, ...@@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& shape) { const std::vector<int64_t>& shape) {
auto out_meta = InferMetaFromVecValue(x.meta(), shape); auto out_meta = InferMetaFromVecValue(x.meta(), shape);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Reshape(dev_ctx, x, ScalarArray(shape), &dense_out); Reshape(dev_ctx, x, ScalarArray(shape), &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once #pragma once
// See Note: [ How do we organize the kernel directory ] // See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/cuda/math.h" #include "paddle/pten/kernels/cuda/math.h"
...@@ -25,10 +25,10 @@ namespace pten { ...@@ -25,10 +25,10 @@ namespace pten {
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) { DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) {
auto out_meta = UnchangedInferMeta(x.meta()); auto out_meta = UnchangedInferMeta(x.meta());
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Sign<T>(dev_ctx, x, &dense_out); Sign<T>(dev_ctx, x, &dense_out);
return dense_out; return dense_out;
} }
...@@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx, ...@@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
bool keep_dim) { bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
bool reduce_all = false; bool reduce_all = false;
DataType out_dtype = pten::DataType::UNDEFINED; DataType out_dtype = pten::DataType::UNDEFINED;
Mean<T>( Mean<T>(
...@@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx, ...@@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx,
DataType dtype, DataType dtype,
bool keep_dim) { bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim); auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); out_meta);
// The real value of reduce_all will be get in kernel // The real value of reduce_all will be get in kernel
// so use default value(false) is OK. // so use default value(false) is OK.
...@@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx, ...@@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
float bias, float bias,
bool bias_after_scale) { bool bias_after_scale) {
auto out_meta = UnchangedInferMeta(x.meta()); auto out_meta = UnchangedInferMeta(x.meta());
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
Scale<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); Scale<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out);
return dense_out; return dense_out;
} }
...@@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx, ...@@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
int axis) { int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out); ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out; return dense_out;
} }
...@@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx, ...@@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
int axis) { int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out); ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out; return dense_out;
} }
...@@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx, ...@@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
int axis) { int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out); ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out; return dense_out;
} }
...@@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx, ...@@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
int axis) { int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
const auto allocator = pten::DenseTensor dense_out(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()); dev_ctx.GetPlace()),
pten::DenseTensor dense_out(allocator, out_meta); std::move(out_meta));
ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out); ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
// temp_output should be stored temp_data in output_data space or stored in // temp_output should be stored temp_data in output_data space or stored in
// y_data; // y_data;
pten::DDim tmp_ddim; pten::DDim tmp_ddim;
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(y->place());
pten::DenseTensor tmp = pten::DenseTensor( pten::DenseTensor tmp = pten::DenseTensor(
alloc, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); pten::make_intrusive<paddle::experimental::SharedStorage>(y->place()),
pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
auto x_data = x.data<Tx>(); auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>(); auto y_data = y->mutable_data<Ty>();
...@@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
reducer.initial(), reducer.initial(),
stream); stream);
// framework::Tensor tmp; // framework::Tensor tmp;
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
pten::DenseTensor tmp = pten::DenseTensor( pten::DenseTensor tmp = pten::DenseTensor(
alloc, pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
pten::DenseTensorMeta(pten::DataType::UINT8, pten::DenseTensorMeta(pten::DataType::UINT8,
paddle::framework::make_ddim( paddle::framework::make_ddim(
{static_cast<int64_t>(temp_storage_bytes)}), {static_cast<int64_t>(temp_storage_bytes)}),
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#pragma once #pragma once
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h" #include "paddle/pten/kernels/hybird/eigen/common.h"
#include "paddle/pten/kernels/hybird/transpose.h" #include "paddle/pten/kernels/hybird/transpose.h"
...@@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx, ...@@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim) { bool keep_dim) {
// shuffle the reduced dim to the end // shuffle the reduced dim to the end
const auto alloc = pten::DenseTensor shuffled_input = pten::DenseTensor(
std::make_shared<paddle::experimental::DefaultAllocator>(input.place()); pten::make_intrusive<paddle::experimental::SharedStorage>(input.place()),
pten::DenseTensor shuffled_input = pten::DenseTensor(alloc, input.meta()); input.meta());
GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims); GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims);
......
...@@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx, ...@@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx,
dev_ctx, x, out, dims, keep_dim, reduce_all); dev_ctx, x, out, dims, keep_dim, reduce_all);
})); }));
} else { } else {
const auto alloc =
std::make_shared<paddle::experimental::DefaultAllocator>(x.place());
pten::DenseTensor tmp_tensor = pten::DenseTensor( pten::DenseTensor tmp_tensor = pten::DenseTensor(
alloc, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); pten::make_intrusive<paddle::experimental::SharedStorage>(x.place()),
pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
// cast x tensor to out_dtype first // cast x tensor to out_dtype first
PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] { PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] {
......
...@@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, ...@@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
kernel_context.EmplaceBackAttr(bias_after_scale); kernel_context.EmplaceBackAttr(bias_after_scale);
auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
auto dense_out = std::make_shared<pten::DenseTensor>(
const auto allocator = pten::make_intrusive<paddle::experimental::SharedStorage>(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::TransToFluidPlace(kernel_backend)),
pten::TransToFluidPlace(kernel_backend)); std::move(out_meta));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out); kernel_context.EmplaceBackOutput(dense_out);
Tensor out; Tensor out;
...@@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x, ...@@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x,
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl()); auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); auto out_meta = pten::UnchangedInferMeta(dense_x->meta());
const auto allocator = auto dense_out = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)); pten::TransToFluidPlace(kernel_backend)),
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta); std::move(out_meta));
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
......
...@@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{ ...@@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{input_tensors} {input_tensors}
{self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} {self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)}
const auto allocator = auto dense_out = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)); pten::TransToFluidPlace(kernel_backend)),
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta); std::move(out_meta));
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
...@@ -345,7 +345,7 @@ def source_include(header_file_path): ...@@ -345,7 +345,7 @@ def source_include(header_file_path):
#include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_declare.h" #include "paddle/pten/api/lib/kernel_declare.h"
#include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册