未验证 提交 b46e49de 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Remove storage deps of empty (#40136)

* remove storage deps of empty

* remove invalid empty method

* remove error empty using

* fix test_sparse_utils_dev_api

* revert some sparse change

* add memset for conv grad

* resolve conflict

* resolve conflict

* resolve conflict
上级 6a0d60d2
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/transfer_layout_kernel.h" #include "paddle/phi/kernels/transfer_layout_kernel.h"
......
...@@ -9,13 +9,22 @@ add_subdirectory(funcs) ...@@ -9,13 +9,22 @@ add_subdirectory(funcs)
# phi depends all phi kernel targets # phi depends all phi kernel targets
set_property(GLOBAL PROPERTY PHI_KERNELS "") set_property(GLOBAL PROPERTY PHI_KERNELS "")
# [ 1. Common kernel compilation dependencies ]
set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils) set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor softmax) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor softmax)
# remove this dep after removing fluid deps on tensor creation # remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
# NOTE: Some kernels depend on some targets that are not commonly used. # [ 2. Kernels that most kernels depend on ]
# There are a few kernels that are very basic operations, and most of the
# kernels depend on these kernels.
set(COMMON_BAISC_KERNELS empty_kernel full_kernel)
kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS})
kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)
# [ 3. Kernels with special dependencies ]
# Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies. # These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here. # In this case, you need to manually generate them here.
set(MANUAL_BUILD_KERNELS math_kernel softmax_kernel softmax_grad_kernel triangular_solve_grad_kernel) set(MANUAL_BUILD_KERNELS math_kernel softmax_kernel softmax_grad_kernel triangular_solve_grad_kernel)
...@@ -24,8 +33,8 @@ kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) ...@@ -24,8 +33,8 @@ kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce) kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce)
# auto parse and build kernel targets by cmake # 4. auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS}) register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS} )
# phi sparse kernels # phi sparse kernels
add_subdirectory(sparse) add_subdirectory(sparse)
......
...@@ -29,7 +29,7 @@ template <typename T, typename Context> ...@@ -29,7 +29,7 @@ template <typename T, typename Context>
DenseTensor Cast(const Context& dev_ctx, DenseTensor Cast(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype) { DataType out_dtype) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
CastInferMeta(x, out_dtype, &meta_out); CastInferMeta(x, out_dtype, &meta_out);
CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out); CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out);
......
...@@ -38,7 +38,7 @@ template < ...@@ -38,7 +38,7 @@ template <
std::is_same<T, phi::dtype::complex<double>>::value, std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true> bool> = true>
DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) { DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
UnchangedInferMeta(x, &meta_out); UnchangedInferMeta(x, &meta_out);
ConjKernel<T>(dev_ctx, x, &dense_out); ConjKernel<T>(dev_ctx, x, &dense_out);
...@@ -64,7 +64,7 @@ template < ...@@ -64,7 +64,7 @@ template <
std::is_same<T, phi::dtype::complex<double>>::value, std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true> bool> = true>
DenseTensor Real(const Context& dev_ctx, const DenseTensor& x) { DenseTensor Real(const Context& dev_ctx, const DenseTensor& x) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
RealAndImagInferMeta(x, &meta_out); RealAndImagInferMeta(x, &meta_out);
RealKernel<T>(dev_ctx, x, &dense_out); RealKernel<T>(dev_ctx, x, &dense_out);
...@@ -90,7 +90,7 @@ template < ...@@ -90,7 +90,7 @@ template <
std::is_same<T, phi::dtype::complex<double>>::value, std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true> bool> = true>
DenseTensor Imag(const Context& dev_ctx, const DenseTensor& x) { DenseTensor Imag(const Context& dev_ctx, const DenseTensor& x) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
RealAndImagInferMeta(x, &meta_out); RealAndImagInferMeta(x, &meta_out);
ImagKernel<T>(dev_ctx, x, &dense_out); ImagKernel<T>(dev_ctx, x, &dense_out);
......
...@@ -38,7 +38,7 @@ DenseTensor Concat(const Context& dev_ctx, ...@@ -38,7 +38,7 @@ DenseTensor Concat(const Context& dev_ctx,
meta_x_ptr.push_back(&meta_x.back()); meta_x_ptr.push_back(&meta_x.back());
} }
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ConcatInferMeta(meta_x_ptr, axis.to<int>(), &meta_out, /*is_runtime=*/true); ConcatInferMeta(meta_x_ptr, axis.to<int>(), &meta_out, /*is_runtime=*/true);
ConcatKernel<T, Context>(dev_ctx, x, axis, &dense_out); ConcatKernel<T, Context>(dev_ctx, x, axis, &dense_out);
......
...@@ -29,7 +29,7 @@ template <typename T, typename Context> ...@@ -29,7 +29,7 @@ template <typename T, typename Context>
DenseTensor Dot(const Context& dev_ctx, DenseTensor Dot(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
DotInferMeta(x, y, &meta_out); DotInferMeta(x, y, &meta_out);
DotKernel<T, Context>(dev_ctx, x, y, &dense_out); DotKernel<T, Context>(dev_ctx, x, y, &dense_out);
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#pragma once #pragma once
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/common/scalar_array.h" #include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/infermeta/nullary.h" #include "paddle/phi/infermeta/nullary.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
...@@ -34,28 +34,17 @@ void EmptyLikeKernel(const Context& dev_ctx, ...@@ -34,28 +34,17 @@ void EmptyLikeKernel(const Context& dev_ctx,
DataType dtype, DataType dtype,
DenseTensor* out); DenseTensor* out);
// TODO(chenweihang): the tensor creation method need to be replaced later,
// all kernel api call Empty here instead of making tensor self
template <typename Context> template <typename Context>
DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) { DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) {
phi::DenseTensor dense_out( phi::DenseTensor dense_out;
phi::make_intrusive<paddle::experimental::SharedStorage>( dense_out.set_meta(meta);
dev_ctx.GetPlace()), dev_ctx.Alloc(&dense_out, dense_out.dtype());
std::move(meta));
return dense_out; return dense_out;
} }
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx) {
return Empty(dev_ctx,
{paddle::experimental::CppTypeToDataType<T>::Type(),
{-1},
DataLayout::NCHW});
}
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) { DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type(); DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
CreateInferMeta(shape, dtype, &meta_out); CreateInferMeta(shape, dtype, &meta_out);
...@@ -65,7 +54,7 @@ DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) { ...@@ -65,7 +54,7 @@ DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) {
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor EmptyLike(const Context& dev_ctx, const DenseTensor& x) { DenseTensor EmptyLike(const Context& dev_ctx, const DenseTensor& x) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type(); DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
CreateLikeInferMeta(x, dtype, &meta_out); CreateLikeInferMeta(x, dtype, &meta_out);
......
...@@ -40,7 +40,7 @@ DenseTensor Flatten(const Context& dev_ctx, ...@@ -40,7 +40,7 @@ DenseTensor Flatten(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
int start_axis, int start_axis,
int stop_axis) { int stop_axis) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
FlattenInferMeta(x, start_axis, stop_axis, &meta_out); FlattenInferMeta(x, start_axis, stop_axis, &meta_out);
FlattenKernel<T, Context>(dev_ctx, x, start_axis, stop_axis, &dense_out); FlattenKernel<T, Context>(dev_ctx, x, start_axis, stop_axis, &dense_out);
......
...@@ -41,7 +41,7 @@ template <typename T, typename Context> ...@@ -41,7 +41,7 @@ template <typename T, typename Context>
DenseTensor Full(const Context& dev_ctx, DenseTensor Full(const Context& dev_ctx,
const ScalarArray& shape, const ScalarArray& shape,
const Scalar& val) { const Scalar& val) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type(); DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
CreateInferMeta(shape, dtype, &meta_out); CreateInferMeta(shape, dtype, &meta_out);
...@@ -53,7 +53,7 @@ template <typename T, typename Context> ...@@ -53,7 +53,7 @@ template <typename T, typename Context>
DenseTensor FullLike(const Context& dev_ctx, DenseTensor FullLike(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const Scalar& val) { const Scalar& val) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type(); DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
CreateLikeInferMeta(x, dtype, &meta_out); CreateLikeInferMeta(x, dtype, &meta_out);
......
...@@ -344,9 +344,8 @@ struct ReduceConfig { ...@@ -344,9 +344,8 @@ struct ReduceConfig {
const phi::GPUContext& dev_ctx, const phi::GPUContext& dev_ctx,
phi::DenseTensor* tmp) { phi::DenseTensor* tmp) {
if (should_reduce_again) { if (should_reduce_again) {
tmp->ResizeAndAllocate(phi::make_ddim( tmp->Resize(phi::make_ddim(
{static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))})); {static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))}));
output_data = dev_ctx.Alloc<Ty>(tmp); output_data = dev_ctx.Alloc<Ty>(tmp);
} else { } else {
output_data = y_data; output_data = y_data;
...@@ -1053,8 +1052,8 @@ CubTensorReduceImpl(const Tx* x_data, ...@@ -1053,8 +1052,8 @@ CubTensorReduceImpl(const Tx* x_data,
reducer, reducer,
reducer.initial(), reducer.initial(),
stream); stream);
phi::DenseTensor tmp = phi::DenseTensor tmp = phi::Empty<uint8_t, phi::GPUContext>(
phi::Empty<uint8_t>(dev_ctx, {static_cast<int64_t>(temp_storage_bytes)}); dev_ctx, {static_cast<int64_t>(temp_storage_bytes)});
auto* temp_storage = dev_ctx.Alloc<uint8_t>(&tmp); auto* temp_storage = dev_ctx.Alloc<uint8_t>(&tmp);
...@@ -1106,7 +1105,7 @@ void TensorReduceImpl(const phi::GPUContext& dev_ctx, ...@@ -1106,7 +1105,7 @@ void TensorReduceImpl(const phi::GPUContext& dev_ctx,
// y_data; // y_data;
phi::DDim tmp_ddim; phi::DDim tmp_ddim;
phi::DenseTensor tmp = phi::Empty<Ty>(dev_ctx); phi::DenseTensor tmp;
auto x_data = x.data<Tx>(); auto x_data = x.data<Tx>();
auto y_data = y->data<Ty>(); auto y_data = y->data<Ty>();
......
...@@ -329,8 +329,8 @@ void MatmulGradKernel(const Context& dev_ctx, ...@@ -329,8 +329,8 @@ void MatmulGradKernel(const Context& dev_ctx,
x_conj = Conj<T>(dev_ctx, x); x_conj = Conj<T>(dev_ctx, x);
y_conj = Conj<T>(dev_ctx, y); y_conj = Conj<T>(dev_ctx, y);
DenseTensor dx_help = Empty<T, Context>(dev_ctx); DenseTensor dx_help;
DenseTensor dy_help = Empty<T, Context>(dev_ctx); DenseTensor dy_help;
if (transpose_x) { if (transpose_x) {
if (transpose_y) { if (transpose_y) {
...@@ -686,8 +686,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -686,8 +686,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
y_conj = Conj<T>(dev_ctx, y); y_conj = Conj<T>(dev_ctx, y);
} }
DenseTensor dx_help = Empty<T>(dev_ctx); DenseTensor dx_help;
DenseTensor dy_help = Empty<T>(dev_ctx); DenseTensor dy_help;
if (transpose_x) { if (transpose_x) {
if (transpose_y) { if (transpose_y) {
...@@ -1373,10 +1373,10 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1373,10 +1373,10 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
VLOG(3) << "It need cost much time to reduce sum for the broadcast and " VLOG(3) << "It need cost much time to reduce sum for the broadcast and "
"wastes the memory. So we should avoid the case in reality"; "wastes the memory. So we should avoid the case in reality";
DenseTensor out_dx_help = Empty<T>(dev_ctx); DenseTensor out_dx_help;
DenseTensor out_dy_help = Empty<T>(dev_ctx); DenseTensor out_dy_help;
DenseTensor out_d_ddx_help = Empty<T>(dev_ctx); DenseTensor out_d_ddx_help;
DenseTensor out_d_ddy_help = Empty<T>(dev_ctx); DenseTensor out_d_ddy_help;
if (out_d_dout) { if (out_d_dout) {
ddx_conj = Conj<T>(dev_ctx, ddx); ddx_conj = Conj<T>(dev_ctx, ddx);
......
...@@ -49,7 +49,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx, ...@@ -49,7 +49,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx,
DenseTensor dy_bst = phi::Empty<T, Context>(dev_ctx, y_bst_dims_array); DenseTensor dy_bst = phi::Empty<T, Context>(dev_ctx, y_bst_dims_array);
if (dy) { if (dy) {
// calculate x's conjugate for complex // calculate x's conjugate for complex
DenseTensor x_conj = phi::Empty<T, Context>(dev_ctx); DenseTensor x_conj;
x_conj.Resize(x.dims()); x_conj.Resize(x.dims());
phi::funcs::ForRange<Context> x_for_range(dev_ctx, x.numel()); phi::funcs::ForRange<Context> x_for_range(dev_ctx, x.numel());
...@@ -76,7 +76,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx, ...@@ -76,7 +76,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx,
DenseTensor dx_bst = phi::Empty<T, Context>(dev_ctx, x_bst_dims_array); DenseTensor dx_bst = phi::Empty<T, Context>(dev_ctx, x_bst_dims_array);
if (dx) { if (dx) {
// calculate x's conjugate for complex // calculate x's conjugate for complex
DenseTensor out_conj = phi::Empty<T, Context>(dev_ctx); DenseTensor out_conj;
out_conj.Resize(out.dims()); out_conj.Resize(out.dims());
phi::funcs::ForRange<Context> out_for_range(dev_ctx, out.numel()); phi::funcs::ForRange<Context> out_for_range(dev_ctx, out.numel());
......
...@@ -109,7 +109,7 @@ template <typename T, typename Context> ...@@ -109,7 +109,7 @@ template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx, DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out); ElementwiseInferMeta(x, y, &meta_out);
AddKernel<T, Context>(dev_ctx, x, y, &dense_out); AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
...@@ -120,7 +120,7 @@ template <typename T, typename Context> ...@@ -120,7 +120,7 @@ template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx, DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out); ElementwiseInferMeta(x, y, &meta_out);
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out); SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
...@@ -131,7 +131,7 @@ template <typename T, typename Context> ...@@ -131,7 +131,7 @@ template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx, DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out); ElementwiseInferMeta(x, y, &meta_out);
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out); DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
...@@ -142,7 +142,7 @@ template <typename T, typename Context> ...@@ -142,7 +142,7 @@ template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx, DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out); ElementwiseInferMeta(x, y, &meta_out);
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out); MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
...@@ -154,7 +154,7 @@ DenseTensor Mean(const Context& dev_ctx, ...@@ -154,7 +154,7 @@ DenseTensor Mean(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
bool keep_dim) { bool keep_dim) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
ReduceInferMetaBase(x, axis, keep_dim, false, x.dtype(), &meta_out); ReduceInferMetaBase(x, axis, keep_dim, false, x.dtype(), &meta_out);
MeanKernel<T, Context>(dev_ctx, x, axis, keep_dim, &dense_out); MeanKernel<T, Context>(dev_ctx, x, axis, keep_dim, &dense_out);
...@@ -167,7 +167,7 @@ DenseTensor Sum(const Context& dev_ctx, ...@@ -167,7 +167,7 @@ DenseTensor Sum(const Context& dev_ctx,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
DataType dtype, DataType dtype,
bool keep_dim) { bool keep_dim) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
SumInferMeta(x, axis, dtype, keep_dim, &meta_out); SumInferMeta(x, axis, dtype, keep_dim, &meta_out);
SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out); SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
......
...@@ -35,7 +35,7 @@ DenseTensor Matmul(const Context& dev_ctx, ...@@ -35,7 +35,7 @@ DenseTensor Matmul(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
bool transpose_x = false, bool transpose_x = false,
bool transpose_y = false) { bool transpose_y = false) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
MatmulInferMeta(x, y, transpose_x, transpose_y, &meta_out); MatmulInferMeta(x, y, transpose_x, transpose_y, &meta_out);
MatmulKernel<T, Context>(dev_ctx, x, y, transpose_x, transpose_y, &dense_out); MatmulKernel<T, Context>(dev_ctx, x, y, transpose_x, transpose_y, &dense_out);
......
...@@ -38,7 +38,7 @@ template <typename T, typename Context> ...@@ -38,7 +38,7 @@ template <typename T, typename Context>
DenseTensor Reshape(const Context& dev_ctx, DenseTensor Reshape(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& shape) { const std::vector<int64_t>& shape) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
InferMetaFromVecValue(x, shape, &meta_out); InferMetaFromVecValue(x, shape, &meta_out);
ReshapeKernel<Context>(dev_ctx, x, ScalarArray(shape), &dense_out); ReshapeKernel<Context>(dev_ctx, x, ScalarArray(shape), &dense_out);
......
...@@ -34,7 +34,7 @@ DenseTensor Scale(const Context& dev_ctx, ...@@ -34,7 +34,7 @@ DenseTensor Scale(const Context& dev_ctx,
const Scalar& scale, const Scalar& scale,
float bias, float bias,
bool bias_after_scale) { bool bias_after_scale) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
UnchangedInferMeta(x, &meta_out); UnchangedInferMeta(x, &meta_out);
ScaleKernel<T, Context>( ScaleKernel<T, Context>(
......
...@@ -25,7 +25,7 @@ void SignKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out); ...@@ -25,7 +25,7 @@ void SignKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor Sign(const Context& dev_ctx, const DenseTensor& x) { DenseTensor Sign(const Context& dev_ctx, const DenseTensor& x) {
auto dense_out = phi::Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
UnchangedInferMeta(x, &meta_out); UnchangedInferMeta(x, &meta_out);
SignKernel<T, Context>(dev_ctx, x, &dense_out); SignKernel<T, Context>(dev_ctx, x, &dense_out);
......
...@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/sparse/convolution_kernel.h"
namespace phi { namespace phi {
namespace sparse { namespace sparse {
...@@ -45,6 +47,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx, ...@@ -45,6 +47,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx,
const int groups) { const int groups) {
DenseTensor x_grad = phi::Empty<T, Context>(dev_ctx); DenseTensor x_grad = phi::Empty<T, Context>(dev_ctx);
DenseTensor kernel_grad = phi::Empty<T, Context>(dev_ctx); DenseTensor kernel_grad = phi::Empty<T, Context>(dev_ctx);
// TODO(zhangkaihuo): call InferMeta func here
Conv3dGradKernel<T, Context>(dev_ctx, Conv3dGradKernel<T, Context>(dev_ctx,
x, x,
rulebook, rulebook,
......
...@@ -14,11 +14,24 @@ limitations under the License. */ ...@@ -14,11 +14,24 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
namespace phi { namespace phi {
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx) {
phi::DenseTensor dense_out(
phi::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
{paddle::experimental::CppTypeToDataType<T>::Type(),
{-1},
DataLayout::NCHW});
return dense_out;
}
namespace sparse { namespace sparse {
struct Dims4D { struct Dims4D {
......
...@@ -74,6 +74,7 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -74,6 +74,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
dev_ctx.Alloc( dev_ctx.Alloc(
kernel_grad, kernel_grad->dtype(), kernel_grad->numel() * sizeof(T)); kernel_grad, kernel_grad->dtype(), kernel_grad->numel() * sizeof(T));
T* d_kernel_ptr = kernel_grad->data<T>(); T* d_kernel_ptr = kernel_grad->data<T>();
memset(d_kernel_ptr, 0, sizeof(T) * kernel_grad->numel());
Gather<T>(x.non_zero_elements().data<T>(), Gather<T>(x.non_zero_elements().data<T>(),
rulebook_ptr + rulebook_len, rulebook_ptr + rulebook_len,
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h"
...@@ -63,8 +64,8 @@ template <typename T, typename Context> ...@@ -63,8 +64,8 @@ template <typename T, typename Context>
SparseCooTensor DenseToSparseCoo(const Context& dev_ctx, SparseCooTensor DenseToSparseCoo(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const int64_t sparse_dim) { const int64_t sparse_dim) {
DenseTensor indices = phi::Empty<T, Context>(dev_ctx); DenseTensor indices;
DenseTensor values = phi::Empty<T, Context>(dev_ctx); DenseTensor values;
SparseCooTensor coo(indices, values, x.dims()); SparseCooTensor coo(indices, values, x.dims());
DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo); DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo);
return coo; return coo;
...@@ -78,8 +79,8 @@ void SparseCsrToCooKernel(const Context& dev_ctx, ...@@ -78,8 +79,8 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
SparseCooTensor SparseCsrToCoo(const Context& dev_ctx, SparseCooTensor SparseCsrToCoo(const Context& dev_ctx,
const SparseCsrTensor& x) { const SparseCsrTensor& x) {
DenseTensor indices = phi::Empty<T, Context>(dev_ctx); DenseTensor indices;
DenseTensor values = phi::Empty<T, Context>(dev_ctx); DenseTensor values;
SparseCooTensor coo(indices, values, x.dims()); SparseCooTensor coo(indices, values, x.dims());
SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo); SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo);
return coo; return coo;
...@@ -93,9 +94,9 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -93,9 +94,9 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
SparseCsrTensor SparseCooToCsr(const Context& dev_ctx, SparseCsrTensor SparseCooToCsr(const Context& dev_ctx,
const SparseCooTensor& x) { const SparseCooTensor& x) {
DenseTensor non_zero_crows = phi::Empty<int64_t, Context>(dev_ctx); DenseTensor non_zero_crows;
DenseTensor non_zero_cols = phi::Empty<int64_t, Context>(dev_ctx); DenseTensor non_zero_cols;
DenseTensor non_zero_elements = phi::Empty<T, Context>(dev_ctx); DenseTensor non_zero_elements;
SparseCsrTensor csr( SparseCsrTensor csr(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr); SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr);
...@@ -113,8 +114,8 @@ void DenseToSparseCsrKernel(const Context& dev_ctx, ...@@ -113,8 +114,8 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D Tensor.")); "SparseCsrTensor only support 2-D or 3-D Tensor."));
const int64_t sparse_dim = x_dims.size() == 2 ? 2 : 3; const int64_t sparse_dim = x_dims.size() == 2 ? 2 : 3;
DenseTensor indices = phi::Empty<T, Context>(dev_ctx); DenseTensor indices;
DenseTensor values = phi::Empty<T, Context>(dev_ctx); DenseTensor values;
SparseCooTensor coo(indices, values, x.dims()); SparseCooTensor coo(indices, values, x.dims());
DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo); DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo);
SparseCooToCsrKernel<T, Context>(dev_ctx, coo, out); SparseCooToCsrKernel<T, Context>(dev_ctx, coo, out);
...@@ -122,9 +123,9 @@ void DenseToSparseCsrKernel(const Context& dev_ctx, ...@@ -122,9 +123,9 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) { SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) {
DenseTensor non_zero_crows = phi::Empty<int64_t, Context>(dev_ctx); DenseTensor non_zero_crows;
DenseTensor non_zero_cols = phi::Empty<int64_t, Context>(dev_ctx); DenseTensor non_zero_cols;
DenseTensor non_zero_elements = phi::Empty<T, Context>(dev_ctx); DenseTensor non_zero_elements;
SparseCsrTensor csr( SparseCsrTensor csr(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr); DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr);
...@@ -148,8 +149,8 @@ template <typename T, typename Context> ...@@ -148,8 +149,8 @@ template <typename T, typename Context>
void SparseCsrToDenseKernel(const Context& dev_ctx, void SparseCsrToDenseKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
DenseTensor* out) { DenseTensor* out) {
DenseTensor indices = phi::Empty<T, Context>(dev_ctx); DenseTensor indices;
DenseTensor values = phi::Empty<T, Context>(dev_ctx); DenseTensor values;
SparseCooTensor coo(indices, values, x.dims()); SparseCooTensor coo(indices, values, x.dims());
SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo); SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo);
SparseCooToDenseKernel<T, Context>(dev_ctx, coo, out); SparseCooToDenseKernel<T, Context>(dev_ctx, coo, out);
......
...@@ -50,7 +50,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx, ...@@ -50,7 +50,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx,
result.reserve(out_number); result.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) { for (size_t i = 0; i < out_number; ++i) {
result.emplace_back(phi::Empty<T, Context>(dev_ctx)); result.emplace_back(DenseTensor());
out_meta.emplace_back(&result.back()); out_meta.emplace_back(&result.back());
out_meta_ptr.push_back(&out_meta.back()); out_meta_ptr.push_back(&out_meta.back());
} }
......
...@@ -32,7 +32,7 @@ template <typename T, typename Context> ...@@ -32,7 +32,7 @@ template <typename T, typename Context>
DenseTensor Transpose(const Context& dev_ctx, DenseTensor Transpose(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int>& axis) { const std::vector<int>& axis) {
auto dense_out = Empty<T, Context>(dev_ctx); DenseTensor dense_out;
MetaTensor meta_out(&dense_out); MetaTensor meta_out(&dense_out);
TransposeInferMeta(x, axis, &meta_out); TransposeInferMeta(x, axis, &meta_out);
TransposeKernel<T, Context>(dev_ctx, x, axis, &dense_out); TransposeKernel<T, Context>(dev_ctx, x, axis, &dense_out);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/phi/api/lib/api_registry.h" #include "paddle/phi/api/lib/api_registry.h"
#include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/phi/common/scalar_array.h" #include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
......
...@@ -90,6 +90,10 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x, ...@@ -90,6 +90,10 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x,
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init(); dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
// 1. test cpu // 1. test cpu
auto cpu_sparse_out = auto cpu_sparse_out =
...@@ -300,6 +304,11 @@ void TestSparseCsrToCoo(const DDim& dense_dims, ...@@ -300,6 +304,11 @@ void TestSparseCsrToCoo(const DDim& dense_dims,
// 1. test cpu // 1. test cpu
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
auto cpu_sparse_out = sparse::SparseCsrToCoo<T>(dev_ctx_cpu, csr); auto cpu_sparse_out = sparse::SparseCsrToCoo<T>(dev_ctx_cpu, csr);
CheckResult<T, int64_t>(&dev_ctx_cpu, CheckResult<T, int64_t>(&dev_ctx_cpu,
cpu_sparse_out, cpu_sparse_out,
...@@ -473,6 +482,11 @@ void TestCooToCsr(const DDim& dense_dims, ...@@ -473,6 +482,11 @@ void TestCooToCsr(const DDim& dense_dims,
// 1. test cpu // 1. test cpu
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
auto cpu_sparse_out = sparse::SparseCooToCsr<T>(dev_ctx_cpu, coo); auto cpu_sparse_out = sparse::SparseCooToCsr<T>(dev_ctx_cpu, coo);
CheckCsrResult<T, int64_t>(&dev_ctx_cpu, CheckCsrResult<T, int64_t>(&dev_ctx_cpu,
cpu_sparse_out, cpu_sparse_out,
...@@ -563,6 +577,11 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x, ...@@ -563,6 +577,11 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x,
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>( const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()); paddle::platform::CPUPlace());
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
// 1. test cpu // 1. test cpu
auto cpu_sparse_out = sparse::DenseToSparseCsr<T>(dev_ctx_cpu, dense_x); auto cpu_sparse_out = sparse::DenseToSparseCsr<T>(dev_ctx_cpu, dense_x);
...@@ -667,6 +686,11 @@ void TestSparseCooToDense(const DDim& dense_dims, ...@@ -667,6 +686,11 @@ void TestSparseCooToDense(const DDim& dense_dims,
const int64_t non_zero_num, const int64_t non_zero_num,
const int64_t sparse_dim) { const int64_t sparse_dim) {
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>( const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()); paddle::platform::CPUPlace());
...@@ -836,6 +860,11 @@ void TestSparseCsrToDense(const DDim& dense_dims, ...@@ -836,6 +860,11 @@ void TestSparseCsrToDense(const DDim& dense_dims,
// 1. test cpu // 1. test cpu
phi::CPUContext dev_ctx_cpu; phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.Init();
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(phi::CPUPlace())
.get());
DenseTensor cpu_sparse_out = sparse::SparseCsrToDense<T>(dev_ctx_cpu, csr); DenseTensor cpu_sparse_out = sparse::SparseCsrToDense<T>(dev_ctx_cpu, csr);
int cmp_cpu = memcmp(cpu_sparse_out.data<T>(), int cmp_cpu = memcmp(cpu_sparse_out.data<T>(),
dense_data.data(), dense_data.data(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册