未验证 提交 a4d2878a 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse]Use shorter function names (#45339)

上级 4c780311
...@@ -25,17 +25,17 @@ template <typename T, typename IntT> ...@@ -25,17 +25,17 @@ template <typename T, typename IntT>
void CoalesceCPUKernel(const CPUContext& dev_ctx, void CoalesceCPUKernel(const CPUContext& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices); DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values); DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
const int64_t sparse_dim = x.non_zero_indices().dims()[0]; const int64_t sparse_dim = x.indices().dims()[0];
std::vector<IntT> sparse_offsets(sparse_dim), x_indexs(x.nnz()); std::vector<IntT> sparse_offsets(sparse_dim), x_indexs(x.nnz());
phi::funcs::sparse::CalcOffsetsPerDim<IntT>( phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data()); x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(), phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(), sparse_offsets.data(),
x.nnz(), x.nnz(),
sparse_dim, sparse_dim,
...@@ -45,7 +45,7 @@ void CoalesceCPUKernel(const CPUContext& dev_ctx, ...@@ -45,7 +45,7 @@ void CoalesceCPUKernel(const CPUContext& dev_ctx,
const T* x_values_ptr = x_values.data<T>(); const T* x_values_ptr = x_values.data<T>();
const int64_t stride = const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
std::map<IntT, std::vector<int64_t>> indices_to_index; std::map<IntT, std::vector<int64_t>> indices_to_index;
for (uint64_t i = 0; i < x_indexs.size(); i++) { for (uint64_t i = 0; i < x_indexs.size(); i++) {
...@@ -98,10 +98,9 @@ template <typename T, typename Context> ...@@ -98,10 +98,9 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx, void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "CoalesceCPUKernel", ([&] {
x.non_zero_indices().dtype(), "CoalesceCPUKernel", ([&] { CoalesceCPUKernel<T, data_t>(dev_ctx, x, out);
CoalesceCPUKernel<T, data_t>(dev_ctx, x, out); }));
}));
} }
} // namespace sparse } // namespace sparse
......
...@@ -43,8 +43,8 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -43,8 +43,8 @@ void ProductRuleBook(const Context& dev_ctx,
DenseTensor* rulebook, DenseTensor* rulebook,
int* counter_per_kernel) { int* counter_per_kernel) {
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices(); const auto& indices = x.indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>(); const IntT* indices_ptr = indices.data<IntT>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2]; int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
memset(counter_per_kernel, 0, kernel_size * sizeof(int)); memset(counter_per_kernel, 0, kernel_size * sizeof(int));
...@@ -155,9 +155,8 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx, ...@@ -155,9 +155,8 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
paddle::experimental::CppTypeToDataType<IntT>::Type(), paddle::experimental::CppTypeToDataType<IntT>::Type(),
{sparse_dim, out_non_zero_num}, {sparse_dim, out_non_zero_num},
DataLayout::NCHW); DataLayout::NCHW);
DenseTensorMeta values_meta(x.dtype(), DenseTensorMeta values_meta(
{out_non_zero_num, out_channels}, x.dtype(), {out_non_zero_num, out_channels}, x.values().layout());
x.non_zero_elements().layout());
phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta)); phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta));
phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta)); phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta));
IntT* out_indices_ptr = out_indices.data<IntT>(); IntT* out_indices_ptr = out_indices.data<IntT>();
......
...@@ -78,17 +78,13 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx, ...@@ -78,17 +78,13 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
int half_kernel_size = kernel_size / 2; int half_kernel_size = kernel_size / 2;
auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx); auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx);
DenseTensor x_grad_indices = DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
T* x_grad_values_ptr = x_grad_values.data<T>(); T* x_grad_values_ptr = x_grad_values.data<T>();
memset(x_grad_values_ptr, 0, sizeof(T) * x_grad_values.numel()); memset(x_grad_values_ptr, 0, sizeof(T) * x_grad_values.numel());
memset(d_x_features_ptr, 0, sizeof(T) * d_x_features.numel()); memset(d_x_features_ptr, 0, sizeof(T) * d_x_features.numel());
phi::Copy<CPUContext>(dev_ctx, phi::Copy<CPUContext>(
x.non_zero_indices(), dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
dev_ctx.GetPlace(),
false,
&x_grad_indices);
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
std::vector<IntT> offsets(kernel_size + 1); std::vector<IntT> offsets(kernel_size + 1);
...@@ -104,27 +100,26 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx, ...@@ -104,27 +100,26 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
offsets[kernel_size] = offset; offsets[kernel_size] = offset;
if (subm) { if (subm) {
phi::funcs::sparse::SubmPreProcess<T, CPUContext>( phi::funcs::sparse::SubmPreProcess<T, CPUContext>(dev_ctx,
dev_ctx, x,
x, kernel,
kernel, out_grad.values(),
out_grad.non_zero_elements(), in_channels,
in_channels, out_channels,
out_channels, half_kernel_size,
half_kernel_size, kernel_grad,
kernel_grad, &x_grad_values);
&x_grad_values);
if (max_count == 0) { if (max_count == 0) {
return; return;
} }
} }
Gather<T, IntT>(x.non_zero_elements().data<T>(), Gather<T, IntT>(x.values().data<T>(),
rulebook_ptr + rulebook_len, rulebook_ptr + rulebook_len,
rulebook_len, rulebook_len,
in_channels, in_channels,
in_features_ptr); in_features_ptr);
Gather<T, IntT>(out_grad.non_zero_elements().data<T>(), Gather<T, IntT>(out_grad.values().data<T>(),
rulebook_ptr + rulebook_len * 2, rulebook_ptr + rulebook_len * 2,
rulebook_len, rulebook_len,
out_channels, out_channels,
...@@ -197,7 +192,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, ...@@ -197,7 +192,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* kernel_grad) { DenseTensor* kernel_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradCPUKernel", ([&] { x.indices().dtype(), "Conv3dCooGradCPUKernel", ([&] {
Conv3dCooGradCPUKernel<T, data_t>(dev_ctx, Conv3dCooGradCPUKernel<T, data_t>(dev_ctx,
x, x,
kernel, kernel,
......
...@@ -126,11 +126,8 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, ...@@ -126,11 +126,8 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
T* in_features_ptr = in_features.data<T>(); T* in_features_ptr = in_features.data<T>();
T* out_features_ptr = out_features.data<T>(); T* out_features_ptr = out_features.data<T>();
Gather<T, IntT>(x.non_zero_elements().data<T>(), Gather<T, IntT>(
rulebook_ptr + n, x.values().data<T>(), rulebook_ptr + n, n, in_channels, in_features_ptr);
n,
in_channels,
in_features_ptr);
// 3. call gemm for every werght // 3. call gemm for every werght
auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx); auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx);
...@@ -167,7 +164,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, ...@@ -167,7 +164,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
} }
// 4. scatter // 4. scatter
T* out_values_ptr = out->mutable_non_zero_elements()->data<T>(); T* out_values_ptr = out->mutable_values()->data<T>();
memset(out_values_ptr, 0, sizeof(T) * out->nnz() * out_channels); memset(out_values_ptr, 0, sizeof(T) * out->nnz() * out_channels);
Scatter<T, IntT>( Scatter<T, IntT>(
out_features_ptr, rulebook_ptr + n * 2, n, out_channels, out_values_ptr); out_features_ptr, rulebook_ptr + n * 2, n, out_channels, out_values_ptr);
...@@ -186,21 +183,20 @@ void Conv3dCooKernel(const Context& dev_ctx, ...@@ -186,21 +183,20 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out, SparseCooTensor* out,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "Conv3dCooCPUKernel", ([&] {
x.non_zero_indices().dtype(), "Conv3dCooCPUKernel", ([&] { Conv3dCooCPUKernel<T, data_t>(dev_ctx,
Conv3dCooCPUKernel<T, data_t>(dev_ctx, x,
x, kernel,
kernel, paddings,
paddings, dilations,
dilations, strides,
strides, groups,
groups, subm,
subm, key,
key, out,
out, rulebook,
rulebook, counter);
counter); }));
}));
} }
} // namespace sparse } // namespace sparse
......
...@@ -33,9 +33,9 @@ template <typename T, typename IntT, typename Context> ...@@ -33,9 +33,9 @@ template <typename T, typename IntT, typename Context>
void AllocCsrPtr(const Context& dev_ctx, void AllocCsrPtr(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCsrTensor* dx) { SparseCsrTensor* dx) {
DenseTensor dx_crows = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_crows()); DenseTensor dx_crows = phi::EmptyLike<IntT>(dev_ctx, x.crows());
DenseTensor dx_cols = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_cols()); DenseTensor dx_cols = phi::EmptyLike<IntT>(dev_ctx, x.cols());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.values());
dx->SetMember(dx_crows, dx_cols, dx_values, x.dims()); dx->SetMember(dx_crows, dx_cols, dx_values, x.dims());
} }
...@@ -43,8 +43,8 @@ template <typename T, typename IntT, typename Context> ...@@ -43,8 +43,8 @@ template <typename T, typename IntT, typename Context>
void AllocCooPtr(const Context& dev_ctx, void AllocCooPtr(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* dx) { SparseCooTensor* dx) {
DenseTensor dx_indices = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); DenseTensor dx_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.values());
dx->SetMember(dx_indices, dx_values, x.dims(), true); dx->SetMember(dx_indices, dx_values, x.dims(), true);
} }
...@@ -88,7 +88,7 @@ void ElementWiseSubtractCsrGradCPUKernel(const Context& dev_ctx, ...@@ -88,7 +88,7 @@ void ElementWiseSubtractCsrGradCPUKernel(const Context& dev_ctx,
AllocCsrPtr<T, IntT>(dev_ctx, y, dy); AllocCsrPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy); Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>( phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements()); dev_ctx, dout.values(), dy->mutable_values());
} }
} }
...@@ -131,7 +131,7 @@ void ElementWiseDivideCsrGradCPUKernel(const Context& dev_ctx, ...@@ -131,7 +131,7 @@ void ElementWiseDivideCsrGradCPUKernel(const Context& dev_ctx,
AllocCsrPtr<T, IntT>(dev_ctx, y, dy); AllocCsrPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy); Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>( phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements()); dev_ctx, dout.values(), dy->mutable_values());
auto tmp = sparse::ElementWiseMultiplyCsr<T, Context>(dev_ctx, *dy, out); auto tmp = sparse::ElementWiseMultiplyCsr<T, Context>(dev_ctx, *dy, out);
sparse::ElementWiseDivideCsrKernel<T, Context>(dev_ctx, tmp, y, dy); sparse::ElementWiseDivideCsrKernel<T, Context>(dev_ctx, tmp, y, dy);
} }
...@@ -177,7 +177,7 @@ void ElementWiseSubtractCooGradCPUKernel(const Context& dev_ctx, ...@@ -177,7 +177,7 @@ void ElementWiseSubtractCooGradCPUKernel(const Context& dev_ctx,
AllocCooPtr<T, IntT>(dev_ctx, y, dy); AllocCooPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy); Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>( phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements()); dev_ctx, dout.values(), dy->mutable_values());
} }
} }
...@@ -220,7 +220,7 @@ void ElementWiseDivideCooGradCPUKernel(const Context& dev_ctx, ...@@ -220,7 +220,7 @@ void ElementWiseDivideCooGradCPUKernel(const Context& dev_ctx,
AllocCooPtr<T, IntT>(dev_ctx, y, dy); AllocCooPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy); Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>( phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements()); dev_ctx, dout.values(), dy->mutable_values());
auto tmp = sparse::ElementWiseMultiplyCoo<T, Context>(dev_ctx, *dy, out); auto tmp = sparse::ElementWiseMultiplyCoo<T, Context>(dev_ctx, *dy, out);
sparse::ElementWiseDivideCooKernel<T, Context>(dev_ctx, tmp, y, dy); sparse::ElementWiseDivideCooKernel<T, Context>(dev_ctx, tmp, y, dy);
} }
...@@ -237,7 +237,7 @@ void ElementWiseDivideCsrGradKernel(const Context& dev_ctx, ...@@ -237,7 +237,7 @@ void ElementWiseDivideCsrGradKernel(const Context& dev_ctx,
SparseCsrTensor* dx, SparseCsrTensor* dx,
SparseCsrTensor* dy) { SparseCsrTensor* dy) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] { x.crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] {
ElementWiseDivideCsrGradCPUKernel<T, data_t>( ElementWiseDivideCsrGradCPUKernel<T, data_t>(
dev_ctx, x, y, out, dout, dx, dy); dev_ctx, x, y, out, dout, dx, dy);
})); }));
...@@ -251,7 +251,7 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, ...@@ -251,7 +251,7 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx,
SparseCooTensor* dx, SparseCooTensor* dx,
SparseCooTensor* dy) { SparseCooTensor* dy) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] { x.indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] {
ElementWiseDivideCooGradCPUKernel<T, data_t>( ElementWiseDivideCooGradCPUKernel<T, data_t>(
dev_ctx, x, y, out, dout, dx, dy); dev_ctx, x, y, out, dout, dx, dy);
})); }));
...@@ -262,38 +262,34 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, ...@@ -262,38 +262,34 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx,
\ \
DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name)
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \ #define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \
template <typename T, typename Context> \ template <typename T, typename Context> \
void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \ void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \ const SparseCsrTensor& x, \
const SparseCsrTensor& y, \ const SparseCsrTensor& y, \
const SparseCsrTensor& dout, \ const SparseCsrTensor& dout, \
SparseCsrTensor* dx, \ SparseCsrTensor* dx, \
SparseCsrTensor* dy) { \ SparseCsrTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \ PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_crows().dtype(), \ x.crows().dtype(), "ElementWise##name##CsrGradCPUKernel", ([&] { \
"ElementWise##name##CsrGradCPUKernel", \ ElementWise##name##CsrGradCPUKernel<T, data_t>( \
([&] { \ dev_ctx, x, y, dout, dx, dy); \
ElementWise##name##CsrGradCPUKernel<T, data_t>( \ })); \
dev_ctx, x, y, dout, dx, dy); \
})); \
} }
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \ #define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \
template <typename T, typename Context> \ template <typename T, typename Context> \
void ElementWise##name##CooGradKernel(const Context& dev_ctx, \ void ElementWise##name##CooGradKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \ const SparseCooTensor& x, \
const SparseCooTensor& y, \ const SparseCooTensor& y, \
const SparseCooTensor& dout, \ const SparseCooTensor& dout, \
SparseCooTensor* dx, \ SparseCooTensor* dx, \
SparseCooTensor* dy) { \ SparseCooTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \ PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_indices().dtype(), \ x.indices().dtype(), "ElementWise##name##CooGradCPUKernel", ([&] { \
"ElementWise##name##CooGradCPUKernel", \ ElementWise##name##CooGradCPUKernel<T, data_t>( \
([&] { \ dev_ctx, x, y, dout, dx, dy); \
ElementWise##name##CooGradCPUKernel<T, data_t>( \ })); \
dev_ctx, x, y, dout, dx, dy); \
})); \
} }
DEFINE_ELEMENTWISE_GRAD_KERNEL(Add) DEFINE_ELEMENTWISE_GRAD_KERNEL(Add)
......
...@@ -156,13 +156,13 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -156,13 +156,13 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
x.dims(), x.dims(),
y.dims())); y.dims()));
int64_t element_size = 1; int64_t element_size = 1;
for (auto j = 1; j < x.non_zero_elements().dims().size(); ++j) { for (auto j = 1; j < x.values().dims().size(); ++j) {
element_size *= x.non_zero_elements().dims()[j]; element_size *= x.values().dims()[j];
} }
IntT nnz = 0; IntT nnz = 0;
const auto x_values = x.non_zero_elements().data<T>(); const auto x_values = x.values().data<T>();
const auto y_values = y.non_zero_elements().data<T>(); const auto y_values = y.values().data<T>();
const auto sparse_dim = x.non_zero_indices().dims()[0]; const auto sparse_dim = x.indices().dims()[0];
const bool is_divide = std::is_same<Functor, funcs::DivideFunctor<T>>::value; const bool is_divide = std::is_same<Functor, funcs::DivideFunctor<T>>::value;
int64_t max_len = 1; int64_t max_len = 1;
...@@ -176,7 +176,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -176,7 +176,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
phi::funcs::sparse::CalcOffsetsPerDim<IntT>( phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data()); x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(), phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(), sparse_offsets.data(),
x.nnz(), x.nnz(),
sparse_dim, sparse_dim,
...@@ -184,7 +184,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -184,7 +184,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
1, 1,
x_indexs.data()); x_indexs.data());
phi::funcs::sparse::FlattenIndices(y.non_zero_indices().data<IntT>(), phi::funcs::sparse::FlattenIndices(y.indices().data<IntT>(),
sparse_offsets.data(), sparse_offsets.data(),
y.nnz(), y.nnz(),
sparse_dim, sparse_dim,
...@@ -233,10 +233,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -233,10 +233,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
out_indices_vec.data()); out_indices_vec.data());
if (nnz == 0) { if (nnz == 0) {
phi::DenseTensor out_indices = phi::DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); phi::DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x.values());
phi::DenseTensor out_values =
phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
out->SetMember(out_indices, out_values, x.dims()); out->SetMember(out_indices, out_values, x.dims());
} else { } else {
DenseTensorMeta indices_meta( DenseTensorMeta indices_meta(
...@@ -244,8 +242,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -244,8 +242,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
phi::make_ddim( phi::make_ddim(
{static_cast<int64_t>(sparse_dim), static_cast<int64_t>(nnz)}), {static_cast<int64_t>(sparse_dim), static_cast<int64_t>(nnz)}),
DataLayout::NCHW); DataLayout::NCHW);
auto indeces_dim = vectorize(slice_ddim( auto indeces_dim =
x.non_zero_elements().dims(), 1, x.non_zero_elements().dims().size())); vectorize(slice_ddim(x.values().dims(), 1, x.values().dims().size()));
indeces_dim.insert(indeces_dim.begin(), nnz); indeces_dim.insert(indeces_dim.begin(), nnz);
DenseTensorMeta values_meta( DenseTensorMeta values_meta(
paddle::experimental::CppTypeToDataType<T>::Type(), paddle::experimental::CppTypeToDataType<T>::Type(),
...@@ -283,16 +281,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -283,16 +281,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
*out = SparseCooToCsr<T>(dev_ctx, coo_out); \ *out = SparseCooToCsr<T>(dev_ctx, coo_out); \
} }
#define DEFINE_CSR_ELEMENTWISE_KERNEL(name) \ #define DEFINE_CSR_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \ template <typename T, typename Context> \
void ElementWise##name##CsrKernel(const Context& dev_ctx, \ void ElementWise##name##CsrKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \ const SparseCsrTensor& x, \
const SparseCsrTensor& y, \ const SparseCsrTensor& y, \
SparseCsrTensor* out) { \ SparseCsrTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \ PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \ x.crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \
ElementWise##name##CsrCPUKernel<T, data_t>(dev_ctx, x, y, out); \ ElementWise##name##CsrCPUKernel<T, data_t>(dev_ctx, x, y, out); \
})); \ })); \
} }
#define DEFINE_COO_ELEMENTWISE_CPU_KERNEL(name) \ #define DEFINE_COO_ELEMENTWISE_CPU_KERNEL(name) \
...@@ -306,18 +304,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -306,18 +304,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
dev_ctx, x, y, out, functor); \ dev_ctx, x, y, out, functor); \
} }
#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \ #define DEFINE_COO_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \ template <typename T, typename Context> \
void ElementWise##name##CooKernel(const Context& dev_ctx, \ void ElementWise##name##CooKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \ const SparseCooTensor& x, \
const SparseCooTensor& y, \ const SparseCooTensor& y, \
SparseCooTensor* out) { \ SparseCooTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ PD_VISIT_BASE_INTEGRAL_TYPES( \
"ElementWise##name##CooCPUKernel", \ x.indices().dtype(), "ElementWise##name##CooCPUKernel", ([&] { \
([&] { \ ElementWise##name##CooCPUKernel<T, data_t>(dev_ctx, x, y, out); \
ElementWise##name##CooCPUKernel<T, data_t>( \ })); \
dev_ctx, x, y, out); \
})); \
} }
DEFINE_CSR_ELEMENTWISE_CPU_KERNEL(Add) DEFINE_CSR_ELEMENTWISE_CPU_KERNEL(Add)
......
...@@ -37,8 +37,8 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, ...@@ -37,8 +37,8 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
x.dims(), x.dims(),
mask.dims(), mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape")); phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices(); const DenseTensor& indices = mask.indices();
const DenseTensor& values = mask.non_zero_elements(); const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim(); const int sparse_dim = mask.sparse_dim();
DenseTensor out_indices = phi::EmptyLike<T>(dev_ctx, indices); DenseTensor out_indices = phi::EmptyLike<T>(dev_ctx, indices);
...@@ -71,7 +71,7 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx, ...@@ -71,7 +71,7 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
/** /**
* @brief Filter the DenseTensor x by the * @brief Filter the DenseTensor x by the
* mask.non_zero_indices() and output a SparseCooTensor * mask.indices() and output a SparseCooTensor
* x and mask must have the same shape. * x and mask must have the same shape.
**/ **/
template <typename T, typename Context> template <typename T, typename Context>
...@@ -80,7 +80,7 @@ void SparseMaskKernel(const Context& dev_ctx, ...@@ -80,7 +80,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const SparseCooTensor& mask, const SparseCooTensor& mask,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskCPUKernel", ([&] { mask.indices().dtype(), "SparseMaskCPUKernel", ([&] {
SparseMaskCPUKernel<T, data_t>(dev_ctx, x, mask, out); SparseMaskCPUKernel<T, data_t>(dev_ctx, x, mask, out);
})); }));
} }
...@@ -102,7 +102,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, ...@@ -102,7 +102,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::CalcOffsetsPerDim<IntT>( phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data()); x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(), phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(), sparse_offsets.data(),
x.nnz(), x.nnz(),
sparse_dim, sparse_dim,
...@@ -121,12 +121,12 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx, ...@@ -121,12 +121,12 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx,
for (uint64_t i = 0; i < x_indexs.size(); i++) { for (uint64_t i = 0; i < x_indexs.size(); i++) {
x_indexs_map[x_indexs[i]] = i; x_indexs_map[x_indexs[i]] = i;
} }
*out = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); *out = phi::EmptyLike<T>(dev_ctx, x.values());
T* out_ptr = out->data<T>(); T* out_ptr = out->data<T>();
memset(out_ptr, static_cast<T>(0), out->numel() * sizeof(T)); memset(out_ptr, static_cast<T>(0), out->numel() * sizeof(T));
const int64_t stride = const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
const T* in_ptr = x.non_zero_elements().data<T>(); const T* in_ptr = x.values().data<T>();
// TODO(zhangkaihuo): multithreading can be used for acceleration // TODO(zhangkaihuo): multithreading can be used for acceleration
for (uint64_t i = 0; i < mask_indexs.size(); i++) { for (uint64_t i = 0; i < mask_indexs.size(); i++) {
auto iter = x_indexs_map.find(mask_indexs[i]); auto iter = x_indexs_map.find(mask_indexs[i]);
...@@ -147,7 +147,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx, ...@@ -147,7 +147,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const DenseTensor& mask_indices, const DenseTensor& mask_indices,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperCPUKernel", ([&] { x.indices().dtype(), "SparseMaskHelperCPUKernel", ([&] {
SparseMaskHelperCPUKernel<T, data_t>(dev_ctx, x, mask_indices, out); SparseMaskHelperCPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
})); }));
} }
......
...@@ -42,21 +42,17 @@ void MaxPoolCooGradCPUKernel(const CPUContext& dev_ctx, ...@@ -42,21 +42,17 @@ void MaxPoolCooGradCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size); phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size);
const T* in_features_ptr = x.non_zero_elements().data<T>(); const T* in_features_ptr = x.values().data<T>();
const T* out_features_ptr = out.non_zero_elements().data<T>(); const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.non_zero_elements().data<T>(); const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike // TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices = DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>(); T* x_grad_ptr = x_grad_values.data<T>();
memset(x_grad_ptr, 0, sizeof(T) * x_grad_values.numel()); memset(x_grad_ptr, 0, sizeof(T) * x_grad_values.numel());
phi::Copy<CPUContext>(dev_ctx, phi::Copy<CPUContext>(
x.non_zero_indices(), dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
dev_ctx.GetPlace(),
false,
&x_grad_indices);
phi::funcs::MaxPoolGrad<T> grad_functor; phi::funcs::MaxPoolGrad<T> grad_functor;
for (int i = 0; i < kernel_size; i++) { for (int i = 0; i < kernel_size; i++) {
...@@ -84,7 +80,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx, ...@@ -84,7 +80,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const std::vector<int>& kernel_sizes, const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) { SparseCooTensor* x_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] { x.indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] {
MaxPoolCooGradCPUKernel<T, data_t>( MaxPoolCooGradCPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad); dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
})); }));
......
...@@ -50,7 +50,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx, ...@@ -50,7 +50,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
std::vector<int> counter_per_kernel(kernel_size, 0); std::vector<int> counter_per_kernel(kernel_size, 0);
const T* in_features_ptr = x.non_zero_elements().data<T>(); const T* in_features_ptr = x.values().data<T>();
// 1. product rule book // 1. product rule book
ProductRuleBook<T, CPUContext, IntT>(dev_ctx, ProductRuleBook<T, CPUContext, IntT>(dev_ctx,
x, x,
...@@ -78,7 +78,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx, ...@@ -78,7 +78,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
std::vector<bool> out_flags(out->nnz(), false); std::vector<bool> out_flags(out->nnz(), false);
// 2. max pool // 2. max pool
T* out_features_ptr = out->mutable_non_zero_elements()->data<T>(); T* out_features_ptr = out->mutable_values()->data<T>();
phi::funcs::MaxPool<T> max_pool_functor; phi::funcs::MaxPool<T> max_pool_functor;
for (int i = 0; i < kernel_size; i++) { for (int i = 0; i < kernel_size; i++) {
for (int j = 0; j < counter_ptr[i]; j++) { for (int j = 0; j < counter_ptr[i]; j++) {
...@@ -110,7 +110,7 @@ void MaxPoolCooKernel(const Context& dev_ctx, ...@@ -110,7 +110,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooCPUKernel", ([&] { x.indices().dtype(), "MaxPoolCooCPUKernel", ([&] {
MaxPoolCooCPUKernel<T, data_t>(dev_ctx, MaxPoolCooCPUKernel<T, data_t>(dev_ctx,
x, x,
kernel_sizes, kernel_sizes,
......
...@@ -111,10 +111,10 @@ void SparseCsrToCooCPUKernel(const CPUContext& dev_ctx, ...@@ -111,10 +111,10 @@ void SparseCsrToCooCPUKernel(const CPUContext& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DDim& x_dims = x.dims(); const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.non_zero_cols().numel(); const int64_t non_zero_num = x.cols().numel();
const auto& csr_crows = x.non_zero_crows(); const auto& csr_crows = x.crows();
const auto& csr_cols = x.non_zero_cols(); const auto& csr_cols = x.cols();
const auto& csr_values = x.non_zero_elements(); const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>(); const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>(); const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>(); const T* csr_values_data = csr_values.data<T>();
...@@ -161,7 +161,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx, ...@@ -161,7 +161,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooCPUKernel", ([&] { x.crows().dtype(), "SparseCsrToCooCPUKernel", ([&] {
SparseCsrToCooCPUKernel<T, data_t>(dev_ctx, x, out); SparseCsrToCooCPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
...@@ -182,20 +182,20 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx, ...@@ -182,20 +182,20 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0]; int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1]; int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
phi::DenseTensor non_zero_crows; phi::DenseTensor crows;
non_zero_crows.Resize({batchs * (rows + 1)}); crows.Resize({batchs * (rows + 1)});
IntT* csr_crows_data = dev_ctx.template Alloc<IntT>(&non_zero_crows); IntT* csr_crows_data = dev_ctx.template Alloc<IntT>(&crows);
phi::DenseTensor non_zero_cols; phi::DenseTensor cols;
non_zero_cols.Resize({non_zero_num}); cols.Resize({non_zero_num});
IntT* csr_cols_data = dev_ctx.template Alloc<IntT>(&non_zero_cols); IntT* csr_cols_data = dev_ctx.template Alloc<IntT>(&cols);
phi::DenseTensor non_zero_elements; phi::DenseTensor values;
non_zero_elements.Resize({non_zero_num}); values.Resize({non_zero_num});
T* csr_values_data = dev_ctx.template Alloc<T>(&non_zero_elements); T* csr_values_data = dev_ctx.template Alloc<T>(&values);
const auto& coo_indices = x.non_zero_indices(); const auto& coo_indices = x.indices();
const auto& coo_values = x.non_zero_elements(); const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>(); const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data = const IntT* coo_rows_data =
x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num; x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num;
...@@ -243,7 +243,7 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx, ...@@ -243,7 +243,7 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx,
memcpy(csr_cols_data, coo_cols_data, sizeof(IntT) * non_zero_num); memcpy(csr_cols_data, coo_cols_data, sizeof(IntT) * non_zero_num);
memcpy(csr_values_data, coo_values_data, sizeof(T) * non_zero_num); memcpy(csr_values_data, coo_values_data, sizeof(T) * non_zero_num);
out->SetMember(non_zero_crows, non_zero_cols, non_zero_elements, x_dims); out->SetMember(crows, cols, values, x_dims);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -251,7 +251,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -251,7 +251,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCsrTensor* out) { SparseCsrTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrCPUKernel", ([&] { x.indices().dtype(), "SparseCooToCsrCPUKernel", ([&] {
SparseCooToCsrCPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToCsrCPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
...@@ -262,8 +262,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx, ...@@ -262,8 +262,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
const auto non_zero_num = x.nnz(); const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims(); const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices(); const auto indices = x.indices();
const auto values = x.non_zero_elements(); const auto values = x.values();
const auto indices_dims = indices.dims(); const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0]; int64_t sparse_dim = indices_dims[0];
if (indices_dims.size() == 1) { if (indices_dims.size() == 1) {
...@@ -272,9 +272,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx, ...@@ -272,9 +272,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx,
const int64_t dense_dim = x.dense_dim(); const int64_t dense_dim = x.dense_dim();
const T* x_data = values.data<T>(); const T* x_data = values.data<T>();
*out = phi::Empty( *out = phi::Empty(dev_ctx,
dev_ctx, DenseTensorMeta(x.dtype(), x.dims(), x.values().layout()));
DenseTensorMeta(x.dtype(), x.dims(), x.non_zero_elements().layout()));
T* out_data = out->data<T>(); T* out_data = out->data<T>();
int64_t base_offset = 1; int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) { for (int64_t i = 0; i < dense_dim; i++) {
...@@ -305,7 +304,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx, ...@@ -305,7 +304,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseCPUKernel", ([&] { x.indices().dtype(), "SparseCooToDenseCPUKernel", ([&] {
SparseCooToDenseCPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToDenseCPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
......
...@@ -43,25 +43,24 @@ namespace sparse { ...@@ -43,25 +43,24 @@ namespace sparse {
SparseCsrTensor ElementWise##name##Csr(const Context& dev_ctx, \ SparseCsrTensor ElementWise##name##Csr(const Context& dev_ctx, \
const SparseCsrTensor& x, \ const SparseCsrTensor& x, \
const SparseCsrTensor& y) { \ const SparseCsrTensor& y) { \
DenseTensor non_zero_crows; \ DenseTensor crows; \
DenseTensor non_zero_cols; \ DenseTensor cols; \
DenseTensor non_zero_elements; \ DenseTensor non_zero_elements; \
SparseCsrTensor out( \ SparseCsrTensor out(crows, cols, non_zero_elements, x.dims()); \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
ElementWise##name##CsrKernel<T, Context>(dev_ctx, x, y, &out); \ ElementWise##name##CsrKernel<T, Context>(dev_ctx, x, y, &out); \
return out; \ return out; \
} }
#define DEFINE_COO_ELEMENTWISE_KERNEL_FUNC(name) \ #define DEFINE_COO_ELEMENTWISE_KERNEL_FUNC(name) \
template <typename T, typename Context> \ template <typename T, typename Context> \
SparseCooTensor ElementWise##name##Coo(const Context& dev_ctx, \ SparseCooTensor ElementWise##name##Coo(const Context& dev_ctx, \
const SparseCooTensor& x, \ const SparseCooTensor& x, \
const SparseCooTensor& y) { \ const SparseCooTensor& y) { \
DenseTensor non_zero_indices; \ DenseTensor indices; \
DenseTensor non_zero_elements; \ DenseTensor non_zero_elements; \
SparseCooTensor out(non_zero_indices, non_zero_elements, x.dims()); \ SparseCooTensor out(indices, non_zero_elements, x.dims()); \
ElementWise##name##CooKernel<T, Context>(dev_ctx, x, y, &out); \ ElementWise##name##CooKernel<T, Context>(dev_ctx, x, y, &out); \
return out; \ return out; \
} }
DEFINE_ELEMENTWISE_KERNEL_HEAD(Add) DEFINE_ELEMENTWISE_KERNEL_HEAD(Add)
......
...@@ -27,7 +27,7 @@ void EmptyLikeCooKernel(const Context& dev_ctx, ...@@ -27,7 +27,7 @@ void EmptyLikeCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
out->set_dims(x.dims()); out->set_dims(x.dims());
*(out->mutable_non_zero_indices()) = x.non_zero_indices(); *(out->mutable_indices()) = x.indices();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
...@@ -40,8 +40,8 @@ void EmptyLikeCsrKernel(const Context& dev_ctx, ...@@ -40,8 +40,8 @@ void EmptyLikeCsrKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCsrTensor* out) { SparseCsrTensor* out) {
out->set_dims(x.dims()); out->set_dims(x.dims());
*(out->mutable_non_zero_crows()) = x.non_zero_crows(); *(out->mutable_crows()) = x.crows();
*(out->mutable_non_zero_cols()) = x.non_zero_cols(); *(out->mutable_cols()) = x.cols();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
......
...@@ -131,9 +131,9 @@ void CastCooKernel(const Context& dev_ctx, ...@@ -131,9 +131,9 @@ void CastCooKernel(const Context& dev_ctx,
SparseCooTensor* out) { SparseCooTensor* out) {
out->set_dims(x.dims()); out->set_dims(x.dims());
const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_indices = out->mutable_non_zero_indices(); DenseTensor* out_indices = out->mutable_indices();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) { if (index_dtype == DataType::UNDEFINED) {
...@@ -167,11 +167,11 @@ void CastCsrKernel(const Context& dev_ctx, ...@@ -167,11 +167,11 @@ void CastCsrKernel(const Context& dev_ctx,
SparseCsrTensor* out) { SparseCsrTensor* out) {
out->set_dims(x.dims()); out->set_dims(x.dims());
const DenseTensor& x_crows = x.non_zero_crows(); const DenseTensor& x_crows = x.crows();
const DenseTensor& x_cols = x.non_zero_cols(); const DenseTensor& x_cols = x.cols();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_crows = out->mutable_non_zero_crows(); DenseTensor* out_crows = out->mutable_crows();
DenseTensor* out_cols = out->mutable_non_zero_cols(); DenseTensor* out_cols = out->mutable_cols();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) { if (index_dtype == DataType::UNDEFINED) {
......
...@@ -24,7 +24,7 @@ void CooValuesGradKernel(const Context& dev_ctx, ...@@ -24,7 +24,7 @@ void CooValuesGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& out_grad, const DenseTensor& out_grad,
SparseCooTensor* x_grad) { SparseCooTensor* x_grad) {
x_grad->SetMember(x.non_zero_indices(), out_grad, x.dims(), true); x_grad->SetMember(x.indices(), out_grad, x.dims(), true);
} }
template <typename T, typename Context> template <typename T, typename Context>
......
...@@ -63,11 +63,10 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -63,11 +63,10 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
SparseCsrTensor SparseCooToCsr(const Context& dev_ctx, SparseCsrTensor SparseCooToCsr(const Context& dev_ctx,
const SparseCooTensor& x) { const SparseCooTensor& x) {
DenseTensor non_zero_crows; DenseTensor crows;
DenseTensor non_zero_cols; DenseTensor cols;
DenseTensor non_zero_elements; DenseTensor non_zero_elements;
SparseCsrTensor csr( SparseCsrTensor csr(crows, cols, non_zero_elements, x.dims());
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr); SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr);
return csr; return csr;
} }
...@@ -92,11 +91,10 @@ void DenseToSparseCsrKernel(const Context& dev_ctx, ...@@ -92,11 +91,10 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) { SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) {
DenseTensor non_zero_crows; DenseTensor crows;
DenseTensor non_zero_cols; DenseTensor cols;
DenseTensor non_zero_elements; DenseTensor non_zero_elements;
SparseCsrTensor csr( SparseCsrTensor csr(crows, cols, non_zero_elements, x.dims());
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr); DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr);
return csr; return csr;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册