未验证 提交 a4d2878a 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse]Use shorter function names (#45339)

上级 4c780311
......@@ -25,17 +25,17 @@ template <typename T, typename IntT>
void CoalesceCPUKernel(const CPUContext& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices();
const DenseTensor& x_values = x.non_zero_elements();
const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
const int64_t sparse_dim = x.non_zero_indices().dims()[0];
const int64_t sparse_dim = x.indices().dims()[0];
std::vector<IntT> sparse_offsets(sparse_dim), x_indexs(x.nnz());
phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(),
phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(),
x.nnz(),
sparse_dim,
......@@ -45,7 +45,7 @@ void CoalesceCPUKernel(const CPUContext& dev_ctx,
const T* x_values_ptr = x_values.data<T>();
const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1];
x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
std::map<IntT, std::vector<int64_t>> indices_to_index;
for (uint64_t i = 0; i < x_indexs.size(); i++) {
......@@ -98,10 +98,9 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "CoalesceCPUKernel", ([&] {
CoalesceCPUKernel<T, data_t>(dev_ctx, x, out);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "CoalesceCPUKernel", ([&] {
CoalesceCPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
} // namespace sparse
......
......@@ -43,8 +43,8 @@ void ProductRuleBook(const Context& dev_ctx,
DenseTensor* rulebook,
int* counter_per_kernel) {
const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>();
const auto& indices = x.indices();
const IntT* indices_ptr = indices.data<IntT>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
memset(counter_per_kernel, 0, kernel_size * sizeof(int));
......@@ -155,9 +155,8 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
paddle::experimental::CppTypeToDataType<IntT>::Type(),
{sparse_dim, out_non_zero_num},
DataLayout::NCHW);
DenseTensorMeta values_meta(x.dtype(),
{out_non_zero_num, out_channels},
x.non_zero_elements().layout());
DenseTensorMeta values_meta(
x.dtype(), {out_non_zero_num, out_channels}, x.values().layout());
phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta));
phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta));
IntT* out_indices_ptr = out_indices.data<IntT>();
......
......@@ -78,17 +78,13 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
int half_kernel_size = kernel_size / 2;
auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx);
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
T* x_grad_values_ptr = x_grad_values.data<T>();
memset(x_grad_values_ptr, 0, sizeof(T) * x_grad_values.numel());
memset(d_x_features_ptr, 0, sizeof(T) * d_x_features.numel());
phi::Copy<CPUContext>(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&x_grad_indices);
phi::Copy<CPUContext>(
dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
std::vector<IntT> offsets(kernel_size + 1);
......@@ -104,27 +100,26 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
offsets[kernel_size] = offset;
if (subm) {
phi::funcs::sparse::SubmPreProcess<T, CPUContext>(
dev_ctx,
x,
kernel,
out_grad.non_zero_elements(),
in_channels,
out_channels,
half_kernel_size,
kernel_grad,
&x_grad_values);
phi::funcs::sparse::SubmPreProcess<T, CPUContext>(dev_ctx,
x,
kernel,
out_grad.values(),
in_channels,
out_channels,
half_kernel_size,
kernel_grad,
&x_grad_values);
if (max_count == 0) {
return;
}
}
Gather<T, IntT>(x.non_zero_elements().data<T>(),
Gather<T, IntT>(x.values().data<T>(),
rulebook_ptr + rulebook_len,
rulebook_len,
in_channels,
in_features_ptr);
Gather<T, IntT>(out_grad.non_zero_elements().data<T>(),
Gather<T, IntT>(out_grad.values().data<T>(),
rulebook_ptr + rulebook_len * 2,
rulebook_len,
out_channels,
......@@ -197,7 +192,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
SparseCooTensor* x_grad,
DenseTensor* kernel_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradCPUKernel", ([&] {
x.indices().dtype(), "Conv3dCooGradCPUKernel", ([&] {
Conv3dCooGradCPUKernel<T, data_t>(dev_ctx,
x,
kernel,
......
......@@ -126,11 +126,8 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
T* in_features_ptr = in_features.data<T>();
T* out_features_ptr = out_features.data<T>();
Gather<T, IntT>(x.non_zero_elements().data<T>(),
rulebook_ptr + n,
n,
in_channels,
in_features_ptr);
Gather<T, IntT>(
x.values().data<T>(), rulebook_ptr + n, n, in_channels, in_features_ptr);
// 3. call gemm for every werght
auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx);
......@@ -167,7 +164,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
}
// 4. scatter
T* out_values_ptr = out->mutable_non_zero_elements()->data<T>();
T* out_values_ptr = out->mutable_values()->data<T>();
memset(out_values_ptr, 0, sizeof(T) * out->nnz() * out_channels);
Scatter<T, IntT>(
out_features_ptr, rulebook_ptr + n * 2, n, out_channels, out_values_ptr);
......@@ -186,21 +183,20 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooCPUKernel", ([&] {
Conv3dCooCPUKernel<T, data_t>(dev_ctx,
x,
kernel,
paddings,
dilations,
strides,
groups,
subm,
key,
out,
rulebook,
counter);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "Conv3dCooCPUKernel", ([&] {
Conv3dCooCPUKernel<T, data_t>(dev_ctx,
x,
kernel,
paddings,
dilations,
strides,
groups,
subm,
key,
out,
rulebook,
counter);
}));
}
} // namespace sparse
......
......@@ -33,9 +33,9 @@ template <typename T, typename IntT, typename Context>
void AllocCsrPtr(const Context& dev_ctx,
const SparseCsrTensor& x,
SparseCsrTensor* dx) {
DenseTensor dx_crows = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_crows());
DenseTensor dx_cols = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_cols());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor dx_crows = phi::EmptyLike<IntT>(dev_ctx, x.crows());
DenseTensor dx_cols = phi::EmptyLike<IntT>(dev_ctx, x.cols());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.values());
dx->SetMember(dx_crows, dx_cols, dx_values, x.dims());
}
......@@ -43,8 +43,8 @@ template <typename T, typename IntT, typename Context>
void AllocCooPtr(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* dx) {
DenseTensor dx_indices = phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor dx_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor dx_values = phi::EmptyLike<T>(dev_ctx, x.values());
dx->SetMember(dx_indices, dx_values, x.dims(), true);
}
......@@ -88,7 +88,7 @@ void ElementWiseSubtractCsrGradCPUKernel(const Context& dev_ctx,
AllocCsrPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements());
dev_ctx, dout.values(), dy->mutable_values());
}
}
......@@ -131,7 +131,7 @@ void ElementWiseDivideCsrGradCPUKernel(const Context& dev_ctx,
AllocCsrPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements());
dev_ctx, dout.values(), dy->mutable_values());
auto tmp = sparse::ElementWiseMultiplyCsr<T, Context>(dev_ctx, *dy, out);
sparse::ElementWiseDivideCsrKernel<T, Context>(dev_ctx, tmp, y, dy);
}
......@@ -177,7 +177,7 @@ void ElementWiseSubtractCooGradCPUKernel(const Context& dev_ctx,
AllocCooPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements());
dev_ctx, dout.values(), dy->mutable_values());
}
}
......@@ -220,7 +220,7 @@ void ElementWiseDivideCooGradCPUKernel(const Context& dev_ctx,
AllocCooPtr<T, IntT>(dev_ctx, y, dy);
Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dy);
phi::NegativeKernel<T, Context>(
dev_ctx, dout.non_zero_elements(), dy->mutable_non_zero_elements());
dev_ctx, dout.values(), dy->mutable_values());
auto tmp = sparse::ElementWiseMultiplyCoo<T, Context>(dev_ctx, *dy, out);
sparse::ElementWiseDivideCooKernel<T, Context>(dev_ctx, tmp, y, dy);
}
......@@ -237,7 +237,7 @@ void ElementWiseDivideCsrGradKernel(const Context& dev_ctx,
SparseCsrTensor* dx,
SparseCsrTensor* dy) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] {
x.crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] {
ElementWiseDivideCsrGradCPUKernel<T, data_t>(
dev_ctx, x, y, out, dout, dx, dy);
}));
......@@ -251,7 +251,7 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx,
SparseCooTensor* dx,
SparseCooTensor* dy) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] {
x.indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] {
ElementWiseDivideCooGradCPUKernel<T, data_t>(
dev_ctx, x, y, out, dout, dx, dy);
}));
......@@ -262,38 +262,34 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx,
\
DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name)
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \
template <typename T, typename Context> \
void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& y, \
const SparseCsrTensor& dout, \
SparseCsrTensor* dx, \
SparseCsrTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_crows().dtype(), \
"ElementWise##name##CsrGradCPUKernel", \
([&] { \
ElementWise##name##CsrGradCPUKernel<T, data_t>( \
dev_ctx, x, y, dout, dx, dy); \
})); \
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \
template <typename T, typename Context> \
void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& y, \
const SparseCsrTensor& dout, \
SparseCsrTensor* dx, \
SparseCsrTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.crows().dtype(), "ElementWise##name##CsrGradCPUKernel", ([&] { \
ElementWise##name##CsrGradCPUKernel<T, data_t>( \
dev_ctx, x, y, dout, dx, dy); \
})); \
}
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \
template <typename T, typename Context> \
void ElementWise##name##CooGradKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y, \
const SparseCooTensor& dout, \
SparseCooTensor* dx, \
SparseCooTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_indices().dtype(), \
"ElementWise##name##CooGradCPUKernel", \
([&] { \
ElementWise##name##CooGradCPUKernel<T, data_t>( \
dev_ctx, x, y, dout, dx, dy); \
})); \
#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \
template <typename T, typename Context> \
void ElementWise##name##CooGradKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y, \
const SparseCooTensor& dout, \
SparseCooTensor* dx, \
SparseCooTensor* dy) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.indices().dtype(), "ElementWise##name##CooGradCPUKernel", ([&] { \
ElementWise##name##CooGradCPUKernel<T, data_t>( \
dev_ctx, x, y, dout, dx, dy); \
})); \
}
DEFINE_ELEMENTWISE_GRAD_KERNEL(Add)
......
......@@ -156,13 +156,13 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
x.dims(),
y.dims()));
int64_t element_size = 1;
for (auto j = 1; j < x.non_zero_elements().dims().size(); ++j) {
element_size *= x.non_zero_elements().dims()[j];
for (auto j = 1; j < x.values().dims().size(); ++j) {
element_size *= x.values().dims()[j];
}
IntT nnz = 0;
const auto x_values = x.non_zero_elements().data<T>();
const auto y_values = y.non_zero_elements().data<T>();
const auto sparse_dim = x.non_zero_indices().dims()[0];
const auto x_values = x.values().data<T>();
const auto y_values = y.values().data<T>();
const auto sparse_dim = x.indices().dims()[0];
const bool is_divide = std::is_same<Functor, funcs::DivideFunctor<T>>::value;
int64_t max_len = 1;
......@@ -176,7 +176,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(),
phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(),
x.nnz(),
sparse_dim,
......@@ -184,7 +184,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
1,
x_indexs.data());
phi::funcs::sparse::FlattenIndices(y.non_zero_indices().data<IntT>(),
phi::funcs::sparse::FlattenIndices(y.indices().data<IntT>(),
sparse_offsets.data(),
y.nnz(),
sparse_dim,
......@@ -233,10 +233,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
out_indices_vec.data());
if (nnz == 0) {
phi::DenseTensor out_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
phi::DenseTensor out_values =
phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
phi::DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x.values());
out->SetMember(out_indices, out_values, x.dims());
} else {
DenseTensorMeta indices_meta(
......@@ -244,8 +242,8 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
phi::make_ddim(
{static_cast<int64_t>(sparse_dim), static_cast<int64_t>(nnz)}),
DataLayout::NCHW);
auto indeces_dim = vectorize(slice_ddim(
x.non_zero_elements().dims(), 1, x.non_zero_elements().dims().size()));
auto indeces_dim =
vectorize(slice_ddim(x.values().dims(), 1, x.values().dims().size()));
indeces_dim.insert(indeces_dim.begin(), nnz);
DenseTensorMeta values_meta(
paddle::experimental::CppTypeToDataType<T>::Type(),
......@@ -283,16 +281,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
*out = SparseCooToCsr<T>(dev_ctx, coo_out); \
}
#define DEFINE_CSR_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \
void ElementWise##name##CsrKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& y, \
SparseCsrTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.non_zero_crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \
ElementWise##name##CsrCPUKernel<T, data_t>(dev_ctx, x, y, out); \
})); \
#define DEFINE_CSR_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \
void ElementWise##name##CsrKernel(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& y, \
SparseCsrTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \
ElementWise##name##CsrCPUKernel<T, data_t>(dev_ctx, x, y, out); \
})); \
}
#define DEFINE_COO_ELEMENTWISE_CPU_KERNEL(name) \
......@@ -306,18 +304,16 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
dev_ctx, x, y, out, functor); \
}
#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \
void ElementWise##name##CooKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y, \
SparseCooTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \
"ElementWise##name##CooCPUKernel", \
([&] { \
ElementWise##name##CooCPUKernel<T, data_t>( \
dev_ctx, x, y, out); \
})); \
#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \
template <typename T, typename Context> \
void ElementWise##name##CooKernel(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y, \
SparseCooTensor* out) { \
PD_VISIT_BASE_INTEGRAL_TYPES( \
x.indices().dtype(), "ElementWise##name##CooCPUKernel", ([&] { \
ElementWise##name##CooCPUKernel<T, data_t>(dev_ctx, x, y, out); \
})); \
}
DEFINE_CSR_ELEMENTWISE_CPU_KERNEL(Add)
......
......@@ -37,8 +37,8 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
x.dims(),
mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices();
const DenseTensor& values = mask.non_zero_elements();
const DenseTensor& indices = mask.indices();
const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim();
DenseTensor out_indices = phi::EmptyLike<T>(dev_ctx, indices);
......@@ -71,7 +71,7 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
/**
* @brief Filter the DenseTensor x by the
* mask.non_zero_indices() and output a SparseCooTensor
* mask.indices() and output a SparseCooTensor
* x and mask must have the same shape.
**/
template <typename T, typename Context>
......@@ -80,7 +80,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const SparseCooTensor& mask,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskCPUKernel", ([&] {
mask.indices().dtype(), "SparseMaskCPUKernel", ([&] {
SparseMaskCPUKernel<T, data_t>(dev_ctx, x, mask, out);
}));
}
......@@ -102,7 +102,7 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
x.dims(), sparse_dim, sparse_offsets.data());
phi::funcs::sparse::FlattenIndices(x.non_zero_indices().data<IntT>(),
phi::funcs::sparse::FlattenIndices(x.indices().data<IntT>(),
sparse_offsets.data(),
x.nnz(),
sparse_dim,
......@@ -121,12 +121,12 @@ void SparseMaskHelperCPUKernel(const CPUContext& dev_ctx,
for (uint64_t i = 0; i < x_indexs.size(); i++) {
x_indexs_map[x_indexs[i]] = i;
}
*out = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
*out = phi::EmptyLike<T>(dev_ctx, x.values());
T* out_ptr = out->data<T>();
memset(out_ptr, static_cast<T>(0), out->numel() * sizeof(T));
const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1];
const T* in_ptr = x.non_zero_elements().data<T>();
x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
const T* in_ptr = x.values().data<T>();
// TODO(zhangkaihuo): multithreading can be used for acceleration
for (uint64_t i = 0; i < mask_indexs.size(); i++) {
auto iter = x_indexs_map.find(mask_indexs[i]);
......@@ -147,7 +147,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const DenseTensor& mask_indices,
DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperCPUKernel", ([&] {
x.indices().dtype(), "SparseMaskHelperCPUKernel", ([&] {
SparseMaskHelperCPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
}));
}
......
......@@ -42,21 +42,17 @@ void MaxPoolCooGradCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size);
const T* in_features_ptr = x.non_zero_elements().data<T>();
const T* out_features_ptr = out.non_zero_elements().data<T>();
const T* out_grad_ptr = out_grad.non_zero_elements().data<T>();
const T* in_features_ptr = x.values().data<T>();
const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>();
memset(x_grad_ptr, 0, sizeof(T) * x_grad_values.numel());
phi::Copy<CPUContext>(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&x_grad_indices);
phi::Copy<CPUContext>(
dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
phi::funcs::MaxPoolGrad<T> grad_functor;
for (int i = 0; i < kernel_size; i++) {
......@@ -84,7 +80,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] {
x.indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] {
MaxPoolCooGradCPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
}));
......
......@@ -50,7 +50,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
std::vector<int> counter_per_kernel(kernel_size, 0);
const T* in_features_ptr = x.non_zero_elements().data<T>();
const T* in_features_ptr = x.values().data<T>();
// 1. product rule book
ProductRuleBook<T, CPUContext, IntT>(dev_ctx,
x,
......@@ -78,7 +78,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
std::vector<bool> out_flags(out->nnz(), false);
// 2. max pool
T* out_features_ptr = out->mutable_non_zero_elements()->data<T>();
T* out_features_ptr = out->mutable_values()->data<T>();
phi::funcs::MaxPool<T> max_pool_functor;
for (int i = 0; i < kernel_size; i++) {
for (int j = 0; j < counter_ptr[i]; j++) {
......@@ -110,7 +110,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooCPUKernel", ([&] {
x.indices().dtype(), "MaxPoolCooCPUKernel", ([&] {
MaxPoolCooCPUKernel<T, data_t>(dev_ctx,
x,
kernel_sizes,
......
......@@ -111,10 +111,10 @@ void SparseCsrToCooCPUKernel(const CPUContext& dev_ctx,
const SparseCsrTensor& x,
SparseCooTensor* out) {
const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.non_zero_cols().numel();
const auto& csr_crows = x.non_zero_crows();
const auto& csr_cols = x.non_zero_cols();
const auto& csr_values = x.non_zero_elements();
const int64_t non_zero_num = x.cols().numel();
const auto& csr_crows = x.crows();
const auto& csr_cols = x.cols();
const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>();
......@@ -161,7 +161,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooCPUKernel", ([&] {
x.crows().dtype(), "SparseCsrToCooCPUKernel", ([&] {
SparseCsrToCooCPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......@@ -182,20 +182,20 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
phi::DenseTensor non_zero_crows;
non_zero_crows.Resize({batchs * (rows + 1)});
IntT* csr_crows_data = dev_ctx.template Alloc<IntT>(&non_zero_crows);
phi::DenseTensor crows;
crows.Resize({batchs * (rows + 1)});
IntT* csr_crows_data = dev_ctx.template Alloc<IntT>(&crows);
phi::DenseTensor non_zero_cols;
non_zero_cols.Resize({non_zero_num});
IntT* csr_cols_data = dev_ctx.template Alloc<IntT>(&non_zero_cols);
phi::DenseTensor cols;
cols.Resize({non_zero_num});
IntT* csr_cols_data = dev_ctx.template Alloc<IntT>(&cols);
phi::DenseTensor non_zero_elements;
non_zero_elements.Resize({non_zero_num});
T* csr_values_data = dev_ctx.template Alloc<T>(&non_zero_elements);
phi::DenseTensor values;
values.Resize({non_zero_num});
T* csr_values_data = dev_ctx.template Alloc<T>(&values);
const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
const auto& coo_indices = x.indices();
const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data =
x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num;
......@@ -243,7 +243,7 @@ void SparseCooToCsrCPUKernel(const CPUContext& dev_ctx,
memcpy(csr_cols_data, coo_cols_data, sizeof(IntT) * non_zero_num);
memcpy(csr_values_data, coo_values_data, sizeof(T) * non_zero_num);
out->SetMember(non_zero_crows, non_zero_cols, non_zero_elements, x_dims);
out->SetMember(crows, cols, values, x_dims);
}
template <typename T, typename Context>
......@@ -251,7 +251,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCsrTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrCPUKernel", ([&] {
x.indices().dtype(), "SparseCooToCsrCPUKernel", ([&] {
SparseCooToCsrCPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......@@ -262,8 +262,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx,
DenseTensor* out) {
const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices();
const auto values = x.non_zero_elements();
const auto indices = x.indices();
const auto values = x.values();
const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0];
if (indices_dims.size() == 1) {
......@@ -272,9 +272,8 @@ void SparseCooToDenseCPUKernel(const CPUContext& dev_ctx,
const int64_t dense_dim = x.dense_dim();
const T* x_data = values.data<T>();
*out = phi::Empty(
dev_ctx,
DenseTensorMeta(x.dtype(), x.dims(), x.non_zero_elements().layout()));
*out = phi::Empty(dev_ctx,
DenseTensorMeta(x.dtype(), x.dims(), x.values().layout()));
T* out_data = out->data<T>();
int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) {
......@@ -305,7 +304,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x,
DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseCPUKernel", ([&] {
x.indices().dtype(), "SparseCooToDenseCPUKernel", ([&] {
SparseCooToDenseCPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......
......@@ -43,25 +43,24 @@ namespace sparse {
SparseCsrTensor ElementWise##name##Csr(const Context& dev_ctx, \
const SparseCsrTensor& x, \
const SparseCsrTensor& y) { \
DenseTensor non_zero_crows; \
DenseTensor non_zero_cols; \
DenseTensor crows; \
DenseTensor cols; \
DenseTensor non_zero_elements; \
SparseCsrTensor out( \
non_zero_crows, non_zero_cols, non_zero_elements, x.dims()); \
SparseCsrTensor out(crows, cols, non_zero_elements, x.dims()); \
ElementWise##name##CsrKernel<T, Context>(dev_ctx, x, y, &out); \
return out; \
}
#define DEFINE_COO_ELEMENTWISE_KERNEL_FUNC(name) \
template <typename T, typename Context> \
SparseCooTensor ElementWise##name##Coo(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y) { \
DenseTensor non_zero_indices; \
DenseTensor non_zero_elements; \
SparseCooTensor out(non_zero_indices, non_zero_elements, x.dims()); \
ElementWise##name##CooKernel<T, Context>(dev_ctx, x, y, &out); \
return out; \
#define DEFINE_COO_ELEMENTWISE_KERNEL_FUNC(name) \
template <typename T, typename Context> \
SparseCooTensor ElementWise##name##Coo(const Context& dev_ctx, \
const SparseCooTensor& x, \
const SparseCooTensor& y) { \
DenseTensor indices; \
DenseTensor non_zero_elements; \
SparseCooTensor out(indices, non_zero_elements, x.dims()); \
ElementWise##name##CooKernel<T, Context>(dev_ctx, x, y, &out); \
return out; \
}
DEFINE_ELEMENTWISE_KERNEL_HEAD(Add)
......
......@@ -27,7 +27,7 @@ void EmptyLikeCooKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
out->set_dims(x.dims());
*(out->mutable_non_zero_indices()) = x.non_zero_indices();
*(out->mutable_indices()) = x.indices();
const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_values = out->mutable_non_zero_elements();
......@@ -40,8 +40,8 @@ void EmptyLikeCsrKernel(const Context& dev_ctx,
const SparseCsrTensor& x,
SparseCsrTensor* out) {
out->set_dims(x.dims());
*(out->mutable_non_zero_crows()) = x.non_zero_crows();
*(out->mutable_non_zero_cols()) = x.non_zero_cols();
*(out->mutable_crows()) = x.crows();
*(out->mutable_cols()) = x.cols();
const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_values = out->mutable_non_zero_elements();
......
......@@ -131,9 +131,9 @@ void CastCooKernel(const Context& dev_ctx,
SparseCooTensor* out) {
out->set_dims(x.dims());
const DenseTensor& x_indices = x.non_zero_indices();
const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_indices = out->mutable_non_zero_indices();
DenseTensor* out_indices = out->mutable_indices();
DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) {
......@@ -167,11 +167,11 @@ void CastCsrKernel(const Context& dev_ctx,
SparseCsrTensor* out) {
out->set_dims(x.dims());
const DenseTensor& x_crows = x.non_zero_crows();
const DenseTensor& x_cols = x.non_zero_cols();
const DenseTensor& x_crows = x.crows();
const DenseTensor& x_cols = x.cols();
const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_crows = out->mutable_non_zero_crows();
DenseTensor* out_cols = out->mutable_non_zero_cols();
DenseTensor* out_crows = out->mutable_crows();
DenseTensor* out_cols = out->mutable_cols();
DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) {
......
......@@ -24,7 +24,7 @@ void CooValuesGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& out_grad,
SparseCooTensor* x_grad) {
x_grad->SetMember(x.non_zero_indices(), out_grad, x.dims(), true);
x_grad->SetMember(x.indices(), out_grad, x.dims(), true);
}
template <typename T, typename Context>
......
......@@ -63,11 +63,10 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
template <typename T, typename Context>
SparseCsrTensor SparseCooToCsr(const Context& dev_ctx,
const SparseCooTensor& x) {
DenseTensor non_zero_crows;
DenseTensor non_zero_cols;
DenseTensor crows;
DenseTensor cols;
DenseTensor non_zero_elements;
SparseCsrTensor csr(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
SparseCsrTensor csr(crows, cols, non_zero_elements, x.dims());
SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr);
return csr;
}
......@@ -92,11 +91,10 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
template <typename T, typename Context>
SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) {
DenseTensor non_zero_crows;
DenseTensor non_zero_cols;
DenseTensor crows;
DenseTensor cols;
DenseTensor non_zero_elements;
SparseCsrTensor csr(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
SparseCsrTensor csr(crows, cols, non_zero_elements, x.dims());
DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr);
return csr;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册