未验证 提交 016b94c2 编写于 作者: Z zhangkaihuo 提交者: GitHub

rename the member function of SparseTensor (#45291)

上级 ed57237e
...@@ -63,10 +63,16 @@ class SparseCooTensor : public TensorBase, ...@@ -63,10 +63,16 @@ class SparseCooTensor : public TensorBase,
/// \brief Returns the indices of non zero elemetns in original dense tensor. /// \brief Returns the indices of non zero elemetns in original dense tensor.
/// \return The indices of non zero elemetns in original dense tensor. /// \return The indices of non zero elemetns in original dense tensor.
const DenseTensor& indices() const { return non_zero_indices_; }
/// Note: This function will removed soon. It is recommended to use indices()
const DenseTensor& non_zero_indices() const { return non_zero_indices_; } const DenseTensor& non_zero_indices() const { return non_zero_indices_; }
/// \brief Returns the non zero elemetns in original dense tensor. /// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor. /// \return The non zero elemetns in original dense tensor.
const DenseTensor& values() const { return non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use values()
const DenseTensor& non_zero_elements() const { return non_zero_elements_; } const DenseTensor& non_zero_elements() const { return non_zero_elements_; }
/// \brief Returns whether the indices has coalesced /// \brief Returns whether the indices has coalesced
...@@ -136,10 +142,18 @@ class SparseCooTensor : public TensorBase, ...@@ -136,10 +142,18 @@ class SparseCooTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_indices_. /// \brief Get a mutable pointer of non_zero_indices_.
/// return a mutable pointer of non_zero_indices_. /// return a mutable pointer of non_zero_indices_.
DenseTensor* mutable_indices() { return &non_zero_indices_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_indices()
DenseTensor* mutable_non_zero_indices() { return &non_zero_indices_; } DenseTensor* mutable_non_zero_indices() { return &non_zero_indices_; }
/// \brief Get a mutable pointer of non_zero_elements. /// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements. /// return a mutable pointer of non_zero_elements.
DenseTensor* mutable_values() { return &non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; } DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; }
/// \brief This function is not recommended /// \brief This function is not recommended
......
...@@ -72,9 +72,9 @@ SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other) ...@@ -72,9 +72,9 @@ SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other)
SparseCsrTensor& SparseCsrTensor::operator=(const SparseCsrTensor& other) { SparseCsrTensor& SparseCsrTensor::operator=(const SparseCsrTensor& other) {
this->dims_ = other.dims(); this->dims_ = other.dims();
this->non_zero_crows_ = other.non_zero_crows(); this->non_zero_crows_ = other.crows();
this->non_zero_cols_ = other.non_zero_cols(); this->non_zero_cols_ = other.cols();
this->non_zero_elements_ = other.non_zero_elements(); this->non_zero_elements_ = other.values();
return *this; return *this;
} }
......
...@@ -74,15 +74,24 @@ class SparseCsrTensor : public TensorBase, ...@@ -74,15 +74,24 @@ class SparseCsrTensor : public TensorBase,
/// dense tensor. /// dense tensor.
/// \return The compressed row index of non zero elemetns in original dense /// \return The compressed row index of non zero elemetns in original dense
/// tensor. /// tensor.
const DenseTensor& crows() const { return non_zero_crows_; }
/// Note: This function will removed soon. It is recommended to use crows()
const DenseTensor& non_zero_crows() const { return non_zero_crows_; } const DenseTensor& non_zero_crows() const { return non_zero_crows_; }
/// \brief Returns the column index of non zero elemetns in original dense /// \brief Returns the column index of non zero elemetns in original dense
/// tensor. /// tensor.
/// \return The column index of non zero elemetns in original dense tensor. /// \return The column index of non zero elemetns in original dense tensor.
const DenseTensor& cols() const { return non_zero_cols_; }
/// Note: This function will removed soon. It is recommended to use cols()
const DenseTensor& non_zero_cols() const { return non_zero_cols_; } const DenseTensor& non_zero_cols() const { return non_zero_cols_; }
/// \brief Returns the non zero elemetns in original dense tensor. /// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor. /// \return The non zero elemetns in original dense tensor.
const DenseTensor& values() const { return non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use indices()
const DenseTensor& non_zero_elements() const { return non_zero_elements_; } const DenseTensor& non_zero_elements() const { return non_zero_elements_; }
/// \brief Returns the total number of non zero elements in original dense /// \brief Returns the total number of non zero elements in original dense
...@@ -138,14 +147,26 @@ class SparseCsrTensor : public TensorBase, ...@@ -138,14 +147,26 @@ class SparseCsrTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_crows. /// \brief Get a mutable pointer of non_zero_crows.
/// return a mutable pointer of non_zero_crows. /// return a mutable pointer of non_zero_crows.
DenseTensor* mutable_crows() { return &non_zero_crows_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_crows()
DenseTensor* mutable_non_zero_crows() { return &non_zero_crows_; } DenseTensor* mutable_non_zero_crows() { return &non_zero_crows_; }
/// \brief Get a mutable pointer of non_zero_cols. /// \brief Get a mutable pointer of non_zero_cols.
/// return a mutable pointer of non_zero_cols. /// return a mutable pointer of non_zero_cols.
DenseTensor* mutable_cols() { return &non_zero_cols_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_cols()
DenseTensor* mutable_non_zero_cols() { return &non_zero_cols_; } DenseTensor* mutable_non_zero_cols() { return &non_zero_cols_; }
/// \brief Get a mutable pointer of non_zero_elements. /// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements. /// return a mutable pointer of non_zero_elements.
DenseTensor* mutable_values() { return &non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; } DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; }
/// \brief set the dims of original dense tensor /// \brief set the dims of original dense tensor
......
...@@ -31,7 +31,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx, ...@@ -31,7 +31,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_indices = x.non_zero_indices();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices); DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values); DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
...@@ -73,7 +73,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx, ...@@ -73,7 +73,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
// 2. get the address of each non-zero values // 2. get the address of each non-zero values
const T* x_values_ptr = x_values.data<T>(); const T* x_values_ptr = x_values.data<T>();
const int64_t stride = const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
DenseTensor values_indexs = phi::Empty( DenseTensor values_indexs = phi::Empty(
dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW)); dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW));
int* values_indexs_ptr = values_indexs.data<int>(); int* values_indexs_ptr = values_indexs.data<int>();
......
...@@ -81,7 +81,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -81,7 +81,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx); auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
DenseTensor x_grad_indices = DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
T* x_grad_values_ptr = x_grad_values.data<T>(); T* x_grad_values_ptr = x_grad_values.data<T>();
phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr, phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr,
0, 0,
...@@ -109,11 +109,10 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -109,11 +109,10 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
offsets[kernel_size] = offset; offsets[kernel_size] = offset;
if (subm) { if (subm) {
phi::funcs::sparse::SubmPreProcess<T, GPUContext>( phi::funcs::sparse::SubmPreProcess<T, GPUContext>(dev_ctx,
dev_ctx,
x, x,
kernel, kernel,
out_grad.non_zero_elements(), out_grad.values(),
in_channels, in_channels,
out_channels, out_channels,
half_kernel_size, half_kernel_size,
...@@ -181,7 +180,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -181,7 +180,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
unique_value_ptr); unique_value_ptr);
GatherV2<T, IntT>(dev_ctx, GatherV2<T, IntT>(dev_ctx,
x.non_zero_elements().data<T>(), x.values().data<T>(),
out_index_ptr, out_index_ptr,
unique_value_ptr, unique_value_ptr,
x.nnz(), x.nnz(),
...@@ -192,7 +191,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -192,7 +191,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
in_features_ptr); in_features_ptr);
Gather<T, IntT>(dev_ctx, Gather<T, IntT>(dev_ctx,
out_grad.non_zero_elements().data<T>(), out_grad.values().data<T>(),
rulebook_ptr + rulebook_len, rulebook_ptr + rulebook_len,
rulebook_len, rulebook_len,
out_channels, out_channels,
......
...@@ -131,7 +131,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, ...@@ -131,7 +131,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
set_zero(dev_ctx, &out_features, static_cast<T>(0.0f)); set_zero(dev_ctx, &out_features, static_cast<T>(0.0f));
Gather<T, IntT>(dev_ctx, Gather<T, IntT>(dev_ctx,
x.non_zero_elements().data<T>(), x.values().data<T>(),
rulebook_ptr, rulebook_ptr,
rulebook_len, rulebook_len,
in_channels, in_channels,
...@@ -139,7 +139,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, ...@@ -139,7 +139,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
// 3. call gemm for every werght // 3. call gemm for every werght
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx); auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
auto* out_values = out->mutable_non_zero_elements(); auto* out_values = out->mutable_values();
T* out_values_ptr = out_values->data<T>(); T* out_values_ptr = out_values->data<T>();
set_zero(dev_ctx, out_values, static_cast<T>(0.0f)); set_zero(dev_ctx, out_values, static_cast<T>(0.0f));
......
...@@ -515,9 +515,8 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -515,9 +515,8 @@ int ProductRuleBook(const Context& dev_ctx,
const int64_t sparse_dim = 4; const int64_t sparse_dim = 4;
DenseTensorMeta indices_meta( DenseTensorMeta indices_meta(
indices_dtype, {sparse_dim, out_non_zero_num}, DataLayout::NCHW); indices_dtype, {sparse_dim, out_non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(x.dtype(), DenseTensorMeta values_meta(
{out_non_zero_num, kernel_sizes[4]}, x.dtype(), {out_non_zero_num, kernel_sizes[4]}, x.values().layout());
x.non_zero_elements().layout());
phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta)); phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta));
phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta)); phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta));
...@@ -541,11 +540,10 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -541,11 +540,10 @@ int ProductRuleBook(const Context& dev_ctx,
} else { } else {
DenseTensor out_indices = DenseTensor out_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_values = DenseTensor out_values = phi::Empty(
phi::Empty(dev_ctx, dev_ctx,
DenseTensorMeta(x.dtype(), DenseTensorMeta(
{x.nnz(), kernel_sizes[4]}, x.dtype(), {x.nnz(), kernel_sizes[4]}, x.values().layout()));
x.non_zero_elements().layout()));
phi::Copy( phi::Copy(
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices); dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
out->SetMember(out_indices, out_values, out_dims, true); out->SetMember(out_indices, out_values, out_dims, true);
......
...@@ -48,8 +48,8 @@ void CooFullLikeKernel(const Context& dev_ctx, ...@@ -48,8 +48,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
false, false,
out->mutable_non_zero_indices()); out->mutable_non_zero_indices());
DenseTensor* values = out->mutable_non_zero_elements(); DenseTensor* values = out->mutable_values();
values->Resize(x.non_zero_elements().dims()); values->Resize(x.values().dims());
dev_ctx.template Alloc<T>(values); dev_ctx.template Alloc<T>(values);
std::vector<const DenseTensor*> inputs = {}; std::vector<const DenseTensor*> inputs = {};
...@@ -80,8 +80,8 @@ void CsrFullLikeKernel(const Context& dev_ctx, ...@@ -80,8 +80,8 @@ void CsrFullLikeKernel(const Context& dev_ctx,
false, false,
out->mutable_non_zero_cols()); out->mutable_non_zero_cols());
DenseTensor* values = out->mutable_non_zero_elements(); DenseTensor* values = out->mutable_values();
values->Resize(x.non_zero_elements().dims()); values->Resize(x.values().dims());
dev_ctx.template Alloc<T>(values); dev_ctx.template Alloc<T>(values);
std::vector<const DenseTensor*> inputs = {}; std::vector<const DenseTensor*> inputs = {};
......
...@@ -94,9 +94,9 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, ...@@ -94,9 +94,9 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>( AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
softmax.non_zero_crows().data<int64_t>(), softmax.non_zero_crows().data<int64_t>(),
softmax.non_zero_elements().data<T>(), softmax.values().data<T>(),
dsoftmax.mutable_non_zero_elements()->data<T>(), dsoftmax.mutable_values()->data<T>(),
d_sdd_result.mutable_non_zero_elements()->data<T>(), d_sdd_result.mutable_values()->data<T>(),
M, M,
total_row_num, total_row_num,
std::sqrt(N), std::sqrt(N),
......
...@@ -202,10 +202,10 @@ void FusedAttentionCsrKernel( ...@@ -202,10 +202,10 @@ void FusedAttentionCsrKernel(
AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>( AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
sdd_result.non_zero_crows().data<int64_t>(), sdd_result.non_zero_crows().data<int64_t>(),
sdd_result.non_zero_cols().data<int64_t>(), sdd_result.non_zero_cols().data<int64_t>(),
sdd_result.non_zero_elements().data<T>(), sdd_result.values().data<T>(),
kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr, kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr,
attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr, attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr,
softmax->mutable_non_zero_elements()->data<T>(), softmax->mutable_values()->data<T>(),
M, M,
total_row_num, total_row_num,
q_dim[1], q_dim[1],
......
...@@ -59,7 +59,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx, ...@@ -59,7 +59,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
mask.dims(), mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape")); phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices(); const DenseTensor& indices = mask.non_zero_indices();
const DenseTensor& values = mask.non_zero_elements(); const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim(); const int sparse_dim = mask.sparse_dim();
DenseTensor sparse_offsets = phi::Empty<GPUContext>( DenseTensor sparse_offsets = phi::Empty<GPUContext>(
dev_ctx, dev_ctx,
...@@ -224,8 +224,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, ...@@ -224,8 +224,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
phi::backends::gpu::GpuMemsetAsync( phi::backends::gpu::GpuMemsetAsync(
table.data<int>(), 0, table_size * sizeof(int), dev_ctx.stream()); table.data<int>(), 0, table_size * sizeof(int), dev_ctx.stream());
const int64_t stride = const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1]; x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
*out = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); *out = phi::EmptyLike<T>(dev_ctx, x.values());
phi::funcs::SetConstant<GPUContext, T> set_zero; phi::funcs::SetConstant<GPUContext, T> set_zero;
set_zero(dev_ctx, out, static_cast<T>(0)); set_zero(dev_ctx, out, static_cast<T>(0));
T* out_ptr = out->data<T>(); T* out_ptr = out->data<T>();
...@@ -242,15 +242,14 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, ...@@ -242,15 +242,14 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
const int VecBytes = 16; const int VecBytes = 16;
const int VecSize = VecBytes / sizeof(T); const int VecSize = VecBytes / sizeof(T);
if (stride % VecSize == 0) { if (stride % VecSize == 0) {
MaskCopy<T, IntT, VecSize> MaskCopy<T, IntT, VecSize><<<config.block_per_grid,
<<<config.block_per_grid,
config.thread_per_block, config.thread_per_block,
0, 0,
dev_ctx.stream()>>>(mask_indexs_ptr, dev_ctx.stream()>>>(mask_indexs_ptr,
table.data<int>(), table.data<int>(),
mask_indexs.numel(), mask_indexs.numel(),
stride, stride,
x.non_zero_elements().data<T>(), x.values().data<T>(),
out_ptr); out_ptr);
} else { } else {
MaskCopy<T, IntT, 1><<<config.block_per_grid, MaskCopy<T, IntT, 1><<<config.block_per_grid,
...@@ -260,7 +259,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, ...@@ -260,7 +259,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
table.data<int>(), table.data<int>(),
mask_indexs.numel(), mask_indexs.numel(),
stride, stride,
x.non_zero_elements().data<T>(), x.values().data<T>(),
out_ptr); out_ptr);
} }
} }
......
...@@ -79,7 +79,7 @@ void MvCooGradKernel(const Context &dev_ctx, ...@@ -79,7 +79,7 @@ void MvCooGradKernel(const Context &dev_ctx,
dev_ctx.stream()>>>(dout.data<T>(), dev_ctx.stream()>>>(dout.data<T>(),
vec.data<T>(), vec.data<T>(),
dx->non_zero_indices().data<data_t>(), dx->non_zero_indices().data<data_t>(),
dx->mutable_non_zero_elements()->data<T>(), dx->mutable_values()->data<T>(),
dx->nnz()); dx->nnz());
})); }));
} }
...@@ -127,7 +127,7 @@ void MvCsrGradKernel(const Context &dev_ctx, ...@@ -127,7 +127,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
vec.data<T>(), vec.data<T>(),
dx->non_zero_crows().data<data_t>(), dx->non_zero_crows().data<data_t>(),
dx->non_zero_cols().data<data_t>(), dx->non_zero_cols().data<data_t>(),
dx->mutable_non_zero_elements()->data<T>(), dx->mutable_values()->data<T>(),
row_number); row_number);
})); }));
} }
......
...@@ -68,13 +68,13 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -68,13 +68,13 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const int* counter_ptr = counter.data<int>(); const int* counter_ptr = counter.data<int>();
phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size); phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size);
const T* in_features_ptr = x.non_zero_elements().data<T>(); const T* in_features_ptr = x.values().data<T>();
const T* out_features_ptr = out.non_zero_elements().data<T>(); const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.non_zero_elements().data<T>(); const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike // TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices = DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices()); phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>(); T* x_grad_ptr = x_grad_values.data<T>();
phi::funcs::SetConstant<GPUContext, T> set_zero; phi::funcs::SetConstant<GPUContext, T> set_zero;
......
...@@ -95,8 +95,8 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx, ...@@ -95,8 +95,8 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
const IntT* rulebook_ptr = rulebook->data<IntT>(); const IntT* rulebook_ptr = rulebook->data<IntT>();
T* out_features_ptr = out->mutable_non_zero_elements()->data<T>(); T* out_features_ptr = out->mutable_values()->data<T>();
const T* in_features_ptr = x.non_zero_elements().data<T>(); const T* in_features_ptr = x.values().data<T>();
counter->Resize({kernel_size}); counter->Resize({kernel_size});
int* counter_ptr = dev_ctx.template HostAlloc<int>(counter); int* counter_ptr = dev_ctx.template HostAlloc<int>(counter);
memcpy(counter_ptr, h_counter.data(), h_counter.size() * sizeof(int)); memcpy(counter_ptr, h_counter.data(), h_counter.size() * sizeof(int));
...@@ -107,7 +107,7 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx, ...@@ -107,7 +107,7 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
thrust::fill(thrust::cuda::par.on(dev_ctx.stream()), thrust::fill(thrust::cuda::par.on(dev_ctx.stream()),
#endif #endif
out_features_ptr, out_features_ptr,
out_features_ptr + out->non_zero_elements().numel(), out_features_ptr + out->values().numel(),
static_cast<T>(0)); static_cast<T>(0));
// TODO(zhangkaihuo) Replacing multiple calls with one kernel may be faster // TODO(zhangkaihuo) Replacing multiple calls with one kernel may be faster
for (int i = 0; i < kernel_size; i++) { for (int i = 0; i < kernel_size; i++) {
......
...@@ -96,9 +96,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx, ...@@ -96,9 +96,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>( SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(), out.non_zero_crows().data<data_t>(),
out.non_zero_elements().data<T>(), out.values().data<T>(),
dout.non_zero_elements().data<T>(), dout.values().data<T>(),
dx->mutable_non_zero_elements()->data<T>(), dx->mutable_values()->data<T>(),
row_number, row_number,
total_row_number); total_row_number);
})); }));
......
...@@ -109,8 +109,8 @@ void SoftmaxCsrKernel(const Context& dev_ctx, ...@@ -109,8 +109,8 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>( SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(), x.non_zero_crows().data<data_t>(),
x.non_zero_elements().data<T>(), x.values().data<T>(),
out->mutable_non_zero_elements()->data<T>(), out->mutable_values()->data<T>(),
row_number, row_number,
total_row_number); total_row_number);
})); }));
......
...@@ -215,7 +215,7 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx, ...@@ -215,7 +215,7 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const int64_t non_zero_num = x.non_zero_cols().numel(); const int64_t non_zero_num = x.non_zero_cols().numel();
const auto& csr_crows = x.non_zero_crows(); const auto& csr_crows = x.non_zero_crows();
const auto& csr_cols = x.non_zero_cols(); const auto& csr_cols = x.non_zero_cols();
const auto& csr_values = x.non_zero_elements(); const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>(); const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>(); const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>(); const T* csr_values_data = csr_values.data<T>();
...@@ -361,14 +361,13 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx, ...@@ -361,14 +361,13 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
phi::DenseTensor non_zero_crows = phi::DenseTensor non_zero_crows =
phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)}); phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor non_zero_cols = phi::Empty<IntT>(dev_ctx, {non_zero_num}); phi::DenseTensor non_zero_cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor non_zero_elements = phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
phi::EmptyLike<T, GPUContext>(dev_ctx, x.non_zero_elements());
IntT* csr_crows_data = non_zero_crows.data<IntT>(); IntT* csr_crows_data = non_zero_crows.data<IntT>();
IntT* csr_cols_data = non_zero_cols.data<IntT>(); IntT* csr_cols_data = non_zero_cols.data<IntT>();
T* csr_values_data = non_zero_elements.data<T>(); T* csr_values_data = values.data<T>();
const auto& coo_indices = x.non_zero_indices(); const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements(); const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>(); const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data = const IntT* coo_rows_data =
x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num; x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num;
...@@ -414,7 +413,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx, ...@@ -414,7 +413,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof(T) * non_zero_num, sizeof(T) * non_zero_num,
gpuMemcpyDeviceToDevice, gpuMemcpyDeviceToDevice,
dev_ctx.stream()); dev_ctx.stream());
out->SetMember(non_zero_crows, non_zero_cols, non_zero_elements, x_dims); out->SetMember(non_zero_crows, non_zero_cols, values, x_dims);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -455,7 +454,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx, ...@@ -455,7 +454,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const auto non_zero_num = x.nnz(); const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims(); const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices(); const auto indices = x.non_zero_indices();
const auto values = x.non_zero_elements(); const auto values = x.values();
const auto indices_dims = indices.dims(); const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0]; int64_t sparse_dim = indices_dims[0];
if (indices_dims.size() == 1) { if (indices_dims.size() == 1) {
...@@ -465,9 +464,8 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx, ...@@ -465,9 +464,8 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const auto place = dev_ctx.GetPlace(); const auto place = dev_ctx.GetPlace();
const T* x_data = values.data<T>(); const T* x_data = values.data<T>();
*out = phi::Empty(dev_ctx, *out = phi::Empty(
phi::DenseTensorMeta( dev_ctx, phi::DenseTensorMeta(x.dtype(), x.dims(), x.values().layout()));
x.dtype(), x.dims(), x.non_zero_elements().layout()));
T* out_data = out->data<T>(); T* out_data = out->data<T>();
int64_t base_offset = 1; int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) { for (int64_t i = 0; i < dense_dim; i++) {
......
...@@ -40,8 +40,8 @@ void DivCooScalarKernel(const Context& dev_ctx, ...@@ -40,8 +40,8 @@ void DivCooScalarKernel(const Context& dev_ctx,
SparseCooTensor* out) { SparseCooTensor* out) {
EmptyLikeCooKernel<T, Context>(dev_ctx, x, out); EmptyLikeCooKernel<T, Context>(dev_ctx, x, out);
std::vector<const DenseTensor*> ins = {&(x.non_zero_elements())}; std::vector<const DenseTensor*> ins = {&(x.values())};
std::vector<DenseTensor*> outs = {out->mutable_non_zero_elements()}; std::vector<DenseTensor*> outs = {out->mutable_values()};
DivScalarFunctor<T> func(static_cast<T>(scalar)); DivScalarFunctor<T> func(static_cast<T>(scalar));
funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func); funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func);
} }
...@@ -53,8 +53,8 @@ void DivCsrScalarKernel(const Context& dev_ctx, ...@@ -53,8 +53,8 @@ void DivCsrScalarKernel(const Context& dev_ctx,
SparseCsrTensor* out) { SparseCsrTensor* out) {
EmptyLikeCsrKernel<T, Context>(dev_ctx, x, out); EmptyLikeCsrKernel<T, Context>(dev_ctx, x, out);
std::vector<const DenseTensor*> ins = {&(x.non_zero_elements())}; std::vector<const DenseTensor*> ins = {&(x.values())};
std::vector<DenseTensor*> outs = {out->mutable_non_zero_elements()}; std::vector<DenseTensor*> outs = {out->mutable_values()};
DivScalarFunctor<T> func(static_cast<T>(scalar)); DivScalarFunctor<T> func(static_cast<T>(scalar));
funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func); funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册