未验证 提交 016b94c2 编写于 作者: Z zhangkaihuo 提交者: GitHub

rename the member function of SparseTensor (#45291)

上级 ed57237e
......@@ -63,10 +63,16 @@ class SparseCooTensor : public TensorBase,
/// \brief Returns the indices of non zero elemetns in original dense tensor.
/// \return The indices of non zero elemetns in original dense tensor.
const DenseTensor& indices() const { return non_zero_indices_; }
/// Note: This function will removed soon. It is recommended to use indices()
const DenseTensor& non_zero_indices() const { return non_zero_indices_; }
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const DenseTensor& values() const { return non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use values()
const DenseTensor& non_zero_elements() const { return non_zero_elements_; }
/// \brief Returns whether the indices has coalesced
......@@ -136,10 +142,18 @@ class SparseCooTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_indices_.
/// return a mutable pointer of non_zero_indices_.
DenseTensor* mutable_indices() { return &non_zero_indices_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_indices()
DenseTensor* mutable_non_zero_indices() { return &non_zero_indices_; }
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor* mutable_values() { return &non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; }
/// \brief This function is not recommended
......
......@@ -72,9 +72,9 @@ SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other)
SparseCsrTensor& SparseCsrTensor::operator=(const SparseCsrTensor& other) {
this->dims_ = other.dims();
this->non_zero_crows_ = other.non_zero_crows();
this->non_zero_cols_ = other.non_zero_cols();
this->non_zero_elements_ = other.non_zero_elements();
this->non_zero_crows_ = other.crows();
this->non_zero_cols_ = other.cols();
this->non_zero_elements_ = other.values();
return *this;
}
......
......@@ -74,15 +74,24 @@ class SparseCsrTensor : public TensorBase,
/// dense tensor.
/// \return The compressed row index of non zero elemetns in original dense
/// tensor.
const DenseTensor& crows() const { return non_zero_crows_; }
/// Note: This function will removed soon. It is recommended to use crows()
const DenseTensor& non_zero_crows() const { return non_zero_crows_; }
/// \brief Returns the column index of non zero elemetns in original dense
/// tensor.
/// \return The column index of non zero elemetns in original dense tensor.
const DenseTensor& cols() const { return non_zero_cols_; }
/// Note: This function will removed soon. It is recommended to use cols()
const DenseTensor& non_zero_cols() const { return non_zero_cols_; }
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const DenseTensor& values() const { return non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use indices()
const DenseTensor& non_zero_elements() const { return non_zero_elements_; }
/// \brief Returns the total number of non zero elements in original dense
......@@ -138,14 +147,26 @@ class SparseCsrTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_crows.
/// return a mutable pointer of non_zero_crows.
DenseTensor* mutable_crows() { return &non_zero_crows_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_crows()
DenseTensor* mutable_non_zero_crows() { return &non_zero_crows_; }
/// \brief Get a mutable pointer of non_zero_cols.
/// return a mutable pointer of non_zero_cols.
DenseTensor* mutable_cols() { return &non_zero_cols_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_cols()
DenseTensor* mutable_non_zero_cols() { return &non_zero_cols_; }
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor* mutable_values() { return &non_zero_elements_; }
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; }
/// \brief set the dims of original dense tensor
......
......@@ -31,7 +31,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices();
const DenseTensor& x_values = x.non_zero_elements();
const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
......@@ -73,7 +73,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
// 2. get the address of each non-zero values
const T* x_values_ptr = x_values.data<T>();
const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1];
x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
DenseTensor values_indexs = phi::Empty(
dev_ctx, DenseTensorMeta(DataType::INT32, {nnz}, DataLayout::NCHW));
int* values_indexs_ptr = values_indexs.data<int>();
......
......@@ -81,7 +81,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
T* x_grad_values_ptr = x_grad_values.data<T>();
phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr,
0,
......@@ -109,16 +109,15 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
offsets[kernel_size] = offset;
if (subm) {
phi::funcs::sparse::SubmPreProcess<T, GPUContext>(
dev_ctx,
x,
kernel,
out_grad.non_zero_elements(),
in_channels,
out_channels,
half_kernel_size,
kernel_grad,
&x_grad_values);
phi::funcs::sparse::SubmPreProcess<T, GPUContext>(dev_ctx,
x,
kernel,
out_grad.values(),
in_channels,
out_channels,
half_kernel_size,
kernel_grad,
&x_grad_values);
if (max_count == 0) {
return;
}
......@@ -181,7 +180,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
unique_value_ptr);
GatherV2<T, IntT>(dev_ctx,
x.non_zero_elements().data<T>(),
x.values().data<T>(),
out_index_ptr,
unique_value_ptr,
x.nnz(),
......@@ -192,7 +191,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
in_features_ptr);
Gather<T, IntT>(dev_ctx,
out_grad.non_zero_elements().data<T>(),
out_grad.values().data<T>(),
rulebook_ptr + rulebook_len,
rulebook_len,
out_channels,
......
......@@ -131,7 +131,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
set_zero(dev_ctx, &out_features, static_cast<T>(0.0f));
Gather<T, IntT>(dev_ctx,
x.non_zero_elements().data<T>(),
x.values().data<T>(),
rulebook_ptr,
rulebook_len,
in_channels,
......@@ -139,7 +139,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
// 3. call gemm for every werght
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
auto* out_values = out->mutable_non_zero_elements();
auto* out_values = out->mutable_values();
T* out_values_ptr = out_values->data<T>();
set_zero(dev_ctx, out_values, static_cast<T>(0.0f));
......
......@@ -515,9 +515,8 @@ int ProductRuleBook(const Context& dev_ctx,
const int64_t sparse_dim = 4;
DenseTensorMeta indices_meta(
indices_dtype, {sparse_dim, out_non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(x.dtype(),
{out_non_zero_num, kernel_sizes[4]},
x.non_zero_elements().layout());
DenseTensorMeta values_meta(
x.dtype(), {out_non_zero_num, kernel_sizes[4]}, x.values().layout());
phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta));
phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta));
......@@ -541,11 +540,10 @@ int ProductRuleBook(const Context& dev_ctx,
} else {
DenseTensor out_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_values =
phi::Empty(dev_ctx,
DenseTensorMeta(x.dtype(),
{x.nnz(), kernel_sizes[4]},
x.non_zero_elements().layout()));
DenseTensor out_values = phi::Empty(
dev_ctx,
DenseTensorMeta(
x.dtype(), {x.nnz(), kernel_sizes[4]}, x.values().layout()));
phi::Copy(
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
out->SetMember(out_indices, out_values, out_dims, true);
......
......@@ -48,8 +48,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
false,
out->mutable_non_zero_indices());
DenseTensor* values = out->mutable_non_zero_elements();
values->Resize(x.non_zero_elements().dims());
DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims());
dev_ctx.template Alloc<T>(values);
std::vector<const DenseTensor*> inputs = {};
......@@ -80,8 +80,8 @@ void CsrFullLikeKernel(const Context& dev_ctx,
false,
out->mutable_non_zero_cols());
DenseTensor* values = out->mutable_non_zero_elements();
values->Resize(x.non_zero_elements().dims());
DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims());
dev_ctx.template Alloc<T>(values);
std::vector<const DenseTensor*> inputs = {};
......
......@@ -94,9 +94,9 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
softmax.non_zero_crows().data<int64_t>(),
softmax.non_zero_elements().data<T>(),
dsoftmax.mutable_non_zero_elements()->data<T>(),
d_sdd_result.mutable_non_zero_elements()->data<T>(),
softmax.values().data<T>(),
dsoftmax.mutable_values()->data<T>(),
d_sdd_result.mutable_values()->data<T>(),
M,
total_row_num,
std::sqrt(N),
......
......@@ -202,10 +202,10 @@ void FusedAttentionCsrKernel(
AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
sdd_result.non_zero_crows().data<int64_t>(),
sdd_result.non_zero_cols().data<int64_t>(),
sdd_result.non_zero_elements().data<T>(),
sdd_result.values().data<T>(),
kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr,
attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr,
softmax->mutable_non_zero_elements()->data<T>(),
softmax->mutable_values()->data<T>(),
M,
total_row_num,
q_dim[1],
......
......@@ -59,7 +59,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices();
const DenseTensor& values = mask.non_zero_elements();
const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim();
DenseTensor sparse_offsets = phi::Empty<GPUContext>(
dev_ctx,
......@@ -224,8 +224,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
phi::backends::gpu::GpuMemsetAsync(
table.data<int>(), 0, table_size * sizeof(int), dev_ctx.stream());
const int64_t stride =
x.dims().size() == sparse_dim ? 1 : x.non_zero_elements().dims()[1];
*out = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
x.dims().size() == sparse_dim ? 1 : x.values().dims()[1];
*out = phi::EmptyLike<T>(dev_ctx, x.values());
phi::funcs::SetConstant<GPUContext, T> set_zero;
set_zero(dev_ctx, out, static_cast<T>(0));
T* out_ptr = out->data<T>();
......@@ -242,16 +242,15 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
const int VecBytes = 16;
const int VecSize = VecBytes / sizeof(T);
if (stride % VecSize == 0) {
MaskCopy<T, IntT, VecSize>
<<<config.block_per_grid,
config.thread_per_block,
0,
dev_ctx.stream()>>>(mask_indexs_ptr,
table.data<int>(),
mask_indexs.numel(),
stride,
x.non_zero_elements().data<T>(),
out_ptr);
MaskCopy<T, IntT, VecSize><<<config.block_per_grid,
config.thread_per_block,
0,
dev_ctx.stream()>>>(mask_indexs_ptr,
table.data<int>(),
mask_indexs.numel(),
stride,
x.values().data<T>(),
out_ptr);
} else {
MaskCopy<T, IntT, 1><<<config.block_per_grid,
config.thread_per_block,
......@@ -260,7 +259,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
table.data<int>(),
mask_indexs.numel(),
stride,
x.non_zero_elements().data<T>(),
x.values().data<T>(),
out_ptr);
}
}
......
......@@ -79,7 +79,7 @@ void MvCooGradKernel(const Context &dev_ctx,
dev_ctx.stream()>>>(dout.data<T>(),
vec.data<T>(),
dx->non_zero_indices().data<data_t>(),
dx->mutable_non_zero_elements()->data<T>(),
dx->mutable_values()->data<T>(),
dx->nnz());
}));
}
......@@ -127,7 +127,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
vec.data<T>(),
dx->non_zero_crows().data<data_t>(),
dx->non_zero_cols().data<data_t>(),
dx->mutable_non_zero_elements()->data<T>(),
dx->mutable_values()->data<T>(),
row_number);
}));
}
......
......@@ -68,13 +68,13 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const int* counter_ptr = counter.data<int>();
phi::funcs::sparse::PrefixSum(counter_ptr, &offsets[0], kernel_size);
const T* in_features_ptr = x.non_zero_elements().data<T>();
const T* out_features_ptr = out.non_zero_elements().data<T>();
const T* out_grad_ptr = out_grad.non_zero_elements().data<T>();
const T* in_features_ptr = x.values().data<T>();
const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.non_zero_elements());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>();
phi::funcs::SetConstant<GPUContext, T> set_zero;
......
......@@ -95,8 +95,8 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
const IntT* rulebook_ptr = rulebook->data<IntT>();
T* out_features_ptr = out->mutable_non_zero_elements()->data<T>();
const T* in_features_ptr = x.non_zero_elements().data<T>();
T* out_features_ptr = out->mutable_values()->data<T>();
const T* in_features_ptr = x.values().data<T>();
counter->Resize({kernel_size});
int* counter_ptr = dev_ctx.template HostAlloc<int>(counter);
memcpy(counter_ptr, h_counter.data(), h_counter.size() * sizeof(int));
......@@ -107,7 +107,7 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
thrust::fill(thrust::cuda::par.on(dev_ctx.stream()),
#endif
out_features_ptr,
out_features_ptr + out->non_zero_elements().numel(),
out_features_ptr + out->values().numel(),
static_cast<T>(0));
// TODO(zhangkaihuo) Replacing multiple calls with one kernel may be faster
for (int i = 0; i < kernel_size; i++) {
......
......@@ -96,9 +96,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(),
out.non_zero_elements().data<T>(),
dout.non_zero_elements().data<T>(),
dx->mutable_non_zero_elements()->data<T>(),
out.values().data<T>(),
dout.values().data<T>(),
dx->mutable_values()->data<T>(),
row_number,
total_row_number);
}));
......
......@@ -109,8 +109,8 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(),
x.non_zero_elements().data<T>(),
out->mutable_non_zero_elements()->data<T>(),
x.values().data<T>(),
out->mutable_values()->data<T>(),
row_number,
total_row_number);
}));
......
......@@ -215,7 +215,7 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const int64_t non_zero_num = x.non_zero_cols().numel();
const auto& csr_crows = x.non_zero_crows();
const auto& csr_cols = x.non_zero_cols();
const auto& csr_values = x.non_zero_elements();
const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>();
......@@ -361,14 +361,13 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
phi::DenseTensor non_zero_crows =
phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor non_zero_cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor non_zero_elements =
phi::EmptyLike<T, GPUContext>(dev_ctx, x.non_zero_elements());
phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
IntT* csr_crows_data = non_zero_crows.data<IntT>();
IntT* csr_cols_data = non_zero_cols.data<IntT>();
T* csr_values_data = non_zero_elements.data<T>();
T* csr_values_data = values.data<T>();
const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data =
x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num;
......@@ -414,7 +413,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof(T) * non_zero_num,
gpuMemcpyDeviceToDevice,
dev_ctx.stream());
out->SetMember(non_zero_crows, non_zero_cols, non_zero_elements, x_dims);
out->SetMember(non_zero_crows, non_zero_cols, values, x_dims);
}
template <typename T, typename Context>
......@@ -455,7 +454,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices();
const auto values = x.non_zero_elements();
const auto values = x.values();
const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0];
if (indices_dims.size() == 1) {
......@@ -465,9 +464,8 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const auto place = dev_ctx.GetPlace();
const T* x_data = values.data<T>();
*out = phi::Empty(dev_ctx,
phi::DenseTensorMeta(
x.dtype(), x.dims(), x.non_zero_elements().layout()));
*out = phi::Empty(
dev_ctx, phi::DenseTensorMeta(x.dtype(), x.dims(), x.values().layout()));
T* out_data = out->data<T>();
int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) {
......
......@@ -40,8 +40,8 @@ void DivCooScalarKernel(const Context& dev_ctx,
SparseCooTensor* out) {
EmptyLikeCooKernel<T, Context>(dev_ctx, x, out);
std::vector<const DenseTensor*> ins = {&(x.non_zero_elements())};
std::vector<DenseTensor*> outs = {out->mutable_non_zero_elements()};
std::vector<const DenseTensor*> ins = {&(x.values())};
std::vector<DenseTensor*> outs = {out->mutable_values()};
DivScalarFunctor<T> func(static_cast<T>(scalar));
funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func);
}
......@@ -53,8 +53,8 @@ void DivCsrScalarKernel(const Context& dev_ctx,
SparseCsrTensor* out) {
EmptyLikeCsrKernel<T, Context>(dev_ctx, x, out);
std::vector<const DenseTensor*> ins = {&(x.non_zero_elements())};
std::vector<DenseTensor*> outs = {out->mutable_non_zero_elements()};
std::vector<const DenseTensor*> ins = {&(x.values())};
std::vector<DenseTensor*> outs = {out->mutable_values()};
DivScalarFunctor<T> func(static_cast<T>(scalar));
funcs::ElementwiseKernel<T, DivScalarFunctor<T>>(dev_ctx, ins, &outs, func);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册