未验证 提交 3a7b1810 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse]Use shorted function names (#45325)

* rename the member function of SparseTensor

* use shorter function names
上级 b5d8bd2f
...@@ -30,13 +30,13 @@ template <typename T, typename IntT> ...@@ -30,13 +30,13 @@ template <typename T, typename IntT>
void CoalesceGPUKernel(const GPUContext& dev_ctx, void CoalesceGPUKernel(const GPUContext& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices(); const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.values(); const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices); DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values); DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
const int64_t nnz = x.nnz(); const int64_t nnz = x.nnz();
const int64_t sparse_dim = x.non_zero_indices().dims()[0]; const int64_t sparse_dim = x.indices().dims()[0];
std::vector<IntT> sparse_offsets(sparse_dim); std::vector<IntT> sparse_offsets(sparse_dim);
phi::funcs::sparse::CalcOffsetsPerDim<IntT>( phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
...@@ -64,7 +64,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx, ...@@ -64,7 +64,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
config.thread_per_block, config.thread_per_block,
0, 0,
dev_ctx.stream()>>>( dev_ctx.stream()>>>(
x.non_zero_indices().data<IntT>(), x.indices().data<IntT>(),
d_sparse_offsets.data<IntT>(), d_sparse_offsets.data<IntT>(),
indexs.numel(), indexs.numel(),
sparse_dim, sparse_dim,
...@@ -175,8 +175,7 @@ template <typename T, typename Context> ...@@ -175,8 +175,7 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx, void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "CoalesceGPUKernel", ([&] {
x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
CoalesceGPUKernel<T, data_t>(dev_ctx, x, out); CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
......
...@@ -589,8 +589,8 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -589,8 +589,8 @@ int ProductRuleBook(const Context& dev_ctx,
int* h_offsets) { int* h_offsets) {
auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type(); auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices(); const auto& indices = x.indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>(); const IntT* indices_ptr = indices.data<IntT>();
int* counter_ptr = counter_per_kernel->data<int>(); int* counter_ptr = counter_per_kernel->data<int>();
int* offsets_ptr = offsets_per_kernel->data<int>(); int* offsets_ptr = offsets_per_kernel->data<int>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2]; int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
...@@ -629,12 +629,10 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -629,12 +629,10 @@ int ProductRuleBook(const Context& dev_ctx,
if (subm) { if (subm) {
DenseTensor tmp_rulebook = phi::Empty(dev_ctx, std::move(rulebook_meta)); DenseTensor tmp_rulebook = phi::Empty(dev_ctx, std::move(rulebook_meta));
IntT* rulebook_ptr = tmp_rulebook.data<IntT>(); IntT* rulebook_ptr = tmp_rulebook.data<IntT>();
DenseTensor out_indices = DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_values = phi::Empty<T>(dev_ctx, {x.nnz(), kernel_sizes[4]}); DenseTensor out_values = phi::Empty<T>(dev_ctx, {x.nnz(), kernel_sizes[4]});
phi::Copy( phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices);
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
phi::backends::gpu::GpuMemsetAsync( phi::backends::gpu::GpuMemsetAsync(
out_index_table_ptr, 0, sizeof(int) * table_size, dev_ctx.stream()); out_index_table_ptr, 0, sizeof(int) * table_size, dev_ctx.stream());
......
...@@ -79,8 +79,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -79,8 +79,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
int half_kernel_size = kernel_size / 2; int half_kernel_size = kernel_size / 2;
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx); auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
DenseTensor x_grad_indices = DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
T* x_grad_values_ptr = x_grad_values.data<T>(); T* x_grad_values_ptr = x_grad_values.data<T>();
phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr, phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr,
...@@ -89,11 +88,8 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -89,11 +88,8 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
dev_ctx.stream()); dev_ctx.stream());
phi::backends::gpu::GpuMemsetAsync( phi::backends::gpu::GpuMemsetAsync(
d_x_features_ptr, 0, sizeof(T) * d_x_features.numel(), dev_ctx.stream()); d_x_features_ptr, 0, sizeof(T) * d_x_features.numel(), dev_ctx.stream());
phi::Copy<GPUContext>(dev_ctx, phi::Copy<GPUContext>(
x.non_zero_indices(), dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
dev_ctx.GetPlace(),
false,
&x_grad_indices);
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
std::vector<int> offsets(kernel_size + 1); std::vector<int> offsets(kernel_size + 1);
...@@ -142,7 +138,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -142,7 +138,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
<<<config.block_per_grid, <<<config.block_per_grid,
config.thread_per_block, config.thread_per_block,
0, 0,
dev_ctx.stream()>>>(x.non_zero_indices().data<IntT>(), dev_ctx.stream()>>>(x.indices().data<IntT>(),
x.nnz(), x.nnz(),
d_x_dims, d_x_dims,
nullptr, nullptr,
...@@ -269,7 +265,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, ...@@ -269,7 +265,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* kernel_grad) { DenseTensor* kernel_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] { x.indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
Conv3dCooGradGPUKernel<T, data_t>(dev_ctx, Conv3dCooGradGPUKernel<T, data_t>(dev_ctx,
x, x,
kernel, kernel,
......
...@@ -202,8 +202,7 @@ void Conv3dCooKernel(const Context& dev_ctx, ...@@ -202,8 +202,7 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out, SparseCooTensor* out,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "Conv3dCooGPUKernel", ([&] {
x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] {
Conv3dCooGPUKernel<T, data_t>(dev_ctx, Conv3dCooGPUKernel<T, data_t>(dev_ctx,
x, x,
kernel, kernel,
......
...@@ -302,8 +302,8 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -302,8 +302,8 @@ int ProductRuleBook(const Context& dev_ctx,
std::vector<int>* h_offsets) { std::vector<int>* h_offsets) {
auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type(); auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices(); const auto& indices = x.indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>(); const IntT* indices_ptr = indices.data<IntT>();
DenseTensor in_indexs = phi::Empty<Context>( DenseTensor in_indexs = phi::Empty<Context>(
dev_ctx, DenseTensorMeta(indices_dtype, {x.nnz()}, DataLayout::NCHW)); dev_ctx, DenseTensorMeta(indices_dtype, {x.nnz()}, DataLayout::NCHW));
int* counter_ptr = counter_per_kernel->data<int>(); int* counter_ptr = counter_per_kernel->data<int>();
...@@ -538,14 +538,12 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -538,14 +538,12 @@ int ProductRuleBook(const Context& dev_ctx,
rulebook_ptr + 2 * rulebook_len); rulebook_ptr + 2 * rulebook_len);
out->SetMember(out_indices, out_values, out_dims, true); out->SetMember(out_indices, out_values, out_dims, true);
} else { } else {
DenseTensor out_indices = DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_values = phi::Empty( DenseTensor out_values = phi::Empty(
dev_ctx, dev_ctx,
DenseTensorMeta( DenseTensorMeta(
x.dtype(), {x.nnz(), kernel_sizes[4]}, x.values().layout())); x.dtype(), {x.nnz(), kernel_sizes[4]}, x.values().layout()));
phi::Copy( phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices);
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
out->SetMember(out_indices, out_values, out_dims, true); out->SetMember(out_indices, out_values, out_dims, true);
} }
return rulebook_len; return rulebook_len;
......
...@@ -42,11 +42,8 @@ void CooFullLikeKernel(const Context& dev_ctx, ...@@ -42,11 +42,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
const Scalar& val, const Scalar& val,
DataType dtype, DataType dtype,
SparseCooTensor* out) { SparseCooTensor* out) {
phi::Copy<Context>(dev_ctx, phi::Copy<Context>(
x.non_zero_indices(), dev_ctx, x.indices(), dev_ctx.GetPlace(), false, out->mutable_indices());
dev_ctx.GetPlace(),
false,
out->mutable_non_zero_indices());
DenseTensor* values = out->mutable_values(); DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims()); values->Resize(x.values().dims());
...@@ -68,17 +65,11 @@ void CsrFullLikeKernel(const Context& dev_ctx, ...@@ -68,17 +65,11 @@ void CsrFullLikeKernel(const Context& dev_ctx,
const Scalar& val, const Scalar& val,
DataType dtype, DataType dtype,
SparseCsrTensor* out) { SparseCsrTensor* out) {
phi::Copy<Context>(dev_ctx, phi::Copy<Context>(
x.non_zero_crows(), dev_ctx, x.crows(), dev_ctx.GetPlace(), false, out->mutable_crows());
dev_ctx.GetPlace(),
false, phi::Copy<Context>(
out->mutable_non_zero_crows()); dev_ctx, x.cols(), dev_ctx.GetPlace(), false, out->mutable_cols());
phi::Copy<Context>(dev_ctx,
x.non_zero_cols(),
dev_ctx.GetPlace(),
false,
out->mutable_non_zero_cols());
DenseTensor* values = out->mutable_values(); DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims()); values->Resize(x.values().dims());
......
...@@ -93,7 +93,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, ...@@ -93,7 +93,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
dim3 block(WARP_SIZE, 8); dim3 block(WARP_SIZE, 8);
AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>( AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
softmax.non_zero_crows().data<int64_t>(), softmax.crows().data<int64_t>(),
softmax.values().data<T>(), softmax.values().data<T>(),
dsoftmax.mutable_values()->data<T>(), dsoftmax.mutable_values()->data<T>(),
d_sdd_result.mutable_values()->data<T>(), d_sdd_result.mutable_values()->data<T>(),
......
...@@ -200,8 +200,8 @@ void FusedAttentionCsrKernel( ...@@ -200,8 +200,8 @@ void FusedAttentionCsrKernel(
int batch_nnz = sdd_result.nnz() / batch_num; int batch_nnz = sdd_result.nnz() / batch_num;
AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>( AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
sdd_result.non_zero_crows().data<int64_t>(), sdd_result.crows().data<int64_t>(),
sdd_result.non_zero_cols().data<int64_t>(), sdd_result.cols().data<int64_t>(),
sdd_result.values().data<T>(), sdd_result.values().data<T>(),
kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr, kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr,
attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr, attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr,
......
...@@ -58,7 +58,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx, ...@@ -58,7 +58,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
x.dims(), x.dims(),
mask.dims(), mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape")); phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices(); const DenseTensor& indices = mask.indices();
const DenseTensor& values = mask.values(); const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim(); const int sparse_dim = mask.sparse_dim();
DenseTensor sparse_offsets = phi::Empty<GPUContext>( DenseTensor sparse_offsets = phi::Empty<GPUContext>(
...@@ -103,7 +103,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx, ...@@ -103,7 +103,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
/** /**
* @brief Filter the DenseTensor x by the * @brief Filter the DenseTensor x by the
* mask.non_zero_indices() and output a SparseCooTensor * mask.indices() and output a SparseCooTensor
* x and mask must have the same shape. * x and mask must have the same shape.
**/ **/
template <typename T, typename Context> template <typename T, typename Context>
...@@ -112,7 +112,7 @@ void SparseMaskKernel(const Context& dev_ctx, ...@@ -112,7 +112,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const SparseCooTensor& mask, const SparseCooTensor& mask,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] { mask.indices().dtype(), "SparseMaskGPUKernel", ([&] {
SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out); SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out);
})); }));
} }
...@@ -197,7 +197,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx, ...@@ -197,7 +197,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
config.thread_per_block, config.thread_per_block,
0, 0,
dev_ctx.stream()>>>( dev_ctx.stream()>>>(
x.non_zero_indices().data<IntT>(), x.indices().data<IntT>(),
d_sparse_offsets.data<IntT>(), d_sparse_offsets.data<IntT>(),
x_indexs.numel(), x_indexs.numel(),
sparse_dim, sparse_dim,
...@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx, ...@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const DenseTensor& mask_indices, const DenseTensor& mask_indices,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] { x.indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out); SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
})); }));
} }
......
...@@ -71,14 +71,14 @@ void MvCooGradKernel(const Context &dev_ctx, ...@@ -71,14 +71,14 @@ void MvCooGradKernel(const Context &dev_ctx,
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx); EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz()); auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz());
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] { dx->indices().dtype(), "MvCooGradKernel", ([&] {
MvCooGradGpuKernel<T> MvCooGradGpuKernel<T>
<<<config.block_per_grid.x, <<<config.block_per_grid.x,
config.thread_per_block.x, config.thread_per_block.x,
0, 0,
dev_ctx.stream()>>>(dout.data<T>(), dev_ctx.stream()>>>(dout.data<T>(),
vec.data<T>(), vec.data<T>(),
dx->non_zero_indices().data<data_t>(), dx->indices().data<data_t>(),
dx->mutable_values()->data<T>(), dx->mutable_values()->data<T>(),
dx->nnz()); dx->nnz());
})); }));
...@@ -117,16 +117,16 @@ void MvCsrGradKernel(const Context &dev_ctx, ...@@ -117,16 +117,16 @@ void MvCsrGradKernel(const Context &dev_ctx,
int col_number = dx->dims()[1]; int col_number = dx->dims()[1];
auto config = phi::backends::gpu::GetGpuLaunchConfig2D( auto config = phi::backends::gpu::GetGpuLaunchConfig2D(
dev_ctx, col_number, row_number); dev_ctx, col_number, row_number);
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(dx->crows().dtype(), "MvCsrGradKernel", ([&] {
dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] {
MvCsrGradGpuKernel<T> MvCsrGradGpuKernel<T>
<<<config.block_per_grid.x, <<<config.block_per_grid.x,
config.thread_per_block.x, config.thread_per_block.x,
0, 0,
dev_ctx.stream()>>>(dout.data<T>(), dev_ctx.stream()>>>(
dout.data<T>(),
vec.data<T>(), vec.data<T>(),
dx->non_zero_crows().data<data_t>(), dx->crows().data<data_t>(),
dx->non_zero_cols().data<data_t>(), dx->cols().data<data_t>(),
dx->mutable_values()->data<T>(), dx->mutable_values()->data<T>(),
row_number); row_number);
})); }));
......
...@@ -72,18 +72,14 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -72,18 +72,14 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const T* out_features_ptr = out.values().data<T>(); const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.values().data<T>(); const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike // TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices = DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values()); DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>(); T* x_grad_ptr = x_grad_values.data<T>();
phi::funcs::SetConstant<GPUContext, T> set_zero; phi::funcs::SetConstant<GPUContext, T> set_zero;
set_zero(dev_ctx, &x_grad_values, static_cast<T>(0.0f)); set_zero(dev_ctx, &x_grad_values, static_cast<T>(0.0f));
phi::Copy<GPUContext>(dev_ctx, phi::Copy<GPUContext>(
x.non_zero_indices(), dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
dev_ctx.GetPlace(),
false,
&x_grad_indices);
for (int i = 0; i < kernel_size; i++) { for (int i = 0; i < kernel_size; i++) {
if (counter_ptr[i] <= 0) { if (counter_ptr[i] <= 0) {
...@@ -117,7 +113,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx, ...@@ -117,7 +113,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const std::vector<int>& kernel_sizes, const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) { SparseCooTensor* x_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] { x.indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
MaxPoolCooGradGPUKernel<T, data_t>( MaxPoolCooGradGPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad); dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
})); }));
......
...@@ -140,7 +140,7 @@ void MaxPoolCooKernel(const Context& dev_ctx, ...@@ -140,7 +140,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] { x.indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
MaxPoolCooGPUKernel<T, data_t>(dev_ctx, MaxPoolCooGPUKernel<T, data_t>(dev_ctx,
x, x,
kernel_sizes, kernel_sizes,
......
...@@ -93,9 +93,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx, ...@@ -93,9 +93,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
dim3 block(32, 4); dim3 block(32, 4);
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { out.crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>( SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(), out.crows().data<data_t>(),
out.values().data<T>(), out.values().data<T>(),
dout.values().data<T>(), dout.values().data<T>(),
dx->mutable_values()->data<T>(), dx->mutable_values()->data<T>(),
......
...@@ -105,10 +105,10 @@ void SoftmaxCsrKernel(const Context& dev_ctx, ...@@ -105,10 +105,10 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4); dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4); dim3 block(32, 4);
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(x.crows().dtype(), "CsrSoftmaxKernel", ([&] {
x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { SoftmaxGpuKernel<T, data_t>
SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>( <<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(), x.crows().data<data_t>(),
x.values().data<T>(), x.values().data<T>(),
out->mutable_values()->data<T>(), out->mutable_values()->data<T>(),
row_number, row_number,
......
...@@ -212,9 +212,9 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx, ...@@ -212,9 +212,9 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DDim& x_dims = x.dims(); const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.non_zero_cols().numel(); const int64_t non_zero_num = x.cols().numel();
const auto& csr_crows = x.non_zero_crows(); const auto& csr_crows = x.crows();
const auto& csr_cols = x.non_zero_cols(); const auto& csr_cols = x.cols();
const auto& csr_values = x.values(); const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>(); const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>(); const IntT* csr_cols_data = csr_cols.data<IntT>();
...@@ -278,7 +278,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx, ...@@ -278,7 +278,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] { x.crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out); SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
...@@ -358,15 +358,14 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx, ...@@ -358,15 +358,14 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0]; int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1]; int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
phi::DenseTensor non_zero_crows = phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)}); phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor non_zero_cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values()); phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
IntT* csr_crows_data = non_zero_crows.data<IntT>(); IntT* csr_crows_data = crows.data<IntT>();
IntT* csr_cols_data = non_zero_cols.data<IntT>(); IntT* csr_cols_data = cols.data<IntT>();
T* csr_values_data = values.data<T>(); T* csr_values_data = values.data<T>();
const auto& coo_indices = x.non_zero_indices(); const auto& coo_indices = x.indices();
const auto& coo_values = x.values(); const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>(); const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data = const IntT* coo_rows_data =
...@@ -413,7 +412,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx, ...@@ -413,7 +412,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof(T) * non_zero_num, sizeof(T) * non_zero_num,
gpuMemcpyDeviceToDevice, gpuMemcpyDeviceToDevice,
dev_ctx.stream()); dev_ctx.stream());
out->SetMember(non_zero_crows, non_zero_cols, values, x_dims); out->SetMember(crows, cols, values, x_dims);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -421,7 +420,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -421,7 +420,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCsrTensor* out) { SparseCsrTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] { x.indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
...@@ -453,7 +452,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx, ...@@ -453,7 +452,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
const auto non_zero_num = x.nnz(); const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims(); const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices(); const auto indices = x.indices();
const auto values = x.values(); const auto values = x.values();
const auto indices_dims = indices.dims(); const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0]; int64_t sparse_dim = indices_dims[0];
...@@ -509,7 +508,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx, ...@@ -509,7 +508,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] { x.indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册