未验证 提交 3a7b1810 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse]Use shorted function names (#45325)

* rename the member function of SparseTensor

* use shorter function names
上级 b5d8bd2f
......@@ -30,13 +30,13 @@ template <typename T, typename IntT>
void CoalesceGPUKernel(const GPUContext& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices();
const DenseTensor& x_indices = x.indices();
const DenseTensor& x_values = x.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x_indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, x_values);
const int64_t nnz = x.nnz();
const int64_t sparse_dim = x.non_zero_indices().dims()[0];
const int64_t sparse_dim = x.indices().dims()[0];
std::vector<IntT> sparse_offsets(sparse_dim);
phi::funcs::sparse::CalcOffsetsPerDim<IntT>(
......@@ -64,7 +64,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
config.thread_per_block,
0,
dev_ctx.stream()>>>(
x.non_zero_indices().data<IntT>(),
x.indices().data<IntT>(),
d_sparse_offsets.data<IntT>(),
indexs.numel(),
sparse_dim,
......@@ -175,10 +175,9 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "CoalesceGPUKernel", ([&] {
CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
} // namespace sparse
} // namespace phi
......
......@@ -589,8 +589,8 @@ int ProductRuleBook(const Context& dev_ctx,
int* h_offsets) {
auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>();
const auto& indices = x.indices();
const IntT* indices_ptr = indices.data<IntT>();
int* counter_ptr = counter_per_kernel->data<int>();
int* offsets_ptr = offsets_per_kernel->data<int>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
......@@ -629,12 +629,10 @@ int ProductRuleBook(const Context& dev_ctx,
if (subm) {
DenseTensor tmp_rulebook = phi::Empty(dev_ctx, std::move(rulebook_meta));
IntT* rulebook_ptr = tmp_rulebook.data<IntT>();
DenseTensor out_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor out_values = phi::Empty<T>(dev_ctx, {x.nnz(), kernel_sizes[4]});
phi::Copy(
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices);
phi::backends::gpu::GpuMemsetAsync(
out_index_table_ptr, 0, sizeof(int) * table_size, dev_ctx.stream());
......
......@@ -79,8 +79,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
int half_kernel_size = kernel_size / 2;
auto blas = phi::funcs::GetBlas<GPUContext, T>(dev_ctx);
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
T* x_grad_values_ptr = x_grad_values.data<T>();
phi::backends::gpu::GpuMemsetAsync(x_grad_values_ptr,
......@@ -89,11 +88,8 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
dev_ctx.stream());
phi::backends::gpu::GpuMemsetAsync(
d_x_features_ptr, 0, sizeof(T) * d_x_features.numel(), dev_ctx.stream());
phi::Copy<GPUContext>(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&x_grad_indices);
phi::Copy<GPUContext>(
dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
std::vector<int> offsets(kernel_size + 1);
......@@ -142,7 +138,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
<<<config.block_per_grid,
config.thread_per_block,
0,
dev_ctx.stream()>>>(x.non_zero_indices().data<IntT>(),
dev_ctx.stream()>>>(x.indices().data<IntT>(),
x.nnz(),
d_x_dims,
nullptr,
......@@ -269,7 +265,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
SparseCooTensor* x_grad,
DenseTensor* kernel_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
x.indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
Conv3dCooGradGPUKernel<T, data_t>(dev_ctx,
x,
kernel,
......
......@@ -202,21 +202,20 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] {
Conv3dCooGPUKernel<T, data_t>(dev_ctx,
x,
kernel,
paddings,
dilations,
strides,
groups,
subm,
key,
out,
rulebook,
counter);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "Conv3dCooGPUKernel", ([&] {
Conv3dCooGPUKernel<T, data_t>(dev_ctx,
x,
kernel,
paddings,
dilations,
strides,
groups,
subm,
key,
out,
rulebook,
counter);
}));
}
} // namespace sparse
......
......@@ -302,8 +302,8 @@ int ProductRuleBook(const Context& dev_ctx,
std::vector<int>* h_offsets) {
auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices();
const IntT* indices_ptr = non_zero_indices.data<IntT>();
const auto& indices = x.indices();
const IntT* indices_ptr = indices.data<IntT>();
DenseTensor in_indexs = phi::Empty<Context>(
dev_ctx, DenseTensorMeta(indices_dtype, {x.nnz()}, DataLayout::NCHW));
int* counter_ptr = counter_per_kernel->data<int>();
......@@ -538,14 +538,12 @@ int ProductRuleBook(const Context& dev_ctx,
rulebook_ptr + 2 * rulebook_len);
out->SetMember(out_indices, out_values, out_dims, true);
} else {
DenseTensor out_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor out_values = phi::Empty(
dev_ctx,
DenseTensorMeta(
x.dtype(), {x.nnz(), kernel_sizes[4]}, x.values().layout()));
phi::Copy(
dev_ctx, x.non_zero_indices(), dev_ctx.GetPlace(), false, &out_indices);
phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices);
out->SetMember(out_indices, out_values, out_dims, true);
}
return rulebook_len;
......
......@@ -42,11 +42,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
const Scalar& val,
DataType dtype,
SparseCooTensor* out) {
phi::Copy<Context>(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
out->mutable_non_zero_indices());
phi::Copy<Context>(
dev_ctx, x.indices(), dev_ctx.GetPlace(), false, out->mutable_indices());
DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims());
......@@ -68,17 +65,11 @@ void CsrFullLikeKernel(const Context& dev_ctx,
const Scalar& val,
DataType dtype,
SparseCsrTensor* out) {
phi::Copy<Context>(dev_ctx,
x.non_zero_crows(),
dev_ctx.GetPlace(),
false,
out->mutable_non_zero_crows());
phi::Copy<Context>(dev_ctx,
x.non_zero_cols(),
dev_ctx.GetPlace(),
false,
out->mutable_non_zero_cols());
phi::Copy<Context>(
dev_ctx, x.crows(), dev_ctx.GetPlace(), false, out->mutable_crows());
phi::Copy<Context>(
dev_ctx, x.cols(), dev_ctx.GetPlace(), false, out->mutable_cols());
DenseTensor* values = out->mutable_values();
values->Resize(x.values().dims());
......
......@@ -93,7 +93,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
dim3 block(WARP_SIZE, 8);
AttnSoftmaxGpuGradKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
softmax.non_zero_crows().data<int64_t>(),
softmax.crows().data<int64_t>(),
softmax.values().data<T>(),
dsoftmax.mutable_values()->data<T>(),
d_sdd_result.mutable_values()->data<T>(),
......
......@@ -200,8 +200,8 @@ void FusedAttentionCsrKernel(
int batch_nnz = sdd_result.nnz() / batch_num;
AttnSoftmaxGpuKernel<T><<<grid, block, 0, dev_ctx.stream()>>>(
sdd_result.non_zero_crows().data<int64_t>(),
sdd_result.non_zero_cols().data<int64_t>(),
sdd_result.crows().data<int64_t>(),
sdd_result.cols().data<int64_t>(),
sdd_result.values().data<T>(),
kp_mask_ptr ? kp_mask_ptr->data<T>() : nullptr,
attn_mask_ptr ? attn_mask_ptr->data<T>() : nullptr,
......
......@@ -58,7 +58,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
x.dims(),
mask.dims(),
phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.non_zero_indices();
const DenseTensor& indices = mask.indices();
const DenseTensor& values = mask.values();
const int sparse_dim = mask.sparse_dim();
DenseTensor sparse_offsets = phi::Empty<GPUContext>(
......@@ -103,7 +103,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
/**
* @brief Filter the DenseTensor x by the
* mask.non_zero_indices() and output a SparseCooTensor
* mask.indices() and output a SparseCooTensor
* x and mask must have the same shape.
**/
template <typename T, typename Context>
......@@ -112,7 +112,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const SparseCooTensor& mask,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] {
mask.indices().dtype(), "SparseMaskGPUKernel", ([&] {
SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out);
}));
}
......@@ -197,7 +197,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
config.thread_per_block,
0,
dev_ctx.stream()>>>(
x.non_zero_indices().data<IntT>(),
x.indices().data<IntT>(),
d_sparse_offsets.data<IntT>(),
x_indexs.numel(),
sparse_dim,
......@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const DenseTensor& mask_indices,
DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
x.indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
}));
}
......
......@@ -71,14 +71,14 @@ void MvCooGradKernel(const Context &dev_ctx,
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz());
PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] {
dx->indices().dtype(), "MvCooGradKernel", ([&] {
MvCooGradGpuKernel<T>
<<<config.block_per_grid.x,
config.thread_per_block.x,
0,
dev_ctx.stream()>>>(dout.data<T>(),
vec.data<T>(),
dx->non_zero_indices().data<data_t>(),
dx->indices().data<data_t>(),
dx->mutable_values()->data<T>(),
dx->nnz());
}));
......@@ -117,19 +117,19 @@ void MvCsrGradKernel(const Context &dev_ctx,
int col_number = dx->dims()[1];
auto config = phi::backends::gpu::GetGpuLaunchConfig2D(
dev_ctx, col_number, row_number);
PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] {
MvCsrGradGpuKernel<T>
<<<config.block_per_grid.x,
config.thread_per_block.x,
0,
dev_ctx.stream()>>>(dout.data<T>(),
vec.data<T>(),
dx->non_zero_crows().data<data_t>(),
dx->non_zero_cols().data<data_t>(),
dx->mutable_values()->data<T>(),
row_number);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(dx->crows().dtype(), "MvCsrGradKernel", ([&] {
MvCsrGradGpuKernel<T>
<<<config.block_per_grid.x,
config.thread_per_block.x,
0,
dev_ctx.stream()>>>(
dout.data<T>(),
vec.data<T>(),
dx->crows().data<data_t>(),
dx->cols().data<data_t>(),
dx->mutable_values()->data<T>(),
row_number);
}));
}
// dvec{Dense} = x'{SparseCsr} * dout{Dense}
......
......@@ -72,18 +72,14 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const T* out_features_ptr = out.values().data<T>();
const T* out_grad_ptr = out_grad.values().data<T>();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor x_grad_indices =
phi::EmptyLike<IntT>(dev_ctx, x.non_zero_indices());
DenseTensor x_grad_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor x_grad_values = phi::EmptyLike<T>(dev_ctx, x.values());
x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true);
T* x_grad_ptr = x_grad_values.data<T>();
phi::funcs::SetConstant<GPUContext, T> set_zero;
set_zero(dev_ctx, &x_grad_values, static_cast<T>(0.0f));
phi::Copy<GPUContext>(dev_ctx,
x.non_zero_indices(),
dev_ctx.GetPlace(),
false,
&x_grad_indices);
phi::Copy<GPUContext>(
dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &x_grad_indices);
for (int i = 0; i < kernel_size; i++) {
if (counter_ptr[i] <= 0) {
......@@ -117,7 +113,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
x.indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
MaxPoolCooGradGPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
}));
......
......@@ -140,7 +140,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
x.indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
MaxPoolCooGPUKernel<T, data_t>(dev_ctx,
x,
kernel_sizes,
......
......@@ -93,9 +93,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
dim3 block(32, 4);
PD_VISIT_BASE_INTEGRAL_TYPES(
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
out.crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(),
out.crows().data<data_t>(),
out.values().data<T>(),
dout.values().data<T>(),
dx->mutable_values()->data<T>(),
......
......@@ -105,15 +105,15 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4);
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(),
x.values().data<T>(),
out->mutable_values()->data<T>(),
row_number,
total_row_number);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(x.crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t>
<<<grid, block, 0, dev_ctx.stream()>>>(
x.crows().data<data_t>(),
x.values().data<T>(),
out->mutable_values()->data<T>(),
row_number,
total_row_number);
}));
}
} // namespace sparse
......
......@@ -212,9 +212,9 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const SparseCsrTensor& x,
SparseCooTensor* out) {
const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.non_zero_cols().numel();
const auto& csr_crows = x.non_zero_crows();
const auto& csr_cols = x.non_zero_cols();
const int64_t non_zero_num = x.cols().numel();
const auto& csr_crows = x.crows();
const auto& csr_cols = x.cols();
const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>();
......@@ -278,7 +278,7 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x,
SparseCooTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
x.crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......@@ -358,15 +358,14 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
phi::DenseTensor non_zero_crows =
phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor non_zero_cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
IntT* csr_crows_data = non_zero_crows.data<IntT>();
IntT* csr_cols_data = non_zero_cols.data<IntT>();
IntT* csr_crows_data = crows.data<IntT>();
IntT* csr_cols_data = cols.data<IntT>();
T* csr_values_data = values.data<T>();
const auto& coo_indices = x.non_zero_indices();
const auto& coo_indices = x.indices();
const auto& coo_values = x.values();
const IntT* batchs_ptr = coo_indices.data<IntT>();
const IntT* coo_rows_data =
......@@ -413,7 +412,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof(T) * non_zero_num,
gpuMemcpyDeviceToDevice,
dev_ctx.stream());
out->SetMember(non_zero_crows, non_zero_cols, values, x_dims);
out->SetMember(crows, cols, values, x_dims);
}
template <typename T, typename Context>
......@@ -421,7 +420,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCsrTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
x.indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......@@ -453,7 +452,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
DenseTensor* out) {
const auto non_zero_num = x.nnz();
const auto dense_dims = x.dims();
const auto indices = x.non_zero_indices();
const auto indices = x.indices();
const auto values = x.values();
const auto indices_dims = indices.dims();
int64_t sparse_dim = indices_dims[0];
......@@ -509,7 +508,7 @@ void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x,
DenseTensor* out) {
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
x.indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out);
}));
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册