未验证 提交 f419e341 编写于 作者: Z zhangkaihuo 提交者: GitHub

add sparse visit (#44847)

上级 d4ca7ffb
......@@ -87,6 +87,20 @@ namespace phi {
} \
}()
#define PD_VISIT_BASE_INTEGRAL_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `", \
__dtype__, \
"`"); \
} \
}()
///////// Complex Dispatch Marco ///////////
#define PD_VISIT_COMPLEX_TYPES(TYPE, NAME, ...) \
......
......@@ -174,7 +174,7 @@ class CuSparseSpMatDescriptor {
explicit CuSparseSpMatDescriptor(const phi::SparseCsrTensor& x,
const phi::GPUContext& dev_ctx)
: dev_ctx_(dev_ctx) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "Csr CuSparseSpMatDescriptor", ([&] {
CreateCsrDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
}));
......@@ -184,7 +184,7 @@ class CuSparseSpMatDescriptor {
explicit CuSparseSpMatDescriptor(const phi::SparseCooTensor& x,
const phi::GPUContext& dev_ctx)
: dev_ctx_(dev_ctx) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Coo CuSparseSpMatDescriptor", ([&] {
CreateCooDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
}));
......
......@@ -175,7 +175,7 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
}));
......
......@@ -233,7 +233,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
const std::string& key,
SparseCooTensor* x_grad,
DenseTensor* kernel_grad) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
Conv3dCooGradGPUKernel<T, data_t>(dev_ctx,
x,
......
......@@ -221,7 +221,7 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] {
Conv3dCooGPUKernel<T, data_t>(dev_ctx,
x,
......
......@@ -111,7 +111,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const DenseTensor& x,
const SparseCooTensor& mask,
SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] {
SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out);
}));
......@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& mask_indices,
DenseTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
}));
......
......@@ -70,7 +70,7 @@ void MvCooGradKernel(const Context &dev_ctx,
// InferMeta of SparseCooTensor 'dx', CreateLikeInferMeta
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz());
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] {
MvCooGradGpuKernel<T>
<<<config.block_per_grid.x,
......@@ -117,7 +117,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
int col_number = dx->dims()[1];
auto config = phi::backends::gpu::GetGpuLaunchConfig2D(
dev_ctx, col_number, row_number);
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] {
MvCsrGradGpuKernel<T>
<<<config.block_per_grid.x,
......
......@@ -116,7 +116,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const SparseCooTensor& out_grad,
const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
MaxPoolCooGradGPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
......
......@@ -139,7 +139,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
SparseCooTensor* out,
DenseTensor* rulebook,
DenseTensor* counter) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
MaxPoolCooGPUKernel<T, data_t>(dev_ctx,
x,
......
......@@ -92,7 +92,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4);
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(),
......
......@@ -105,15 +105,15 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4);
PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t>
<<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(),
x.non_zero_elements().data<T>(),
out->mutable_non_zero_elements()->data<T>(),
row_number,
total_row_number);
}));
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(),
x.non_zero_elements().data<T>(),
out->mutable_non_zero_elements()->data<T>(),
row_number,
total_row_number);
}));
}
} // namespace sparse
......
......@@ -277,7 +277,7 @@ template <typename T, typename Context>
void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x,
SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out);
}));
......@@ -421,7 +421,7 @@ template <typename T, typename Context>
void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x,
SparseCsrTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out);
}));
......@@ -510,7 +510,7 @@ template <typename T, typename Context>
void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x,
DenseTensor* out) {
PD_VISIT_INTEGRAL_TYPES(
PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out);
}));
......
......@@ -160,7 +160,7 @@ class TestSparseUnary(unittest.TestCase):
def test_sparse_cast(self):
self.compare_with_dense_two_attr(paddle.cast,
paddle.incubate.sparse.cast, 'int16',
paddle.incubate.sparse.cast, 'int32',
'float32')
self.compare_with_dense_two_attr(paddle.cast,
paddle.incubate.sparse.cast, 'int32',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册