未验证 提交 f419e341 编写于 作者: Z zhangkaihuo 提交者: GitHub

add sparse visit (#44847)

上级 d4ca7ffb
...@@ -87,6 +87,20 @@ namespace phi { ...@@ -87,6 +87,20 @@ namespace phi {
} \ } \
}() }()
#define PD_VISIT_BASE_INTEGRAL_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `", \
__dtype__, \
"`"); \
} \
}()
///////// Complex Dispatch Marco /////////// ///////// Complex Dispatch Marco ///////////
#define PD_VISIT_COMPLEX_TYPES(TYPE, NAME, ...) \ #define PD_VISIT_COMPLEX_TYPES(TYPE, NAME, ...) \
......
...@@ -174,7 +174,7 @@ class CuSparseSpMatDescriptor { ...@@ -174,7 +174,7 @@ class CuSparseSpMatDescriptor {
explicit CuSparseSpMatDescriptor(const phi::SparseCsrTensor& x, explicit CuSparseSpMatDescriptor(const phi::SparseCsrTensor& x,
const phi::GPUContext& dev_ctx) const phi::GPUContext& dev_ctx)
: dev_ctx_(dev_ctx) { : dev_ctx_(dev_ctx) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "Csr CuSparseSpMatDescriptor", ([&] { x.non_zero_crows().dtype(), "Csr CuSparseSpMatDescriptor", ([&] {
CreateCsrDescriptor<T, data_t>(x, dev_ctx_, &descriptor_); CreateCsrDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
})); }));
...@@ -184,7 +184,7 @@ class CuSparseSpMatDescriptor { ...@@ -184,7 +184,7 @@ class CuSparseSpMatDescriptor {
explicit CuSparseSpMatDescriptor(const phi::SparseCooTensor& x, explicit CuSparseSpMatDescriptor(const phi::SparseCooTensor& x,
const phi::GPUContext& dev_ctx) const phi::GPUContext& dev_ctx)
: dev_ctx_(dev_ctx) { : dev_ctx_(dev_ctx) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Coo CuSparseSpMatDescriptor", ([&] { x.non_zero_indices().dtype(), "Coo CuSparseSpMatDescriptor", ([&] {
CreateCooDescriptor<T, data_t>(x, dev_ctx_, &descriptor_); CreateCooDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
})); }));
......
...@@ -175,7 +175,7 @@ template <typename T, typename Context> ...@@ -175,7 +175,7 @@ template <typename T, typename Context>
void CoalesceKernel(const Context& dev_ctx, void CoalesceKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] { x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
CoalesceGPUKernel<T, data_t>(dev_ctx, x, out); CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
......
...@@ -233,7 +233,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, ...@@ -233,7 +233,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
const std::string& key, const std::string& key,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* kernel_grad) { DenseTensor* kernel_grad) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] { x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
Conv3dCooGradGPUKernel<T, data_t>(dev_ctx, Conv3dCooGradGPUKernel<T, data_t>(dev_ctx,
x, x,
......
...@@ -221,7 +221,7 @@ void Conv3dCooKernel(const Context& dev_ctx, ...@@ -221,7 +221,7 @@ void Conv3dCooKernel(const Context& dev_ctx,
SparseCooTensor* out, SparseCooTensor* out,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] { x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] {
Conv3dCooGPUKernel<T, data_t>(dev_ctx, Conv3dCooGPUKernel<T, data_t>(dev_ctx,
x, x,
......
...@@ -111,7 +111,7 @@ void SparseMaskKernel(const Context& dev_ctx, ...@@ -111,7 +111,7 @@ void SparseMaskKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const SparseCooTensor& mask, const SparseCooTensor& mask,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] { mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] {
SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out); SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out);
})); }));
...@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx, ...@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& mask_indices, const DenseTensor& mask_indices,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] { x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out); SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
})); }));
......
...@@ -70,7 +70,7 @@ void MvCooGradKernel(const Context &dev_ctx, ...@@ -70,7 +70,7 @@ void MvCooGradKernel(const Context &dev_ctx,
// InferMeta of SparseCooTensor 'dx', CreateLikeInferMeta // InferMeta of SparseCooTensor 'dx', CreateLikeInferMeta
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx); EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz()); auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz());
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] { dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] {
MvCooGradGpuKernel<T> MvCooGradGpuKernel<T>
<<<config.block_per_grid.x, <<<config.block_per_grid.x,
...@@ -117,7 +117,7 @@ void MvCsrGradKernel(const Context &dev_ctx, ...@@ -117,7 +117,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
int col_number = dx->dims()[1]; int col_number = dx->dims()[1];
auto config = phi::backends::gpu::GetGpuLaunchConfig2D( auto config = phi::backends::gpu::GetGpuLaunchConfig2D(
dev_ctx, col_number, row_number); dev_ctx, col_number, row_number);
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] { dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] {
MvCsrGradGpuKernel<T> MvCsrGradGpuKernel<T>
<<<config.block_per_grid.x, <<<config.block_per_grid.x,
......
...@@ -116,7 +116,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx, ...@@ -116,7 +116,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
const SparseCooTensor& out_grad, const SparseCooTensor& out_grad,
const std::vector<int>& kernel_sizes, const std::vector<int>& kernel_sizes,
SparseCooTensor* x_grad) { SparseCooTensor* x_grad) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] { x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
MaxPoolCooGradGPUKernel<T, data_t>( MaxPoolCooGradGPUKernel<T, data_t>(
dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad); dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);
......
...@@ -139,7 +139,7 @@ void MaxPoolCooKernel(const Context& dev_ctx, ...@@ -139,7 +139,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
SparseCooTensor* out, SparseCooTensor* out,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter) { DenseTensor* counter) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] { x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
MaxPoolCooGPUKernel<T, data_t>(dev_ctx, MaxPoolCooGPUKernel<T, data_t>(dev_ctx,
x, x,
......
...@@ -92,7 +92,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx, ...@@ -92,7 +92,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4); dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4); dim3 block(32, 4);
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>( SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
out.non_zero_crows().data<data_t>(), out.non_zero_crows().data<data_t>(),
......
...@@ -105,9 +105,9 @@ void SoftmaxCsrKernel(const Context& dev_ctx, ...@@ -105,9 +105,9 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
dim3 grid((total_row_number + 3) / 4); dim3 grid((total_row_number + 3) / 4);
dim3 block(32, 4); dim3 block(32, 4);
PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { PD_VISIT_BASE_INTEGRAL_TYPES(
SoftmaxGpuKernel<T, data_t> x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
<<<grid, block, 0, dev_ctx.stream()>>>( SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
x.non_zero_crows().data<data_t>(), x.non_zero_crows().data<data_t>(),
x.non_zero_elements().data<T>(), x.non_zero_elements().data<T>(),
out->mutable_non_zero_elements()->data<T>(), out->mutable_non_zero_elements()->data<T>(),
......
...@@ -277,7 +277,7 @@ template <typename T, typename Context> ...@@ -277,7 +277,7 @@ template <typename T, typename Context>
void SparseCsrToCooKernel(const Context& dev_ctx, void SparseCsrToCooKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] { x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out); SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
...@@ -421,7 +421,7 @@ template <typename T, typename Context> ...@@ -421,7 +421,7 @@ template <typename T, typename Context>
void SparseCooToCsrKernel(const Context& dev_ctx, void SparseCooToCsrKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCsrTensor* out) { SparseCsrTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] { x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
...@@ -510,7 +510,7 @@ template <typename T, typename Context> ...@@ -510,7 +510,7 @@ template <typename T, typename Context>
void SparseCooToDenseKernel(const Context& dev_ctx, void SparseCooToDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
DenseTensor* out) { DenseTensor* out) {
PD_VISIT_INTEGRAL_TYPES( PD_VISIT_BASE_INTEGRAL_TYPES(
x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] { x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out); SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out);
})); }));
......
...@@ -160,7 +160,7 @@ class TestSparseUnary(unittest.TestCase): ...@@ -160,7 +160,7 @@ class TestSparseUnary(unittest.TestCase):
def test_sparse_cast(self): def test_sparse_cast(self):
self.compare_with_dense_two_attr(paddle.cast, self.compare_with_dense_two_attr(paddle.cast,
paddle.incubate.sparse.cast, 'int16', paddle.incubate.sparse.cast, 'int32',
'float32') 'float32')
self.compare_with_dense_two_attr(paddle.cast, self.compare_with_dense_two_attr(paddle.cast,
paddle.incubate.sparse.cast, 'int32', paddle.incubate.sparse.cast, 'int32',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册