Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5d3fd4fe
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5d3fd4fe
编写于
4月 02, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
4月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Sparse conv and pool support indices as template (#41137)
上级
66d1b1f6
变更
18
展开全部
隐藏空白更改
内联
并排
Showing
18 changed file
with
862 addition
and
589 deletion
+862
-589
paddle/phi/kernels/empty_kernel.cc
paddle/phi/kernels/empty_kernel.cc
+4
-0
paddle/phi/kernels/funcs/sparse/convolution.h
paddle/phi/kernels/funcs/sparse/convolution.h
+20
-17
paddle/phi/kernels/sparse/convolution_grad_kernel.h
paddle/phi/kernels/sparse/convolution_grad_kernel.h
+2
-2
paddle/phi/kernels/sparse/convolution_kernel.h
paddle/phi/kernels/sparse/convolution_kernel.h
+1
-5
paddle/phi/kernels/sparse/cpu/convolution.h
paddle/phi/kernels/sparse/cpu/convolution.h
+39
-36
paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc
+81
-50
paddle/phi/kernels/sparse/cpu/convolution_kernel.cc
paddle/phi/kernels/sparse/cpu/convolution_kernel.cc
+62
-34
paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc
+42
-13
paddle/phi/kernels/sparse/cpu/sparse_pool_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_pool_kernel.cc
+48
-24
paddle/phi/kernels/sparse/gpu/convolution.cu.h
paddle/phi/kernels/sparse/gpu/convolution.cu.h
+122
-119
paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu
+90
-53
paddle/phi/kernels/sparse/gpu/convolution_kernel.cu
paddle/phi/kernels/sparse/gpu/convolution_kernel.cu
+70
-47
paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu
+53
-24
paddle/phi/kernels/sparse/gpu/sparse_pool_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_pool_kernel.cu
+61
-38
paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h
paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h
+9
-11
paddle/phi/kernels/sparse/sparse_pool_kernel.h
paddle/phi/kernels/sparse/sparse_pool_kernel.h
+1
-5
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
+98
-50
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
+59
-61
未找到文件。
paddle/phi/kernels/empty_kernel.cc
浏览文件 @
5d3fd4fe
...
@@ -45,6 +45,7 @@ PD_REGISTER_KERNEL(empty,
...
@@ -45,6 +45,7 @@ PD_REGISTER_KERNEL(empty,
phi
::
EmptyKernel
,
phi
::
EmptyKernel
,
float
,
float
,
double
,
double
,
int8_t
,
uint8_t
,
uint8_t
,
int16_t
,
int16_t
,
int
,
int
,
...
@@ -61,6 +62,7 @@ PD_REGISTER_KERNEL(empty_like,
...
@@ -61,6 +62,7 @@ PD_REGISTER_KERNEL(empty_like,
phi
::
EmptyLikeKernel
,
phi
::
EmptyLikeKernel
,
float
,
float
,
double
,
double
,
int8_t
,
uint8_t
,
uint8_t
,
int16_t
,
int16_t
,
int
,
int
,
...
@@ -80,6 +82,7 @@ PD_REGISTER_KERNEL(empty,
...
@@ -80,6 +82,7 @@ PD_REGISTER_KERNEL(empty,
phi
::
EmptyKernel
,
phi
::
EmptyKernel
,
float
,
float
,
double
,
double
,
int8_t
,
uint8_t
,
uint8_t
,
int16_t
,
int16_t
,
int
,
int
,
...
@@ -95,6 +98,7 @@ PD_REGISTER_KERNEL(empty_like,
...
@@ -95,6 +98,7 @@ PD_REGISTER_KERNEL(empty_like,
phi
::
EmptyLikeKernel
,
phi
::
EmptyLikeKernel
,
float
,
float
,
double
,
double
,
int8_t
,
uint8_t
,
uint8_t
,
int16_t
,
int16_t
,
int
,
int
,
...
...
paddle/phi/kernels/funcs/sparse/convolution.h
浏览文件 @
5d3fd4fe
...
@@ -33,28 +33,30 @@ struct Dims4D {
...
@@ -33,28 +33,30 @@ struct Dims4D {
};
};
// Judge whether the current position x is in (lower, upper)
// Judge whether the current position x is in (lower, upper)
inline
HOSTDEVICE
bool
Check
(
const
int
&
x
,
template
<
typename
IntT
=
int
>
inline
HOSTDEVICE
bool
Check
(
const
IntT
&
x
,
const
int
&
kx
,
const
int
&
kx
,
const
int
&
pad
,
const
int
&
pad
,
const
int
&
stride
,
const
int
&
stride
,
const
int
dilation
,
const
int
dilation
,
const
int
kdim
,
const
int
kdim
,
const
int
xdim
)
{
const
int
xdim
)
{
const
int
lower
=
x
-
dilation
*
kx
+
pad
;
const
IntT
lower
=
x
-
dilation
*
kx
+
pad
;
const
int
uper
=
x
+
(
kdim
-
kx
-
1
)
*
dilation
-
pad
;
const
IntT
uper
=
x
+
(
kdim
-
kx
-
1
)
*
dilation
-
pad
;
return
(
lower
>=
0
&&
lower
%
stride
==
0
&&
uper
<
xdim
);
return
(
lower
>=
0
&&
lower
%
stride
==
0
&&
uper
<
xdim
);
}
}
// Check whether the current position(x, y, z) is legal:
// Check whether the current position(x, y, z) is legal:
// Judge the minimum and maximum values at each latitude
// Judge the minimum and maximum values at each latitude
template
<
typename
IntT
=
int
>
inline
HOSTDEVICE
bool
Check
(
const
Dims4D
&
dims
,
inline
HOSTDEVICE
bool
Check
(
const
Dims4D
&
dims
,
const
Dims4D
&
kernel_dims
,
const
Dims4D
&
kernel_dims
,
const
Dims4D
&
paddings
,
const
Dims4D
&
paddings
,
const
Dims4D
&
dilations
,
const
Dims4D
&
dilations
,
const
Dims4D
&
strides
,
const
Dims4D
&
strides
,
const
int
x
,
const
IntT
x
,
const
int
y
,
const
IntT
y
,
const
int
z
,
const
IntT
z
,
const
int
kx
,
const
int
kx
,
const
int
ky
,
const
int
ky
,
const
int
kz
)
{
const
int
kz
)
{
...
@@ -67,22 +69,22 @@ inline HOSTDEVICE bool Check(const Dims4D& dims,
...
@@ -67,22 +69,22 @@ inline HOSTDEVICE bool Check(const Dims4D& dims,
return
(
x_valid
&&
y_valid
&&
z_valid
);
return
(
x_valid
&&
y_valid
&&
z_valid
);
}
}
template
<
typename
Dim
>
template
<
typename
Dim
,
typename
IntT
=
int
>
inline
HOSTDEVICE
int
PointToIndex
(
const
int
&
batch
,
inline
HOSTDEVICE
IntT
PointToIndex
(
const
IntT
&
batch
,
const
int
&
x
,
const
IntT
&
x
,
const
int
&
y
,
const
IntT
&
y
,
const
int
&
z
,
const
IntT
&
z
,
const
Dim
&
dims
)
{
const
Dim
&
dims
)
{
return
batch
*
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
]
+
z
*
dims
[
2
]
*
dims
[
3
]
+
return
batch
*
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
]
+
z
*
dims
[
2
]
*
dims
[
3
]
+
y
*
dims
[
3
]
+
x
;
y
*
dims
[
3
]
+
x
;
}
}
// TODO(zhangkaihuo): use division and multiply to optimize
// TODO(zhangkaihuo): use division and multiply to optimize
// modulo operation
// modulo operation
template
<
typename
Dim
>
template
<
typename
Dim
,
typename
IntT
=
int
>
inline
HOSTDEVICE
void
IndexToPoint
(
inline
HOSTDEVICE
void
IndexToPoint
(
const
int
index
,
const
Dim
&
dims
,
int
*
batch
,
int
*
x
,
int
*
y
,
int
*
z
)
{
const
IntT
index
,
const
Dim
&
dims
,
IntT
*
batch
,
IntT
*
x
,
IntT
*
y
,
IntT
*
z
)
{
int
n
=
index
;
IntT
n
=
index
;
*
x
=
n
%
dims
[
3
];
*
x
=
n
%
dims
[
3
];
n
/=
dims
[
3
];
n
/=
dims
[
3
];
*
y
=
n
%
dims
[
2
];
*
y
=
n
%
dims
[
2
];
...
@@ -176,8 +178,9 @@ inline const std::vector<int> PoolResetKernel(
...
@@ -176,8 +178,9 @@ inline const std::vector<int> PoolResetKernel(
return
res
;
return
res
;
}
}
inline
void
PrefixSum
(
const
int
*
counter
,
int
*
offsets
,
const
int
n
)
{
template
<
typename
T
>
int
offset
=
0
;
inline
void
PrefixSum
(
const
T
*
counter
,
T
*
offsets
,
const
int
n
)
{
T
offset
=
0
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
offsets
[
i
]
=
offset
;
offsets
[
i
]
=
offset
;
offset
+=
counter
[
i
];
offset
+=
counter
[
i
];
...
...
paddle/phi/kernels/sparse/convolution_grad_kernel.h
浏览文件 @
5d3fd4fe
...
@@ -49,8 +49,8 @@ std::tuple<SparseCooTensor, DenseTensor> Conv3dGrad(
...
@@ -49,8 +49,8 @@ std::tuple<SparseCooTensor, DenseTensor> Conv3dGrad(
const
int
groups
,
const
int
groups
,
const
bool
subm
)
{
const
bool
subm
)
{
SparseCooTensor
x_grad
;
SparseCooTensor
x_grad
;
DenseTensor
kernel_grad
=
phi
::
Empty
<
Context
>
(
DenseTensor
kernel_grad
;
dev_ctx
,
DenseTensorMeta
(
kernel
.
dtype
(),
{
1
},
kernel
.
layout
()));
// TODO(zhangkaihuo): call InferMeta func here
// TODO(zhangkaihuo): call InferMeta func here
Conv3dGradKernel
<
T
,
Context
>
(
dev_ctx
,
Conv3dGradKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
x
,
...
...
paddle/phi/kernels/sparse/convolution_kernel.h
浏览文件 @
5d3fd4fe
...
@@ -45,11 +45,7 @@ SparseCooTensor Conv3d(const Context& dev_ctx,
...
@@ -45,11 +45,7 @@ SparseCooTensor Conv3d(const Context& dev_ctx,
const
int
groups
,
const
int
groups
,
const
bool
subm
,
const
bool
subm
,
DenseTensor
*
rulebook
)
{
DenseTensor
*
rulebook
)
{
DenseTensor
indices
=
phi
::
Empty
<
Context
>
(
SparseCooTensor
coo
;
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
DenseTensor
values
=
phi
::
Empty
<
Context
>
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
1
},
x
.
layout
()));
SparseCooTensor
coo
(
indices
,
values
,
x
.
dims
());
Conv3dKernel
<
T
,
Context
>
(
dev_ctx
,
Conv3dKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
x
,
kernel
,
kernel
,
...
...
paddle/phi/kernels/sparse/cpu/convolution.h
浏览文件 @
5d3fd4fe
...
@@ -31,7 +31,7 @@ using Dims4D = phi::funcs::sparse::Dims4D;
...
@@ -31,7 +31,7 @@ using Dims4D = phi::funcs::sparse::Dims4D;
// such as: kernel(3, 3, 3), kernel_size = 27
// such as: kernel(3, 3, 3), kernel_size = 27
// counter_per_weight: (kernel_size)
// counter_per_weight: (kernel_size)
// TODO(zhangkaihuo): optimize performance with multithreading
// TODO(zhangkaihuo): optimize performance with multithreading
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
,
typename
IntT
=
int
>
void
ProductRuleBook
(
const
Context
&
dev_ctx
,
void
ProductRuleBook
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
...
@@ -44,7 +44,7 @@ void ProductRuleBook(const Context& dev_ctx,
...
@@ -44,7 +44,7 @@ void ProductRuleBook(const Context& dev_ctx,
DenseTensor
*
counter_per_kernel
)
{
DenseTensor
*
counter_per_kernel
)
{
const
int64_t
non_zero_num
=
x
.
nnz
();
const
int64_t
non_zero_num
=
x
.
nnz
();
const
auto
&
non_zero_indices
=
x
.
non_zero_indices
();
const
auto
&
non_zero_indices
=
x
.
non_zero_indices
();
const
int
*
indices_ptr
=
non_zero_indices
.
data
<
int
>
();
const
IntT
*
indices_ptr
=
non_zero_indices
.
data
<
IntT
>
();
int
*
counter_ptr
=
counter_per_kernel
->
data
<
int
>
();
int
*
counter_ptr
=
counter_per_kernel
->
data
<
int
>
();
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
memset
(
counter_ptr
,
0
,
kernel_size
*
sizeof
(
int
));
memset
(
counter_ptr
,
0
,
kernel_size
*
sizeof
(
int
));
...
@@ -60,33 +60,33 @@ void ProductRuleBook(const Context& dev_ctx,
...
@@ -60,33 +60,33 @@ void ProductRuleBook(const Context& dev_ctx,
const
Dims4D
c_strides
(
1
,
strides
[
2
],
strides
[
1
],
strides
[
0
]);
const
Dims4D
c_strides
(
1
,
strides
[
2
],
strides
[
1
],
strides
[
0
]);
const
Dims4D
c_dilations
(
1
,
dilations
[
2
],
dilations
[
1
],
dilations
[
0
]);
const
Dims4D
c_dilations
(
1
,
dilations
[
2
],
dilations
[
1
],
dilations
[
0
]);
std
::
set
<
int
>
hash_in
;
std
::
set
<
IntT
>
hash_in
;
if
(
subm
)
{
if
(
subm
)
{
for
(
int
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
int
batch
=
indices_ptr
[
i
];
IntT
batch
=
indices_ptr
[
i
];
int
in_z
=
indices_ptr
[
i
+
non_zero_num
];
IntT
in_z
=
indices_ptr
[
i
+
non_zero_num
];
int
in_y
=
indices_ptr
[
i
+
2
*
non_zero_num
];
IntT
in_y
=
indices_ptr
[
i
+
2
*
non_zero_num
];
int
in_x
=
indices_ptr
[
i
+
3
*
non_zero_num
];
IntT
in_x
=
indices_ptr
[
i
+
3
*
non_zero_num
];
int
index
=
phi
::
funcs
::
sparse
::
PointToIndex
<
DDim
>
(
IntT
index
=
phi
::
funcs
::
sparse
::
PointToIndex
<
DDim
>
(
batch
,
in_x
,
in_y
,
in_z
,
x_dims
);
batch
,
in_x
,
in_y
,
in_z
,
x_dims
);
hash_in
.
insert
(
index
);
hash_in
.
insert
(
index
);
}
}
}
}
auto
f_calc_rulebook
=
[
&
](
int
*
rulebook_ptr
)
{
auto
f_calc_rulebook
=
[
&
](
IntT
*
rulebook_ptr
)
{
int
kernel_index
=
0
,
rulebook_index
=
0
;
int
kernel_index
=
0
,
rulebook_index
=
0
;
for
(
int
kz
=
0
;
kz
<
kernel_sizes
[
0
];
kz
++
)
{
for
(
int
kz
=
0
;
kz
<
kernel_sizes
[
0
];
kz
++
)
{
for
(
int
ky
=
0
;
ky
<
kernel_sizes
[
1
];
ky
++
)
{
for
(
int
ky
=
0
;
ky
<
kernel_sizes
[
1
];
ky
++
)
{
for
(
int
kx
=
0
;
kx
<
kernel_sizes
[
2
];
kx
++
)
{
for
(
int
kx
=
0
;
kx
<
kernel_sizes
[
2
];
kx
++
)
{
++
kernel_index
;
++
kernel_index
;
for
(
int64_t
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
for
(
int64_t
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
int
batch
=
indices_ptr
[
i
];
IntT
batch
=
indices_ptr
[
i
];
int
in_z
=
indices_ptr
[
i
+
non_zero_num
];
IntT
in_z
=
indices_ptr
[
i
+
non_zero_num
];
int
in_y
=
indices_ptr
[
i
+
2
*
non_zero_num
];
IntT
in_y
=
indices_ptr
[
i
+
2
*
non_zero_num
];
int
in_x
=
indices_ptr
[
i
+
3
*
non_zero_num
];
IntT
in_x
=
indices_ptr
[
i
+
3
*
non_zero_num
];
int
out_z
=
(
in_z
+
paddings
[
0
]
-
kz
*
dilations
[
0
])
/
strides
[
0
];
IntT
out_z
=
(
in_z
+
paddings
[
0
]
-
kz
*
dilations
[
0
])
/
strides
[
0
];
int
out_y
=
(
in_y
+
paddings
[
1
]
-
ky
*
dilations
[
1
])
/
strides
[
1
];
IntT
out_y
=
(
in_y
+
paddings
[
1
]
-
ky
*
dilations
[
1
])
/
strides
[
1
];
int
out_x
=
(
in_x
+
paddings
[
2
]
-
kx
*
dilations
[
2
])
/
strides
[
2
];
IntT
out_x
=
(
in_x
+
paddings
[
2
]
-
kx
*
dilations
[
2
])
/
strides
[
2
];
if
(
phi
::
funcs
::
sparse
::
Check
(
c_x_dims
,
if
(
phi
::
funcs
::
sparse
::
Check
(
c_x_dims
,
c_kernel_dims
,
c_kernel_dims
,
c_paddings
,
c_paddings
,
...
@@ -99,7 +99,7 @@ void ProductRuleBook(const Context& dev_ctx,
...
@@ -99,7 +99,7 @@ void ProductRuleBook(const Context& dev_ctx,
ky
,
ky
,
kz
))
{
kz
))
{
if
(
subm
)
{
if
(
subm
)
{
int
out_index
=
phi
::
funcs
::
sparse
::
PointToIndex
<
DDim
>
(
IntT
out_index
=
phi
::
funcs
::
sparse
::
PointToIndex
<
DDim
>
(
batch
,
out_x
,
out_y
,
out_z
,
out_dims
);
batch
,
out_x
,
out_y
,
out_z
,
out_dims
);
if
(
hash_in
.
find
(
out_index
)
==
hash_in
.
end
())
{
if
(
hash_in
.
find
(
out_index
)
==
hash_in
.
end
())
{
continue
;
continue
;
...
@@ -126,15 +126,16 @@ void ProductRuleBook(const Context& dev_ctx,
...
@@ -126,15 +126,16 @@ void ProductRuleBook(const Context& dev_ctx,
f_calc_rulebook
(
nullptr
);
f_calc_rulebook
(
nullptr
);
// alloc the rulebook
// alloc the rulebook
DenseTensorMeta
rulebook_meta
(
*
rulebook
=
phi
::
Empty
(
DataType
::
INT32
,
{
3
,
rulebook_len
},
DataLayout
::
NCHW
);
dev_ctx
,
rulebook
->
set_meta
(
rulebook_meta
);
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
(),
dev_ctx
.
Alloc
(
rulebook
,
rulebook
->
dtype
(),
rulebook
->
numel
()
*
sizeof
(
int
));
{
3
,
rulebook_len
},
int
*
rulebook_ptr
=
rulebook
->
data
<
int
>
();
DataLayout
::
NCHW
));
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
f_calc_rulebook
(
rulebook_ptr
);
f_calc_rulebook
(
rulebook_ptr
);
}
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
,
typename
IntT
=
int
>
void
UpdateRulebookAndOutIndex
(
const
Context
&
dev_ctx
,
void
UpdateRulebookAndOutIndex
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
int
kernel_size
,
const
int
kernel_size
,
...
@@ -142,9 +143,9 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
...
@@ -142,9 +143,9 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
const
DDim
&
out_dims
,
const
DDim
&
out_dims
,
DenseTensor
*
rulebook
,
DenseTensor
*
rulebook
,
SparseCooTensor
*
out
)
{
SparseCooTensor
*
out
)
{
std
::
set
<
int
>
out_indexs
;
std
::
set
<
IntT
>
out_indexs
;
int
n
=
rulebook
->
dims
()[
1
];
int
n
=
rulebook
->
dims
()[
1
];
int
*
rulebook_ptr
=
rulebook
->
data
<
int
>
();
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
out_indexs
.
insert
(
rulebook_ptr
[
i
+
n
*
2
]);
out_indexs
.
insert
(
rulebook_ptr
[
i
+
n
*
2
]);
}
}
...
@@ -152,17 +153,19 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
...
@@ -152,17 +153,19 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
int
out_non_zero_num
=
out_indexs
.
size
();
int
out_non_zero_num
=
out_indexs
.
size
();
const
int64_t
sparse_dim
=
4
;
const
int64_t
sparse_dim
=
4
;
DenseTensorMeta
indices_meta
(
DenseTensorMeta
indices_meta
(
DataType
::
INT32
,
{
sparse_dim
,
out_non_zero_num
},
DataLayout
::
NCHW
);
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
(),
{
sparse_dim
,
out_non_zero_num
},
DataLayout
::
NCHW
);
DenseTensorMeta
values_meta
(
x
.
dtype
(),
DenseTensorMeta
values_meta
(
x
.
dtype
(),
{
out_non_zero_num
,
out_channels
},
{
out_non_zero_num
,
out_channels
},
x
.
non_zero_elements
().
layout
());
x
.
non_zero_elements
().
layout
());
phi
::
DenseTensor
out_indices
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
indices_meta
));
phi
::
DenseTensor
out_indices
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
indices_meta
));
phi
::
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
values_meta
));
phi
::
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
values_meta
));
int
*
out_indices_ptr
=
out_indices
.
data
<
int
>
();
IntT
*
out_indices_ptr
=
out_indices
.
data
<
IntT
>
();
int
i
=
0
;
int
i
=
0
;
for
(
auto
it
=
out_indexs
.
begin
();
it
!=
out_indexs
.
end
();
it
++
,
i
++
)
{
for
(
auto
it
=
out_indexs
.
begin
();
it
!=
out_indexs
.
end
();
it
++
,
i
++
)
{
const
int
index
=
*
it
;
const
IntT
index
=
*
it
;
int
batch
,
x
,
y
,
z
;
IntT
batch
,
x
,
y
,
z
;
phi
::
funcs
::
sparse
::
IndexToPoint
<
DDim
>
(
index
,
out_dims
,
&
batch
,
&
x
,
&
y
,
&
z
);
phi
::
funcs
::
sparse
::
IndexToPoint
<
DDim
>
(
index
,
out_dims
,
&
batch
,
&
x
,
&
y
,
&
z
);
out_indices_ptr
[
i
]
=
batch
;
out_indices_ptr
[
i
]
=
batch
;
out_indices_ptr
[
i
+
out_non_zero_num
]
=
z
;
out_indices_ptr
[
i
+
out_non_zero_num
]
=
z
;
...
@@ -170,7 +173,7 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
...
@@ -170,7 +173,7 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
out_indices_ptr
[
i
+
out_non_zero_num
*
3
]
=
x
;
out_indices_ptr
[
i
+
out_non_zero_num
*
3
]
=
x
;
}
}
for
(
i
=
0
;
i
<
n
;
i
++
)
{
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
out_index
=
rulebook_ptr
[
i
+
n
*
2
];
IntT
out_index
=
rulebook_ptr
[
i
+
n
*
2
];
rulebook_ptr
[
i
+
n
*
2
]
=
rulebook_ptr
[
i
+
n
*
2
]
=
std
::
distance
(
out_indexs
.
begin
(),
out_indexs
.
find
(
out_index
));
std
::
distance
(
out_indexs
.
begin
(),
out_indexs
.
find
(
out_index
));
}
}
...
@@ -178,20 +181,20 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
...
@@ -178,20 +181,20 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
true
);
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
true
);
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
void
Gather
(
void
Gather
(
const
T
*
x
,
const
int
*
indexs
,
const
int
n
,
const
int
channels
,
T
*
out
)
{
const
T
*
x
,
const
IntT
*
indexs
,
const
int
n
,
const
int
channels
,
T
*
out
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
int
real_i
=
indexs
[
i
];
IntT
real_i
=
indexs
[
i
];
memcpy
(
out
+
i
*
channels
,
x
+
real_i
*
channels
,
channels
*
sizeof
(
T
));
memcpy
(
out
+
i
*
channels
,
x
+
real_i
*
channels
,
channels
*
sizeof
(
T
));
}
}
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
void
Scatter
(
void
Scatter
(
const
T
*
x
,
const
int
*
indexs
,
const
int
n
,
const
int
channels
,
T
*
out
)
{
const
T
*
x
,
const
IntT
*
indexs
,
const
int
n
,
const
int
channels
,
T
*
out
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
int
real_i
=
indexs
[
i
];
IntT
real_i
=
indexs
[
i
];
for
(
int
j
=
0
;
j
<
channels
;
j
++
)
{
for
(
int
j
=
0
;
j
<
channels
;
j
++
)
{
out
[
real_i
*
channels
+
j
]
+=
x
[
i
*
channels
+
j
];
out
[
real_i
*
channels
+
j
]
+=
x
[
i
*
channels
+
j
];
}
}
...
...
paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc
浏览文件 @
5d3fd4fe
...
@@ -18,6 +18,8 @@ limitations under the License. */
...
@@ -18,6 +18,8 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/sparse/cpu/convolution.h"
#include "paddle/phi/kernels/sparse/cpu/convolution.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
...
@@ -29,24 +31,24 @@ namespace sparse {
...
@@ -29,24 +31,24 @@ namespace sparse {
//]
//]
// x_grad = out_grad * transpose(kenrel)
// x_grad = out_grad * transpose(kenrel)
// kernel_grad = transpose(x) * out_grad
// kernel_grad = transpose(x) * out_grad
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
Conv3dGrad
Kernel
(
const
Context
&
dev_ctx
,
void
Conv3dGrad
CPUKernel
(
const
CPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out_grad
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
int
groups
,
const
bool
subm
,
const
bool
subm
,
SparseCooTensor
*
x_grad
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
kernel_grad
)
{
DenseTensor
*
kernel_grad
)
{
const
auto
&
kernel_dims
=
kernel
.
dims
();
const
auto
&
kernel_dims
=
kernel
.
dims
();
const
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
const
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
*
rulebook_ptr
=
rulebook
.
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
.
data
<
IntT
>
();
const
int
rulebook_len
=
rulebook
.
dims
()[
1
];
const
int
rulebook_len
=
rulebook
.
dims
()[
1
];
...
@@ -66,32 +68,30 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -66,32 +68,30 @@ void Conv3dGradKernel(const Context& dev_ctx,
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
d_x_features_ptr
=
d_x_features
.
data
<
T
>
();
T
*
d_x_features_ptr
=
d_x_features
.
data
<
T
>
();
T
*
out_grad_features_ptr
=
out_grad_features
.
data
<
T
>
();
T
*
out_grad_features_ptr
=
out_grad_features
.
data
<
T
>
();
kernel_grad
->
Resize
(
kernel_dims
);
*
kernel_grad
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
kernel
);
dev_ctx
.
Alloc
(
kernel_grad
,
kernel_grad
->
dtype
(),
kernel_grad
->
numel
()
*
sizeof
(
T
));
T
*
d_kernel_ptr
=
kernel_grad
->
data
<
T
>
();
T
*
d_kernel_ptr
=
kernel_grad
->
data
<
T
>
();
memset
(
d_kernel_ptr
,
0
,
sizeof
(
T
)
*
kernel_grad
->
numel
());
memset
(
d_kernel_ptr
,
0
,
sizeof
(
T
)
*
kernel_grad
->
numel
());
int
half_kernel_size
=
kernel_size
/
2
;
int
half_kernel_size
=
kernel_size
/
2
;
auto
blas
=
phi
::
funcs
::
GetBlas
<
Context
,
T
>
(
dev_ctx
);
auto
blas
=
phi
::
funcs
::
GetBlas
<
C
PUC
ontext
,
T
>
(
dev_ctx
);
DenseTensor
x_grad_indices
=
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
int
>
(
dev_ctx
,
x
.
non_zero_indices
());
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
memset
(
x_grad_values_ptr
,
0
,
sizeof
(
T
)
*
x_grad_values
.
numel
());
memset
(
x_grad_values_ptr
,
0
,
sizeof
(
T
)
*
x_grad_values
.
numel
());
memset
(
d_x_features_ptr
,
0
,
sizeof
(
T
)
*
d_x_features
.
numel
());
memset
(
d_x_features_ptr
,
0
,
sizeof
(
T
)
*
d_x_features
.
numel
());
phi
::
Copy
<
Context
>
(
dev_ctx
,
phi
::
Copy
<
C
PUC
ontext
>
(
dev_ctx
,
x
.
non_zero_indices
(),
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
dev_ctx
.
GetPlace
(),
false
,
false
,
&
x_grad_indices
);
&
x_grad_indices
);
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
);
std
::
vector
<
IntT
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
);
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
counter
[
rulebook_ptr
[
i
]]
+=
1
;
counter
[
rulebook_ptr
[
i
]]
+=
1
;
}
}
int
offset
=
0
,
max_count
=
0
;
IntT
offset
=
0
,
max_count
=
0
;
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
offsets
[
i
]
=
offset
;
offsets
[
i
]
=
offset
;
offset
+=
counter
[
i
];
offset
+=
counter
[
i
];
...
@@ -102,30 +102,31 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -102,30 +102,31 @@ void Conv3dGradKernel(const Context& dev_ctx,
offsets
[
kernel_size
]
=
offset
;
offsets
[
kernel_size
]
=
offset
;
if
(
subm
)
{
if
(
subm
)
{
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
Context
>
(
dev_ctx
,
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
CPUContext
>
(
x
,
dev_ctx
,
kernel
,
x
,
out_grad
.
non_zero_elements
(),
kernel
,
in_channels
,
out_grad
.
non_zero_elements
(),
out_channels
,
in_channels
,
half_kernel_size
,
out_channels
,
kernel_grad
,
half_kernel_size
,
&
x_grad_values
);
kernel_grad
,
&
x_grad_values
);
if
(
max_count
==
0
)
{
if
(
max_count
==
0
)
{
return
;
return
;
}
}
}
}
Gather
<
T
>
(
x
.
non_zero_elements
().
data
<
T
>
(),
Gather
<
T
,
IntT
>
(
x
.
non_zero_elements
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
,
rulebook_ptr
+
rulebook_len
,
rulebook_len
,
rulebook_len
,
in_channels
,
in_channels
,
in_features_ptr
);
in_features_ptr
);
Gather
<
T
>
(
out_grad
.
non_zero_elements
().
data
<
T
>
(),
Gather
<
T
,
IntT
>
(
out_grad
.
non_zero_elements
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
*
2
,
rulebook_ptr
+
rulebook_len
*
2
,
rulebook_len
,
rulebook_len
,
out_channels
,
out_channels
,
out_grad_features_ptr
);
out_grad_features_ptr
);
const
T
*
kernel_ptr
=
kernel
.
data
<
T
>
();
const
T
*
kernel_ptr
=
kernel
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
...
@@ -170,11 +171,41 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -170,11 +171,41 @@ void Conv3dGradKernel(const Context& dev_ctx,
}
}
// 4. scatter
// 4. scatter
Scatter
<
T
>
(
d_x_features_ptr
,
Scatter
<
T
,
IntT
>
(
d_x_features_ptr
,
rulebook
.
data
<
int
>
()
+
rulebook_len
,
rulebook
.
data
<
IntT
>
()
+
rulebook_len
,
rulebook_len
,
rulebook_len
,
in_channels
,
in_channels
,
x_grad_values_ptr
);
x_grad_values_ptr
);
}
template
<
typename
T
,
typename
Context
>
void
Conv3dGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
bool
subm
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
kernel_grad
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"Conv3dGradCPUKernel"
,
([
&
]
{
Conv3dGradCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel
,
rulebook
,
out_grad
,
paddings
,
dilations
,
strides
,
groups
,
subm
,
x_grad
,
kernel_grad
);
}));
}
}
}
// namespace sparse
}
// namespace sparse
...
...
paddle/phi/kernels/sparse/cpu/convolution_kernel.cc
浏览文件 @
5d3fd4fe
...
@@ -17,6 +17,8 @@ limitations under the License. */
...
@@ -17,6 +17,8 @@ limitations under the License. */
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
...
@@ -25,17 +27,17 @@ namespace sparse {
...
@@ -25,17 +27,17 @@ namespace sparse {
* kernel: (D, H, W, C, OC)
* kernel: (D, H, W, C, OC)
* out: (N, D, H, W, OC)
* out: (N, D, H, W, OC)
**/
**/
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
Conv3d
Kernel
(
const
Context
&
dev_ctx
,
void
Conv3d
CPUKernel
(
const
CPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
kernel
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
int
groups
,
const
bool
subm
,
const
bool
subm
,
SparseCooTensor
*
out
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
DenseTensor
*
rulebook
)
{
// update padding and dilation
// update padding and dilation
// Currently, only support x.layout is NDHWC, groups = 1
// Currently, only support x.layout is NDHWC, groups = 1
// if x.layout != NDHWC then transpose(x), transpose(weight)
// if x.layout != NDHWC then transpose(x), transpose(weight)
...
@@ -66,18 +68,18 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -66,18 +68,18 @@ void Conv3dKernel(const Context& dev_ctx,
DataType
::
INT32
,
{
kernel_size
},
DataLayout
::
NCHW
);
DataType
::
INT32
,
{
kernel_size
},
DataLayout
::
NCHW
);
DenseTensor
counter_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
counter_meta
));
DenseTensor
counter_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
counter_meta
));
ProductRuleBook
<
T
,
C
ontext
>
(
dev_ctx
,
ProductRuleBook
<
T
,
C
PUContext
,
IntT
>
(
dev_ctx
,
x
,
x
,
kernel_sizes
,
kernel_sizes
,
subm_paddings
,
subm_paddings
,
dilations
,
dilations
,
subm_strides
,
subm_strides
,
out_dims
,
out_dims
,
subm
,
subm
,
rulebook
,
rulebook
,
&
counter_per_kernel
);
&
counter_per_kernel
);
UpdateRulebookAndOutIndex
<
T
>
(
UpdateRulebookAndOutIndex
<
T
,
CPUContext
,
IntT
>
(
dev_ctx
,
x
,
kernel_size
,
out_channels
,
out_dims
,
rulebook
,
out
);
dev_ctx
,
x
,
kernel_size
,
out_channels
,
out_dims
,
rulebook
,
out
);
int
n
=
rulebook
->
dims
()[
1
];
int
n
=
rulebook
->
dims
()[
1
];
...
@@ -95,14 +97,14 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -95,14 +97,14 @@ void Conv3dKernel(const Context& dev_ctx,
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
out_features_ptr
=
out_features
.
data
<
T
>
();
T
*
out_features_ptr
=
out_features
.
data
<
T
>
();
Gather
<
T
>
(
x
.
non_zero_elements
().
data
<
T
>
(),
Gather
<
T
,
IntT
>
(
x
.
non_zero_elements
().
data
<
T
>
(),
rulebook
->
data
<
int
>
()
+
n
,
rulebook
->
data
<
IntT
>
()
+
n
,
n
,
n
,
in_channels
,
in_channels
,
in_features_ptr
);
in_features_ptr
);
// 3. call gemm for every werght
// 3. call gemm for every werght
auto
blas
=
phi
::
funcs
::
GetBlas
<
Context
,
T
>
(
dev_ctx
);
auto
blas
=
phi
::
funcs
::
GetBlas
<
C
PUC
ontext
,
T
>
(
dev_ctx
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
);
int
offset
=
0
;
int
offset
=
0
;
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
...
@@ -139,11 +141,37 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -139,11 +141,37 @@ void Conv3dKernel(const Context& dev_ctx,
// 4. scatter
// 4. scatter
T
*
out_values_ptr
=
out
->
mutable_non_zero_elements
()
->
data
<
T
>
();
T
*
out_values_ptr
=
out
->
mutable_non_zero_elements
()
->
data
<
T
>
();
memset
(
out_values_ptr
,
0
,
sizeof
(
T
)
*
out
->
nnz
()
*
out_channels
);
memset
(
out_values_ptr
,
0
,
sizeof
(
T
)
*
out
->
nnz
()
*
out_channels
);
Scatter
<
T
>
(
out_features_ptr
,
Scatter
<
T
,
IntT
>
(
out_features_ptr
,
rulebook
->
data
<
int
>
()
+
n
*
2
,
rulebook
->
data
<
IntT
>
()
+
n
*
2
,
n
,
n
,
out_channels
,
out_channels
,
out_values_ptr
);
out_values_ptr
);
}
template
<
typename
T
,
typename
Context
>
void
Conv3dKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
bool
subm
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"Conv3dCPUKernel"
,
([
&
]
{
Conv3dCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel
,
paddings
,
dilations
,
strides
,
groups
,
subm
,
out
,
rulebook
);
}));
}
}
}
// namespace sparse
}
// namespace sparse
...
...
paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc
浏览文件 @
5d3fd4fe
...
@@ -14,24 +14,28 @@ limitations under the License. */
...
@@ -14,24 +14,28 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h"
#include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
MaxPoolGrad
Kernel
(
const
Context
&
dev_ctx
,
void
MaxPoolGrad
CPUKernel
(
const
CPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out
,
const
Dense
Tensor
&
out_grad
,
const
SparseCoo
Tensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
Dense
Tensor
*
x_grad
)
{
SparseCoo
Tensor
*
x_grad
)
{
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
const
int
channels
=
x
.
dims
()[
4
];
const
int
channels
=
x
.
dims
()[
4
];
int
rulebook_len
=
rulebook
.
dims
()[
1
];
int
rulebook_len
=
rulebook
.
dims
()[
1
];
const
int
*
rulebook_ptr
=
rulebook
.
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
.
data
<
IntT
>
();
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
);
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
counter
[
rulebook_ptr
[
i
]]
+=
1
;
counter
[
rulebook_ptr
[
i
]]
+=
1
;
...
@@ -40,15 +44,25 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -40,15 +44,25 @@ void MaxPoolGradKernel(const Context& dev_ctx,
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
non_zero_elements
().
data
<
T
>
();
T
*
x_grad_ptr
=
x_grad
->
data
<
T
>
();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
T
*
x_grad_ptr
=
x_grad_values
.
data
<
T
>
();
memset
(
x_grad_ptr
,
0
,
sizeof
(
T
)
*
x_grad
->
numel
());
memset
(
x_grad_ptr
,
0
,
sizeof
(
T
)
*
x_grad
->
numel
());
phi
::
Copy
<
CPUContext
>
(
dev_ctx
,
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
false
,
&
x_grad_indices
);
phi
::
funcs
::
MaxPoolGrad
<
T
>
grad_functor
;
phi
::
funcs
::
MaxPoolGrad
<
T
>
grad_functor
;
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
j
=
0
;
j
<
counter
[
i
];
j
++
)
{
for
(
int
j
=
0
;
j
<
counter
[
i
];
j
++
)
{
int
in_i
=
rulebook_ptr
[
rulebook_len
+
offsets
[
i
]
+
j
];
IntT
in_i
=
rulebook_ptr
[
rulebook_len
+
offsets
[
i
]
+
j
];
int
out_i
=
rulebook_ptr
[
rulebook_len
*
2
+
offsets
[
i
]
+
j
];
IntT
out_i
=
rulebook_ptr
[
rulebook_len
*
2
+
offsets
[
i
]
+
j
];
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
grad_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
c
],
grad_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
c
],
out_features_ptr
[
out_i
*
channels
+
c
],
out_features_ptr
[
out_i
*
channels
+
c
],
...
@@ -60,6 +74,21 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -60,6 +74,21 @@ void MaxPoolGradKernel(const Context& dev_ctx,
}
}
}
}
template
<
typename
T
,
typename
Context
>
void
MaxPoolGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
,
SparseCooTensor
*
x_grad
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"MaxPoolGradCPUKernel"
,
([
&
]
{
MaxPoolGradCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
rulebook
,
out
,
out_grad
,
kernel_sizes
,
x_grad
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/cpu/sparse_pool_kernel.cc
浏览文件 @
5d3fd4fe
...
@@ -19,6 +19,8 @@ limitations under the License. */
...
@@ -19,6 +19,8 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/sparse/cpu/convolution.h"
#include "paddle/phi/kernels/sparse/cpu/convolution.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
...
@@ -27,15 +29,15 @@ namespace sparse {
...
@@ -27,15 +29,15 @@ namespace sparse {
* kernel: (D, H, W, C, OC)
* kernel: (D, H, W, C, OC)
* out: (N, D, H, W, OC)
* out: (N, D, H, W, OC)
**/
**/
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
MaxPool
Kernel
(
const
Context
&
dev_ctx
,
void
MaxPool
CPUKernel
(
const
CPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
SparseCooTensor
*
out
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
DenseTensor
*
rulebook
)
{
const
auto
&
x_dims
=
x
.
dims
();
const
auto
&
x_dims
=
x
.
dims
();
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
const
std
::
vector
<
int
>&
real_kernel_sizes
=
const
std
::
vector
<
int
>&
real_kernel_sizes
=
...
@@ -51,22 +53,22 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -51,22 +53,22 @@ void MaxPoolKernel(const Context& dev_ctx,
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
// 1. product rule book
// 1. product rule book
ProductRuleBook
<
T
,
C
ontext
>
(
dev_ctx
,
ProductRuleBook
<
T
,
C
PUContext
,
IntT
>
(
dev_ctx
,
x
,
x
,
real_kernel_sizes
,
real_kernel_sizes
,
paddings
,
paddings
,
dilations
,
dilations
,
strides
,
strides
,
out_dims
,
out_dims
,
false
,
false
,
rulebook
,
rulebook
,
&
counter_per_kernel
);
&
counter_per_kernel
);
UpdateRulebookAndOutIndex
<
T
>
(
UpdateRulebookAndOutIndex
<
T
,
CPUContext
,
IntT
>
(
dev_ctx
,
x
,
kernel_size
,
in_channels
,
out_dims
,
rulebook
,
out
);
dev_ctx
,
x
,
kernel_size
,
in_channels
,
out_dims
,
rulebook
,
out
);
int
rulebook_len
=
rulebook
->
dims
()[
1
];
int
rulebook_len
=
rulebook
->
dims
()[
1
];
const
int
*
rulebook_ptr
=
rulebook
->
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
const
int
*
counter_ptr
=
counter_per_kernel
.
data
<
int
>
();
const
int
*
counter_ptr
=
counter_per_kernel
.
data
<
int
>
();
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
);
...
@@ -78,8 +80,8 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -78,8 +80,8 @@ void MaxPoolKernel(const Context& dev_ctx,
phi
::
funcs
::
MaxPool
<
T
>
max_pool_functor
;
phi
::
funcs
::
MaxPool
<
T
>
max_pool_functor
;
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
j
=
0
;
j
<
counter_ptr
[
i
];
j
++
)
{
for
(
int
j
=
0
;
j
<
counter_ptr
[
i
];
j
++
)
{
int
in_i
=
rulebook_ptr
[
rulebook_len
+
offsets
[
i
]
+
j
];
IntT
in_i
=
rulebook_ptr
[
rulebook_len
+
offsets
[
i
]
+
j
];
int
out_i
=
rulebook_ptr
[
rulebook_len
*
2
+
offsets
[
i
]
+
j
];
IntT
out_i
=
rulebook_ptr
[
rulebook_len
*
2
+
offsets
[
i
]
+
j
];
if
(
!
out_flags
[
out_i
])
{
if
(
!
out_flags
[
out_i
])
{
out_flags
[
out_i
]
=
true
;
out_flags
[
out_i
]
=
true
;
memcpy
(
&
out_features_ptr
[
out_i
*
in_channels
],
memcpy
(
&
out_features_ptr
[
out_i
*
in_channels
],
...
@@ -95,6 +97,28 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -95,6 +97,28 @@ void MaxPoolKernel(const Context& dev_ctx,
}
}
}
}
template
<
typename
T
,
typename
Context
>
void
MaxPoolKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"MaxPoolCPUKernel"
,
([
&
]
{
MaxPoolCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel_sizes
,
paddings
,
dilations
,
strides
,
out
,
rulebook
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/gpu/convolution.cu.h
浏览文件 @
5d3fd4fe
此差异已折叠。
点击以展开。
paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu
浏览文件 @
5d3fd4fe
...
@@ -24,6 +24,8 @@ limitations under the License. */
...
@@ -24,6 +24,8 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
#include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
...
@@ -35,24 +37,24 @@ namespace sparse {
...
@@ -35,24 +37,24 @@ namespace sparse {
//]
//]
// x_grad = out_grad * transpose(kenrel)
// x_grad = out_grad * transpose(kenrel)
// kernel_grad = transpose(x) * out_grad
// kernel_grad = transpose(x) * out_grad
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
IntT
>
void
Conv3dGrad
Kernel
(
const
Context
&
dev_ctx
,
void
Conv3dGrad
GPUKernel
(
const
GPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out_grad
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
int
groups
,
const
bool
subm
,
const
bool
subm
,
SparseCooTensor
*
x_grad
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
kernel_grad
)
{
DenseTensor
*
kernel_grad
)
{
const
auto
&
kernel_dims
=
kernel
.
dims
();
const
auto
&
kernel_dims
=
kernel
.
dims
();
const
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
const
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
*
rulebook_ptr
=
rulebook
.
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
.
data
<
IntT
>
();
const
int
rulebook_len
=
rulebook
.
dims
()[
1
];
const
int
rulebook_len
=
rulebook
.
dims
()[
1
];
...
@@ -74,29 +76,29 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -74,29 +76,29 @@ void Conv3dGradKernel(const Context& dev_ctx,
T
*
out_grad_features_ptr
=
out_grad_features
.
data
<
T
>
();
T
*
out_grad_features_ptr
=
out_grad_features
.
data
<
T
>
();
*
kernel_grad
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
kernel
);
*
kernel_grad
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
kernel
);
T
*
d_kernel_ptr
=
kernel_grad
->
data
<
T
>
();
T
*
d_kernel_ptr
=
kernel_grad
->
data
<
T
>
();
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
phi
::
funcs
::
SetConstant
<
GPU
Context
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
kernel_grad
,
static_cast
<
T
>
(
0.0
f
));
set_zero
(
dev_ctx
,
kernel_grad
,
static_cast
<
T
>
(
0.0
f
));
int
half_kernel_size
=
kernel_size
/
2
;
int
half_kernel_size
=
kernel_size
/
2
;
auto
blas
=
phi
::
funcs
::
GetBlas
<
Context
,
T
>
(
dev_ctx
);
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPU
Context
,
T
>
(
dev_ctx
);
DenseTensor
x_grad_indices
=
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
int
>
(
dev_ctx
,
x
.
non_zero_indices
());
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
set_zero
(
dev_ctx
,
&
x_grad_values
,
static_cast
<
T
>
(
0.0
f
));
set_zero
(
dev_ctx
,
&
x_grad_values
,
static_cast
<
T
>
(
0.0
f
));
set_zero
(
dev_ctx
,
&
d_x_features
,
static_cast
<
T
>
(
0.0
f
));
set_zero
(
dev_ctx
,
&
d_x_features
,
static_cast
<
T
>
(
0.0
f
));
phi
::
Copy
<
Context
>
(
dev_ctx
,
phi
::
Copy
<
GPU
Context
>
(
dev_ctx
,
x
.
non_zero_indices
(),
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
dev_ctx
.
GetPlace
(),
false
,
false
,
&
x_grad_indices
);
&
x_grad_indices
);
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
),
std
::
vector
<
IntT
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
),
h_counter
(
rulebook_len
,
0
);
h_counter
(
rulebook_len
,
0
);
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
&
h_counter
[
0
],
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
&
h_counter
[
0
],
rulebook_ptr
,
rulebook_ptr
,
rulebook_len
*
sizeof
(
int
),
rulebook_len
*
sizeof
(
IntT
),
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipMemcpyDeviceToHost
,
hipMemcpyDeviceToHost
,
#else
#else
...
@@ -109,7 +111,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -109,7 +111,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
for
(
int
i
=
0
;
i
<
rulebook_len
;
i
++
)
{
counter
[
h_counter
[
i
]]
+=
1
;
counter
[
h_counter
[
i
]]
+=
1
;
}
}
int
offset
=
0
,
max_count
=
0
;
IntT
offset
=
0
,
max_count
=
0
;
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
offsets
[
i
]
=
offset
;
offsets
[
i
]
=
offset
;
offset
+=
counter
[
i
];
offset
+=
counter
[
i
];
...
@@ -120,15 +122,16 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -120,15 +122,16 @@ void Conv3dGradKernel(const Context& dev_ctx,
offsets
[
kernel_size
]
=
offset
;
offsets
[
kernel_size
]
=
offset
;
if
(
subm
)
{
if
(
subm
)
{
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
Context
>
(
dev_ctx
,
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
GPUContext
>
(
x
,
dev_ctx
,
kernel
,
x
,
out_grad
.
non_zero_elements
(),
kernel
,
in_channels
,
out_grad
.
non_zero_elements
(),
out_channels
,
in_channels
,
half_kernel_size
,
out_channels
,
kernel_grad
,
half_kernel_size
,
&
x_grad_values
);
kernel_grad
,
&
x_grad_values
);
if
(
max_count
==
0
)
{
if
(
max_count
==
0
)
{
return
;
return
;
}
}
...
@@ -136,21 +139,21 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -136,21 +139,21 @@ void Conv3dGradKernel(const Context& dev_ctx,
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
rulebook_len
*
in_channels
,
1
);
dev_ctx
,
rulebook_len
*
in_channels
,
1
);
GatherKernel
<
T
,
int
><<<
config
.
block_per_grid
.
x
,
GatherKernel
<
T
,
IntT
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_elements
().
data
<
T
>
(),
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_elements
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
,
rulebook_ptr
+
rulebook_len
,
in_features_ptr
,
in_features_ptr
,
rulebook_len
,
rulebook_len
,
in_channels
);
in_channels
);
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
rulebook_len
*
out_channels
,
1
);
dev_ctx
,
rulebook_len
*
out_channels
,
1
);
GatherKernel
<
T
,
int
><<<
config
.
block_per_grid
.
x
,
GatherKernel
<
T
,
IntT
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
dev_ctx
.
stream
()
>>>
(
out_grad
.
non_zero_elements
().
data
<
T
>
(),
out_grad
.
non_zero_elements
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
*
2
,
rulebook_ptr
+
rulebook_len
*
2
,
out_grad_features_ptr
,
out_grad_features_ptr
,
...
@@ -203,15 +206,19 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -203,15 +206,19 @@ void Conv3dGradKernel(const Context& dev_ctx,
// x_grad->ResizeAndAllocate(x.non_zero_elements().dims());
// x_grad->ResizeAndAllocate(x.non_zero_elements().dims());
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
rulebook_len
},
DataLayout
::
NCHW
);
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
rulebook_len
},
DataLayout
::
NCHW
);
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_key
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_key
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
(),
{
rulebook_len
},
DataLayout
::
NCHW
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
SortedAndUniqueIndex
(
dev_ctx
,
SortedAndUniqueIndex
<
GPUContext
,
IntT
>
(
dev_ctx
,
rulebook_ptr
+
rulebook_len
,
rulebook_ptr
+
rulebook_len
,
rulebook_len
,
rulebook_len
,
&
out_index
,
&
out_index
,
&
unique_key
,
&
unique_key
,
&
unique_value
);
&
unique_value
);
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
rulebook_len
*
in_channels
,
1
);
dev_ctx
,
rulebook_len
*
in_channels
,
1
);
...
@@ -229,6 +236,36 @@ void Conv3dGradKernel(const Context& dev_ctx,
...
@@ -229,6 +236,36 @@ void Conv3dGradKernel(const Context& dev_ctx,
subm
);
subm
);
}
}
template
<
typename
T
,
typename
Context
>
void
Conv3dGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
bool
subm
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
kernel_grad
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"Conv3dGradGPUKernel"
,
([
&
]
{
Conv3dGradGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel
,
rulebook
,
out_grad
,
paddings
,
dilations
,
strides
,
groups
,
subm
,
x_grad
,
kernel_grad
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/gpu/convolution_kernel.cu
浏览文件 @
5d3fd4fe
...
@@ -19,29 +19,25 @@ limitations under the License. */
...
@@ -19,29 +19,25 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/convolution_kernel.h"
#include "paddle/phi/kernels/sparse/convolution_kernel.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
/**
template
<
typename
T
,
typename
IntT
>
* x: (N, D, H, W, C)
void
Conv3dGPUKernel
(
const
GPUContext
&
dev_ctx
,
* kernel: (D, H, W, C, OC)
const
SparseCooTensor
&
x
,
* out: (N, D, H, W, OC)
const
DenseTensor
&
kernel
,
**/
const
std
::
vector
<
int
>&
paddings
,
template
<
typename
T
,
typename
Context
>
const
std
::
vector
<
int
>&
dilations
,
void
Conv3dKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
int
>&
strides
,
const
SparseCooTensor
&
x
,
const
int
groups
,
const
DenseTensor
&
kernel
,
const
bool
subm
,
const
std
::
vector
<
int
>&
paddings
,
SparseCooTensor
*
out
,
const
std
::
vector
<
int
>&
dilations
,
DenseTensor
*
rulebook
)
{
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
bool
subm
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
// update padding and dilation
// update padding and dilation
// Currently, only support x.layout is NDHWC, groups = 1
// Currently, only support x.layout is NDHWC, groups = 1
// if x.layout != NDHWC then transpose(x), transpose(weight)
// if x.layout != NDHWC then transpose(x), transpose(weight)
const
auto
&
x_dims
=
x
.
dims
();
const
auto
&
x_dims
=
x
.
dims
();
const
auto
&
kernel_dims
=
kernel
.
dims
();
const
auto
&
kernel_dims
=
kernel
.
dims
();
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
int
kernel_size
=
kernel_dims
[
0
]
*
kernel_dims
[
1
]
*
kernel_dims
[
2
];
...
@@ -67,7 +63,6 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -67,7 +63,6 @@ void Conv3dKernel(const Context& dev_ctx,
DenseTensor
offsets_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
offsets_meta
));
DenseTensor
offsets_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
offsets_meta
));
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
);
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
);
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_key
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
std
::
vector
<
int
>
subm_paddings
(
paddings
),
subm_strides
(
strides
);
std
::
vector
<
int
>
subm_paddings
(
paddings
),
subm_strides
(
strides
);
...
@@ -75,28 +70,26 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -75,28 +70,26 @@ void Conv3dKernel(const Context& dev_ctx,
phi
::
funcs
::
sparse
::
ResetSubmKernelSizeAndStrides
(
phi
::
funcs
::
sparse
::
ResetSubmKernelSizeAndStrides
(
kernel
.
dims
(),
&
subm_paddings
,
&
subm_strides
);
kernel
.
dims
(),
&
subm_paddings
,
&
subm_strides
);
}
}
int
n
=
ProductRuleBook
<
T
,
GPUContext
,
IntT
>
(
dev_ctx
,
int
n
=
ProductRuleBook
<
T
,
Context
>
(
dev_ctx
,
x
,
x
,
kernel_sizes
,
kernel_sizes
,
subm_paddings
,
subm_paddings
,
dilations
,
dilations
,
subm_strides
,
subm_strides
,
out_dims
,
out_dims
,
subm
,
subm
,
rulebook
,
rulebook
,
&
counter_per_kernel
,
&
counter_per_kernel
,
&
offsets_per_kernel
,
&
offsets_per_kernel
,
&
out_index
,
&
out_index
,
&
unique_value
,
&
unique_key
,
out
,
&
unique_value
,
&
h_counter
,
out
,
&
offsets
);
&
h_counter
,
&
offsets
);
const
int
*
counter_ptr
=
counter_per_kernel
.
data
<
int
>
();
const
int
*
counter_ptr
=
counter_per_kernel
.
data
<
int
>
();
const
int
*
offsets_ptr
=
counter_per_kernel
.
data
<
int
>
();
const
int
*
offsets_ptr
=
counter_per_kernel
.
data
<
int
>
();
const
int
*
rulebook_ptr
=
rulebook
->
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
// 2. gather
// 2. gather
DenseTensorMeta
in_features_meta
(
DenseTensorMeta
in_features_meta
(
...
@@ -109,22 +102,22 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -109,22 +102,22 @@ void Conv3dKernel(const Context& dev_ctx,
phi
::
Empty
(
dev_ctx
,
std
::
move
(
out_features_meta
));
phi
::
Empty
(
dev_ctx
,
std
::
move
(
out_features_meta
));
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
in_features_ptr
=
in_features
.
data
<
T
>
();
T
*
out_features_ptr
=
out_features
.
data
<
T
>
();
T
*
out_features_ptr
=
out_features
.
data
<
T
>
();
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
phi
::
funcs
::
SetConstant
<
GPU
Context
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
&
out_features
,
static_cast
<
T
>
(
0.0
f
));
set_zero
(
dev_ctx
,
&
out_features
,
static_cast
<
T
>
(
0.0
f
));
auto
config
=
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
n
*
in_channels
,
1
);
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
n
*
in_channels
,
1
);
GatherKernel
<
T
,
int
><<<
config
.
block_per_grid
.
x
,
GatherKernel
<
T
,
IntT
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_elements
().
data
<
T
>
(),
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_elements
().
data
<
T
>
(),
rulebook_ptr
+
n
,
rulebook_ptr
+
n
,
in_features_ptr
,
in_features_ptr
,
n
,
n
,
in_channels
);
in_channels
);
// 3. call gemm for every werght
// 3. call gemm for every werght
auto
blas
=
phi
::
funcs
::
GetBlas
<
Context
,
T
>
(
dev_ctx
);
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPU
Context
,
T
>
(
dev_ctx
);
auto
*
out_values
=
out
->
mutable_non_zero_elements
();
auto
*
out_values
=
out
->
mutable_non_zero_elements
();
T
*
out_values_ptr
=
out_values
->
data
<
T
>
();
T
*
out_values_ptr
=
out_values
->
data
<
T
>
();
...
@@ -168,6 +161,36 @@ void Conv3dKernel(const Context& dev_ctx,
...
@@ -168,6 +161,36 @@ void Conv3dKernel(const Context& dev_ctx,
out_channels
,
out_channels
,
out_values_ptr
);
out_values_ptr
);
}
}
/**
* x: (N, D, H, W, C)
* kernel: (D, H, W, C, OC)
* out: (N, D, H, W, OC)
**/
template
<
typename
T
,
typename
Context
>
void
Conv3dKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
kernel
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
int
groups
,
const
bool
subm
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"Conv3dGPUKernel"
,
([
&
]
{
Conv3dGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel
,
paddings
,
dilations
,
strides
,
groups
,
subm
,
out
,
rulebook
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu
浏览文件 @
5d3fd4fe
...
@@ -12,24 +12,28 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,24 +12,28 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/
kernels/sparse/sparse_pool_grad_kernel
.h"
#include "paddle/phi/
api/ext/dispatch
.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
__global__
void
MaxPoolGradCudaKernel
(
const
T
*
in_features_ptr
,
__global__
void
MaxPoolGradCudaKernel
(
const
T
*
in_features_ptr
,
const
T
*
out_features_ptr
,
const
T
*
out_features_ptr
,
const
T
*
out_grad_ptr
,
const
T
*
out_grad_ptr
,
const
int
*
rulebook_ptr
,
const
IntT
*
rulebook_ptr
,
const
int
n
,
const
int
n
,
const
int
rulebook_len
,
const
int
rulebook_len
,
const
int
channels
,
const
int
channels
,
...
@@ -38,8 +42,8 @@ __global__ void MaxPoolGradCudaKernel(const T* in_features_ptr,
...
@@ -38,8 +42,8 @@ __global__ void MaxPoolGradCudaKernel(const T* in_features_ptr,
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
*
channels
,
int64_t
)
{
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
*
channels
,
int64_t
)
{
int
real_i
=
i
/
channels
;
int
real_i
=
i
/
channels
;
int
c
=
i
-
real_i
*
channels
;
int
c
=
i
-
real_i
*
channels
;
int
in_i
=
rulebook_ptr
[
real_i
];
IntT
in_i
=
rulebook_ptr
[
real_i
];
int
out_i
=
rulebook_ptr
[
real_i
+
rulebook_len
];
IntT
out_i
=
rulebook_ptr
[
real_i
+
rulebook_len
];
grad_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
c
],
grad_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
c
],
out_features_ptr
[
out_i
*
channels
+
c
],
out_features_ptr
[
out_i
*
channels
+
c
],
out_grad_ptr
[
out_i
*
channels
+
c
],
out_grad_ptr
[
out_i
*
channels
+
c
],
...
@@ -48,23 +52,23 @@ __global__ void MaxPoolGradCudaKernel(const T* in_features_ptr,
...
@@ -48,23 +52,23 @@ __global__ void MaxPoolGradCudaKernel(const T* in_features_ptr,
}
}
}
}
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
MaxPoolGrad
Kernel
(
const
Context
&
dev_ctx
,
void
MaxPoolGrad
GPUKernel
(
const
GPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out
,
const
Dense
Tensor
&
out_grad
,
const
SparseCoo
Tensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
Dense
Tensor
*
x_grad
)
{
SparseCoo
Tensor
*
x_grad
)
{
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
const
int
in_channels
=
x
.
dims
()[
4
];
const
int
in_channels
=
x
.
dims
()[
4
];
int
rulebook_len
=
rulebook
.
dims
()[
1
];
int
rulebook_len
=
rulebook
.
dims
()[
1
];
const
int
*
rulebook_ptr
=
rulebook
.
data
<
int
>
();
const
IntT
*
rulebook_ptr
=
rulebook
.
data
<
IntT
>
();
std
::
vector
<
int
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
),
std
::
vector
<
IntT
>
offsets
(
kernel_size
+
1
),
counter
(
kernel_size
,
0
),
h_counter
(
kernel_size
);
h_counter
(
kernel_size
);
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
&
h_counter
[
0
],
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
&
h_counter
[
0
],
rulebook_ptr
,
rulebook_ptr
,
rulebook_len
*
sizeof
(
int
),
rulebook_len
*
sizeof
(
IntT
),
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipMemcpyDeviceToHost
,
hipMemcpyDeviceToHost
,
#else
#else
...
@@ -80,10 +84,20 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -80,10 +84,20 @@ void MaxPoolGradKernel(const Context& dev_ctx,
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_elements
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
non_zero_elements
().
data
<
T
>
();
T
*
x_grad_ptr
=
x_grad
->
data
<
T
>
();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
DenseTensor
x_grad_indices
=
set_zero
(
dev_ctx
,
x_grad
,
static_cast
<
T
>
(
0.0
f
));
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
T
*
x_grad_ptr
=
x_grad_values
.
data
<
T
>
();
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
&
x_grad_values
,
static_cast
<
T
>
(
0.0
f
));
phi
::
Copy
<
GPUContext
>
(
dev_ctx
,
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
false
,
&
x_grad_indices
);
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
if
(
counter
[
i
]
<=
0
)
{
if
(
counter
[
i
]
<=
0
)
{
...
@@ -92,10 +106,10 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -92,10 +106,10 @@ void MaxPoolGradKernel(const Context& dev_ctx,
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
counter
[
i
]
*
in_channels
,
1
);
dev_ctx
,
counter
[
i
]
*
in_channels
,
1
);
MaxPoolGradCudaKernel
<
T
><<<
config
.
block_per_grid
.
x
,
MaxPoolGradCudaKernel
<
T
,
IntT
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
dev_ctx
.
stream
()
>>>
(
in_features_ptr
,
in_features_ptr
,
out_features_ptr
,
out_features_ptr
,
out_grad_ptr
,
out_grad_ptr
,
...
@@ -107,6 +121,21 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -107,6 +121,21 @@ void MaxPoolGradKernel(const Context& dev_ctx,
}
}
}
}
template
<
typename
T
,
typename
Context
>
void
MaxPoolGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
,
SparseCooTensor
*
x_grad
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"MaxPoolGradGPUKernel"
,
([
&
]
{
MaxPoolGradGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
rulebook
,
out
,
out_grad
,
kernel_sizes
,
x_grad
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/gpu/sparse_pool_kernel.cu
浏览文件 @
5d3fd4fe
...
@@ -12,19 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,19 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/phi/kernels/sparse/sparse_pool_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/funcs/sparse/convolution.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"
#include "paddle/phi/kernels/sparse/sparse_pool_kernel.h"
#include "paddle/phi/api/ext/dispatch.h"
namespace
phi
{
namespace
phi
{
namespace
sparse
{
namespace
sparse
{
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
__global__
void
MaxPoolCudaKernel
(
const
T
*
in_features_ptr
,
__global__
void
MaxPoolCudaKernel
(
const
T
*
in_features_ptr
,
const
int
*
rulebook_ptr
,
const
IntT
*
rulebook_ptr
,
const
int
n
,
const
int
n
,
const
int
rulebook_len
,
const
int
rulebook_len
,
const
int
channels
,
const
int
channels
,
...
@@ -33,8 +36,8 @@ __global__ void MaxPoolCudaKernel(const T* in_features_ptr,
...
@@ -33,8 +36,8 @@ __global__ void MaxPoolCudaKernel(const T* in_features_ptr,
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
*
channels
,
int64_t
)
{
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
*
channels
,
int64_t
)
{
int
real_i
=
i
/
channels
;
int
real_i
=
i
/
channels
;
int
channel_i
=
i
-
real_i
*
channels
;
int
channel_i
=
i
-
real_i
*
channels
;
int
in_i
=
rulebook_ptr
[
real_i
];
IntT
in_i
=
rulebook_ptr
[
real_i
];
int
out_i
=
rulebook_ptr
[
real_i
+
rulebook_len
];
IntT
out_i
=
rulebook_ptr
[
real_i
+
rulebook_len
];
max_pool_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
channel_i
],
max_pool_functor
.
compute
(
in_features_ptr
[
in_i
*
channels
+
channel_i
],
&
out_features_ptr
[
out_i
*
channels
+
channel_i
]);
&
out_features_ptr
[
out_i
*
channels
+
channel_i
]);
}
}
...
@@ -45,15 +48,15 @@ __global__ void MaxPoolCudaKernel(const T* in_features_ptr,
...
@@ -45,15 +48,15 @@ __global__ void MaxPoolCudaKernel(const T* in_features_ptr,
* kernel: (D, H, W, C, OC)
* kernel: (D, H, W, C, OC)
* out: (N, D, H, W, OC)
* out: (N, D, H, W, OC)
**/
**/
template
<
typename
T
,
typename
Contex
t
>
template
<
typename
T
,
typename
IntT
=
in
t
>
void
MaxPool
Kernel
(
const
Context
&
dev_ctx
,
void
MaxPool
GPUKernel
(
const
GPU
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
SparseCooTensor
*
out
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
DenseTensor
*
rulebook
)
{
const
auto
&
x_dims
=
x
.
dims
();
const
auto
&
x_dims
=
x
.
dims
();
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
int
kernel_size
=
kernel_sizes
[
0
]
*
kernel_sizes
[
1
]
*
kernel_sizes
[
2
];
const
std
::
vector
<
int
>&
real_kernel_sizes
=
const
std
::
vector
<
int
>&
real_kernel_sizes
=
...
@@ -70,29 +73,27 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -70,29 +73,27 @@ void MaxPoolKernel(const Context& dev_ctx,
DenseTensor
offsets_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
counter_meta
));
DenseTensor
offsets_per_kernel
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
counter_meta
));
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
);
DenseTensorMeta
index_meta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
);
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
out_index
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_key
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
DenseTensor
unique_value
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
index_meta
));
// 1. product rulebook
// 1. product rulebook
int
rulebook_len
=
ProductRuleBook
<
T
,
Context
>
(
dev_ctx
,
int
rulebook_len
=
ProductRuleBook
<
T
,
GPUContext
,
IntT
>
(
dev_ctx
,
x
,
x
,
real_kernel_sizes
,
real_kernel_sizes
,
paddings
,
paddings
,
dilations
,
dilations
,
strides
,
strides
,
out_dims
,
out_dims
,
false
,
false
,
rulebook
,
rulebook
,
&
counter_per_kernel
,
&
counter_per_kernel
,
&
offsets_per_kernel
,
&
offsets_per_kernel
,
&
out_index
,
&
out_index
,
&
unique_key
,
&
unique_value
,
&
unique_value
,
out
,
out
,
&
counter
,
&
counter
,
&
offsets
);
&
offsets
);
const
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
const
int
*
rulebook_ptr
=
rulebook
->
data
<
int
>
();
T
*
out_features_ptr
=
out
->
mutable_non_zero_elements
()
->
data
<
T
>
();
T
*
out_features_ptr
=
out
->
mutable_non_zero_elements
()
->
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
...
@@ -113,10 +114,10 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -113,10 +114,10 @@ void MaxPoolKernel(const Context& dev_ctx,
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
counter
[
i
]
*
in_channels
,
1
);
dev_ctx
,
counter
[
i
]
*
in_channels
,
1
);
MaxPoolCudaKernel
<
T
><<<
config
.
block_per_grid
.
x
,
MaxPoolCudaKernel
<
T
,
IntT
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
dev_ctx
.
stream
()
>>>
(
in_features_ptr
,
in_features_ptr
,
rulebook_ptr
+
offsets
[
i
]
+
rulebook_len
,
rulebook_ptr
+
offsets
[
i
]
+
rulebook_len
,
counter
[
i
],
counter
[
i
],
...
@@ -126,6 +127,28 @@ void MaxPoolKernel(const Context& dev_ctx,
...
@@ -126,6 +127,28 @@ void MaxPoolKernel(const Context& dev_ctx,
}
}
}
}
template
<
typename
T
,
typename
Context
>
void
MaxPoolKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
SparseCooTensor
*
out
,
DenseTensor
*
rulebook
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"MaxPoolGPUKernel"
,
([
&
]
{
MaxPoolGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
kernel_sizes
,
paddings
,
dilations
,
strides
,
out
,
rulebook
);
}));
}
}
// namespace sparse
}
// namespace sparse
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h
浏览文件 @
5d3fd4fe
...
@@ -26,20 +26,18 @@ void MaxPoolGradKernel(const Context& dev_ctx,
...
@@ -26,20 +26,18 @@ void MaxPoolGradKernel(const Context& dev_ctx,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out
,
const
Dense
Tensor
&
out_grad
,
const
SparseCoo
Tensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
,
const
std
::
vector
<
int
>&
kernel_sizes
,
Dense
Tensor
*
x_grad
);
SparseCoo
Tensor
*
x_grad
);
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
DenseTensor
MaxPoolGrad
(
const
Context
&
dev_ctx
,
SparseCooTensor
MaxPoolGrad
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
rulebook
,
const
DenseTensor
&
rulebook
,
const
SparseCooTensor
&
out
,
const
SparseCooTensor
&
out
,
const
DenseTensor
&
out_grad
,
const
SparseCooTensor
&
out_grad
,
const
std
::
vector
<
int
>&
kernel_sizes
)
{
const
std
::
vector
<
int
>&
kernel_sizes
)
{
DenseTensor
x_grad
=
phi
::
Empty
<
Context
>
(
SparseCooTensor
x_grad
;
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
x
.
non_zero_elements
().
dims
(),
x
.
layout
()));
MaxPoolGradKernel
<
T
,
Context
>
(
MaxPoolGradKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
rulebook
,
out
,
out_grad
,
kernel_sizes
,
&
x_grad
);
dev_ctx
,
x
,
rulebook
,
out
,
out_grad
,
kernel_sizes
,
&
x_grad
);
return
x_grad
;
return
x_grad
;
...
...
paddle/phi/kernels/sparse/sparse_pool_kernel.h
浏览文件 @
5d3fd4fe
...
@@ -39,11 +39,7 @@ SparseCooTensor MaxPool(const Context& dev_ctx,
...
@@ -39,11 +39,7 @@ SparseCooTensor MaxPool(const Context& dev_ctx,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
DenseTensor
*
rulebook
)
{
DenseTensor
*
rulebook
)
{
DenseTensor
indices
=
phi
::
Empty
<
Context
>
(
SparseCooTensor
coo
;
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
DenseTensor
values
=
phi
::
Empty
<
Context
>
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
1
},
x
.
layout
()));
SparseCooTensor
coo
(
indices
,
values
,
x
.
dims
());
MaxPoolKernel
<
T
,
Context
>
(
MaxPoolKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
kernel_sizes
,
paddings
,
dilations
,
strides
,
&
coo
,
rulebook
);
dev_ctx
,
x
,
kernel_sizes
,
paddings
,
dilations
,
strides
,
&
coo
,
rulebook
);
return
coo
;
return
coo
;
...
...
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
浏览文件 @
5d3fd4fe
...
@@ -48,13 +48,13 @@ std::vector<T2> cast(const std::vector<T1>& in) {
...
@@ -48,13 +48,13 @@ std::vector<T2> cast(const std::vector<T1>& in) {
return
out
;
return
out
;
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
void
TestConv3dBase
(
const
std
::
vector
<
int
>&
indices
,
void
TestConv3dBase
(
const
std
::
vector
<
IntT
>&
indices
,
const
std
::
vector
<
T
>&
features
,
const
std
::
vector
<
T
>&
features
,
const
DDim
&
x_dims
,
const
DDim
&
x_dims
,
const
std
::
vector
<
T
>&
kernel
,
const
std
::
vector
<
T
>&
kernel
,
const
DDim
&
kernel_dims
,
const
DDim
&
kernel_dims
,
const
std
::
vector
<
int
>&
correct_out_indices
,
const
std
::
vector
<
IntT
>&
correct_out_indices
,
const
std
::
vector
<
T
>&
correct_out_features
,
const
std
::
vector
<
T
>&
correct_out_features
,
const
DDim
&
correct_out_dims
,
const
DDim
&
correct_out_dims
,
const
int
non_zero_num
,
const
int
non_zero_num
,
...
@@ -80,11 +80,13 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -80,11 +80,13 @@ void TestConv3dBase(const std::vector<int>& indices,
const
int
in_channels
=
kernel_dims
[
3
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
out_channels
=
kernel_dims
[
4
];
auto
indices_dtype
=
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
();
DenseTensor
indices_tensor
=
phi
::
Empty
(
DenseTensor
indices_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
memcpy
(
memcpy
(
indices_tensor
.
data
<
IntT
>
(),
indices_tensor
.
data
<
int
>
(),
indices
.
data
(),
indices
.
size
()
*
sizeof
(
int
));
indices
.
data
(),
indices
.
size
()
*
sizeof
(
IntT
));
DenseTensor
features_tensor
=
phi
::
Empty
(
DenseTensor
features_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
T
>::
Type
(),
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
T
>::
Type
(),
...
@@ -111,7 +113,7 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -111,7 +113,7 @@ void TestConv3dBase(const std::vector<int>& indices,
if
(
!
std
::
is_same
<
T
,
phi
::
dtype
::
float16
>::
value
)
{
if
(
!
std
::
is_same
<
T
,
phi
::
dtype
::
float16
>::
value
)
{
DenseTensor
rulebook
=
phi
::
Empty
(
DenseTensor
rulebook
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
dev_ctx_cpu
,
DenseTensorMeta
(
indices_dtype
,
{
1
},
DataLayout
::
NCHW
));
SparseCooTensor
out
=
sparse
::
Conv3d
<
T
>
(
dev_ctx_cpu
,
SparseCooTensor
out
=
sparse
::
Conv3d
<
T
>
(
dev_ctx_cpu
,
x_tensor
,
x_tensor
,
kernel_tensor
,
kernel_tensor
,
...
@@ -129,8 +131,8 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -129,8 +131,8 @@ void TestConv3dBase(const std::vector<int>& indices,
ASSERT_EQ
((
int64_t
)
correct_out_features
.
size
()
/
out_channels
,
out
.
nnz
());
ASSERT_EQ
((
int64_t
)
correct_out_features
.
size
()
/
out_channels
,
out
.
nnz
());
int
cmp_indices
=
memcmp
(
correct_out_indices
.
data
(),
int
cmp_indices
=
memcmp
(
correct_out_indices
.
data
(),
out
.
non_zero_indices
().
data
<
int
>
(),
out
.
non_zero_indices
().
data
<
IntT
>
(),
correct_out_indices
.
size
()
*
sizeof
(
int
));
correct_out_indices
.
size
()
*
sizeof
(
IntT
));
ASSERT_EQ
(
cmp_indices
,
0
);
ASSERT_EQ
(
cmp_indices
,
0
);
f_verify
(
out
.
non_zero_elements
().
data
<
T
>
(),
correct_out_features
);
f_verify
(
out
.
non_zero_elements
().
data
<
T
>
(),
correct_out_features
);
...
@@ -172,7 +174,7 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -172,7 +174,7 @@ void TestConv3dBase(const std::vector<int>& indices,
DenseTensor
d_indices_tensor
=
phi
::
Empty
(
DenseTensor
d_indices_tensor
=
phi
::
Empty
(
dev_ctx_gpu
,
dev_ctx_gpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
phi
::
Copy
(
phi
::
Copy
(
dev_ctx_gpu
,
indices_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_indices_tensor
);
dev_ctx_gpu
,
indices_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_indices_tensor
);
...
@@ -195,7 +197,7 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -195,7 +197,7 @@ void TestConv3dBase(const std::vector<int>& indices,
dev_ctx_gpu
,
kernel_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_kernel_tensor
);
dev_ctx_gpu
,
kernel_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_kernel_tensor
);
DenseTensor
d_rulebook
=
phi
::
Empty
(
DenseTensor
d_rulebook
=
phi
::
Empty
(
dev_ctx_gpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
dev_ctx_gpu
,
DenseTensorMeta
(
indices_dtype
,
{
1
},
DataLayout
::
NCHW
));
SparseCooTensor
d_out
=
sparse
::
Conv3d
<
T
>
(
dev_ctx_gpu
,
SparseCooTensor
d_out
=
sparse
::
Conv3d
<
T
>
(
dev_ctx_gpu
,
d_x_tensor
,
d_x_tensor
,
d_kernel_tensor
,
d_kernel_tensor
,
...
@@ -214,7 +216,7 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -214,7 +216,7 @@ void TestConv3dBase(const std::vector<int>& indices,
DenseTensor
h_indices_tensor
=
phi
::
Empty
(
DenseTensor
h_indices_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
d_out
.
nnz
()},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
d_out
.
nnz
()},
DataLayout
::
NCHW
));
phi
::
Copy
(
dev_ctx_gpu
,
phi
::
Copy
(
dev_ctx_gpu
,
d_out
.
non_zero_indices
(),
d_out
.
non_zero_indices
(),
phi
::
CPUPlace
(),
phi
::
CPUPlace
(),
...
@@ -222,8 +224,8 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -222,8 +224,8 @@ void TestConv3dBase(const std::vector<int>& indices,
&
h_indices_tensor
);
&
h_indices_tensor
);
int
cmp_indices2
=
memcmp
(
correct_out_indices
.
data
(),
int
cmp_indices2
=
memcmp
(
correct_out_indices
.
data
(),
h_indices_tensor
.
data
<
int
>
(),
h_indices_tensor
.
data
<
IntT
>
(),
correct_out_indices
.
size
()
*
sizeof
(
int
));
correct_out_indices
.
size
()
*
sizeof
(
IntT
));
ASSERT_EQ
(
cmp_indices2
,
0
);
ASSERT_EQ
(
cmp_indices2
,
0
);
DenseTensor
h_features_tensor
=
DenseTensor
h_features_tensor
=
...
@@ -264,12 +266,13 @@ void TestConv3dBase(const std::vector<int>& indices,
...
@@ -264,12 +266,13 @@ void TestConv3dBase(const std::vector<int>& indices,
#endif
#endif
}
}
void
TestConv3d
(
const
std
::
vector
<
int
>&
indices
,
template
<
typename
IntT
=
int
>
void
TestConv3d
(
const
std
::
vector
<
IntT
>&
indices
,
const
std
::
vector
<
float
>&
features
,
const
std
::
vector
<
float
>&
features
,
const
DDim
&
x_dims
,
const
DDim
&
x_dims
,
const
std
::
vector
<
float
>&
kernel
,
const
std
::
vector
<
float
>&
kernel
,
const
DDim
&
kernel_dims
,
const
DDim
&
kernel_dims
,
const
std
::
vector
<
int
>&
correct_out_indices
,
const
std
::
vector
<
IntT
>&
correct_out_indices
,
const
std
::
vector
<
float
>&
correct_out_features
,
const
std
::
vector
<
float
>&
correct_out_features
,
const
DDim
&
correct_out_dims
,
const
DDim
&
correct_out_dims
,
const
int
non_zero_num
,
const
int
non_zero_num
,
...
@@ -282,41 +285,41 @@ void TestConv3d(const std::vector<int>& indices,
...
@@ -282,41 +285,41 @@ void TestConv3d(const std::vector<int>& indices,
const
std
::
vector
<
float
>
kernel_grad
=
{},
const
std
::
vector
<
float
>
kernel_grad
=
{},
const
bool
subm
=
false
)
{
const
bool
subm
=
false
)
{
// test float
// test float
TestConv3dBase
<
float
>
(
indices
,
TestConv3dBase
<
float
,
IntT
>
(
indices
,
features
,
features
,
x_dims
,
x_dims
,
kernel
,
kernel
,
kernel_dims
,
kernel_dims
,
correct_out_indices
,
correct_out_indices
,
correct_out_features
,
correct_out_features
,
correct_out_dims
,
correct_out_dims
,
non_zero_num
,
non_zero_num
,
paddings
,
paddings
,
strides
,
strides
,
dilations
,
dilations
,
diff
,
diff
,
backward
,
backward
,
features_grad
,
features_grad
,
kernel_grad
,
kernel_grad
,
subm
);
subm
);
// test double
// test double
TestConv3dBase
<
double
>
(
indices
,
TestConv3dBase
<
double
,
IntT
>
(
indices
,
cast
<
float
,
double
>
(
features
),
cast
<
float
,
double
>
(
features
),
x_dims
,
x_dims
,
cast
<
float
,
double
>
(
kernel
),
cast
<
float
,
double
>
(
kernel
),
kernel_dims
,
kernel_dims
,
correct_out_indices
,
correct_out_indices
,
cast
<
float
,
double
>
(
correct_out_features
),
cast
<
float
,
double
>
(
correct_out_features
),
correct_out_dims
,
correct_out_dims
,
non_zero_num
,
non_zero_num
,
paddings
,
paddings
,
strides
,
strides
,
dilations
,
dilations
,
diff
,
diff
,
backward
,
backward
,
cast
<
float
,
double
>
(
features_grad
),
cast
<
float
,
double
>
(
features_grad
),
cast
<
float
,
double
>
(
kernel_grad
),
cast
<
float
,
double
>
(
kernel_grad
),
subm
);
subm
);
}
}
TEST
(
DEV_API
,
sparse_conv3d
)
{
TEST
(
DEV_API
,
sparse_conv3d
)
{
...
@@ -616,6 +619,51 @@ TEST(DEV_API, sparse_conv2d) {
...
@@ -616,6 +619,51 @@ TEST(DEV_API, sparse_conv2d) {
dilations
);
dilations
);
}
}
TEST
(
DEV_API
,
sparse_conv2d_int64
)
{
const
int
in_channels
=
1
;
const
int
out_channels
=
1
;
DDim
x_dims
=
{
1
,
1
,
5
,
5
,
in_channels
};
DDim
kernel_dims
=
{
1
,
3
,
3
,
in_channels
,
out_channels
};
DDim
out_dims
=
{
1
,
1
,
3
,
3
,
out_channels
};
std
::
vector
<
int
>
paddings
=
{
0
,
0
,
0
};
std
::
vector
<
int
>
strides
=
{
1
,
1
,
1
};
std
::
vector
<
int
>
dilations
=
{
1
,
1
,
1
};
const
int
non_zero_num
=
3
;
std
::
vector
<
int64_t
>
indices_flatten
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
4
,
0
,
3
,
2
,
4
};
std
::
vector
<
float
>
features
=
{
-
0.79394531
,
-
0.3125
,
-
0.55029297
};
// 3*3*3=27
std
::
vector
<
float
>
kernel
=
{
0.65820312
,
0.75048828
,
0.21411133
,
0.17370605
,
0.85546875
,
0.53076172
,
0.28833008
,
0.71044922
,
0.00659943
};
std
::
vector
<
int64_t
>
out_indices_flatten
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
,
2
,
1
,
2
,
0
,
1
,
2
};
std
::
vector
<
float
>
out_features
=
{
-
0.17004
,
-
0.71338
,
-
0.00206
,
-
0.22205
,
-
0.09009
};
TestConv3d
<
int64_t
>
(
indices_flatten
,
features
,
x_dims
,
kernel
,
kernel_dims
,
out_indices_flatten
,
out_features
,
out_dims
,
non_zero_num
,
paddings
,
strides
,
dilations
);
}
TEST
(
DEV_API
,
sparse_conv3d_backward
)
{
TEST
(
DEV_API
,
sparse_conv3d_backward
)
{
const
int
in_channels
=
1
;
const
int
in_channels
=
1
;
const
int
out_channels
=
1
;
const
int
out_channels
=
1
;
...
...
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
浏览文件 @
5d3fd4fe
...
@@ -36,11 +36,11 @@ std::vector<T2> cast(const std::vector<T1>& in) {
...
@@ -36,11 +36,11 @@ std::vector<T2> cast(const std::vector<T1>& in) {
}
}
return
out
;
return
out
;
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
IntT
=
int
>
void
TestMaxPoolBase
(
const
std
::
vector
<
int
>&
indices
,
void
TestMaxPoolBase
(
const
std
::
vector
<
IntT
>&
indices
,
const
std
::
vector
<
T
>&
features
,
const
std
::
vector
<
T
>&
features
,
const
DDim
&
x_dims
,
const
DDim
&
x_dims
,
const
std
::
vector
<
int
>&
correct_out_indices
,
const
std
::
vector
<
IntT
>&
correct_out_indices
,
const
std
::
vector
<
T
>&
correct_out_features
,
const
std
::
vector
<
T
>&
correct_out_features
,
const
DDim
&
correct_out_dims
,
const
DDim
&
correct_out_dims
,
const
int
non_zero_num
,
const
int
non_zero_num
,
...
@@ -65,11 +65,13 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -65,11 +65,13 @@ void TestMaxPoolBase(const std::vector<int>& indices,
const
int
in_channels
=
x_dims
[
4
];
const
int
in_channels
=
x_dims
[
4
];
const
int
out_channels
=
in_channels
;
const
int
out_channels
=
in_channels
;
auto
indices_dtype
=
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
();
DenseTensor
indices_tensor
=
phi
::
Empty
(
DenseTensor
indices_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
memcpy
(
memcpy
(
indices_tensor
.
data
<
IntT
>
(),
indices_tensor
.
data
<
int
>
(),
indices
.
data
(),
indices
.
size
()
*
sizeof
(
int
));
indices
.
data
(),
indices
.
size
()
*
sizeof
(
IntT
));
DenseTensor
features_tensor
=
phi
::
Empty
(
DenseTensor
features_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
T
>::
Type
(),
DenseTensorMeta
(
paddle
::
experimental
::
CppTypeToDataType
<
T
>::
Type
(),
...
@@ -88,8 +90,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -88,8 +90,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
};
};
if
(
!
std
::
is_same
<
T
,
phi
::
dtype
::
float16
>::
value
)
{
if
(
!
std
::
is_same
<
T
,
phi
::
dtype
::
float16
>::
value
)
{
DenseTensor
rulebook
=
phi
::
Empty
(
DenseTensor
rulebook
;
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
SparseCooTensor
out
=
sparse
::
MaxPool
<
T
>
(
dev_ctx_cpu
,
SparseCooTensor
out
=
sparse
::
MaxPool
<
T
>
(
dev_ctx_cpu
,
x_tensor
,
x_tensor
,
kernel_sizes
,
kernel_sizes
,
...
@@ -105,20 +106,16 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -105,20 +106,16 @@ void TestMaxPoolBase(const std::vector<int>& indices,
ASSERT_EQ
((
int64_t
)
correct_out_features
.
size
()
/
out_channels
,
out
.
nnz
());
ASSERT_EQ
((
int64_t
)
correct_out_features
.
size
()
/
out_channels
,
out
.
nnz
());
int
cmp_indices
=
memcmp
(
correct_out_indices
.
data
(),
int
cmp_indices
=
memcmp
(
correct_out_indices
.
data
(),
out
.
non_zero_indices
().
data
<
int
>
(),
out
.
non_zero_indices
().
data
<
IntT
>
(),
correct_out_indices
.
size
()
*
sizeof
(
int
));
correct_out_indices
.
size
()
*
sizeof
(
IntT
));
ASSERT_EQ
(
cmp_indices
,
0
);
ASSERT_EQ
(
cmp_indices
,
0
);
f_verify
(
out
.
non_zero_elements
().
data
<
T
>
(),
correct_out_features
);
f_verify
(
out
.
non_zero_elements
().
data
<
T
>
(),
correct_out_features
);
if
(
backward
)
{
if
(
backward
)
{
DenseTensor
x_grad
=
sparse
::
MaxPoolGrad
<
T
>
(
dev_ctx_cpu
,
SparseCooTensor
x_grad
=
sparse
::
MaxPoolGrad
<
T
>
(
x_tensor
,
dev_ctx_cpu
,
x_tensor
,
rulebook
,
out
,
out
,
kernel_sizes
);
rulebook
,
f_verify
(
x_grad
.
non_zero_elements
().
data
<
T
>
(),
features_grad
);
out
,
out
.
non_zero_elements
(),
kernel_sizes
);
f_verify
(
x_grad
.
data
<
T
>
(),
features_grad
);
}
}
}
}
...
@@ -142,7 +139,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -142,7 +139,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
DenseTensor
d_indices_tensor
=
phi
::
Empty
(
DenseTensor
d_indices_tensor
=
phi
::
Empty
(
dev_ctx_gpu
,
dev_ctx_gpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
non_zero_num
},
DataLayout
::
NCHW
));
phi
::
Copy
(
phi
::
Copy
(
dev_ctx_gpu
,
indices_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_indices_tensor
);
dev_ctx_gpu
,
indices_tensor
,
phi
::
GPUPlace
(),
true
,
&
d_indices_tensor
);
...
@@ -153,8 +150,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -153,8 +150,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
SparseCooTensor
d_x_tensor
(
d_indices_tensor
,
d_features_tensor
,
x_dims
);
SparseCooTensor
d_x_tensor
(
d_indices_tensor
,
d_features_tensor
,
x_dims
);
DenseTensor
d_rulebook
=
phi
::
Empty
(
DenseTensor
d_rulebook
;
dev_ctx_gpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
1
},
DataLayout
::
NCHW
));
SparseCooTensor
d_out
=
sparse
::
MaxPool
<
T
>
(
dev_ctx_gpu
,
SparseCooTensor
d_out
=
sparse
::
MaxPool
<
T
>
(
dev_ctx_gpu
,
d_x_tensor
,
d_x_tensor
,
kernel_sizes
,
kernel_sizes
,
...
@@ -171,7 +167,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -171,7 +167,7 @@ void TestMaxPoolBase(const std::vector<int>& indices,
DenseTensor
h_indices_tensor
=
phi
::
Empty
(
DenseTensor
h_indices_tensor
=
phi
::
Empty
(
dev_ctx_cpu
,
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
INT32
,
{
4
,
d_out
.
nnz
()},
DataLayout
::
NCHW
));
DenseTensorMeta
(
indices_dtype
,
{
4
,
d_out
.
nnz
()},
DataLayout
::
NCHW
));
phi
::
Copy
(
dev_ctx_gpu
,
phi
::
Copy
(
dev_ctx_gpu
,
d_out
.
non_zero_indices
(),
d_out
.
non_zero_indices
(),
phi
::
CPUPlace
(),
phi
::
CPUPlace
(),
...
@@ -179,8 +175,8 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -179,8 +175,8 @@ void TestMaxPoolBase(const std::vector<int>& indices,
&
h_indices_tensor
);
&
h_indices_tensor
);
int
cmp_indices2
=
memcmp
(
correct_out_indices
.
data
(),
int
cmp_indices2
=
memcmp
(
correct_out_indices
.
data
(),
h_indices_tensor
.
data
<
int
>
(),
h_indices_tensor
.
data
<
IntT
>
(),
correct_out_indices
.
size
()
*
sizeof
(
int
));
correct_out_indices
.
size
()
*
sizeof
(
IntT
));
ASSERT_EQ
(
cmp_indices2
,
0
);
ASSERT_EQ
(
cmp_indices2
,
0
);
DenseTensor
h_features_tensor
=
DenseTensor
h_features_tensor
=
...
@@ -194,23 +190,25 @@ void TestMaxPoolBase(const std::vector<int>& indices,
...
@@ -194,23 +190,25 @@ void TestMaxPoolBase(const std::vector<int>& indices,
f_verify
(
h_features_tensor
.
data
<
T
>
(),
correct_out_features
);
f_verify
(
h_features_tensor
.
data
<
T
>
(),
correct_out_features
);
if
(
backward
)
{
if
(
backward
)
{
DenseTensor
x_grad
=
sparse
::
MaxPoolGrad
<
T
>
(
dev_ctx_gpu
,
SparseCooTensor
x_grad
=
sparse
::
MaxPoolGrad
<
T
>
(
d_x_tensor
,
dev_ctx_gpu
,
d_x_tensor
,
d_rulebook
,
d_out
,
d_out
,
kernel_sizes
);
d_rulebook
,
DenseTensor
h_features_grad
=
d_out
,
phi
::
EmptyLike
<
T
>
(
dev_ctx_cpu
,
x_grad
.
non_zero_elements
());
d_out
.
non_zero_elements
(),
phi
::
Copy
(
dev_ctx_gpu
,
kernel_sizes
);
x_grad
.
non_zero_elements
(),
DenseTensor
h_features_grad
=
phi
::
EmptyLike
<
T
>
(
dev_ctx_cpu
,
x_grad
);
phi
::
CPUPlace
(),
phi
::
Copy
(
dev_ctx_gpu
,
x_grad
,
phi
::
CPUPlace
(),
true
,
&
h_features_grad
);
true
,
&
h_features_grad
);
f_verify
(
h_features_grad
.
data
<
T
>
(),
features_grad
);
f_verify
(
h_features_grad
.
data
<
T
>
(),
features_grad
);
}
}
#endif
#endif
}
}
void
TestMaxPool
(
const
std
::
vector
<
int
>&
indices
,
template
<
typename
IntT
=
int
>
void
TestMaxPool
(
const
std
::
vector
<
IntT
>&
indices
,
const
std
::
vector
<
float
>&
features
,
const
std
::
vector
<
float
>&
features
,
const
DDim
&
x_dims
,
const
DDim
&
x_dims
,
const
std
::
vector
<
int
>&
correct_out_indices
,
const
std
::
vector
<
IntT
>&
correct_out_indices
,
const
std
::
vector
<
float
>&
correct_out_features
,
const
std
::
vector
<
float
>&
correct_out_features
,
const
DDim
&
correct_out_dims
,
const
DDim
&
correct_out_dims
,
const
int
non_zero_num
,
const
int
non_zero_num
,
...
@@ -222,35 +220,35 @@ void TestMaxPool(const std::vector<int>& indices,
...
@@ -222,35 +220,35 @@ void TestMaxPool(const std::vector<int>& indices,
const
bool
backward
=
false
,
const
bool
backward
=
false
,
const
std
::
vector
<
float
>
features_grad
=
{})
{
const
std
::
vector
<
float
>
features_grad
=
{})
{
// test float
// test float
TestMaxPoolBase
<
float
>
(
indices
,
TestMaxPoolBase
<
float
,
IntT
>
(
indices
,
features
,
features
,
x_dims
,
x_dims
,
correct_out_indices
,
correct_out_indices
,
correct_out_features
,
correct_out_features
,
correct_out_dims
,
correct_out_dims
,
non_zero_num
,
non_zero_num
,
kernel_sizes
,
kernel_sizes
,
paddings
,
paddings
,
strides
,
strides
,
dilations
,
dilations
,
diff
,
diff
,
backward
,
backward
,
features_grad
);
features_grad
);
// test double
// test double
TestMaxPoolBase
<
double
>
(
indices
,
TestMaxPoolBase
<
double
,
IntT
>
(
indices
,
cast
<
float
,
double
>
(
features
),
cast
<
float
,
double
>
(
features
),
x_dims
,
x_dims
,
correct_out_indices
,
correct_out_indices
,
cast
<
float
,
double
>
(
correct_out_features
),
cast
<
float
,
double
>
(
correct_out_features
),
correct_out_dims
,
correct_out_dims
,
non_zero_num
,
non_zero_num
,
kernel_sizes
,
kernel_sizes
,
paddings
,
paddings
,
strides
,
strides
,
dilations
,
dilations
,
diff
,
diff
,
backward
,
backward
,
cast
<
float
,
double
>
(
features_grad
));
cast
<
float
,
double
>
(
features_grad
));
}
}
TEST
(
DEV_API
,
sparse_maxpool
)
{
TEST
(
DEV_API
,
sparse_maxpool
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录