Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
feaf1e2d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
feaf1e2d
编写于
11月 14, 2017
作者:
C
chengduo
提交者:
GitHub
11月 14, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5608 from chengduoZH/fix_pooling_function_parameter_order
fix pooling functor parameter order
上级
d7319c22
21604977
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
278 addition
and
272 deletion
+278
-272
paddle/operators/math/pooling.cc
paddle/operators/math/pooling.cc
+61
-57
paddle/operators/math/pooling.cu
paddle/operators/math/pooling.cu
+167
-167
paddle/operators/math/pooling.h
paddle/operators/math/pooling.h
+26
-24
paddle/operators/pool_op.h
paddle/operators/pool_op.h
+16
-16
paddle/operators/pool_with_index_op.h
paddle/operators/pool_with_index_op.h
+8
-8
未找到文件。
paddle/operators/math/pooling.cc
浏览文件 @
feaf1e2d
...
@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
...
@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
class
Pool2dFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
class
Pool2dFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_height
=
output
.
dims
()[
2
];
const
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
.
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_width
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
1
];
const
int
stride_height
=
strides
[
0
];
const
int
stride_height
=
strides
[
0
];
...
@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
...
@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
const
int
output_stride
=
output_height
*
output_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
...
@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
class
Pool2dGradFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
class
Pool2dGradFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_grad_process
)
{
PoolProcess
pool_grad_process
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
...
@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
...
@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -154,10 +155,11 @@ template <class T>
...
@@ -154,10 +155,11 @@ template <class T>
class
MaxPool2dGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool2dGradFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
...
@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
...
@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
...
@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
class
Pool3dFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
class
Pool3dFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_depth
=
output
.
dims
()[
2
];
const
int
output_depth
=
output
->
dims
()[
2
];
const
int
output_height
=
output
.
dims
()[
3
];
const
int
output_height
=
output
->
dims
()[
3
];
const
int
output_width
=
output
.
dims
()[
4
];
const
int
output_width
=
output
->
dims
()[
4
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
2
];
const
int
ksize_width
=
ksize
[
2
];
...
@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
...
@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
...
@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
class
Pool3dGradFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
class
Pool3dGradFunctor
<
platform
::
CPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_grad_process
)
{
PoolProcess
pool_grad_process
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
...
@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
...
@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -398,10 +401,11 @@ template <class T>
...
@@ -398,10 +401,11 @@ template <class T>
class
MaxPool3dGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool3dGradFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
...
@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
...
@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -498,15 +502,15 @@ template <typename T>
...
@@ -498,15 +502,15 @@ template <typename T>
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_height
=
output
.
dims
()[
2
];
const
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
.
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_width
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
1
];
const
int
stride_height
=
strides
[
0
];
const
int
stride_height
=
strides
[
0
];
...
@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const
int
output_stride
=
output_height
*
output_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -563,13 +567,13 @@ template <typename T>
...
@@ -563,13 +567,13 @@ template <typename T>
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
const
int
batch_size
=
input_grad
.
dims
()[
0
];
framework
::
Tensor
*
input_grad
)
{
const
int
input_height
=
input_grad
.
dims
()[
2
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_width
=
input_grad
.
dims
()[
3
];
const
int
input_height
=
input_grad
->
dims
()[
2
];
const
int
input_width
=
input_grad
->
dims
()[
3
];
const
int
output_channels
=
output_grad
.
dims
()[
1
];
const
int
output_channels
=
output_grad
.
dims
()[
1
];
const
int
output_height
=
output_grad
.
dims
()[
2
];
const
int
output_height
=
output_grad
.
dims
()[
2
];
const
int
output_width
=
output_grad
.
dims
()[
3
];
const
int
output_width
=
output_grad
.
dims
()[
3
];
...
@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -612,17 +616,17 @@ template <typename T>
...
@@ -612,17 +616,17 @@ template <typename T>
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_depth
=
output
.
dims
()[
2
];
const
int
output_depth
=
output
->
dims
()[
2
];
const
int
output_height
=
output
.
dims
()[
3
];
const
int
output_height
=
output
->
dims
()[
3
];
const
int
output_width
=
output
.
dims
()[
4
];
const
int
output_width
=
output
->
dims
()[
4
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
2
];
const
int
ksize_width
=
ksize
[
2
];
...
@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -691,14 +695,14 @@ template <typename T>
...
@@ -691,14 +695,14 @@ template <typename T>
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
const
int
batch_size
=
input_grad
.
dims
()[
0
];
framework
::
Tensor
*
input_grad
)
{
const
int
input_depth
=
input_grad
.
dims
()[
2
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_height
=
input_grad
.
dims
()[
3
];
const
int
input_depth
=
input_grad
->
dims
()[
2
];
const
int
input_width
=
input_grad
.
dims
()[
4
];
const
int
input_height
=
input_grad
->
dims
()[
3
];
const
int
input_width
=
input_grad
->
dims
()[
4
];
const
int
output_channels
=
output_grad
.
dims
()[
1
];
const
int
output_channels
=
output_grad
.
dims
()[
1
];
const
int
output_depth
=
output_grad
.
dims
()[
2
];
const
int
output_depth
=
output_grad
.
dims
()[
2
];
const
int
output_height
=
output_grad
.
dims
()[
3
];
const
int
output_height
=
output_grad
.
dims
()[
3
];
...
@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
...
paddle/operators/math/pooling.cu
浏览文件 @
feaf1e2d
...
@@ -21,13 +21,13 @@ namespace math {
...
@@ -21,13 +21,13 @@ namespace math {
template
<
typename
PoolProcess
,
typename
T
>
template
<
typename
PoolProcess
,
typename
T
>
__global__
void
KernelPool2D
(
const
int
nthreads
,
const
T
*
input_data
,
__global__
void
KernelPool2D
(
const
int
nthreads
,
const
T
*
input_data
,
T
*
output_data
,
const
int
channels
,
const
int
channels
,
const
int
input_height
,
const
int
input_
height
,
const
int
input_width
,
const
int
input_
width
,
const
int
output_height
,
const
int
output_
height
,
const
int
output_width
,
const
int
output_
width
,
const
int
ksize_height
,
const
int
ksize_
height
,
const
int
ksize_width
,
const
int
ksize_
width
,
const
int
stride_height
,
const
int
stride_
height
,
const
int
stride_width
,
const
int
stride_
width
,
const
int
padding_height
,
const
int
padding_
height
,
const
int
padding_width
,
const
int
padding_
width
,
PoolProcess
pool_process
,
PoolProcess
pool_process
)
{
T
*
output_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
...
@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
template
<
typename
PoolProcess
,
typename
T
>
template
<
typename
PoolProcess
,
typename
T
>
__global__
void
KernelPool2DGrad
(
__global__
void
KernelPool2DGrad
(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
T
*
output_grad
,
T
*
input_grad
,
const
int
channels
,
const
T
*
output_grad
,
const
int
channels
,
const
int
input_height
,
const
int
input_
height
,
const
int
input_width
,
const
int
output_height
,
const
int
input_
width
,
const
int
output_height
,
const
int
output_width
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_
height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
stride_
width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_width
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
offsetW
=
index
%
input_width
+
padding_width
;
int
offsetW
=
index
%
input_width
+
padding_width
;
...
@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad(
...
@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool2DGrad
(
__global__
void
KernelMaxPool2DGrad
(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
T
*
output_grad
,
T
*
input_grad
,
const
int
channels
,
const
T
*
output_grad
,
const
int
channels
,
const
int
input_height
,
const
int
input_
height
,
const
int
input_width
,
const
int
output_height
,
const
int
input_
width
,
const
int
output_height
,
const
int
output_width
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_
height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
stride_
width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_width
)
{
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T>
...
@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T>
class
Pool2dFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
class
Pool2dFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_height
=
output
.
dims
()[
2
];
const
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
.
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_width
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
1
];
const
int
stride_height
=
strides
[
0
];
const
int
stride_height
=
strides
[
0
];
...
@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
PoolProcess
,
PoolProcess
,
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
input_channels
,
.
stream
()
>>>
(
input_height
,
input_width
,
output_height
,
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
output_width
,
ksize_height
,
ksize_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_height
,
stride_width
,
padding_height
,
stride_width
,
padding_height
,
padding_width
,
pool_process
,
output_data
);
padding_width
,
pool_process
);
}
}
};
};
...
@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T>
...
@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T>
class
Pool2dGradFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
class
Pool2dGradFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
grad_data
,
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
channels
,
input_
channels
,
input_height
,
input_width
,
output_height
,
output_width
,
input_
height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_
height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
ksize_
width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
p
adding_width
,
pool_process
);
p
ool_process
,
input_grad_data
);
}
}
};
};
...
@@ -253,10 +252,11 @@ template <typename T>
...
@@ -253,10 +252,11 @@ template <typename T>
class
MaxPool2dGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool2dGradFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
...
@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
...
@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
grad_data
,
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
channels
,
input_
channels
,
input_height
,
input_width
,
output_height
,
output_width
,
input_
height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_
height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
ksize_
width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
padding_width
);
input_grad_data
);
}
}
};
};
...
@@ -313,14 +313,16 @@ template class Pool2dGradFunctor<
...
@@ -313,14 +313,16 @@ template class Pool2dGradFunctor<
platform
::
GPUPlace
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
double
>,
double
>
;
platform
::
GPUPlace
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
double
>,
double
>
;
template
<
typename
PoolProcess
,
typename
T
>
template
<
typename
PoolProcess
,
typename
T
>
__global__
void
KernelPool3D
(
__global__
void
KernelPool3D
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
nthreads
,
const
T
*
input_data
,
T
*
output_data
,
const
int
channels
,
const
int
channels
,
const
int
input_depth
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
input_height
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
stride_depth
,
const
int
stride_height
,
PoolProcess
pool_process
)
{
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
PoolProcess
pool_process
,
T
*
output_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -358,13 +360,13 @@ __global__ void KernelPool3D(
...
@@ -358,13 +360,13 @@ __global__ void KernelPool3D(
template
<
typename
PoolProcess
,
typename
T
>
template
<
typename
PoolProcess
,
typename
T
>
__global__
void
KernelPool3DGrad
(
__global__
void
KernelPool3DGrad
(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
T
*
output_grad
,
T
*
input_grad
,
const
int
channels
,
const
T
*
output_grad
,
const
int
channels
,
const
int
input_depth
,
const
int
input_
depth
,
const
int
input_height
,
const
int
input_wid
th
,
const
int
input_
height
,
const
int
input_width
,
const
int
output_dep
th
,
const
int
output_
depth
,
const
int
output_height
,
const
int
output_wid
th
,
const
int
output_
height
,
const
int
output_width
,
const
int
ksize_dep
th
,
const
int
ksize_
depth
,
const
int
ksize_height
,
const
int
ksize_wid
th
,
const
int
ksize_
height
,
const
int
ksize_width
,
const
int
stride_dep
th
,
const
int
stride_
depth
,
const
int
stride_height
,
const
int
stride_wid
th
,
const
int
stride_
height
,
const
int
stride_width
,
const
int
padding_dep
th
,
const
int
padding_
depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_
height
,
const
int
padding_width
,
PoolProcess
pool_process
,
PoolProcess
pool_process
)
{
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
offsetW
=
index
%
input_width
+
padding_width
;
int
offsetW
=
index
%
input_width
+
padding_width
;
...
@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad(
...
@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool3DGrad
(
__global__
void
KernelMaxPool3DGrad
(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
output_data
,
const
T
*
output_grad
,
T
*
input_grad
,
const
int
channels
,
const
T
*
output_grad
,
const
int
channels
,
const
int
input_depth
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
input_height
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_height
,
const
int
padding_width
,
T
*
input_grad
)
{
const
int
padding_width
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -480,18 +481,18 @@ template <typename PoolProcess, class T>
...
@@ -480,18 +481,18 @@ template <typename PoolProcess, class T>
class
Pool3dFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
class
Pool3dFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_depth
=
output
.
dims
()[
2
];
const
int
output_depth
=
output
->
dims
()[
2
];
const
int
output_height
=
output
.
dims
()[
3
];
const
int
output_height
=
output
->
dims
()[
3
];
const
int
output_width
=
output
.
dims
()[
4
];
const
int
output_width
=
output
->
dims
()[
4
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
2
];
const
int
ksize_width
=
ksize
[
2
];
...
@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
const
int
padding_width
=
paddings
[
2
];
const
int
padding_width
=
paddings
[
2
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
output_width
;
output_width
;
...
@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
input_channels
,
input_depth
,
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
input_
height
,
input_width
,
output_depth
,
output_height
,
output_wid
th
,
input_
width
,
output_depth
,
output_height
,
output_width
,
ksize_dep
th
,
ksize_
depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
ksize_
height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
padding_depth
,
padding_height
,
padding_width
,
pool_process
,
pool_process
);
output_data
);
}
}
};
};
...
@@ -533,11 +534,11 @@ template <typename PoolProcess, class T>
...
@@ -533,11 +534,11 @@ template <typename PoolProcess, class T>
class
Pool3dGradFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
class
Pool3dGradFunctor
<
platform
::
GPUPlace
,
PoolProcess
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
)
{
PoolProcess
pool_process
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
...
@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
int
nthreads
=
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
...
@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
...
@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
grad_data
,
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
channels
,
input_
channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
input_
depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_
height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_wid
th
,
output_
width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_dep
th
,
stride_
depth
,
stride_height
,
stride_width
,
padding_depth
,
stride_
height
,
stride_width
,
padding_depth
,
padding_height
,
padding_
height
,
padding_width
,
pool_process
);
padding_
width
,
pool_process
,
input_grad_data
);
}
}
};
};
...
@@ -590,10 +591,11 @@ template <class T>
...
@@ -590,10 +591,11 @@ template <class T>
class
MaxPool3dGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool3dGradFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
...
@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
...
@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
output_width
;
output_width
;
...
@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
...
@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
grad_data
,
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_
channels
,
input_
channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
input_
depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_
height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_wid
th
,
output_
width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_dep
th
,
stride_
depth
,
stride_height
,
stride_width
,
padding_depth
,
stride_
height
,
stride_width
,
padding_depth
,
padding_height
,
padding_
height
,
padding_width
);
padding_
width
,
input_grad_data
);
}
}
};
};
...
@@ -658,11 +660,11 @@ template class Pool3dGradFunctor<
...
@@ -658,11 +660,11 @@ template class Pool3dGradFunctor<
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool2dWithIdx
(
__global__
void
KernelMaxPool2dWithIdx
(
const
int
nthreads
,
const
T
*
input_data
,
T
*
output_data
,
T
*
mask_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_
height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
output_
width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_
height
,
const
int
padding_width
)
{
const
int
padding_
width
,
T
*
output_data
,
T
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx(
...
@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool2DWithIdxGrad
(
__global__
void
KernelMaxPool2DWithIdxGrad
(
const
int
nthreads
,
T
*
input_grad
,
const
T
*
output_grad
,
const
T
*
mask_data
,
const
int
nthreads
,
const
T
*
output_grad
,
const
T
*
mask_data
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
)
{
const
int
padding_height
,
const
int
padding_width
,
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
w_offset
=
index
%
input_width
;
...
@@ -748,16 +750,16 @@ template <typename T>
...
@@ -748,16 +750,16 @@ template <typename T>
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_height
=
output
.
dims
()[
2
];
const
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
.
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_width
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
1
];
const
int
stride_height
=
strides
[
0
];
const
int
stride_height
=
strides
[
0
];
...
@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
KernelMaxPool2dWithIdx
<
KernelMaxPool2dWithIdx
<
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
mask_data
,
.
stream
()
>>>
(
input_channels
,
input_height
,
input_width
,
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
ksize_width
,
stride_height
,
stride_width
,
stride_width
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
padding_height
,
padding_width
);
}
}
};
};
...
@@ -794,14 +795,14 @@ template <typename T>
...
@@ -794,14 +795,14 @@ template <typename T>
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
const
int
batch_size
=
input_grad
.
dims
()[
0
];
framework
::
Tensor
*
input_grad
)
{
const
int
input_channels
=
input_grad
.
dims
()[
1
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_height
=
input_grad
.
dims
()[
2
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
const
int
input_width
=
input_grad
.
dims
()[
3
];
const
int
input_height
=
input_grad
->
dims
()[
2
];
const
int
input_width
=
input_grad
->
dims
()[
3
];
const
int
output_height
=
output_grad
.
dims
()[
2
];
const
int
output_height
=
output_grad
.
dims
()[
2
];
const
int
output_width
=
output_grad
.
dims
()[
3
];
const
int
output_width
=
output_grad
.
dims
()[
3
];
const
int
ksize_height
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
0
];
...
@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
KernelMaxPool2DWithIdxGrad
<
KernelMaxPool2DWithIdxGrad
<
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
input_grad_data
,
output_grad
_data
,
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
mask
_data
,
mask_data
,
input_channels
,
input_height
,
input_channels
,
input_height
,
input_width
,
input_width
,
output_height
,
output_width
,
output_height
,
output_width
,
ksize_height
,
ksize_
height
,
ksize_width
,
stride_height
,
ksize_
width
,
stride_height
,
stride_width
,
stride_width
,
padding_height
,
padding_width
);
padding_height
,
padding_width
,
input_grad_data
);
}
}
};
};
...
@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>;
...
@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>;
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool3DWithIdx
(
__global__
void
KernelMaxPool3DWithIdx
(
const
int
nthreads
,
const
T
*
input_data
,
T
*
output_data
,
T
*
mask_data
,
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
channels
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_width
)
{
T
*
output_data
,
T
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx(
...
@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxPool3DWithIdxGrad
(
__global__
void
KernelMaxPool3DWithIdxGrad
(
const
int
nthreads
,
T
*
input_grad
,
const
T
*
output_grad
,
const
T
*
mask
,
const
int
nthreads
,
const
T
*
output_grad
,
const
T
*
mask
,
const
int
channels
,
const
int
channels
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_width
)
{
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
w_offset
=
index
%
input_width
;
...
@@ -952,18 +953,18 @@ template <typename T>
...
@@ -952,18 +953,18 @@ template <typename T>
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
input_width
=
input
.
dims
()[
4
];
const
int
output_channels
=
output
.
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_depth
=
output
.
dims
()[
2
];
const
int
output_depth
=
output
->
dims
()[
2
];
const
int
output_height
=
output
.
dims
()[
3
];
const
int
output_height
=
output
->
dims
()[
3
];
const
int
output_width
=
output
.
dims
()[
4
];
const
int
output_width
=
output
->
dims
()[
4
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_depth
=
ksize
[
0
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_height
=
ksize
[
1
];
const
int
ksize_width
=
ksize
[
2
];
const
int
ksize_width
=
ksize
[
2
];
...
@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const
int
padding_width
=
paddings
[
2
];
const
int
padding_width
=
paddings
[
2
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
output_width
;
output_width
;
...
@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
output_data
,
mask_data
,
input_channels
,
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_depth
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
padding_width
);
}
}
};
};
...
@@ -1005,15 +1005,15 @@ template <typename T>
...
@@ -1005,15 +1005,15 @@ template <typename T>
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
)
{
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
const
int
batch_size
=
input_grad
.
dims
()[
0
];
framework
::
Tensor
*
input_grad
)
{
const
int
input_channels
=
input_grad
.
dims
()[
1
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_depth
=
input_grad
.
dims
()[
2
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
const
int
input_height
=
input_grad
.
dims
()[
3
];
const
int
input_depth
=
input_grad
->
dims
()[
2
];
const
int
input_width
=
input_grad
.
dims
()[
4
];
const
int
input_height
=
input_grad
->
dims
()[
3
];
const
int
input_width
=
input_grad
->
dims
()[
4
];
const
int
output_depth
=
output_grad
.
dims
()[
2
];
const
int
output_depth
=
output_grad
.
dims
()[
2
];
const
int
output_height
=
output_grad
.
dims
()[
3
];
const
int
output_height
=
output_grad
.
dims
()[
3
];
const
int
output_width
=
output_grad
.
dims
()[
4
];
const
int
output_width
=
output_grad
.
dims
()[
4
];
...
@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
.
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
int
nthreads
=
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
...
@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
T
><<<
grid
,
threads
,
0
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_grad_data
,
output_grad_data
,
mask_data
,
input_channels
,
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_depth
,
input_
depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
input_
height
,
input_width
,
output_depth
,
output_height
,
output_width
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_
height
,
stride_width
,
padding_depth
,
padding_height
,
stride_
width
,
padding_depth
,
padding_height
,
padding_width
,
padding_width
);
input_grad_data
);
}
}
};
};
...
...
paddle/operators/math/pooling.h
浏览文件 @
feaf1e2d
...
@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
...
@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
class
Pool2dFunctor
{
class
Pool2dFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
);
PoolProcess
pool_compute
,
framework
::
Tensor
*
output
);
};
};
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
class
Pool2dGradFunctor
{
class
Pool2dGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
);
PoolProcess
pool_compute
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
Place
,
class
T
>
template
<
typename
Place
,
class
T
>
class
MaxPool2dGradFunctor
{
class
MaxPool2dGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
class
Pool3dFunctor
{
class
Pool3dFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
stride
s
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
padding
s
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
);
PoolProcess
pool_compute
,
framework
::
Tensor
*
output
);
};
};
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
template
<
typename
Place
,
typename
PoolProcess
,
typename
T
>
class
Pool3dGradFunctor
{
class
Pool3dGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
);
PoolProcess
pool_compute
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
Place
,
class
T
>
template
<
typename
Place
,
class
T
>
class
MaxPool3dGradFunctor
{
class
MaxPool3dGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
output_grad
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
);
};
};
/*
/*
...
@@ -155,38 +157,38 @@ template <typename Place, typename T>
...
@@ -155,38 +157,38 @@ template <typename Place, typename T>
class
MaxPool2dWithIndexFunctor
{
class
MaxPool2dWithIndexFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
MaxPool2dWithIndexGradFunctor
{
class
MaxPool2dWithIndexGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
MaxPool3dWithIndexFunctor
{
class
MaxPool3dWithIndexFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
MaxPool3dWithIndexGradFunctor
{
class
MaxPool3dWithIndexGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
input_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
);
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
input_grad
);
};
};
}
// namespace math
}
// namespace math
...
...
paddle/operators/pool_op.h
浏览文件 @
feaf1e2d
...
@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
Place
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
pool2d_forward
;
pool2d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
ksize
,
strides
,
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
);
paddings
,
pool_process
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
paddle
::
operators
::
math
::
Pool2dFunctor
<
Place
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
pool2d_forward
;
pool2d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
ksize
,
strides
,
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
);
paddings
,
pool_process
,
out
);
}
}
}
break
;
}
break
;
case
3
:
{
case
3
:
{
...
@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
Place
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
pool3d_forward
;
pool3d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
ksize
,
strides
,
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
);
paddings
,
pool_process
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool3dFunctor
<
paddle
::
operators
::
math
::
Pool3dFunctor
<
Place
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
pool3d_forward
;
pool3d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
ksize
,
strides
,
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
);
paddings
,
pool_process
,
out
);
}
}
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
...
@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
...
@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
if
(
pooling_type
==
"max"
)
{
if
(
pooling_type
==
"max"
)
{
paddle
::
operators
::
math
::
MaxPool2dGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool2dGradFunctor
<
Place
,
T
>
pool2d_backward
;
pool2d_backward
;
pool2d_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
pool2d_backward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
out_grad
,
*
out_grad
,
ksize
,
strides
,
paddings
);
ksize
,
strides
,
paddings
,
in_x_grad
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool2dGradFunctor
<
paddle
::
operators
::
math
::
Pool2dGradFunctor
<
Place
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
,
T
>
pool2d_backward
;
pool2d_backward
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
pool2d_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
pool2d_backward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
out_grad
,
*
out_grad
,
ksize
,
strides
,
paddings
,
pool_process
);
ksize
,
strides
,
paddings
,
pool_process
,
in_x_grad
);
}
}
}
break
;
}
break
;
case
3
:
{
case
3
:
{
if
(
pooling_type
==
"max"
)
{
if
(
pooling_type
==
"max"
)
{
paddle
::
operators
::
math
::
MaxPool3dGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool3dGradFunctor
<
Place
,
T
>
pool3d_backward
;
pool3d_backward
;
pool3d_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
pool3d_backward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
out_grad
,
*
out_grad
,
ksize
,
strides
,
paddings
);
ksize
,
strides
,
paddings
,
in_x_grad
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool3dGradFunctor
<
paddle
::
operators
::
math
::
Pool3dGradFunctor
<
Place
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
,
T
>
Place
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
,
T
>
pool3d_backward
;
pool3d_backward
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
pool3d_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
pool3d_backward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
out_grad
,
*
out_grad
,
ksize
,
strides
,
paddings
,
pool_process
);
ksize
,
strides
,
paddings
,
pool_process
,
in_x_grad
);
}
}
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
...
...
paddle/operators/pool_with_index_op.h
浏览文件 @
feaf1e2d
...
@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
...
@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
case
2
:
{
case
2
:
{
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
Place
,
T
>
pool2d_forward
;
pool2d_forward
;
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
mask
,
ksize
,
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
strides
,
paddings
);
paddings
,
out
,
mask
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
Place
,
T
>
pool3d_forward
;
pool3d_forward
;
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
*
out
,
*
mask
,
ksize
,
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
strides
,
paddings
);
paddings
,
out
,
mask
);
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
}
}
...
@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
...
@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
case
2
:
{
case
2
:
{
paddle
::
operators
::
math
::
MaxPool2dWithIndexGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool2dWithIndexGradFunctor
<
Place
,
T
>
pool2d_backward
;
pool2d_backward
;
pool2d_backward
(
context
.
device_context
(),
*
in_x_grad
,
*
out_grad
,
pool2d_backward
(
context
.
device_context
(),
*
out_grad
,
*
mask
,
ksize
,
*
mask
,
ksize
,
strides
,
paddings
);
strides
,
paddings
,
in_x_grad
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
Place
,
T
>
pool3d_backward
;
pool3d_backward
;
pool3d_backward
(
context
.
device_context
(),
*
in_x_grad
,
*
out_grad
,
pool3d_backward
(
context
.
device_context
(),
*
out_grad
,
*
mask
,
ksize
,
*
mask
,
ksize
,
strides
,
paddings
);
strides
,
paddings
,
in_x_grad
);
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录