Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
266c6856
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
266c6856
编写于
11月 28, 2018
作者:
D
dengkaipeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add adaptive pool 2d & 3d. test=develop
上级
eab47459
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
860 addition
and
318 deletion
+860
-318
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-0
paddle/fluid/operators/math/pooling.cc
paddle/fluid/operators/math/pooling.cc
+79
-64
paddle/fluid/operators/math/pooling.cu
paddle/fluid/operators/math/pooling.cu
+265
-146
paddle/fluid/operators/math/pooling.h
paddle/fluid/operators/math/pooling.h
+10
-10
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+23
-3
paddle/fluid/operators/pool_op.h
paddle/fluid/operators/pool_op.h
+10
-6
paddle/fluid/operators/pool_with_index_op.cc
paddle/fluid/operators/pool_with_index_op.cc
+24
-3
paddle/fluid/operators/pool_with_index_op.h
paddle/fluid/operators/pool_with_index_op.h
+8
-4
paddle/fluid/operators/spp_op.h
paddle/fluid/operators/spp_op.h
+3
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+186
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+22
-0
python/paddle/fluid/tests/unittests/test_pool2d_op.py
python/paddle/fluid/tests/unittests/test_pool2d_op.py
+65
-26
python/paddle/fluid/tests/unittests/test_pool3d_op.py
python/paddle/fluid/tests/unittests/test_pool3d_op.py
+86
-35
python/paddle/fluid/tests/unittests/test_pool_max_op.py
python/paddle/fluid/tests/unittests/test_pool_max_op.py
+77
-18
未找到文件。
paddle/fluid/API.spec
浏览文件 @
266c6856
...
@@ -77,6 +77,8 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name']
...
@@ -77,6 +77,8 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name']
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=('max', False, True, None))
paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=('max', False, True, None))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
...
...
paddle/fluid/operators/math/pooling.cc
浏览文件 @
266c6856
...
@@ -61,24 +61,26 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
...
@@ -61,24 +61,26 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
...
@@ -136,24 +138,26 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
...
@@ -136,24 +138,26 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
int
pool_size
=
(
exclusive
||
adaptive
)
int
pool_size
=
(
exclusive
||
adaptive
)
...
@@ -308,33 +312,36 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
...
@@ -308,33 +312,36 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
dstart
,
dend
;
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
int
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
}
else
{
}
else
{
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dstart
=
std
::
max
(
dstart
,
0
);
dstart
=
std
::
max
(
dstart
,
0
);
}
}
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
int
output_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
int
output_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
...
@@ -403,33 +410,36 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
...
@@ -403,33 +410,36 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
dstart
,
dend
;
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
int
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
}
else
{
}
else
{
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dstart
=
std
::
max
(
dstart
,
0
);
dstart
=
std
::
max
(
dstart
,
0
);
}
}
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
...
@@ -599,24 +609,26 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
...
@@ -599,24 +609,26 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
T1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
T2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
...
@@ -655,7 +667,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
...
@@ -655,7 +667,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_height
=
input_grad
->
dims
()[
2
];
const
int
input_height
=
input_grad
->
dims
()[
2
];
...
@@ -708,8 +720,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
...
@@ -708,8 +720,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
mask
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
3
];
const
int
input_height
=
input
.
dims
()[
3
];
...
@@ -734,33 +746,36 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
...
@@ -734,33 +746,36 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
T1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
T2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
int
dstart
,
dend
;
int
hstart
,
hend
;
int
wstart
,
wend
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
for
(
int
pd
=
0
;
pd
<
output_depth
;
++
pd
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
int
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
}
else
{
}
else
{
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dend
=
std
::
min
(
dstart
+
ksize_depth
,
input_depth
);
dstart
=
std
::
max
(
dstart
,
0
);
dstart
=
std
::
max
(
dstart
,
0
);
}
}
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
hstart
=
std
::
max
(
hstart
,
0
);
}
}
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
if
(
adaptive
)
{
if
(
adaptive
)
{
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
}
}
...
@@ -804,7 +819,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
...
@@ -804,7 +819,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_depth
=
input_grad
->
dims
()[
2
];
const
int
input_depth
=
input_grad
->
dims
()[
2
];
...
...
paddle/fluid/operators/math/pooling.cu
浏览文件 @
266c6856
...
@@ -21,6 +21,18 @@ namespace paddle {
...
@@ -21,6 +21,18 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
__device__
__forceinline__
int
ADAPT_START_INDEX
(
int
ph
,
int
input_size
,
int
output_size
)
{
return
static_cast
<
int
>
(
floor
(
static_cast
<
double
>
(
ph
*
input_size
)
/
output_size
));
}
__device__
__forceinline__
int
ADAPT_END_INDEX
(
int
ph
,
int
input_size
,
int
output_size
)
{
return
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
((
ph
+
1
)
*
input_size
)
/
output_size
));
}
template
<
typename
PoolProcess
,
typename
T
>
template
<
typename
PoolProcess
,
typename
T
>
__global__
void
KernelPool2D
(
const
int
nthreads
,
const
T
*
input_data
,
__global__
void
KernelPool2D
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
input_height
,
const
int
channels
,
const
int
input_height
,
...
@@ -37,19 +49,21 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
...
@@ -37,19 +49,21 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
int
c
=
(
index
/
output_width
/
output_height
)
%
channels
;
int
c
=
(
index
/
output_width
/
output_height
)
%
channels
;
int
batch_idx
=
index
/
output_width
/
output_height
/
channels
;
int
batch_idx
=
index
/
output_width
/
output_height
/
channels
;
int
hstart
,
hend
;
int
wstart
,
wend
;
if
(
adaptive
)
{
if
(
adaptive
)
{
int
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
int
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
int
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
int
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
}
}
...
@@ -74,7 +88,7 @@ __global__ void KernelPool2DGrad(
...
@@ -74,7 +88,7 @@ __global__ void KernelPool2DGrad(
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
PoolProcess
pool_process
,
bool
exclusive
,
T
*
input_grad
)
{
PoolProcess
pool_process
,
bool
exclusive
,
bool
adaptive
,
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
offsetW
=
index
%
input_width
+
padding_width
;
int
offsetW
=
index
%
input_width
+
padding_width
;
...
@@ -82,14 +96,24 @@ __global__ void KernelPool2DGrad(
...
@@ -82,14 +96,24 @@ __global__ void KernelPool2DGrad(
int
offsetC
=
(
index
/
input_width
/
input_height
)
%
channels
;
int
offsetC
=
(
index
/
input_width
/
input_height
)
%
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
channels
;
int
phstart
=
(
offsetH
<
ksize_height
)
int
phstart
,
phend
;
int
pwstart
,
pwend
;
if
(
adaptive
)
{
phstart
=
offsetH
*
output_height
/
input_height
;
phend
=
min
((
offsetH
+
1
)
*
output_height
/
input_height
+
1
,
output_height
);
pwstart
=
offsetW
*
output_width
/
input_width
;
pwend
=
min
((
offsetW
+
1
)
*
output_width
/
input_width
+
1
,
output_width
);
}
else
{
phstart
=
(
offsetH
<
ksize_height
)
?
0
?
0
:
(
offsetH
-
ksize_height
)
/
stride_height
+
1
;
:
(
offsetH
-
ksize_height
)
/
stride_height
+
1
;
int
pwstart
=
(
offsetW
<
ksize_width
)
pwstart
=
(
offsetW
<
ksize_width
)
?
0
?
0
:
(
offsetW
-
ksize_width
)
/
stride_width
+
1
;
:
(
offsetW
-
ksize_width
)
/
stride_width
+
1
;
int
phend
=
min
(
offsetH
/
stride_height
+
1
,
output_height
);
phend
=
min
(
offsetH
/
stride_height
+
1
,
output_height
);
int
pwend
=
min
(
offsetW
/
stride_width
+
1
,
output_width
);
pwend
=
min
(
offsetW
/
stride_width
+
1
,
output_width
);
}
T
gradient
=
0
;
T
gradient
=
0
;
T
input
=
input_data
[
index
];
T
input
=
input_data
[
index
];
int
output_idx
=
int
output_idx
=
...
@@ -98,14 +122,22 @@ __global__ void KernelPool2DGrad(
...
@@ -98,14 +122,22 @@ __global__ void KernelPool2DGrad(
output_grad
+=
output_idx
;
output_grad
+=
output_idx
;
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
int
pool_size
;
if
(
adaptive
)
{
pool_size
=
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
input_height
)
/
ksize_height
))
*
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
input_width
)
/
ksize_width
));
}
else
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
int
pool_size
=
exclusive
?
(
hend
-
hstart
)
*
(
wend
-
wstart
)
pool_size
=
exclusive
?
(
hend
-
hstart
)
*
(
wend
-
wstart
)
:
ksize_height
*
ksize_width
;
:
ksize_height
*
ksize_width
;
}
int
output_sub_idx
=
ph
*
output_width
+
pw
;
int
output_sub_idx
=
ph
*
output_width
+
pw
;
pool_process
.
compute
(
input
,
output_data
[
output_sub_idx
],
pool_process
.
compute
(
input
,
output_data
[
output_sub_idx
],
output_grad
[
output_sub_idx
],
output_grad
[
output_sub_idx
],
...
@@ -189,7 +221,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
...
@@ -189,7 +221,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
KernelPool2D
<
PoolProcess
,
T
><<<
grid
,
threads
,
0
,
stream
>>>
(
KernelPool2D
<
PoolProcess
,
T
><<<
grid
,
threads
,
0
,
stream
>>>
(
nthreads
,
input
,
input_channels
,
input_height
,
input_width
,
output_height
,
nthreads
,
input
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
pool_compute
,
exclusive
,
output
);
padding_height
,
padding_width
,
pool_compute
,
exclusive
,
false
,
output
);
}
}
/*
/*
...
@@ -204,7 +236,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -204,7 +236,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
bool
exclusive
,
framework
::
Tensor
*
output
)
{
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -231,7 +263,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -231,7 +263,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
stride_width
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
output_data
);
adaptive
,
output_data
);
}
}
};
};
...
@@ -250,7 +282,8 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -250,7 +282,8 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
bool
exclusive
,
framework
::
Tensor
*
input_grad
)
{
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -278,7 +311,7 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -278,7 +311,7 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_channels
,
nthreads
,
input_data
,
output_data
,
output_grad_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
input_grad_data
);
pool_process
,
exclusive
,
adaptive
,
input_grad_data
);
}
}
};
};
...
@@ -367,7 +400,7 @@ __global__ void KernelPool3D(
...
@@ -367,7 +400,7 @@ __global__ void KernelPool3D(
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
PoolProcess
pool_process
,
bool
exclusive
,
T
*
output_data
)
{
PoolProcess
pool_process
,
bool
exclusive
,
bool
adaptive
,
T
*
output_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -376,15 +409,30 @@ __global__ void KernelPool3D(
...
@@ -376,15 +409,30 @@ __global__ void KernelPool3D(
int
c
=
(
index
/
output_width
/
output_height
/
output_depth
)
%
channels
;
int
c
=
(
index
/
output_width
/
output_height
/
output_depth
)
%
channels
;
int
batch_idx
=
int
batch_idx
=
index
/
output_width
/
output_height
/
output_depth
/
channels
;
index
/
output_width
/
output_height
/
output_depth
/
channels
;
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
dstart
,
dend
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
hstart
,
hend
;
int
dend
=
min
(
dstart
+
ksize_depth
,
input_depth
);
int
wstart
,
wend
;
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
if
(
adaptive
)
{
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
dstart
=
pd
*
stride_depth
-
padding_depth
;
hstart
=
ph
*
stride_height
-
padding_height
;
wstart
=
pw
*
stride_width
-
padding_width
;
dend
=
min
(
dstart
+
ksize_depth
,
input_depth
);
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
dstart
=
max
(
dstart
,
0
);
dstart
=
max
(
dstart
,
0
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
}
T
ele
=
pool_process
.
initial
();
T
ele
=
pool_process
.
initial
();
input_data
+=
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_depth
*
input_height
*
input_width
;
(
batch_idx
*
channels
+
c
)
*
input_depth
*
input_height
*
input_width
;
...
@@ -396,7 +444,7 @@ __global__ void KernelPool3D(
...
@@ -396,7 +444,7 @@ __global__ void KernelPool3D(
}
}
}
}
}
}
int
pool_size
=
exclusive
int
pool_size
=
(
exclusive
||
adaptive
)
?
(
dend
-
dstart
)
*
(
hend
-
hstart
)
*
(
wend
-
wstart
)
?
(
dend
-
dstart
)
*
(
hend
-
hstart
)
*
(
wend
-
wstart
)
:
ksize_depth
*
ksize_height
*
ksize_width
;
:
ksize_depth
*
ksize_height
*
ksize_width
;
pool_process
.
finalize
(
static_cast
<
T
>
(
pool_size
),
&
ele
);
pool_process
.
finalize
(
static_cast
<
T
>
(
pool_size
),
&
ele
);
...
@@ -413,7 +461,7 @@ __global__ void KernelPool3DGrad(
...
@@ -413,7 +461,7 @@ __global__ void KernelPool3DGrad(
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
PoolProcess
pool_process
,
const
int
padding_height
,
const
int
padding_width
,
PoolProcess
pool_process
,
bool
exclusive
,
T
*
input_grad
)
{
bool
exclusive
,
bool
adaptive
,
T
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
offsetW
=
index
%
input_width
+
padding_width
;
int
offsetW
=
index
%
input_width
+
padding_width
;
...
@@ -423,18 +471,31 @@ __global__ void KernelPool3DGrad(
...
@@ -423,18 +471,31 @@ __global__ void KernelPool3DGrad(
int
offsetC
=
(
index
/
input_width
/
input_height
/
input_depth
)
%
channels
;
int
offsetC
=
(
index
/
input_width
/
input_height
/
input_depth
)
%
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
input_depth
/
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
input_depth
/
channels
;
int
pdstart
=
(
offsetD
<
ksize_depth
)
int
pdstart
,
pdend
;
int
phstart
,
phend
;
int
pwstart
,
pwend
;
if
(
adaptive
)
{
pdstart
=
offsetD
*
output_depth
/
input_depth
;
pdend
=
min
((
offsetD
+
1
)
*
output_depth
/
input_depth
+
1
,
output_depth
);
phstart
=
offsetH
*
output_height
/
input_height
;
phend
=
min
((
offsetH
+
1
)
*
output_height
/
input_height
+
1
,
output_height
);
pwstart
=
offsetW
*
output_width
/
input_width
;
pwend
=
min
((
offsetW
+
1
)
*
output_width
/
input_width
+
1
,
output_width
);
}
else
{
pdstart
=
(
offsetD
<
ksize_depth
)
?
0
?
0
:
(
offsetD
-
ksize_depth
)
/
stride_depth
+
1
;
:
(
offsetD
-
ksize_depth
)
/
stride_depth
+
1
;
int
phstart
=
(
offsetH
<
ksize_height
)
phstart
=
(
offsetH
<
ksize_height
)
?
0
?
0
:
(
offsetH
-
ksize_height
)
/
stride_height
+
1
;
:
(
offsetH
-
ksize_height
)
/
stride_height
+
1
;
int
pwstart
=
(
offsetW
<
ksize_width
)
pwstart
=
(
offsetW
<
ksize_width
)
?
0
?
0
:
(
offsetW
-
ksize_width
)
/
stride_width
+
1
;
:
(
offsetW
-
ksize_width
)
/
stride_width
+
1
;
int
pdend
=
min
((
offsetD
)
/
stride_depth
+
1
,
output_depth
);
pdend
=
min
((
offsetD
)
/
stride_depth
+
1
,
output_depth
);
int
phend
=
min
((
offsetH
)
/
stride_height
+
1
,
output_height
);
phend
=
min
((
offsetH
)
/
stride_height
+
1
,
output_height
);
int
pwend
=
min
((
offsetW
)
/
stride_width
+
1
,
output_width
);
pwend
=
min
((
offsetW
)
/
stride_width
+
1
,
output_width
);
}
T
gradient
=
0
;
T
gradient
=
0
;
T
input
=
input_data
[
index
];
T
input
=
input_data
[
index
];
...
@@ -447,6 +508,16 @@ __global__ void KernelPool3DGrad(
...
@@ -447,6 +508,16 @@ __global__ void KernelPool3DGrad(
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
// figure out the pooling size
// figure out the pooling size
int
pool_size
;
if
(
adaptive
)
{
pool_size
=
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
input_depth
)
/
ksize_depth
))
*
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
input_height
)
/
ksize_height
))
*
static_cast
<
int
>
(
ceil
(
static_cast
<
double
>
(
input_width
)
/
ksize_width
));
}
else
{
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
...
@@ -456,9 +527,10 @@ __global__ void KernelPool3DGrad(
...
@@ -456,9 +527,10 @@ __global__ void KernelPool3DGrad(
dstart
=
max
(
dstart
,
0
);
dstart
=
max
(
dstart
,
0
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
int
pool_size
=
pool_size
=
exclusive
?
(
dend
-
dstart
)
*
(
hend
-
hstart
)
*
(
wend
-
wstart
)
exclusive
?
(
dend
-
dstart
)
*
(
hend
-
hstart
)
*
(
wend
-
wstart
)
:
ksize_depth
*
ksize_height
*
ksize_width
;
:
ksize_depth
*
ksize_height
*
ksize_width
;
}
int
output_sub_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
int
output_sub_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
pool_process
.
compute
(
input
,
output_data
[
output_sub_idx
],
pool_process
.
compute
(
input
,
output_data
[
output_sub_idx
],
output_grad
[
output_sub_idx
],
output_grad
[
output_sub_idx
],
...
@@ -533,7 +605,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -533,7 +605,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
bool
exclusive
,
framework
::
Tensor
*
output
)
{
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
...
@@ -567,7 +639,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -567,7 +639,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
padding_depth
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
output_data
);
adaptive
,
output_data
);
}
}
};
};
...
@@ -586,7 +658,8 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -586,7 +658,8 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_process
,
bool
exclusive
,
framework
::
Tensor
*
input_grad
)
{
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
...
@@ -622,7 +695,7 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
...
@@ -622,7 +695,7 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
pool_process
,
exclusive
,
input_grad_data
);
padding_width
,
pool_process
,
exclusive
,
adaptive
,
input_grad_data
);
}
}
};
};
...
@@ -711,7 +784,7 @@ __global__ void KernelMaxPool2dWithIdx(
...
@@ -711,7 +784,7 @@ __global__ void KernelMaxPool2dWithIdx(
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
T1
*
output_data
,
T2
*
mask_data
)
{
const
int
padding_width
,
bool
adaptive
,
T1
*
output_data
,
T2
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -719,13 +792,23 @@ __global__ void KernelMaxPool2dWithIdx(
...
@@ -719,13 +792,23 @@ __global__ void KernelMaxPool2dWithIdx(
int
c
=
(
index
/
output_width
/
output_height
)
%
channels
;
int
c
=
(
index
/
output_width
/
output_height
)
%
channels
;
int
batch_idx
=
index
/
output_width
/
output_height
/
channels
;
int
batch_idx
=
index
/
output_width
/
output_height
/
channels
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hstart
,
hend
;
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
int
wstart
,
wend
;
if
(
adaptive
)
{
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
hstart
=
ph
*
stride_height
-
padding_height
;
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
int
wstart
=
pw
*
stride_width
-
padding_width
;
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
}
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_height
*
input_width
;
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_height
*
input_width
;
T1
ele
=
-
FLT_MAX
;
T1
ele
=
-
FLT_MAX
;
...
@@ -750,36 +833,46 @@ __global__ void KernelMaxPool2DWithIdxGrad(
...
@@ -750,36 +833,46 @@ __global__ void KernelMaxPool2DWithIdxGrad(
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
T1
*
input_grad
)
{
const
int
padding_height
,
const
int
padding_width
,
bool
adaptive
,
T1
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
offsetW
=
index
%
input_width
;
int
h_offset
=
(
index
/
input_width
)
%
input_height
;
int
offsetH
=
(
index
/
input_width
)
%
input_height
;
int
c_offset
=
(
index
/
input_width
/
input_height
)
%
channels
;
int
offsetC
=
(
index
/
input_width
/
input_height
)
%
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
channels
;
int
ph_start
=
int
phstart
,
phend
;
(
h_offset
+
padding_height
<
ksize_height
)
int
pwstart
,
pwend
;
if
(
adaptive
)
{
phstart
=
offsetH
*
output_height
/
input_height
;
phend
=
min
((
offsetH
+
1
)
*
output_height
/
input_height
+
1
,
output_height
);
pwstart
=
offsetW
*
output_width
/
input_width
;
pwend
=
min
((
offsetW
+
1
)
*
output_width
/
input_width
+
1
,
output_width
);
}
else
{
phstart
=
(
offsetH
+
padding_height
<
ksize_height
)
?
0
?
0
:
(
h_offset
+
padding_height
-
ksize_height
)
/
stride_height
+
1
;
:
(
offsetH
+
padding_height
-
ksize_height
)
/
stride_height
+
1
;
int
pw_
start
=
pw
start
=
(
w_offset
+
padding_width
<
ksize_width
)
(
offsetW
+
padding_width
<
ksize_width
)
?
0
?
0
:
(
w_offset
+
padding_width
-
ksize_width
)
/
stride_width
+
1
;
:
(
offsetW
+
padding_width
-
ksize_width
)
/
stride_width
+
1
;
int
ph_
end
=
ph
end
=
min
((
h_offset
+
padding_height
)
/
stride_height
+
1
,
output_height
);
min
((
offsetH
+
padding_height
)
/
stride_height
+
1
,
output_height
);
int
pw_end
=
pwend
=
min
((
offsetW
+
padding_width
)
/
stride_width
+
1
,
output_width
);
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
}
T1
gradient
=
0
;
T1
gradient
=
0
;
int
input_current_featuremap_idx
=
h_offset
*
input_width
+
w_offset
;
int
input_current_featuremap_idx
=
offsetH
*
input_width
+
offsetW
;
int
output_idx
=
int
output_idx
=
(
batch_idx
*
channels
+
c_offset
)
*
output_height
*
output_width
;
(
batch_idx
*
channels
+
offsetC
)
*
output_height
*
output_width
;
mask_data
+=
output_idx
;
mask_data
+=
output_idx
;
output_grad
+=
output_idx
;
output_grad
+=
output_idx
;
for
(
int
ph
=
ph
_start
;
ph
<
ph_
end
;
++
ph
)
{
for
(
int
ph
=
ph
start
;
ph
<
ph
end
;
++
ph
)
{
for
(
int
pw
=
pw
_start
;
pw
<
pw_
end
;
++
pw
)
{
for
(
int
pw
=
pw
start
;
pw
<
pw
end
;
++
pw
)
{
if
(
mask_data
[
ph
*
output_width
+
pw
]
==
input_current_featuremap_idx
)
if
(
mask_data
[
ph
*
output_width
+
pw
]
==
input_current_featuremap_idx
)
gradient
+=
output_grad
[
ph
*
output_width
+
pw
];
gradient
+=
output_grad
[
ph
*
output_width
+
pw
];
}
}
...
@@ -799,8 +892,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -799,8 +892,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
mask
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -827,7 +920,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -827,7 +920,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
KernelMaxPool2dWithIdx
<
T1
,
T2
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
KernelMaxPool2dWithIdx
<
T1
,
T2
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
stride_width
,
padding_height
,
padding_width
,
adaptive
,
output_data
,
mask_data
);
}
}
};
};
...
@@ -843,7 +937,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -843,7 +937,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
...
@@ -870,7 +964,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -870,7 +964,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
KernelMaxPool2DWithIdxGrad
<
T1
,
T2
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
KernelMaxPool2DWithIdxGrad
<
T1
,
T2
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_height
,
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
adaptive
,
input_grad_data
);
input_grad_data
);
}
}
};
};
...
@@ -892,7 +986,7 @@ __global__ void KernelMaxPool3DWithIdx(
...
@@ -892,7 +986,7 @@ __global__ void KernelMaxPool3DWithIdx(
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
T1
*
output_data
,
T2
*
mask_data
)
{
bool
adaptive
,
T1
*
output_data
,
T2
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -902,15 +996,29 @@ __global__ void KernelMaxPool3DWithIdx(
...
@@ -902,15 +996,29 @@ __global__ void KernelMaxPool3DWithIdx(
int
batch_idx
=
int
batch_idx
=
index
/
output_width
/
output_height
/
output_depth
/
channels
;
index
/
output_width
/
output_height
/
output_depth
/
channels
;
int
dstart
=
pd
*
stride_depth
-
padding_depth
;
int
dstart
,
dend
;
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hstart
,
hend
;
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
wstart
,
wend
;
int
dend
=
min
(
dstart
+
ksize_depth
,
input_depth
);
if
(
adaptive
)
{
int
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
dstart
=
ADAPT_START_INDEX
(
pd
,
input_depth
,
output_depth
);
int
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
dend
=
ADAPT_END_INDEX
(
pd
,
input_depth
,
output_depth
);
hstart
=
ADAPT_START_INDEX
(
ph
,
input_height
,
output_height
);
hend
=
ADAPT_END_INDEX
(
ph
,
input_height
,
output_height
);
wstart
=
ADAPT_START_INDEX
(
pw
,
input_width
,
output_width
);
wend
=
ADAPT_END_INDEX
(
pw
,
input_width
,
output_width
);
}
else
{
dstart
=
pd
*
stride_depth
-
padding_depth
;
hstart
=
ph
*
stride_height
-
padding_height
;
wstart
=
pw
*
stride_width
-
padding_width
;
dend
=
min
(
dstart
+
ksize_depth
,
input_depth
);
hend
=
min
(
hstart
+
ksize_height
,
input_height
);
wend
=
min
(
wstart
+
ksize_width
,
input_width
);
dstart
=
max
(
dstart
,
0
);
dstart
=
max
(
dstart
,
0
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
}
T1
ele
=
-
FLT_MAX
;
T1
ele
=
-
FLT_MAX
;
int
max_index
=
-
1
;
int
max_index
=
-
1
;
...
@@ -940,46 +1048,56 @@ __global__ void KernelMaxPool3DWithIdxGrad(
...
@@ -940,46 +1048,56 @@ __global__ void KernelMaxPool3DWithIdxGrad(
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
T1
*
input_grad
)
{
const
int
padding_width
,
bool
adaptive
,
T1
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
offsetW
=
index
%
input_width
;
int
h_offset
=
(
index
/
input_width
)
%
input_height
;
int
offsetH
=
(
index
/
input_width
)
%
input_height
;
int
d_offset
=
(
index
/
input_width
/
input_height
)
%
input_depth
;
int
offsetD
=
(
index
/
input_width
/
input_height
)
%
input_depth
;
int
c_offset
=
int
offsetC
=
(
index
/
input_width
/
input_height
/
input_depth
)
%
channels
;
(
index
/
input_width
/
input_height
/
input_depth
)
%
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
input_depth
/
channels
;
int
batch_idx
=
index
/
input_width
/
input_height
/
input_depth
/
channels
;
int
pd_start
=
int
pdstart
,
pdend
;
(
d_offset
+
padding_depth
<
ksize_depth
)
int
phstart
,
phend
;
int
pwstart
,
pwend
;
if
(
adaptive
)
{
pdstart
=
offsetD
*
output_depth
/
input_depth
;
pdend
=
min
((
offsetD
+
1
)
*
output_depth
/
input_depth
+
1
,
output_depth
);
phstart
=
offsetH
*
output_height
/
input_height
;
phend
=
min
((
offsetH
+
1
)
*
output_height
/
input_height
+
1
,
output_height
);
pwstart
=
offsetW
*
output_width
/
input_width
;
pwend
=
min
((
offsetW
+
1
)
*
output_width
/
input_width
+
1
,
output_width
);
}
else
{
pdstart
=
(
offsetD
+
padding_depth
<
ksize_depth
)
?
0
?
0
:
(
d_offset
+
padding_depth
-
ksize_depth
)
/
stride_depth
+
1
;
:
(
offsetD
+
padding_depth
-
ksize_depth
)
/
stride_depth
+
1
;
int
ph_
start
=
ph
start
=
(
h_offset
+
padding_height
<
ksize_height
)
(
offsetH
+
padding_height
<
ksize_height
)
?
0
?
0
:
(
h_offset
+
padding_height
-
ksize_height
)
/
stride_height
+
1
;
:
(
offsetH
+
padding_height
-
ksize_height
)
/
stride_height
+
1
;
int
pw_
start
=
pw
start
=
(
w_offset
+
padding_width
<
ksize_width
)
(
offsetW
+
padding_width
<
ksize_width
)
?
0
?
0
:
(
w_offset
+
padding_width
-
ksize_width
)
/
stride_width
+
1
;
:
(
offsetW
+
padding_width
-
ksize_width
)
/
stride_width
+
1
;
int
pd_end
=
pdend
=
min
((
offsetD
+
padding_depth
)
/
stride_depth
+
1
,
output_depth
);
min
((
d_offset
+
padding_depth
)
/
stride_depth
+
1
,
output_depth
);
phend
=
int
ph_end
=
min
((
offsetH
+
padding_height
)
/
stride_height
+
1
,
output_height
);
min
((
h_offset
+
padding_height
)
/
stride_height
+
1
,
output_height
);
pwend
=
min
((
offsetW
+
padding_width
)
/
stride_width
+
1
,
output_width
);
int
pw_end
=
}
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
T1
gradient
=
0
;
T1
gradient
=
0
;
int
input_current_feature_map_idx
=
int
input_current_feature_map_idx
=
(
d_offset
*
input_height
+
h_offset
)
*
input_width
+
w_offset
;
(
offsetD
*
input_height
+
offsetH
)
*
input_width
+
offsetW
;
int
output_idx
=
(
batch_idx
*
channels
+
c_offset
)
*
output_depth
*
int
output_idx
=
(
batch_idx
*
channels
+
offsetC
)
*
output_depth
*
output_height
*
output_width
;
output_height
*
output_width
;
mask
+=
output_idx
;
mask
+=
output_idx
;
output_grad
+=
output_idx
;
output_grad
+=
output_idx
;
for
(
int
pd
=
pd
_start
;
pd
<
pd_
end
;
++
pd
)
{
for
(
int
pd
=
pd
start
;
pd
<
pd
end
;
++
pd
)
{
for
(
int
ph
=
ph
_start
;
ph
<
ph_
end
;
++
ph
)
{
for
(
int
ph
=
ph
start
;
ph
<
ph
end
;
++
ph
)
{
for
(
int
pw
=
pw
_start
;
pw
<
pw_
end
;
++
pw
)
{
for
(
int
pw
=
pw
start
;
pw
<
pw
end
;
++
pw
)
{
if
(
mask
[(
pd
*
output_height
+
ph
)
*
output_width
+
pw
]
==
if
(
mask
[(
pd
*
output_height
+
ph
)
*
output_width
+
pw
]
==
input_current_feature_map_idx
)
input_current_feature_map_idx
)
gradient
+=
gradient
+=
...
@@ -1002,8 +1120,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -1002,8 +1120,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
mask
)
{
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_depth
=
input
.
dims
()[
2
];
const
int
input_depth
=
input
.
dims
()[
2
];
...
@@ -1037,7 +1155,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -1037,7 +1155,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
padding_depth
,
padding_height
,
padding_width
,
adaptive
,
output_data
,
mask_data
);
}
}
};
};
...
@@ -1053,7 +1172,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -1053,7 +1172,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
)
{
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
batch_size
=
input_grad
->
dims
()[
0
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
const
int
input_channels
=
input_grad
->
dims
()[
1
];
...
@@ -1087,7 +1206,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
...
@@ -1087,7 +1206,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_depth
,
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
adaptive
,
input_grad_data
);
input_grad_data
);
}
}
};
};
...
...
paddle/fluid/operators/math/pooling.h
浏览文件 @
266c6856
...
@@ -102,7 +102,7 @@ class Pool2dFunctor {
...
@@ -102,7 +102,7 @@ class Pool2dFunctor {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
bool
exclusive
,
framework
::
Tensor
*
output
);
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
output
);
};
};
template
<
typename
DeviceContext
,
typename
PoolProcess
,
typename
T
>
template
<
typename
DeviceContext
,
typename
PoolProcess
,
typename
T
>
...
@@ -114,7 +114,7 @@ class Pool2dGradFunctor {
...
@@ -114,7 +114,7 @@ class Pool2dGradFunctor {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
bool
exclusive
,
framework
::
Tensor
*
input_grad
);
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
DeviceContext
,
class
T
>
template
<
typename
DeviceContext
,
class
T
>
...
@@ -136,7 +136,7 @@ class Pool3dFunctor {
...
@@ -136,7 +136,7 @@ class Pool3dFunctor {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
bool
exclusive
,
framework
::
Tensor
*
output
);
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
output
);
};
};
template
<
typename
DeviceContext
,
typename
PoolProcess
,
typename
T
>
template
<
typename
DeviceContext
,
typename
PoolProcess
,
typename
T
>
...
@@ -148,7 +148,7 @@ class Pool3dGradFunctor {
...
@@ -148,7 +148,7 @@ class Pool3dGradFunctor {
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
const
std
::
vector
<
int
>&
paddings
,
PoolProcess
pool_compute
,
bool
exclusive
,
framework
::
Tensor
*
input_grad
);
bool
exclusive
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
DeviceContext
,
class
T
>
template
<
typename
DeviceContext
,
class
T
>
...
@@ -176,8 +176,8 @@ class MaxPool2dWithIndexFunctor {
...
@@ -176,8 +176,8 @@ class MaxPool2dWithIndexFunctor {
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
mask
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
DeviceContext
,
typename
T1
,
typename
T2
>
template
<
typename
DeviceContext
,
typename
T1
,
typename
T2
>
...
@@ -187,7 +187,7 @@ class MaxPool2dWithIndexGradFunctor {
...
@@ -187,7 +187,7 @@ class MaxPool2dWithIndexGradFunctor {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
);
framework
::
Tensor
*
input_grad
);
};
};
...
@@ -197,8 +197,8 @@ class MaxPool3dWithIndexFunctor {
...
@@ -197,8 +197,8 @@ class MaxPool3dWithIndexFunctor {
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
mask
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
DeviceContext
,
typename
T1
,
typename
T2
>
template
<
typename
DeviceContext
,
typename
T1
,
typename
T2
>
...
@@ -208,7 +208,7 @@ class MaxPool3dWithIndexGradFunctor {
...
@@ -208,7 +208,7 @@ class MaxPool3dWithIndexGradFunctor {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
mask
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
bool
adaptive
,
framework
::
Tensor
*
input_grad
);
framework
::
Tensor
*
input_grad
);
};
};
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
266c6856
...
@@ -52,6 +52,7 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -52,6 +52,7 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
ceil_mode
=
ctx
->
Attrs
().
Get
<
bool
>
(
"ceil_mode"
);
bool
ceil_mode
=
ctx
->
Attrs
().
Get
<
bool
>
(
"ceil_mode"
);
bool
adaptive
=
ctx
->
Attrs
().
Get
<
bool
>
(
"adaptive"
);
PADDLE_ENFORCE
(
in_x_dims
.
size
()
==
4
||
in_x_dims
.
size
()
==
5
,
PADDLE_ENFORCE
(
in_x_dims
.
size
()
==
4
||
in_x_dims
.
size
()
==
5
,
"Pooling intput should be 4-D or 5-D tensor."
);
"Pooling intput should be 4-D or 5-D tensor."
);
...
@@ -72,9 +73,13 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -72,9 +73,13 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
"Paddings size and pooling size should be the same."
);
"Paddings size and pooling size should be the same."
);
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]});
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]});
if
(
adaptive
)
{
output_shape
.
insert
(
output_shape
.
end
(),
ksize
.
begin
(),
ksize
.
end
());
}
else
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
output_shape
.
push_back
(
PoolOutputSize
(
in_x_dims
[
i
+
2
],
ksize
[
i
],
output_shape
.
push_back
(
PoolOutputSize
(
paddings
[
i
],
strides
[
i
],
ceil_mode
));
in_x_dims
[
i
+
2
],
ksize
[
i
],
paddings
[
i
],
strides
[
i
],
ceil_mode
));
}
}
}
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
ShareLoD
(
"X"
,
"Out"
);
ctx
->
ShareLoD
(
"X"
,
"Out"
);
...
@@ -186,6 +191,14 @@ void Pool2dOpMaker::Make() {
...
@@ -186,6 +191,14 @@ void Pool2dOpMaker::Make() {
"averaging calculating, otherwise, include the zero-padding. Note, it "
"averaging calculating, otherwise, include the zero-padding. Note, it "
"is only used when pooling_type is avg. The defalut is True."
)
"is only used when pooling_type is avg. The defalut is True."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"adaptive"
,
"(bool, default False) When true, will perform adaptive pooling instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
AddAttr
<
bool
>
(
"use_cudnn"
,
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
...
@@ -325,6 +338,13 @@ void Pool3dOpMaker::Make() {
...
@@ -325,6 +338,13 @@ void Pool3dOpMaker::Make() {
"averaging calculating, otherwise, include the zero-padding. Note, it "
"averaging calculating, otherwise, include the zero-padding. Note, it "
"is only used when pooling_type is avg. The defalut is True."
)
"is only used when pooling_type is avg. The defalut is True."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"adaptive"
,
"(bool, default False) When true, will perform adaptive pooling instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
AddAttr
<
bool
>
(
"use_cudnn"
,
"use_cudnn"
,
...
...
paddle/fluid/operators/pool_op.h
浏览文件 @
266c6856
...
@@ -70,6 +70,7 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -70,6 +70,7 @@ class PoolKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
adaptive
=
context
.
Attr
<
bool
>
(
"adaptive"
);
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
paddings
[
i
]
=
0
;
paddings
[
i
]
=
0
;
...
@@ -85,7 +86,7 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -85,7 +86,7 @@ class PoolKernel : public framework::OpKernel<T> {
pool2d_forward
;
pool2d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
true
,
out
);
true
,
false
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
paddle
::
operators
::
math
::
Pool2dFunctor
<
...
@@ -93,7 +94,7 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -93,7 +94,7 @@ class PoolKernel : public framework::OpKernel<T> {
pool2d_forward
;
pool2d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
exclusive
,
out
);
exclusive
,
adaptive
,
out
);
}
}
}
break
;
}
break
;
case
3
:
{
case
3
:
{
...
@@ -103,14 +104,14 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -103,14 +104,14 @@ class PoolKernel : public framework::OpKernel<T> {
pool3d_forward
;
pool3d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
true
,
out
);
true
,
false
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool3dFunctor
<
paddle
::
operators
::
math
::
Pool3dFunctor
<
DeviceContext
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
DeviceContext
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
pool3d_forward
;
pool3d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
pool_process
,
exclusive
,
out
);
exclusive
,
adaptive
,
out
);
}
}
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
...
@@ -133,6 +134,7 @@ class PoolGradKernel : public framework::OpKernel<T> {
...
@@ -133,6 +134,7 @@ class PoolGradKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
adaptive
=
context
.
Attr
<
bool
>
(
"adaptive"
);
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
...
@@ -159,7 +161,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
...
@@ -159,7 +161,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
pool2d_backward
;
pool2d_backward
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
pool2d_backward
(
dev_ctx
,
*
in_x
,
*
out
,
*
out_grad
,
ksize
,
strides
,
pool2d_backward
(
dev_ctx
,
*
in_x
,
*
out
,
*
out_grad
,
ksize
,
strides
,
paddings
,
pool_process
,
exclusive
,
in_x_grad
);
paddings
,
pool_process
,
exclusive
,
adaptive
,
in_x_grad
);
}
}
}
break
;
}
break
;
case
3
:
{
case
3
:
{
...
@@ -174,7 +177,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
...
@@ -174,7 +177,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
pool3d_backward
;
pool3d_backward
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
paddle
::
operators
::
math
::
AvgPoolGrad
<
T
>
pool_process
;
pool3d_backward
(
dev_ctx
,
*
in_x
,
*
out
,
*
out_grad
,
ksize
,
strides
,
pool3d_backward
(
dev_ctx
,
*
in_x
,
*
out
,
*
out_grad
,
ksize
,
strides
,
paddings
,
pool_process
,
exclusive
,
in_x_grad
);
paddings
,
pool_process
,
exclusive
,
adaptive
,
in_x_grad
);
}
}
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
...
...
paddle/fluid/operators/pool_with_index_op.cc
浏览文件 @
266c6856
...
@@ -40,6 +40,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
...
@@ -40,6 +40,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
std
::
vector
<
int
>
ksize
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
ksize
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
adaptive
=
ctx
->
Attrs
().
Get
<
bool
>
(
"adaptive"
);
PADDLE_ENFORCE
(
in_x_dims
.
size
()
==
4
||
in_x_dims
.
size
()
==
5
,
PADDLE_ENFORCE
(
in_x_dims
.
size
()
==
4
||
in_x_dims
.
size
()
==
5
,
"Pooling intput should be 4-D or 5-D tensor."
);
"Pooling intput should be 4-D or 5-D tensor."
);
...
@@ -60,10 +61,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
...
@@ -60,10 +61,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
"Paddings size and pooling size should be the same."
);
"Paddings size and pooling size should be the same."
);
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]});
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]});
if
(
adaptive
)
{
output_shape
.
insert
(
output_shape
.
end
(),
ksize
.
begin
(),
ksize
.
end
());
}
else
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
output_shape
.
push_back
(
MaxPoolOutputSize
(
in_x_dims
[
i
+
2
],
ksize
[
i
],
output_shape
.
push_back
(
MaxPoolOutputSize
(
in_x_dims
[
i
+
2
],
ksize
[
i
],
paddings
[
i
],
strides
[
i
]));
paddings
[
i
],
strides
[
i
]));
}
}
}
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Mask"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Mask"
,
framework
::
make_ddim
(
output_shape
));
}
}
...
@@ -133,6 +138,14 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -133,6 +138,14 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default:false) Whether to use the global pooling. "
"(bool, default:false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored."
)
"If global_pooling = true, ksize and paddings will be ignored."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"adaptive"
,
"(bool, default False) When true, will perform adaptive pooling "
"instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int>, default {1, 1}), strides(height, "
"(vector<int>, default {1, 1}), strides(height, "
"width) of pooling operator."
)
"width) of pooling operator."
)
...
@@ -209,6 +222,14 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -209,6 +222,14 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default false) Whether to use the global pooling. "
"(bool, default false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored."
)
"If global_pooling = true, ksize and paddings will be ignored."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"adaptive"
,
"(bool, default False) When true, will perform adaptive pooling "
"instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int>, default {1,1,1}), strides(depth, "
"(vector<int>, default {1,1,1}), strides(depth, "
"height, width) of pooling operator."
)
"height, width) of pooling operator."
)
...
...
paddle/fluid/operators/pool_with_index_op.h
浏览文件 @
266c6856
...
@@ -36,6 +36,7 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
...
@@ -36,6 +36,7 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
std
::
vector
<
int
>
ksize
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
ksize
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
adaptive
=
context
.
Attr
<
bool
>
(
"adaptive"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
...
@@ -50,13 +51,15 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
...
@@ -50,13 +51,15 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
DeviceContext
,
T1
,
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
DeviceContext
,
T1
,
T2
>
T2
>
pool2d_forward
;
pool2d_forward
;
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
out
,
mask
);
pool2d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
adaptive
,
out
,
mask
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
DeviceContext
,
T1
,
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
DeviceContext
,
T1
,
T2
>
T2
>
pool3d_forward
;
pool3d_forward
;
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
out
,
mask
);
pool3d_forward
(
dev_ctx
,
*
in_x
,
ksize
,
strides
,
paddings
,
adaptive
,
out
,
mask
);
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
}
}
...
@@ -75,6 +78,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
...
@@ -75,6 +78,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
std
::
vector
<
int
>
ksize
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
ksize
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
adaptive
=
context
.
Attr
<
bool
>
(
"adaptive"
);
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
if
(
context
.
Attr
<
bool
>
(
"global_pooling"
))
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
paddings
[
i
]
=
0
;
paddings
[
i
]
=
0
;
...
@@ -93,14 +97,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
...
@@ -93,14 +97,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
T1
,
T2
>
T1
,
T2
>
pool2d_backward
;
pool2d_backward
;
pool2d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
pool2d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
paddings
,
in_x_grad
);
paddings
,
adaptive
,
in_x_grad
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
DeviceContext
,
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
DeviceContext
,
T1
,
T2
>
T1
,
T2
>
pool3d_backward
;
pool3d_backward
;
pool3d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
pool3d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
paddings
,
in_x_grad
);
paddings
,
adaptive
,
in_x_grad
);
}
break
;
}
break
;
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
default:
{
PADDLE_THROW
(
"Pool op only supports 2D and 3D input."
);
}
}
}
...
...
paddle/fluid/operators/spp_op.h
浏览文件 @
266c6856
...
@@ -56,13 +56,13 @@ class SppKernel : public framework::OpKernel<T> {
...
@@ -56,13 +56,13 @@ class SppKernel : public framework::OpKernel<T> {
math
::
Pool2dFunctor
<
DeviceContext
,
math
::
MaxPool
<
T
>
,
T
>
pool_forward
;
math
::
Pool2dFunctor
<
DeviceContext
,
math
::
MaxPool
<
T
>
,
T
>
pool_forward
;
math
::
MaxPool
<
T
>
max_process
;
math
::
MaxPool
<
T
>
max_process
;
pool_forward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
pool_forward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
kernel_size
,
strides
,
paddings
,
max_process
,
true
,
kernel_size
,
strides
,
paddings
,
max_process
,
true
,
false
,
&
out_level
);
&
out_level
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
math
::
Pool2dFunctor
<
DeviceContext
,
math
::
AvgPool
<
T
>
,
T
>
pool_forward
;
math
::
Pool2dFunctor
<
DeviceContext
,
math
::
AvgPool
<
T
>
,
T
>
pool_forward
;
math
::
AvgPool
<
T
>
avg_process
;
math
::
AvgPool
<
T
>
avg_process
;
pool_forward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
pool_forward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
kernel_size
,
strides
,
paddings
,
avg_process
,
true
,
kernel_size
,
strides
,
paddings
,
avg_process
,
true
,
false
,
&
out_level
);
&
out_level
);
}
}
// flatten pooling output shape
// flatten pooling output shape
...
@@ -156,7 +156,7 @@ class SppGradKernel : public framework::OpKernel<T> {
...
@@ -156,7 +156,7 @@ class SppGradKernel : public framework::OpKernel<T> {
math
::
AvgPoolGrad
<
T
>
avg_process
;
math
::
AvgPoolGrad
<
T
>
avg_process
;
pool_backward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
pool_backward
(
context
.
template
device_context
<
DeviceContext
>(),
*
in_x
,
*&
out_level
,
*&
outgrad_level
,
kernel_size
,
strides
,
*&
out_level
,
*&
outgrad_level
,
kernel_size
,
strides
,
paddings
,
avg_process
,
true
,
in_x_grad
);
paddings
,
avg_process
,
true
,
false
,
in_x_grad
);
}
}
}
}
}
}
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
266c6856
...
@@ -52,6 +52,8 @@ __all__ = [
...
@@ -52,6 +52,8 @@ __all__ = [
'softmax'
,
'softmax'
,
'pool2d'
,
'pool2d'
,
'pool3d'
,
'pool3d'
,
'adaptive_pool2d'
,
'adaptive_pool3d'
,
'batch_norm'
,
'batch_norm'
,
'beam_search_decode'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'conv2d_transpose'
,
...
@@ -2499,6 +2501,190 @@ def pool3d(input,
...
@@ -2499,6 +2501,190 @@ def pool3d(input,
return
pool_out
return
pool_out
@
templatedoc
(
op_type
=
"pool2d"
)
def
adaptive_pool2d
(
input
,
pool_size
,
pool_type
=
"max"
,
require_index
=
False
,
use_cudnn
=
True
,
name
=
None
):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
pool_type: ${pooling_type_comment}
require_index (bool): If true, the index of max pooling point along with outputs.
it cannot be set in average pooling type.
use_cudnn (bool): ${use_cudnn_comment}
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The pooling result.
Raises:
ValueError: 'pool_type' is not 'max' nor 'avg'.
ValueError: 'use_cudnn' is not a bool value.
ValueError: invalid setting 'require_index' true when 'pool_type' is 'avg'.
ValueError: 'pool_size' should be a list or tuple with length as 2.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d(
input=data,
pool_size=[3, 3],
pool_type='max',
require_index=True)
"""
if
pool_type
not
in
[
"max"
,
"avg"
]:
raise
ValueError
(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'."
,
str
(
pool_type
))
if
pool_type
==
"avg"
and
require_index
:
raise
ValueError
(
"invalid setting 'require_index' true when 'pool_type' is 'avg'."
)
def
_is_list_or_tuple_
(
data
):
return
(
isinstance
(
data
,
list
)
or
isinstance
(
data
,
tuple
))
if
not
_is_list_or_tuple_
(
pool_size
)
or
len
(
pool_size
)
!=
2
:
raise
ValueError
(
"'pool_size' should be a list or tuple with length as 2."
)
if
not
isinstance
(
use_cudnn
,
bool
):
raise
ValueError
(
"use_cudnn should be True or False."
)
if
pool_type
==
"max"
:
l_type
=
'max_pool2d_with_index'
else
:
l_type
=
"pool2d"
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
outputs
=
{
"Out"
:
pool_out
}
if
pool_type
==
"max"
:
mask
=
helper
.
create_variable_for_type_inference
(
dtype
)
outputs
[
"Mask"
]
=
mask
helper
.
append_op
(
type
=
l_type
,
inputs
=
{
"X"
:
input
},
outputs
=
outputs
,
attrs
=
{
"pooling_type"
:
pool_type
,
"ksize"
:
pool_size
,
"use_cudnn"
:
use_cudnn
,
"adaptive"
:
True
,
})
return
pool_out
@
templatedoc
(
op_type
=
"pool3d"
)
def
adaptive_pool3d
(
input
,
pool_size
,
pool_type
=
"max"
,
require_index
=
False
,
use_cudnn
=
True
,
name
=
None
):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (Depth, Height, Width).
pool_type: ${pooling_type_comment}
require_index (bool): If true, the index of max pooling point along with outputs.
it cannot be set in average pooling type.
use_cudnn (bool): ${use_cudnn_comment}
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The pooling result.
Raises:
ValueError: 'pool_type' is not 'max' nor 'avg'.
ValueError: 'use_cudnn' is not a bool value.
ValueError: invalid setting 'require_index' true when 'pool_type' is 'avg'.
ValueError: 'pool_size' should be a list or tuple with length as 2.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d(
input=data,
pool_size=[3, 3],
pool_type='max',
require_index=True)
"""
if
pool_type
not
in
[
"max"
,
"avg"
]:
raise
ValueError
(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'."
,
str
(
pool_type
))
if
pool_type
==
"avg"
and
require_index
:
raise
ValueError
(
"invalid setting 'require_index' true when 'pool_type' is 'avg'."
)
def
_is_list_or_tuple_
(
data
):
return
(
isinstance
(
data
,
list
)
or
isinstance
(
data
,
tuple
))
if
not
_is_list_or_tuple_
(
pool_size
)
or
len
(
pool_size
)
!=
3
:
raise
ValueError
(
"'pool_size' should be a list or tuple with length as 3."
)
if
not
isinstance
(
use_cudnn
,
bool
):
raise
ValueError
(
"use_cudnn should be True or False."
)
if
pool_type
==
"max"
:
l_type
=
'max_pool3d_with_index'
else
:
l_type
=
"pool3d"
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
outputs
=
{
"Out"
:
pool_out
}
if
pool_type
==
"max"
:
mask
=
helper
.
create_variable_for_type_inference
(
dtype
)
outputs
[
"Mask"
]
=
mask
helper
.
append_op
(
type
=
l_type
,
inputs
=
{
"X"
:
input
},
outputs
=
outputs
,
attrs
=
{
"pooling_type"
:
pool_type
,
"ksize"
:
pool_size
,
"use_cudnn"
:
use_cudnn
,
"adaptive"
:
True
,
})
return
pool_out
def
batch_norm
(
input
,
def
batch_norm
(
input
,
act
=
None
,
act
=
None
,
is_test
=
False
,
is_test
=
False
,
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
266c6856
...
@@ -233,6 +233,28 @@ class TestBook(unittest.TestCase):
...
@@ -233,6 +233,28 @@ class TestBook(unittest.TestCase):
pool_stride
=
[
1
,
2
],
pool_stride
=
[
1
,
2
],
pool_padding
=
(
2
,
1
)))
pool_padding
=
(
2
,
1
)))
def
test_adaptive_pool2d
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
224
,
224
],
dtype
=
'float32'
)
self
.
assertIsNotNone
(
layers
.
adaptive_pool2d
(
x
,
[
3
,
3
],
require_index
=
True
))
self
.
assertIsNotNone
(
layers
.
adaptive_pool2d
(
x
,
[
3
,
3
],
pool_type
=
'avg'
))
def
test_adaptive_pool3d
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
244
,
224
,
224
],
dtype
=
'float32'
)
self
.
assertIsNotNone
(
layers
.
adaptive_pool3d
(
x
,
[
3
,
3
,
3
],
require_index
=
True
))
self
.
assertIsNotNone
(
layers
.
adaptive_pool3d
(
x
,
[
3
,
3
,
3
],
pool_type
=
'avg'
))
def
test_lstm_unit
(
self
):
def
test_lstm_unit
(
self
):
program
=
Program
()
program
=
Program
()
with
program_guard
(
program
):
with
program_guard
(
program
):
...
...
python/paddle/fluid/tests/unittests/test_pool2d_op.py
浏览文件 @
266c6856
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
from
__future__
import
division
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -21,16 +22,28 @@ import paddle.fluid.core as core
...
@@ -21,16 +22,28 @@ import paddle.fluid.core as core
from
op_test
import
OpTest
from
op_test
import
OpTest
def
adaptive_start_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
floor
(
index
*
input_size
/
output_size
))
def
adaptive_end_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
ceil
((
index
+
1
)
*
input_size
/
output_size
))
def
max_pool2D_forward_naive
(
x
,
def
max_pool2D_forward_naive
(
x
,
ksize
,
ksize
,
strides
,
strides
,
paddings
,
paddings
,
global_pool
=
0
,
global_pool
=
0
,
ceil_mode
=
False
,
ceil_mode
=
False
,
exclusive
=
True
):
exclusive
=
True
,
adaptive
=
False
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
if
adaptive
:
H_out
,
W_out
=
ksize
else
:
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
...
@@ -40,6 +53,12 @@ def max_pool2D_forward_naive(x,
...
@@ -40,6 +53,12 @@ def max_pool2D_forward_naive(x,
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
if
adaptive
:
r_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
0
])
r_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
0
])
c_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
1
])
c_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
1
])
else
:
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
...
@@ -56,10 +75,14 @@ def avg_pool2D_forward_naive(x,
...
@@ -56,10 +75,14 @@ def avg_pool2D_forward_naive(x,
paddings
,
paddings
,
global_pool
=
0
,
global_pool
=
0
,
ceil_mode
=
False
,
ceil_mode
=
False
,
exclusive
=
True
):
exclusive
=
True
,
adaptive
=
False
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
if
adaptive
:
H_out
,
W_out
=
ksize
else
:
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
...
@@ -69,14 +92,20 @@ def avg_pool2D_forward_naive(x,
...
@@ -69,14 +92,20 @@ def avg_pool2D_forward_naive(x,
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
if
adaptive
:
r_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
0
])
r_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
0
])
c_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
1
])
c_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
1
])
else
:
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
c_end
=
np
.
min
((
j
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
W
))
c_end
=
np
.
min
((
j
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
W
))
x_masked
=
x
[:,
:,
r_start
:
r_end
,
c_start
:
c_end
]
x_masked
=
x
[:,
:,
r_start
:
r_end
,
c_start
:
c_end
]
field_size
=
((
r_end
-
r_start
)
*
(
c_end
-
c_start
))
if
exclusive
\
field_size
=
((
r_end
-
r_start
)
*
(
c_end
-
c_start
))
\
else
(
ksize
[
0
]
*
ksize
[
1
])
if
(
exclusive
or
adaptive
)
else
(
ksize
[
0
]
*
ksize
[
1
])
out
[:,
:,
i
,
j
]
=
np
.
sum
(
x_masked
,
axis
=
(
2
,
3
))
/
field_size
out
[:,
:,
i
,
j
]
=
np
.
sum
(
x_masked
,
axis
=
(
2
,
3
))
/
field_size
return
out
return
out
...
@@ -93,12 +122,13 @@ class TestPool2D_Op(OpTest):
...
@@ -93,12 +122,13 @@ class TestPool2D_Op(OpTest):
self
.
init_pool_type
()
self
.
init_pool_type
()
self
.
init_ceil_mode
()
self
.
init_ceil_mode
()
self
.
init_exclusive
()
self
.
init_exclusive
()
self
.
init_adaptive
()
if
self
.
global_pool
:
if
self
.
global_pool
:
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
output
=
self
.
pool2D_forward_naive
(
output
=
self
.
pool2D_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
,
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
,
self
.
ceil_mode
,
self
.
exclusive
).
astype
(
self
.
dtype
)
self
.
ceil_mode
,
self
.
exclusive
,
self
.
adaptive
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
attrs
=
{
self
.
attrs
=
{
...
@@ -112,7 +142,8 @@ class TestPool2D_Op(OpTest):
...
@@ -112,7 +142,8 @@ class TestPool2D_Op(OpTest):
'ceil_mode'
:
self
.
ceil_mode
,
'ceil_mode'
:
self
.
ceil_mode
,
'data_format'
:
'data_format'
:
'AnyLayout'
,
# TODO(dzhwinter) : should be fix latter
'AnyLayout'
,
# TODO(dzhwinter) : should be fix latter
'exclusive'
:
self
.
exclusive
'exclusive'
:
self
.
exclusive
,
'adaptive'
:
self
.
adaptive
}
}
self
.
outputs
=
{
'Out'
:
output
}
self
.
outputs
=
{
'Out'
:
output
}
...
@@ -159,6 +190,9 @@ class TestPool2D_Op(OpTest):
...
@@ -159,6 +190,9 @@ class TestPool2D_Op(OpTest):
def
init_exclusive
(
self
):
def
init_exclusive
(
self
):
self
.
exclusive
=
True
self
.
exclusive
=
True
def
init_adaptive
(
self
):
self
.
adaptive
=
False
class
TestCase1
(
TestPool2D_Op
):
class
TestCase1
(
TestPool2D_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -315,5 +349,10 @@ class TestCUDNNAvgInclude(TestCase2):
...
@@ -315,5 +349,10 @@ class TestCUDNNAvgInclude(TestCase2):
self
.
exclusive
=
False
self
.
exclusive
=
False
class
TestAvgPoolAdaptive
(
TestCase1
):
def
init_adaptive
(
self
):
self
.
adaptive
=
True
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pool3d_op.py
浏览文件 @
266c6856
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
from
__future__
import
division
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -21,16 +22,28 @@ import paddle.fluid.core as core
...
@@ -21,16 +22,28 @@ import paddle.fluid.core as core
from
op_test
import
OpTest
from
op_test
import
OpTest
def
adaptive_start_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
floor
(
index
*
input_size
/
output_size
))
def
adaptive_end_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
ceil
((
index
+
1
)
*
input_size
/
output_size
))
def
max_pool3D_forward_naive
(
x
,
def
max_pool3D_forward_naive
(
x
,
ksize
,
ksize
,
strides
,
strides
,
paddings
,
paddings
,
global_pool
=
0
,
global_pool
=
0
,
ceil_mode
=
False
,
ceil_mode
=
False
,
exclusive
=
True
):
exclusive
=
True
,
adaptive
=
False
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
if
adaptive
:
D_out
,
H_out
,
W_out
=
ksize
else
:
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
...
@@ -42,14 +55,26 @@ def max_pool3D_forward_naive(x,
...
@@ -42,14 +55,26 @@ def max_pool3D_forward_naive(x,
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
for
k
in
range
(
D_out
):
for
k
in
range
(
D_out
):
if
adaptive
:
d_start
=
adaptive_start_index
(
k
,
D
,
ksize
[
0
])
d_end
=
adaptive_end_index
(
k
,
D
,
ksize
[
0
])
else
:
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
h_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
if
adaptive
:
h_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
h_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
1
])
h_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
1
])
else
:
h_start
=
np
.
max
((
i
*
strides
[
1
]
-
paddings
[
1
],
0
))
h_end
=
np
.
min
((
i
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
H
))
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
w_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
if
adaptive
:
w_end
=
np
.
min
((
j
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
W
))
w_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
2
])
w_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
2
])
else
:
w_start
=
np
.
max
((
j
*
strides
[
2
]
-
paddings
[
2
],
0
))
w_end
=
np
.
min
((
j
*
strides
[
2
]
+
ksize
[
2
]
-
paddings
[
2
],
W
))
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
out
[:,
:,
k
,
i
,
j
]
=
np
.
max
(
x_masked
,
axis
=
(
2
,
3
,
4
))
out
[:,
:,
k
,
i
,
j
]
=
np
.
max
(
x_masked
,
axis
=
(
2
,
3
,
4
))
...
@@ -62,10 +87,14 @@ def avg_pool3D_forward_naive(x,
...
@@ -62,10 +87,14 @@ def avg_pool3D_forward_naive(x,
paddings
,
paddings
,
global_pool
=
0
,
global_pool
=
0
,
ceil_mode
=
False
,
ceil_mode
=
False
,
exclusive
=
True
):
exclusive
=
True
,
adaptive
=
False
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
if
adaptive
:
D_out
,
H_out
,
W_out
=
ksize
else
:
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
]
+
strides
[
0
]
-
1
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
)
//
strides
[
0
]
+
1
if
ceil_mode
else
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
...
@@ -77,18 +106,30 @@ def avg_pool3D_forward_naive(x,
...
@@ -77,18 +106,30 @@ def avg_pool3D_forward_naive(x,
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
for
k
in
range
(
D_out
):
for
k
in
range
(
D_out
):
if
adaptive
:
d_start
=
adaptive_start_index
(
k
,
D
,
ksize
[
0
])
d_end
=
adaptive_end_index
(
k
,
D
,
ksize
[
0
])
else
:
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
h_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
if
adaptive
:
h_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
h_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
1
])
h_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
1
])
else
:
h_start
=
np
.
max
((
i
*
strides
[
1
]
-
paddings
[
1
],
0
))
h_end
=
np
.
min
((
i
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
H
))
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
w_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
if
adaptive
:
w_end
=
np
.
min
((
j
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
W
))
w_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
2
])
w_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
2
])
else
:
w_start
=
np
.
max
((
j
*
strides
[
2
]
-
paddings
[
2
],
0
))
w_end
=
np
.
min
((
j
*
strides
[
2
]
+
ksize
[
2
]
-
paddings
[
2
],
W
))
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
field_size
=
(
d_end
-
d_start
)
*
(
h_end
-
h_start
)
*
(
w_end
-
w_start
)
\
field_size
=
(
d_end
-
d_start
)
*
(
h_end
-
h_start
)
*
(
w_end
-
w_start
)
\
if
exclusive
else
ksize
[
0
]
*
ksize
[
1
]
*
ksize
[
2
]
if
(
exclusive
or
adaptive
)
else
ksize
[
0
]
*
ksize
[
1
]
*
ksize
[
2
]
out
[:,
:,
k
,
i
,
j
]
=
np
.
sum
(
x_masked
,
axis
=
(
2
,
3
,
out
[:,
:,
k
,
i
,
j
]
=
np
.
sum
(
x_masked
,
axis
=
(
2
,
3
,
4
))
/
field_size
4
))
/
field_size
return
out
return
out
...
@@ -105,13 +146,14 @@ class TestPool3d_Op(OpTest):
...
@@ -105,13 +146,14 @@ class TestPool3d_Op(OpTest):
self
.
init_pool_type
()
self
.
init_pool_type
()
self
.
init_ceil_mode
()
self
.
init_ceil_mode
()
self
.
init_exclusive
()
self
.
init_exclusive
()
self
.
init_adaptive
()
if
self
.
global_pool
:
if
self
.
global_pool
:
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
output
=
self
.
pool3D_forward_naive
(
output
=
self
.
pool3D_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
,
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
,
self
.
ceil_mode
,
self
.
exclusive
).
astype
(
self
.
dtype
)
self
.
ceil_mode
,
self
.
exclusive
,
self
.
adaptive
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
attrs
=
{
self
.
attrs
=
{
...
@@ -124,7 +166,8 @@ class TestPool3d_Op(OpTest):
...
@@ -124,7 +166,8 @@ class TestPool3d_Op(OpTest):
'ceil_mode'
:
self
.
ceil_mode
,
'ceil_mode'
:
self
.
ceil_mode
,
'data_format'
:
'data_format'
:
'AnyLayout'
,
# TODO(dzhwinter) : should be fix latter
'AnyLayout'
,
# TODO(dzhwinter) : should be fix latter
'exclusive'
:
self
.
exclusive
'exclusive'
:
self
.
exclusive
,
'adaptive'
:
self
.
adaptive
}
}
self
.
outputs
=
{
'Out'
:
output
}
self
.
outputs
=
{
'Out'
:
output
}
...
@@ -171,6 +214,9 @@ class TestPool3d_Op(OpTest):
...
@@ -171,6 +214,9 @@ class TestPool3d_Op(OpTest):
def
init_exclusive
(
self
):
def
init_exclusive
(
self
):
self
.
exclusive
=
True
self
.
exclusive
=
True
def
init_adaptive
(
self
):
self
.
adaptive
=
False
class
TestCase1
(
TestPool3d_Op
):
class
TestCase1
(
TestPool3d_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -353,5 +399,10 @@ class TestCUDNNAvgInclude(TestCUDNNCase3):
...
@@ -353,5 +399,10 @@ class TestCUDNNAvgInclude(TestCUDNNCase3):
self
.
exclusive
=
False
self
.
exclusive
=
False
class
TestAvgPoolAdaptive
(
TestCase1
):
def
init_adaptive
(
self
):
self
.
adaptive
=
True
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pool_max_op.py
浏览文件 @
266c6856
...
@@ -13,33 +13,62 @@
...
@@ -13,33 +13,62 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
from
__future__
import
division
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
op_test
import
OpTest
from
op_test
import
OpTest
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
):
def
adaptive_start_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
floor
(
index
*
input_size
/
output_size
))
def
adaptive_end_index
(
index
,
input_size
,
output_size
):
return
int
(
np
.
ceil
((
index
+
1
)
*
input_size
/
output_size
))
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
,
adaptive
=
False
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
:
if
global_pool
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
paddings
=
[
0
,
0
,
0
]
paddings
=
[
0
,
0
,
0
]
if
adaptive
:
D_out
,
H_out
,
W_out
=
ksize
else
:
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H_out
=
(
H
-
ksize
[
1
]
+
2
*
paddings
[
1
])
//
strides
[
1
]
+
1
H_out
=
(
H
-
ksize
[
1
]
+
2
*
paddings
[
1
])
//
strides
[
1
]
+
1
W_out
=
(
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
W_out
=
(
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
//
strides
[
2
]
+
1
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
mask
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
mask
=
np
.
zeros
((
N
,
C
,
D_out
,
H_out
,
W_out
))
for
k
in
range
(
D_out
):
for
k
in
range
(
D_out
):
if
adaptive
:
d_start
=
adaptive_start_index
(
k
,
D
,
ksize
[
0
])
d_end
=
adaptive_end_index
(
k
,
D
,
ksize
[
0
])
else
:
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_start
=
np
.
max
((
k
*
strides
[
0
]
-
paddings
[
0
],
0
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
d_end
=
np
.
min
((
k
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
D
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
h_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
if
adaptive
:
h_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
h_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
1
])
h_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
1
])
else
:
h_start
=
np
.
max
((
i
*
strides
[
1
]
-
paddings
[
1
],
0
))
h_end
=
np
.
min
((
i
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
H
))
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
w_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
if
adaptive
:
w_end
=
np
.
min
((
j
*
strides
[
1
]
+
ksize
[
1
]
-
paddings
[
1
],
W
))
w_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
2
])
w_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
2
])
else
:
w_start
=
np
.
max
((
j
*
strides
[
2
]
-
paddings
[
2
],
0
))
w_end
=
np
.
min
((
j
*
strides
[
2
]
+
ksize
[
2
]
-
paddings
[
2
],
W
))
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
x_masked
=
x
[:,
:,
d_start
:
d_end
,
h_start
:
h_end
,
w_start
:
w_end
]
out
[:,
:,
k
,
i
,
j
]
=
np
.
max
(
x_masked
,
axis
=
(
2
,
3
,
4
))
out
[:,
:,
k
,
i
,
j
]
=
np
.
max
(
x_masked
,
axis
=
(
2
,
3
,
4
))
...
@@ -58,19 +87,33 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False):
...
@@ -58,19 +87,33 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False):
return
out
,
mask
return
out
,
mask
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
):
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
,
adaptive
=
False
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
:
if
global_pool
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
paddings
=
[
0
,
0
]
paddings
=
[
0
,
0
]
if
adaptive
:
H_out
,
W_out
=
ksize
else
:
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
//
strides
[
0
]
+
1
W_out
=
(
W
-
ksize
[
1
]
+
2
*
paddings
[
1
])
//
strides
[
1
]
+
1
W_out
=
(
W
-
ksize
[
1
]
+
2
*
paddings
[
1
])
//
strides
[
1
]
+
1
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
mask
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
mask
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
for
i
in
range
(
H_out
):
for
i
in
range
(
H_out
):
for
j
in
range
(
W_out
):
for
j
in
range
(
W_out
):
if
adaptive
:
r_start
=
adaptive_start_index
(
i
,
H
,
ksize
[
0
])
r_end
=
adaptive_end_index
(
i
,
H
,
ksize
[
0
])
c_start
=
adaptive_start_index
(
j
,
W
,
ksize
[
1
])
c_end
=
adaptive_end_index
(
j
,
W
,
ksize
[
1
])
else
:
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_start
=
np
.
max
((
i
*
strides
[
0
]
-
paddings
[
0
],
0
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
r_end
=
np
.
min
((
i
*
strides
[
0
]
+
ksize
[
0
]
-
paddings
[
0
],
H
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
c_start
=
np
.
max
((
j
*
strides
[
1
]
-
paddings
[
1
],
0
))
...
@@ -95,10 +138,12 @@ class TestMaxPoolWithIndex_Op(OpTest):
...
@@ -95,10 +138,12 @@ class TestMaxPoolWithIndex_Op(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
self
.
init_global
()
self
.
init_global
()
self
.
init_adaptive
()
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
output
,
mask
=
self
.
pool_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
output
,
mask
=
self
.
pool_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
)
self
.
paddings
,
self
.
global_pool
,
self
.
adaptive
)
output
=
output
.
astype
(
"float32"
)
output
=
output
.
astype
(
"float32"
)
mask
=
mask
.
astype
(
"int32"
)
mask
=
mask
.
astype
(
"int32"
)
...
@@ -107,6 +152,7 @@ class TestMaxPoolWithIndex_Op(OpTest):
...
@@ -107,6 +152,7 @@ class TestMaxPoolWithIndex_Op(OpTest):
'paddings'
:
self
.
paddings
,
'paddings'
:
self
.
paddings
,
'ksize'
:
self
.
ksize
,
'ksize'
:
self
.
ksize
,
'global_pooling'
:
self
.
global_pool
,
'global_pooling'
:
self
.
global_pool
,
'adaptive'
:
self
.
adaptive
,
}
}
self
.
inputs
=
{
'X'
:
input
}
self
.
inputs
=
{
'X'
:
input
}
...
@@ -129,6 +175,9 @@ class TestMaxPoolWithIndex_Op(OpTest):
...
@@ -129,6 +175,9 @@ class TestMaxPoolWithIndex_Op(OpTest):
def
init_global
(
self
):
def
init_global
(
self
):
self
.
global_pool
=
False
self
.
global_pool
=
False
def
init_adaptive
(
self
):
self
.
adaptive
=
False
class
TestCase1
(
TestMaxPoolWithIndex_Op
):
class
TestCase1
(
TestMaxPoolWithIndex_Op
):
def
init_global
(
self
):
def
init_global
(
self
):
...
@@ -190,5 +239,15 @@ class TestCase7(TestCase6):
...
@@ -190,5 +239,15 @@ class TestCase7(TestCase6):
self
.
global_pool
=
False
self
.
global_pool
=
False
class
TestCastAdaptive2d
(
TestCase6
):
def
init_adaptive
(
self
):
self
.
adaptive
=
True
class
TestCastAdaptive3d
(
TestMaxPoolWithIndex_Op
):
def
init_adaptive
(
self
):
self
.
adaptive
=
True
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录