Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
13ec6f99
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
13ec6f99
编写于
11月 20, 2017
作者:
W
wangmeng28
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into factorization_machine_layer
上级
6fed6f20
d5be1d4d
变更
33
隐藏空白更改
内联
并排
Showing
33 changed file
with
872 addition
and
392 deletion
+872
-392
benchmark/paddle/image/googlenet.py
benchmark/paddle/image/googlenet.py
+4
-1
benchmark/paddle/image/run_mkldnn.sh
benchmark/paddle/image/run_mkldnn.sh
+2
-1
paddle/gserver/activations/ActivationFunction.cpp
paddle/gserver/activations/ActivationFunction.cpp
+31
-0
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+4
-3
paddle/operators/conv_cudnn_op.cu.cc
paddle/operators/conv_cudnn_op.cu.cc
+4
-2
paddle/operators/conv_transpose_cudnn_op.cc
paddle/operators/conv_transpose_cudnn_op.cc
+8
-4
paddle/operators/conv_transpose_cudnn_op.cu.cc
paddle/operators/conv_transpose_cudnn_op.cu.cc
+8
-4
paddle/operators/math/pooling.cc
paddle/operators/math/pooling.cc
+30
-30
paddle/operators/math/pooling.cu
paddle/operators/math/pooling.cu
+65
-65
paddle/operators/math/pooling.h
paddle/operators/math/pooling.h
+4
-4
paddle/operators/pool_cudnn_op.cc
paddle/operators/pool_cudnn_op.cc
+14
-2
paddle/operators/pool_cudnn_op.cu.cc
paddle/operators/pool_cudnn_op.cu.cc
+23
-4
paddle/operators/pool_op.cc
paddle/operators/pool_op.cc
+8
-4
paddle/operators/pool_op.cu.cc
paddle/operators/pool_op.cu.cc
+8
-4
paddle/operators/pool_with_index_op.cc
paddle/operators/pool_with_index_op.cc
+29
-9
paddle/operators/pool_with_index_op.cu.cc
paddle/operators/pool_with_index_op.cu.cc
+8
-4
paddle/operators/pool_with_index_op.h
paddle/operators/pool_with_index_op.h
+9
-9
paddle/operators/sequence_slice_op.cc
paddle/operators/sequence_slice_op.cc
+132
-0
paddle/operators/sequence_slice_op.cu
paddle/operators/sequence_slice_op.cu
+23
-0
paddle/operators/sequence_slice_op.h
paddle/operators/sequence_slice_op.h
+173
-0
paddle/platform/cudnn_helper.h
paddle/platform/cudnn_helper.h
+4
-2
paddle/platform/cudnn_helper_test.cc
paddle/platform/cudnn_helper_test.cc
+34
-0
python/paddle/trainer_config_helpers/activations.py
python/paddle/trainer_config_helpers/activations.py
+15
-2
python/paddle/v2/fluid/layers.py
python/paddle/v2/fluid/layers.py
+1
-1
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
...le/v2/fluid/tests/book/test_image_classification_train.py
+8
-3
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
.../paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+4
-4
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
...n/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+11
-4
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
...dle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+11
-7
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
...luid/tests/book/test_understand_sentiment_dynamic_lstm.py
+11
-7
python/paddle/v2/fluid/tests/test_pool2d_op.py
python/paddle/v2/fluid/tests/test_pool2d_op.py
+26
-108
python/paddle/v2/fluid/tests/test_pool3d_op.py
python/paddle/v2/fluid/tests/test_pool3d_op.py
+78
-35
python/paddle/v2/fluid/tests/test_pool_max_op.py
python/paddle/v2/fluid/tests/test_pool_max_op.py
+35
-69
python/paddle/v2/fluid/tests/test_sequence_slice_op.py
python/paddle/v2/fluid/tests/test_sequence_slice_op.py
+47
-0
未找到文件。
benchmark/paddle/image/googlenet.py
浏览文件 @
13ec6f99
...
@@ -5,6 +5,7 @@ height = 224
...
@@ -5,6 +5,7 @@ height = 224
width
=
224
width
=
224
num_class
=
1000
num_class
=
1000
batch_size
=
get_config_arg
(
'batch_size'
,
int
,
128
)
batch_size
=
get_config_arg
(
'batch_size'
,
int
,
128
)
use_gpu
=
get_config_arg
(
'use_gpu'
,
bool
,
True
)
args
=
{
'height'
:
height
,
'width'
:
width
,
'color'
:
True
,
'num_class'
:
num_class
}
args
=
{
'height'
:
height
,
'width'
:
width
,
'color'
:
True
,
'num_class'
:
num_class
}
define_py_data_sources2
(
define_py_data_sources2
(
...
@@ -16,6 +17,8 @@ settings(
...
@@ -16,6 +17,8 @@ settings(
learning_method
=
MomentumOptimizer
(
0.9
),
learning_method
=
MomentumOptimizer
(
0.9
),
regularization
=
L2Regularization
(
0.0005
*
batch_size
))
regularization
=
L2Regularization
(
0.0005
*
batch_size
))
conv_projection
=
conv_projection
if
use_gpu
else
img_conv_layer
def
inception2
(
name
,
input
,
channels
,
\
def
inception2
(
name
,
input
,
channels
,
\
filter1
,
filter1
,
filter3R
,
filter3
,
filter3R
,
filter3
,
...
@@ -138,7 +141,7 @@ def inception(name, input, channels, \
...
@@ -138,7 +141,7 @@ def inception(name, input, channels, \
cat
=
concat_layer
(
cat
=
concat_layer
(
name
=
name
,
name
=
name
,
input
=
[
cov1
,
cov3
,
cov5
,
covprj
],
input
=
[
cov1
,
cov3
,
cov5
,
covprj
],
bias_attr
=
True
,
bias_attr
=
True
if
use_gpu
else
False
,
act
=
ReluActivation
())
act
=
ReluActivation
())
return
cat
return
cat
...
...
benchmark/paddle/image/run_mkldnn.sh
浏览文件 @
13ec6f99
...
@@ -40,6 +40,7 @@ fi
...
@@ -40,6 +40,7 @@ fi
for
use_mkldnn
in
True False
;
do
for
use_mkldnn
in
True False
;
do
for
batchsize
in
64 128 256
;
do
for
batchsize
in
64 128 256
;
do
train vgg 19
$batchsize
$use_mkldnn
train vgg 19
$batchsize
$use_mkldnn
train resnet 50
$batchsize
$use_mkldnn
train resnet 50
$batchsize
$use_mkldnn
train googlenet v1
$batchsize
$use_mkldnn
done
done
done
done
paddle/gserver/activations/ActivationFunction.cpp
浏览文件 @
13ec6f99
...
@@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) {
...
@@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) {
}
}
END_DEFINE_ACTIVATION
(
sequence_softmax
)
END_DEFINE_ACTIVATION
(
sequence_softmax
)
/*
* @brief SoftSign Activation.
* \f[
* f(z) = \frac{z}{1 + |z|}
* \f]
*/
BEGIN_DEFINE_ACTIVATION
(
softsign
)
private:
MatrixPtr
denominator_
;
Error
__must_check
forward
(
Argument
&
act
)
{
size_t
height
=
act
.
value
->
getHeight
();
size_t
width
=
act
.
value
->
getWidth
();
Matrix
::
resizeOrCreate
(
denominator_
,
height
,
width
,
false
,
useGpu
(
act
.
deviceId
));
denominator_
->
assign
(
*
act
.
value
);
denominator_
->
abs2
();
denominator_
->
add
(
1.
);
act
.
value
->
dotDiv
(
*
act
.
value
,
*
denominator_
);
return
Error
();
}
Error
__must_check
backward
(
Argument
&
act
)
{
denominator_
->
square2
();
denominator_
->
scalarDiv
(
*
denominator_
,
1.
);
act
.
grad
->
dotMul
(
*
act
.
grad
,
*
denominator_
);
return
Error
();
}
END_DEFINE_ACTIVATION
(
softsign
)
/**
/**
* @brief Relu Activation.
* @brief Relu Activation.
* forward. y = max(0, z)
* forward. y = max(0, z)
...
...
paddle/operators/conv_cudnn_op.cc
浏览文件 @
13ec6f99
...
@@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
...
@@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops
::
ConvOpGrad
);
ops
::
ConvOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv_cudnn
,
REGISTER_OP_CPU_KERNEL
(
conv_cudnn
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv_cudnn_grad
,
conv_cudnn_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/conv_cudnn_op.cu.cc
浏览文件 @
13ec6f99
...
@@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP_GPU_KERNEL
(
conv_cudnn
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv_cudnn
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv_cudnn_grad
,
REGISTER_OP_GPU_KERNEL
(
conv_cudnn_grad
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
);
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
double
>
);
paddle/operators/conv_transpose_cudnn_op.cc
浏览文件 @
13ec6f99
...
@@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
...
@@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn
,
conv2d_transpose_cudnn
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
conv2d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP
(
conv3d_transpose_cudnn
,
ops
::
ConvTransposeOp
,
REGISTER_OP
(
conv3d_transpose_cudnn
,
ops
::
ConvTransposeOp
,
ops
::
CudnnConv3DTransposeOpMaker
,
conv3d_transpose_cudnn_grad
,
ops
::
CudnnConv3DTransposeOpMaker
,
conv3d_transpose_cudnn_grad
,
...
@@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
...
@@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn
,
conv3d_transpose_cudnn
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
conv3d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/conv_transpose_cudnn_op.cu.cc
浏览文件 @
13ec6f99
...
@@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
...
@@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_cudnn
,
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_cudnn
,
ops
::
CudnnConvTransposeOpKernel
<
float
>
);
ops
::
CudnnConvTransposeOpKernel
<
float
>
,
ops
::
CudnnConvTransposeOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
);
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
,
ops
::
CudnnConvTransposeGradOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn
,
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn
,
ops
::
CudnnConvTransposeOpKernel
<
float
>
);
ops
::
CudnnConvTransposeOpKernel
<
float
>
,
ops
::
CudnnConvTransposeOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
);
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
,
ops
::
CudnnConvTransposeGradOpKernel
<
double
>
);
paddle/operators/math/pooling.cc
浏览文件 @
13ec6f99
...
@@ -498,8 +498,8 @@ template class Pool3dGradFunctor<
...
@@ -498,8 +498,8 @@ template class Pool3dGradFunctor<
* Ksize, strides, paddings are two elements. These two elements represent
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
* height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
...
@@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const
int
input_stride
=
input_height
*
input_width
;
const
int
input_stride
=
input_height
*
input_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
1
*
input_data
=
input
.
data
<
T1
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
T
1
ele
=
static_cast
<
T1
>
(
-
FLT_MAX
);
int
index
=
-
1
;
int
index
=
-
1
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
...
@@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
* Ksize, strides, paddings are two elements. These two elements represent
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
* height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
...
@@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const
int
input_stride
=
input_height
*
input_width
;
const
int
input_stride
=
input_height
*
input_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
2
*
mask_data
=
mask
.
data
<
T2
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
1
*
output_grad_data
=
output_grad
.
data
<
T1
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
input_grad_data
=
input_grad
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
}
}
};
};
template
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
float
,
int
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
float
,
int
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
CPUPlace
,
double
,
int
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
CPUPlace
,
double
,
int
>;
/*
/*
* All tensors are in NCDHW format.
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* depth, height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
...
@@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const
int
input_stride
=
input_depth
*
input_height
*
input_width
;
const
int
input_stride
=
input_depth
*
input_height
*
input_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
1
*
input_data
=
input
.
data
<
T1
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
wstart
=
std
::
max
(
wstart
,
0
);
wstart
=
std
::
max
(
wstart
,
0
);
int
output_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
int
output_idx
=
(
pd
*
output_height
+
ph
)
*
output_width
+
pw
;
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
T
1
ele
=
static_cast
<
T1
>
(
-
FLT_MAX
);
int
index
=
-
1
;
int
index
=
-
1
;
for
(
int
d
=
dstart
;
d
<
dend
;
++
d
)
{
for
(
int
d
=
dstart
;
d
<
dend
;
++
d
)
{
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
...
@@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
...
@@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
* Ksize, strides, paddings are three elements. These three elements represent
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* depth, height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
>
{
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
...
@@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const
int
input_stride
=
input_depth
*
input_height
*
input_width
;
const
int
input_stride
=
input_depth
*
input_height
*
input_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
2
*
mask_data
=
mask
.
data
<
T2
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
1
*
output_grad_data
=
output_grad
.
data
<
T1
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
input_grad_data
=
input_grad
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
@@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
...
@@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
}
}
};
};
template
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
float
,
int
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
float
,
int
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
CPUPlace
,
double
,
int
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
CPUPlace
,
double
,
int
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/operators/math/pooling.cu
浏览文件 @
13ec6f99
...
@@ -658,13 +658,13 @@ template class Pool3dGradFunctor<
...
@@ -658,13 +658,13 @@ template class Pool3dGradFunctor<
template
class
Pool3dGradFunctor
<
template
class
Pool3dGradFunctor
<
platform
::
GPUPlace
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
double
>,
double
>
;
platform
::
GPUPlace
,
paddle
::
operators
::
math
::
AvgPoolGrad
<
double
>,
double
>
;
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
__global__
void
KernelMaxPool2dWithIdx
(
__global__
void
KernelMaxPool2dWithIdx
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
nthreads
,
const
T
1
*
input_data
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
T
*
output_data
,
T
*
mask_data
)
{
const
int
padding_width
,
T
1
*
output_data
,
T2
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx(
...
@@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx(
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_height
*
input_width
;
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_height
*
input_width
;
T
ele
=
-
FLT_MAX
;
T
1
ele
=
-
FLT_MAX
;
int
max_index
=
-
1
;
int
max_index
=
-
1
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
...
@@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx(
...
@@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx(
}
}
}
}
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
__global__
void
KernelMaxPool2DWithIdxGrad
(
__global__
void
KernelMaxPool2DWithIdxGrad
(
const
int
nthreads
,
const
T
*
output_grad
,
const
T
*
mask_data
,
const
int
nthreads
,
const
T
1
*
output_grad
,
const
T2
*
mask_data
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
T
*
input_grad
)
{
const
int
padding_height
,
const
int
padding_width
,
T
1
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
w_offset
=
index
%
input_width
;
...
@@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad(
...
@@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad(
int
pw_end
=
int
pw_end
=
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
T
gradient
=
0
;
T
1
gradient
=
0
;
int
input_current_featuremap_idx
=
h_offset
*
input_width
+
w_offset
;
int
input_current_featuremap_idx
=
h_offset
*
input_width
+
w_offset
;
int
output_idx
=
int
output_idx
=
(
batch_idx
*
channels
+
c_offset
)
*
output_height
*
output_width
;
(
batch_idx
*
channels
+
c_offset
)
*
output_height
*
output_width
;
...
@@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad(
...
@@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad(
* Ksize, strides, paddings are two elements. These two elements represent
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
* height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
...
@@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const
int
padding_height
=
paddings
[
0
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
1
*
input_data
=
input
.
data
<
T1
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
dim3
grid
(
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
KernelMaxPool2dWithIdx
<
KernelMaxPool2dWithIdx
<
T
><<<
grid
,
threads
,
0
,
T
1
,
T2
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
stride_width
,
padding_height
,
padding_width
,
output_data
,
mask_data
);
...
@@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
* Ksize, strides, paddings are two elements. These two elements represent
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
* height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
...
@@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const
int
padding_height
=
paddings
[
0
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
2
*
mask_data
=
mask
.
data
<
T2
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
1
*
output_grad_data
=
output_grad
.
data
<
T1
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
input_grad_data
=
input_grad
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
...
@@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
dim3
grid
(
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
KernelMaxPool2DWithIdxGrad
<
KernelMaxPool2DWithIdxGrad
<
T
><<<
grid
,
threads
,
0
,
T
1
,
T2
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
mask_data
,
.
stream
()
>>>
(
input_channels
,
input_height
,
input_width
,
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_height
,
output_height
,
output_width
,
ksize_height
,
input_width
,
output_height
,
output_width
,
ksize_height
,
ksize_width
,
ksize_width
,
stride_height
,
stride
_width
,
stride_height
,
stride_width
,
padding_height
,
padding
_width
,
padding_height
,
padding_width
,
input_grad_data
);
input_grad_data
);
}
}
};
};
template
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
float
,
int
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
float
,
int
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxPool2dWithIndexFunctor
<
platform
::
GPUPlace
,
double
,
int
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxPool2dWithIndexGradFunctor
<
platform
::
GPUPlace
,
double
,
int
>;
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
__global__
void
KernelMaxPool3DWithIdx
(
__global__
void
KernelMaxPool3DWithIdx
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
nthreads
,
const
T
1
*
input_data
,
const
int
channels
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
T
*
output_data
,
T
*
mask_data
)
{
T
1
*
output_data
,
T2
*
mask_data
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
pw
=
index
%
output_width
;
int
pw
=
index
%
output_width
;
...
@@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx(
...
@@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx(
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
T
ele
=
-
FLT_MAX
;
T
1
ele
=
-
FLT_MAX
;
int
max_index
=
-
1
;
int
max_index
=
-
1
;
input_data
+=
input_data
+=
(
batch_idx
*
channels
+
c
)
*
input_depth
*
input_height
*
input_width
;
(
batch_idx
*
channels
+
c
)
*
input_depth
*
input_height
*
input_width
;
...
@@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx(
...
@@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx(
}
}
}
}
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
__global__
void
KernelMaxPool3DWithIdxGrad
(
__global__
void
KernelMaxPool3DWithIdxGrad
(
const
int
nthreads
,
const
T
*
output_grad
,
const
T
*
mask
,
const
int
channels
,
const
int
nthreads
,
const
T
1
*
output_grad
,
const
T2
*
mask
,
const
int
input_depth
,
const
int
input_height
,
const
int
input_width
,
const
int
channels
,
const
int
input_depth
,
const
int
input_height
,
const
int
output_depth
,
const
int
output_height
,
const
int
output_width
,
const
int
input_width
,
const
int
output_depth
,
const
int
output_height
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
ksize_width
,
const
int
output_width
,
const
int
ksize_depth
,
const
int
ksize_height
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_width
,
const
int
stride_depth
,
const
int
stride_height
,
const
int
padding_depth
,
const
int
padding_height
,
const
int
padding_width
,
const
int
stride_width
,
const
int
padding_depth
,
const
int
padding_height
,
T
*
input_grad
)
{
const
int
padding_width
,
T1
*
input_grad
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_offset
=
index
%
input_width
;
int
w_offset
=
index
%
input_width
;
...
@@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad(
...
@@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad(
int
pw_end
=
int
pw_end
=
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
min
((
w_offset
+
padding_width
)
/
stride_width
+
1
,
output_width
);
T
gradient
=
0
;
T
1
gradient
=
0
;
int
input_current_feature_map_idx
=
int
input_current_feature_map_idx
=
(
d_offset
*
input_height
+
h_offset
)
*
input_width
+
w_offset
;
(
d_offset
*
input_height
+
h_offset
)
*
input_width
+
w_offset
;
int
output_idx
=
(
batch_idx
*
channels
+
c_offset
)
*
output_depth
*
int
output_idx
=
(
batch_idx
*
channels
+
c_offset
)
*
output_depth
*
...
@@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad(
...
@@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad(
* Ksize, strides, paddings are three elements. These three elements represent
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* depth, height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
const
framework
::
Tensor
&
input
,
std
::
vector
<
int
>&
ksize
,
...
@@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const
int
padding_height
=
paddings
[
1
];
const
int
padding_height
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
2
];
const
int
padding_width
=
paddings
[
2
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
1
*
input_data
=
input
.
data
<
T1
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
output_data
=
output
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
T
*
mask_data
=
mask
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
2
*
mask_data
=
mask
->
mutable_data
<
T2
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
int
nthreads
=
batch_size
*
output_channels
*
output_depth
*
output_height
*
output_width
;
output_width
;
...
@@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
dim3
grid
(
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
KernelMaxPool3DWithIdx
<
KernelMaxPool3DWithIdx
<
T
><<<
grid
,
threads
,
0
,
T
1
,
T2
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
nthreads
,
input_data
,
input_channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
stride_width
,
...
@@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
...
@@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
* Ksize, strides, paddings are three elements. These three elements represent
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* depth, height and width, respectively.
*/
*/
template
<
typename
T
>
template
<
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
>
{
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
T
1
,
T2
>
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
...
@@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const
int
padding_height
=
paddings
[
1
];
const
int
padding_height
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
2
];
const
int
padding_width
=
paddings
[
2
];
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
1
*
output_grad_data
=
output_grad
.
data
<
T1
>
();
const
T
*
mask_data
=
mask
.
data
<
T
>
();
const
T
2
*
mask_data
=
mask
.
data
<
T2
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
1
*
input_grad_data
=
input_grad
->
mutable_data
<
T1
>
(
context
.
GetPlace
());
int
nthreads
=
int
nthreads
=
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
batch_size
*
input_channels
*
input_depth
*
input_height
*
input_width
;
...
@@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
dim3
grid
(
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
KernelMaxPool3DWithIdxGrad
<
KernelMaxPool3DWithIdxGrad
<
T
><<<
grid
,
threads
,
0
,
T
1
,
T2
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_depth
,
nthreads
,
output_grad_data
,
mask_data
,
input_channels
,
input_depth
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
input_height
,
input_width
,
output_depth
,
output_height
,
output_width
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
ksize_depth
,
ksize_height
,
ksize_width
,
stride_depth
,
stride_height
,
...
@@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
...
@@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
}
}
};
};
template
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
float
,
int
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
float
,
int
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxPool3dWithIndexFunctor
<
platform
::
GPUPlace
,
double
,
int
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxPool3dWithIndexGradFunctor
<
platform
::
GPUPlace
,
double
,
int
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/operators/math/pooling.h
浏览文件 @
13ec6f99
...
@@ -153,7 +153,7 @@ class MaxPool3dGradFunctor {
...
@@ -153,7 +153,7 @@ class MaxPool3dGradFunctor {
* In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in
* In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in
* NCDHW format.
* NCDHW format.
*/
*/
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexFunctor
{
class
MaxPool2dWithIndexFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
...
@@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor {
...
@@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor {
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPool2dWithIndexGradFunctor
{
class
MaxPool2dWithIndexGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
...
@@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor {
...
@@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor {
framework
::
Tensor
*
input_grad
);
framework
::
Tensor
*
input_grad
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexFunctor
{
class
MaxPool3dWithIndexFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
...
@@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor {
...
@@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor {
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
framework
::
Tensor
*
output
,
framework
::
Tensor
*
mask
);
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPool3dWithIndexGradFunctor
{
class
MaxPool3dWithIndexGradFunctor
{
public:
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
...
...
paddle/operators/pool_cudnn_op.cc
浏览文件 @
13ec6f99
...
@@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
...
@@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
ops
::
PoolOpGrad
);
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn
,
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn_grad
,
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
)
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
)
REGISTER_OP
(
pool3d_cudnn
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
pool3d_cudnn_grad
,
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool3d_cudnn
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool3d_cudnn_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
)
paddle/operators/pool_cudnn_op.cu.cc
浏览文件 @
13ec6f99
...
@@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
...
@@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedPoolingDescriptor
pool_desc
;
ScopedPoolingDescriptor
pool_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
DataLayout
layout
;
if
(
strides
.
size
()
==
2U
)
{
layout
=
DataLayout
::
kNCHW
;
}
else
{
layout
=
DataLayout
::
kNCDHW
;
}
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
...
@@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
...
@@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedPoolingDescriptor
pool_desc
;
ScopedPoolingDescriptor
pool_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
DataLayout
layout
;
if
(
strides
.
size
()
==
2U
)
{
layout
=
DataLayout
::
kNCHW
;
}
else
{
layout
=
DataLayout
::
kNCDHW
;
}
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
...
@@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
...
@@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
pool2d_cudnn
,
ops
::
PoolCudnnOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
pool2d_cudnn
,
ops
::
PoolCudnnOpKernel
<
float
>
,
REGISTER_OP_GPU_KERNEL
(
pool2d_cudnn_grad
,
ops
::
PoolCudnnGradOpKernel
<
float
>
);
ops
::
PoolCudnnOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool2d_cudnn_grad
,
ops
::
PoolCudnnGradOpKernel
<
float
>
,
ops
::
PoolCudnnGradOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool3d_cudnn
,
ops
::
PoolCudnnOpKernel
<
float
>
,
ops
::
PoolCudnnOpKernel
<
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool3d_cudnn_grad
,
ops
::
PoolCudnnGradOpKernel
<
float
>
,
ops
::
PoolCudnnGradOpKernel
<
double
>
);
paddle/operators/pool_op.cc
浏览文件 @
13ec6f99
...
@@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad,
...
@@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad,
ops
::
PoolOpGrad
);
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool2d
,
REGISTER_OP_CPU_KERNEL
(
pool2d
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool2d_grad
,
REGISTER_OP_CPU_KERNEL
(
pool2d_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
)
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
)
REGISTER_OP
(
pool3d
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
pool3d_grad
,
REGISTER_OP
(
pool3d
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
pool3d_grad
,
ops
::
PoolOpGrad
);
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool3d
,
REGISTER_OP_CPU_KERNEL
(
pool3d
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool3d_grad
,
REGISTER_OP_CPU_KERNEL
(
pool3d_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/pool_op.cu.cc
浏览文件 @
13ec6f99
...
@@ -17,11 +17,15 @@ limitations under the License. */
...
@@ -17,11 +17,15 @@ limitations under the License. */
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
pool2d
,
REGISTER_OP_GPU_KERNEL
(
pool2d
,
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool2d_grad
,
REGISTER_OP_GPU_KERNEL
(
pool2d_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool3d
,
REGISTER_OP_GPU_KERNEL
(
pool3d
,
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
pool3d_grad
,
REGISTER_OP_GPU_KERNEL
(
pool3d_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
paddle/operators/pool_with_index_op.cc
浏览文件 @
13ec6f99
...
@@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
...
@@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"
X(Input
) of Pooling should not be null."
);
"
Input(X
) of Pooling should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Out
(Outp
ut) of Pooling should not be null."
);
"Out
put(O
ut) of Pooling should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Mask"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Mask"
),
"
Mask(Output
) of Pooling should not be null."
);
"
Output(Mask
) of Pooling should not be null."
);
auto
in_x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
in_x_dims
=
ctx
->
GetInputDim
(
"X"
);
...
@@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
...
@@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Mask"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Mask"
,
framework
::
make_ddim
(
output_shape
));
}
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
};
class
MaxPoolWithIndexOpGrad
:
public
framework
::
OperatorWithKernel
{
class
MaxPoolWithIndexOpGrad
:
public
framework
::
OperatorWithKernel
{
...
@@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
...
@@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
"Input(X@GRAD) should not be null."
);
"Input(X@GRAD) should not be null."
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
}
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
};
class
MaxPool2dWithIndexOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
MaxPool2dWithIndexOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
@@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
// TypedAttrChecker don't support vector type.)
// TypedAttrChecker don't support vector type.)
AddAttr
<
bool
>
(
AddAttr
<
bool
>
(
"global_pooling"
,
"global_pooling"
,
"(bool, default
false) Whether to use the global pooling. "
"(bool, default
:
false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored."
)
"If global_pooling = true, ksize and paddings will be ignored."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
...
@@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
// TypedAttrChecker don't support vector type.)
// TypedAttrChecker don't support vector type.)
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings"
,
"(vector<int>, defalut
{0, 0}), paddings(height, width) of pooling "
"(vector<int>, defalut
:
{0, 0}), paddings(height, width) of pooling "
"operator. "
"operator. "
"If global_pooling = true, paddings and will be ignored."
)
"If global_pooling = true, paddings and will be ignored."
)
.
SetDefault
({
0
,
0
});
// TODO(Chengduo): Add checker. (Currently,
.
SetDefault
({
0
,
0
});
// TODO(Chengduo): Add checker. (Currently,
...
@@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
...
@@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
max_pool2d_with_index
,
max_pool2d_with_index
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
double
,
int
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
max_pool2d_with_index_grad
,
max_pool2d_with_index_grad
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
)
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
,
int
>
)
REGISTER_OP
(
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexOp
,
REGISTER_OP
(
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexOp
,
ops
::
MaxPool3dWithIndexOpMaker
,
max_pool3d_with_index_grad
,
ops
::
MaxPool3dWithIndexOpMaker
,
max_pool3d_with_index_grad
,
...
@@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
...
@@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
max_pool3d_with_index
,
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
CPUPlace
,
double
,
int
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
max_pool3d_with_index_grad
,
max_pool3d_with_index_grad
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
)
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
,
int
>
)
paddle/operators/pool_with_index_op.cu.cc
浏览文件 @
13ec6f99
...
@@ -18,14 +18,18 @@ namespace ops = paddle::operators;
...
@@ -18,14 +18,18 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
max_pool2d_with_index
,
max_pool2d_with_index
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
double
,
int
>
);
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
max_pool2d_with_index_grad
,
max_pool2d_with_index_grad
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
)
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
,
int
>
)
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
max_pool3d_with_index
,
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexKernel
<
paddle
::
platform
::
GPUPlace
,
double
,
int
>
);
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
max_pool3d_with_index_grad
,
max_pool3d_with_index_grad
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
)
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
,
int
>
,
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
,
int
>
)
paddle/operators/pool_with_index_op.h
浏览文件 @
13ec6f99
...
@@ -24,8 +24,8 @@ namespace operators {
...
@@ -24,8 +24,8 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPoolWithIndexKernel
:
public
framework
::
OpKernel
<
T
>
{
class
MaxPoolWithIndexKernel
:
public
framework
::
OpKernel
<
T
1
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
in_x
=
context
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
in_x
=
context
.
Input
<
Tensor
>
(
"X"
);
...
@@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
...
@@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
switch
(
ksize
.
size
())
{
switch
(
ksize
.
size
())
{
case
2
:
{
case
2
:
{
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool2dWithIndexFunctor
<
Place
,
T
1
,
T2
>
pool2d_forward
;
pool2d_forward
;
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
pool2d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
out
,
mask
);
paddings
,
out
,
mask
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool3dWithIndexFunctor
<
Place
,
T
1
,
T2
>
pool3d_forward
;
pool3d_forward
;
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
pool3d_forward
(
context
.
device_context
(),
*
in_x
,
ksize
,
strides
,
paddings
,
out
,
mask
);
paddings
,
out
,
mask
);
...
@@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
...
@@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
}
}
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
1
,
typename
T2
>
class
MaxPoolWithIndexGradKernel
:
public
framework
::
OpKernel
<
T
>
{
class
MaxPoolWithIndexGradKernel
:
public
framework
::
OpKernel
<
T
1
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
mask
=
context
.
Input
<
Tensor
>
(
"Mask"
);
const
Tensor
*
mask
=
context
.
Input
<
Tensor
>
(
"Mask"
);
...
@@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
...
@@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
}
}
if
(
in_x_grad
)
{
if
(
in_x_grad
)
{
in_x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
in_x_grad
->
mutable_data
<
T
1
>
(
context
.
GetPlace
());
auto
&
device_ctx
=
context
.
device_context
();
auto
&
device_ctx
=
context
.
device_context
();
math
::
set_constant
(
device_ctx
,
in_x_grad
,
0
);
math
::
set_constant
(
device_ctx
,
in_x_grad
,
0
);
switch
(
ksize
.
size
())
{
switch
(
ksize
.
size
())
{
case
2
:
{
case
2
:
{
paddle
::
operators
::
math
::
MaxPool2dWithIndexGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool2dWithIndexGradFunctor
<
Place
,
T
1
,
T2
>
pool2d_backward
;
pool2d_backward
;
pool2d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
pool2d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
paddings
,
in_x_grad
);
paddings
,
in_x_grad
);
}
break
;
}
break
;
case
3
:
{
case
3
:
{
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
Place
,
T
>
paddle
::
operators
::
math
::
MaxPool3dWithIndexGradFunctor
<
Place
,
T
1
,
T2
>
pool3d_backward
;
pool3d_backward
;
pool3d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
pool3d_backward
(
device_ctx
,
*
out_grad
,
*
mask
,
ksize
,
strides
,
paddings
,
in_x_grad
);
paddings
,
in_x_grad
);
...
...
paddle/operators/sequence_slice_op.cc
0 → 100755
浏览文件 @
13ec6f99
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_slice_op.h"
namespace
paddle
{
namespace
operators
{
class
SequenceSliceOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SequenceSliceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Offset"
),
"Input(Offset) of SequenceSliceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Length"
),
"Input(Length) of SequenceSliceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SequenceSliceOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
offset_dim
=
ctx
->
GetInputDim
(
"Offset"
);
auto
length_dim
=
ctx
->
GetInputDim
(
"Length"
);
PADDLE_ENFORCE_EQ
(
offset_dim
.
size
(),
2UL
,
"Only support one level sequence now, The rank of offset must be 2."
);
PADDLE_ENFORCE_EQ
(
length_dim
.
size
(),
2UL
,
"Only support one level sequence now, The rank of Length must be 2."
);
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Offset and Length at kernel
ctx
->
SetOutputDim
(
"Out"
,
input_dims
);
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
SequenceSliceGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"The gradient of Out should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
framework
::
GradVarName
(
"X"
)),
"The gradient of X should not be null."
);
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputsDim
(
"X"
));
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
SequenceSliceOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
SequenceSliceOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(LoDTensor), "
"the input of SequenceSliceOp."
);
AddInput
(
"Offset"
,
"(Tensor), "
"a vector<int> to describe the offset of every input sequence for "
"sub sequence item."
);
AddInput
(
"Length"
,
"(Tensor), "
"a vector<int> to describe the length of every input sequence for "
"sub sequence item."
);
AddOutput
(
"Out"
,
"(LoDTensor), the output of SequenceSliceOp."
);
AddComment
(
R"DOC(
Sequence slice operator
The operator crops a subsequence from given sequence with given start offset and subsequence length.
It only supports sequence (LoD Tensor with level number is 1).
- Case:
X = [[a1, a2;
b1, b2;
c1, c2]
[d1, d2;
e1, e2]]
LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2)
Offset = [[0], [1]]; Length = [[2], [1]]
Out = [[a1, a2;
b1, b2]
[e1, e2]]
LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2)
NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sequence_slice
,
ops
::
SequenceSliceOp
,
ops
::
SequenceSliceOpMaker
,
sequence_slice_grad
,
ops
::
SequenceSliceGradOp
);
REGISTER_OP_CPU_KERNEL
(
sequence_slice
,
ops
::
SequenceSliceOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
sequence_slice_grad
,
ops
::
SequenceSliceGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/sequence_slice_op.cu
0 → 100755
浏览文件 @
13ec6f99
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_slice_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
sequence_slice
,
ops
::
SequenceSliceOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
sequence_slice_grad
,
ops
::
SequenceSliceGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/sequence_slice_op.h
0 → 100755
浏览文件 @
13ec6f99
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoD
=
framework
::
LoD
;
template
<
typename
T
>
inline
LoD
SequenceSliceLoD
(
const
T
&
in
,
const
int64_t
*
offset_data
,
const
int64_t
*
length_data
)
{
auto
out_lod
=
in
.
lod
();
size_t
lod_offset
=
0
;
auto
n
=
in
.
lod
()[
0
].
size
()
-
1
;
out_lod
[
0
][
0
]
=
0
;
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
lod_offset
+=
length_data
[
i
];
out_lod
[
0
][
i
+
1
]
=
lod_offset
;
}
return
out_lod
;
}
template
<
typename
Place
,
typename
T
>
class
SequenceSliceOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
offset
=
ctx
.
Input
<
Tensor
>
(
"Offset"
);
auto
*
length
=
ctx
.
Input
<
Tensor
>
(
"Length"
);
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
lod
=
in
->
lod
();
auto
n
=
lod
[
0
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
n
,
static_cast
<
size_t
>
(
length
->
dims
()[
0
]),
"The size of input-sequence and length-array should be the same"
)
PADDLE_ENFORCE_EQ
(
n
,
static_cast
<
size_t
>
(
offset
->
dims
()[
0
]),
"The size of input-sequence and offset-array should be the same"
)
const
int64_t
*
offset_data
=
offset
->
data
<
int64_t
>
();
const
int64_t
*
length_data
=
length
->
data
<
int64_t
>
();
framework
::
Tensor
offset_cpu
;
framework
::
Tensor
length_cpu
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
offset_cpu
.
mutable_data
<
T
>
(
offset
->
dims
(),
platform
::
CPUPlace
());
offset_cpu
.
CopyFrom
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
());
offset_data
=
offset_cpu
.
data
<
int64_t
>
();
length_cpu
.
mutable_data
<
T
>
(
length
->
dims
(),
platform
::
CPUPlace
());
length_cpu
.
CopyFrom
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
());
length_data
=
length_cpu
.
data
<
int64_t
>
();
}
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
PADDLE_ENFORCE_LT
(
0
,
offset_data
[
i
],
"The offset[%d] must greater than zero."
,
i
)
PADDLE_ENFORCE_LT
(
0
,
length_data
[
i
],
"The length[%d] must greater than zero."
,
i
)
PADDLE_ENFORCE_LT
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
],
lod
[
0
][
i
+
1
],
"The target tensor's length overflow."
)
}
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out_lod
=
SequenceSliceLoD
(
*
in
,
offset_data
,
length_data
);
auto
out_dims
=
in
->
dims
();
out_dims
[
0
]
=
out_lod
[
0
][
out_lod
[
0
].
size
()
-
1
];
out
->
Resize
(
out_dims
);
out
->
set_lod
(
out_lod
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
size_t
out_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
Tensor
in_t
=
in
->
Slice
(
static_cast
<
int
>
(
lod
[
0
][
i
]
+
offset_data
[
i
]),
static_cast
<
int
>
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
]));
StridedMemcpy
<
T
>
(
ctx
.
device_context
(),
in_t
.
data
<
T
>
(),
in_stride
,
in_t
.
dims
(),
out_stride
,
out
->
data
<
T
>
()
+
out_offset
);
out_offset
+=
length_data
[
i
]
*
in_stride
[
0
];
}
}
};
template
<
typename
Place
,
typename
T
>
class
SequenceSliceGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
offset
=
ctx
.
Input
<
Tensor
>
(
"Offset"
);
auto
*
length
=
ctx
.
Input
<
Tensor
>
(
"Length"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
x_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
const
int64_t
*
offset_data
=
offset
->
data
<
int64_t
>
();
const
int64_t
*
length_data
=
length
->
data
<
int64_t
>
();
framework
::
Tensor
offset_cpu
;
framework
::
Tensor
length_cpu
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
offset_cpu
.
mutable_data
<
T
>
(
offset
->
dims
(),
platform
::
CPUPlace
());
offset_cpu
.
CopyFrom
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
());
offset_data
=
offset_cpu
.
data
<
int64_t
>
();
length_cpu
.
mutable_data
<
T
>
(
length
->
dims
(),
platform
::
CPUPlace
());
length_cpu
.
CopyFrom
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
());
length_data
=
length_cpu
.
data
<
int64_t
>
();
}
auto
lod
=
in
->
lod
();
auto
out_lod
=
out_grad
->
lod
();
if
(
x_grad
)
{
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
x_grad
->
set_lod
(
in
->
lod
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
x_grad
,
static_cast
<
T
>
(
0
));
auto
out_grad_stride
=
framework
::
stride
(
out_grad
->
dims
());
for
(
size_t
i
=
0
;
i
<
out_lod
[
0
].
size
()
-
1
;
++
i
)
{
Tensor
out_grad_t
=
out_grad
->
Slice
(
static_cast
<
int
>
(
out_lod
[
0
][
i
]),
static_cast
<
int
>
(
out_lod
[
0
][
i
+
1
]));
auto
out_grad_stride
=
framework
::
stride
(
out_grad_t
.
dims
());
auto
x_grad_stride
=
framework
::
stride
(
x_grad
->
dims
());
Tensor
x_grad_t
=
x_grad
->
Slice
(
static_cast
<
int
>
(
lod
[
0
][
i
]
+
offset_data
[
i
]),
static_cast
<
int
>
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
]));
StridedMemcpy
<
T
>
(
ctx
.
device_context
(),
out_grad_t
.
data
<
T
>
(),
out_grad_stride
,
out_grad_t
.
dims
(),
x_grad_stride
,
x_grad_t
.
data
<
T
>
());
}
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/platform/cudnn_helper.h
浏览文件 @
13ec6f99
...
@@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor {
...
@@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ
(
pads
.
size
(),
strides
.
size
());
PADDLE_ENFORCE_EQ
(
pads
.
size
(),
strides
.
size
());
PADDLE_ENFORCE_EQ
(
pads
.
size
(),
dilations
.
size
());
PADDLE_ENFORCE_EQ
(
pads
.
size
(),
dilations
.
size
());
#if
CUDNN_VERSION < 6000
#if
!CUDNN_VERSION_MIN(6, 0, 0)
// cudnn v5 does not support dilation conv, the argument is called upscale
// cudnn v5 does not support dilation conv, the argument is called upscale
// instead of dilations and it is must be one.
// instead of dilations and it is must be one.
for
(
size_t
i
=
0
;
i
<
dilations
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dilations
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
dilations
[
i
],
1
,
dilations
[
i
],
1
,
"Dilations conv is not supported in this cuDNN version"
);
"Dilations conv is not supported in this cuDNN version(%d.%d.%d)."
,
CUDNN_VERSION
/
1000
,
CUDNN_VERSION
%
1000
/
100
,
CUDNN_VERSION
%
100
);
}
}
#endif
#endif
...
...
paddle/platform/cudnn_helper_test.cc
浏览文件 @
13ec6f99
...
@@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
...
@@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
EXPECT_EQ
(
strides
[
2
],
6
);
EXPECT_EQ
(
strides
[
2
],
6
);
EXPECT_EQ
(
strides
[
1
],
36
);
EXPECT_EQ
(
strides
[
1
],
36
);
EXPECT_EQ
(
strides
[
0
],
144
);
EXPECT_EQ
(
strides
[
0
],
144
);
// test tensor5d: ScopedTensorDescriptor
ScopedTensorDescriptor
tensor5d_desc
;
std
::
vector
<
int
>
shape_5d
=
{
2
,
4
,
6
,
6
,
6
};
auto
desc_5d
=
tensor5d_desc
.
descriptor
<
float
>
(
DataLayout
::
kNCDHW
,
shape_5d
);
std
::
vector
<
int
>
dims_5d
(
5
);
std
::
vector
<
int
>
strides_5d
(
5
);
paddle
::
platform
::
dynload
::
cudnnGetTensorNdDescriptor
(
desc_5d
,
5
,
&
type
,
&
nd
,
dims_5d
.
data
(),
strides_5d
.
data
());
EXPECT_EQ
(
nd
,
5
);
for
(
size_t
i
=
0
;
i
<
dims_5d
.
size
();
++
i
)
{
EXPECT_EQ
(
dims_5d
[
i
],
shape_5d
[
i
]);
}
EXPECT_EQ
(
strides_5d
[
4
],
1
);
EXPECT_EQ
(
strides_5d
[
3
],
6
);
EXPECT_EQ
(
strides_5d
[
2
],
36
);
EXPECT_EQ
(
strides_5d
[
1
],
216
);
EXPECT_EQ
(
strides_5d
[
0
],
864
);
}
}
TEST
(
CudnnHelper
,
ScopedFilterDescriptor
)
{
TEST
(
CudnnHelper
,
ScopedFilterDescriptor
)
{
...
@@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
...
@@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
{
EXPECT_EQ
(
kernel
[
i
],
shape
[
i
]);
EXPECT_EQ
(
kernel
[
i
],
shape
[
i
]);
}
}
ScopedFilterDescriptor
filter_desc_4d
;
std
::
vector
<
int
>
shape_4d
=
{
2
,
3
,
3
,
3
};
auto
desc_4d
=
filter_desc
.
descriptor
<
float
>
(
DataLayout
::
kNCDHW
,
shape_4d
);
std
::
vector
<
int
>
kernel_4d
(
4
);
paddle
::
platform
::
dynload
::
cudnnGetFilterNdDescriptor
(
desc_4d
,
4
,
&
type
,
&
format
,
&
nd
,
kernel_4d
.
data
());
EXPECT_EQ
(
GetCudnnTensorFormat
(
DataLayout
::
kNCHW
),
format
);
EXPECT_EQ
(
nd
,
4
);
for
(
size_t
i
=
0
;
i
<
shape_4d
.
size
();
++
i
)
{
EXPECT_EQ
(
kernel_4d
[
i
],
shape_4d
[
i
]);
}
}
}
TEST
(
CudnnHelper
,
ScopedConvolutionDescriptor
)
{
TEST
(
CudnnHelper
,
ScopedConvolutionDescriptor
)
{
...
...
python/paddle/trainer_config_helpers/activations.py
浏览文件 @
13ec6f99
...
@@ -17,7 +17,8 @@ __all__ = [
...
@@ -17,7 +17,8 @@ __all__ = [
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
,
"LogActivation"
,
"SqrtActivation"
,
"ReciprocalActivation"
"LogActivation"
,
"SqrtActivation"
,
"ReciprocalActivation"
,
"SoftSignActivation"
]
]
...
@@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation):
...
@@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation):
Reciprocal Activation.
Reciprocal Activation.
.. math::
.. math::
f(z)
= 1/z
f(z)
=
\\
frac{1}{z}
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'reciprocal'
,
False
)
BaseActivation
.
__init__
(
self
,
'reciprocal'
,
False
)
class
SoftSignActivation
(
BaseActivation
):
"""
SoftSign Activation.
.. math::
f(z)=
\\
frac{z}{1 + |z|}
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'softsign'
,
False
)
python/paddle/v2/fluid/layers.py
浏览文件 @
13ec6f99
...
@@ -661,7 +661,7 @@ def conv2d(input,
...
@@ -661,7 +661,7 @@ def conv2d(input,
if
groups
is
None
:
if
groups
is
None
:
num_filter_channels
=
num_channels
num_filter_channels
=
num_channels
else
:
else
:
if
num_channels
%
groups
is
not
0
:
if
num_channels
%
groups
!=
0
:
raise
ValueError
(
"num_channels must be divisible by groups."
)
raise
ValueError
(
"num_channels must be divisible by groups."
)
num_filter_channels
=
num_channels
/
groups
num_filter_channels
=
num_channels
/
groups
...
...
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
浏览文件 @
13ec6f99
...
@@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core
...
@@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.evaluator
as
evaluator
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
...
@@ -103,12 +104,13 @@ net = vgg16_bn_drop(images)
...
@@ -103,12 +104,13 @@ net = vgg16_bn_drop(images)
predict
=
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
predict
=
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
# optimizer = SGDOptimizer(learning_rate=0.001)
# optimizer = SGDOptimizer(learning_rate=0.001)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.001
)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
BATCH_SIZE
=
128
BATCH_SIZE
=
128
PASS_NUM
=
1
PASS_NUM
=
1
...
@@ -124,6 +126,7 @@ exe.run(framework.default_startup_program())
...
@@ -124,6 +126,7 @@ exe.run(framework.default_startup_program())
for
pass_id
in
range
(
PASS_NUM
):
for
pass_id
in
range
(
PASS_NUM
):
batch_id
=
0
batch_id
=
0
accuracy
.
reset
(
exe
)
for
data
in
train_reader
():
for
data
in
train_reader
():
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
data_shape
),
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
data_shape
),
data
)).
astype
(
"float32"
)
data
)).
astype
(
"float32"
)
...
@@ -141,12 +144,14 @@ for pass_id in range(PASS_NUM):
...
@@ -141,12 +144,14 @@ for pass_id in range(PASS_NUM):
outs
=
exe
.
run
(
framework
.
default_main_program
(),
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
"pixel"
:
tensor_img
,
feed
=
{
"pixel"
:
tensor_img
,
"label"
:
tensor_y
},
"label"
:
tensor_y
},
fetch_list
=
[
avg_cost
,
acc
uracy
])
fetch_list
=
[
avg_cost
,
acc
_out
])
loss
=
np
.
array
(
outs
[
0
])
loss
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
acc
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"pass_id:"
+
str
(
pass_id
)
+
" batch_id:"
+
str
(
batch_id
)
+
print
(
"pass_id:"
+
str
(
pass_id
)
+
" batch_id:"
+
str
(
batch_id
)
+
" loss:"
+
str
(
loss
)
+
" acc:"
+
str
(
acc
))
" loss:"
+
str
(
loss
)
+
" acc:"
+
str
(
acc
)
+
" pass_acc:"
+
str
(
pass_acc
))
batch_id
=
batch_id
+
1
batch_id
=
batch_id
+
1
if
batch_id
>
1
:
if
batch_id
>
1
:
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
浏览文件 @
13ec6f99
...
@@ -46,7 +46,6 @@ exe = Executor(place)
...
@@ -46,7 +46,6 @@ exe = Executor(place)
exe
.
run
(
framework
.
default_startup_program
())
exe
.
run
(
framework
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
pass_id
in
range
(
PASS_NUM
):
count
=
0
accuracy
.
reset
(
exe
)
accuracy
.
reset
(
exe
)
for
data
in
train_reader
():
for
data
in
train_reader
():
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
([
1
,
28
,
28
]),
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
([
1
,
28
,
28
]),
...
@@ -66,13 +65,14 @@ for pass_id in range(PASS_NUM):
...
@@ -66,13 +65,14 @@ for pass_id in range(PASS_NUM):
loss
=
np
.
array
(
outs
[
0
])
loss
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
acc
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
pass_acc
=
accuracy
.
eval
(
exe
)
print
"pass id : "
,
pass_id
,
pass_acc
print
(
"pass_id="
+
str
(
pass_id
)
+
" acc="
+
str
(
acc
)
+
" pass_acc="
+
str
(
pass_acc
))
# print loss, acc
# print loss, acc
if
loss
<
10.0
and
acc
>
0.9
:
if
loss
<
10.0
and
pass_
acc
>
0.9
:
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
exit
(
0
)
exit
(
0
)
pass_acc
=
accuracy
.
eval
(
exe
)
pass_acc
=
accuracy
.
eval
(
exe
)
print
"pass id : "
,
pass_id
,
pass_acc
print
(
"pass_id="
+
str
(
pass_id
)
+
" pass_acc="
+
str
(
pass_acc
))
exit
(
1
)
exit
(
1
)
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
浏览文件 @
13ec6f99
...
@@ -3,6 +3,7 @@ import paddle.v2 as paddle
...
@@ -3,6 +3,7 @@ import paddle.v2 as paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.evaluator
as
evaluator
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.initializer
import
UniformInitializer
from
paddle.v2.fluid.initializer
import
UniformInitializer
from
paddle.v2.fluid.optimizer
import
MomentumOptimizer
from
paddle.v2.fluid.optimizer
import
MomentumOptimizer
...
@@ -30,11 +31,12 @@ label = layers.data(name='y', shape=[1], data_type='int64')
...
@@ -30,11 +31,12 @@ label = layers.data(name='y', shape=[1], data_type='int64')
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
...
@@ -47,6 +49,7 @@ exe.run(framework.default_startup_program())
...
@@ -47,6 +49,7 @@ exe.run(framework.default_startup_program())
PASS_NUM
=
100
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
pass_id
in
range
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_reader
():
for
data
in
train_reader
():
x_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
],
data
)).
astype
(
"float32"
)
x_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
],
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
...
@@ -61,9 +64,13 @@ for pass_id in range(PASS_NUM):
...
@@ -61,9 +64,13 @@ for pass_id in range(PASS_NUM):
outs
=
exe
.
run
(
framework
.
default_main_program
(),
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
'x'
:
tensor_x
,
feed
=
{
'x'
:
tensor_x
,
'y'
:
tensor_y
},
'y'
:
tensor_y
},
fetch_list
=
[
avg_cost
,
acc
uracy
])
fetch_list
=
[
avg_cost
,
acc
_out
])
out
=
np
.
array
(
outs
[
0
])
out
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
acc
=
np
.
array
(
outs
[
1
])
if
out
[
0
]
<
5.0
:
pass_acc
=
accuracy
.
eval
(
exe
)
exit
(
0
)
# if avg cost less than 5.0, we think our code is good.
if
pass_acc
>
0.7
:
exit
(
0
)
# print("pass_id=" + str(pass_id) + " auc=" +
# str(acc) + " pass_acc=" + str(pass_acc))
exit
(
1
)
exit
(
1
)
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
浏览文件 @
13ec6f99
import
numpy
as
np
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.nets
as
nets
...
@@ -32,8 +33,8 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
...
@@ -32,8 +33,8 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
uracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
return
avg_cost
,
acc
uracy
,
acc_out
def
to_lodtensor
(
data
,
place
):
def
to_lodtensor
(
data
,
place
):
...
@@ -59,7 +60,8 @@ def main():
...
@@ -59,7 +60,8 @@ def main():
dict_dim
=
len
(
word_dict
)
dict_dim
=
len
(
word_dict
)
class_dim
=
2
class_dim
=
2
cost
,
acc
=
convolution_net
(
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
cost
,
accuracy
,
acc_out
=
convolution_net
(
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
train_data
=
paddle
.
batch
(
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -71,6 +73,7 @@ def main():
...
@@ -71,6 +73,7 @@ def main():
exe
.
run
(
framework
.
default_startup_program
())
exe
.
run
(
framework
.
default_startup_program
())
for
pass_id
in
xrange
(
PASS_NUM
):
for
pass_id
in
xrange
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_data
():
for
data
in
train_data
():
tensor_words
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
tensor_words
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
...
@@ -83,12 +86,13 @@ def main():
...
@@ -83,12 +86,13 @@ def main():
outs
=
exe
.
run
(
framework
.
default_main_program
(),
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
"words"
:
tensor_words
,
feed
=
{
"words"
:
tensor_words
,
"label"
:
tensor_label
},
"label"
:
tensor_label
},
fetch_list
=
[
cost
,
acc
])
fetch_list
=
[
cost
,
acc
_out
])
cost_val
=
np
.
array
(
outs
[
0
])
cost_val
=
np
.
array
(
outs
[
0
])
acc_val
=
np
.
array
(
outs
[
1
])
acc_val
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
))
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
)
+
if
cost_val
<
1.0
and
acc_val
>
0.7
:
" pass_acc="
+
str
(
pass_acc
))
if
cost_val
<
1.0
and
pass_acc
>
0.8
:
exit
(
0
)
exit
(
0
)
exit
(
1
)
exit
(
1
)
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
浏览文件 @
13ec6f99
import
numpy
as
np
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
...
@@ -41,8 +42,8 @@ def stacked_lstm_net(input_dim,
...
@@ -41,8 +42,8 @@ def stacked_lstm_net(input_dim,
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
uracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
return
avg_cost
,
acc
uracy
,
acc_out
def
to_lodtensor
(
data
,
place
):
def
to_lodtensor
(
data
,
place
):
...
@@ -69,7 +70,8 @@ def main():
...
@@ -69,7 +70,8 @@ def main():
dict_dim
=
len
(
word_dict
)
dict_dim
=
len
(
word_dict
)
class_dim
=
2
class_dim
=
2
cost
,
acc
=
stacked_lstm_net
(
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
cost
,
accuracy
,
acc_out
=
stacked_lstm_net
(
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
train_data
=
paddle
.
batch
(
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -81,6 +83,7 @@ def main():
...
@@ -81,6 +83,7 @@ def main():
exe
.
run
(
framework
.
default_startup_program
())
exe
.
run
(
framework
.
default_startup_program
())
for
pass_id
in
xrange
(
PASS_NUM
):
for
pass_id
in
xrange
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_data
():
for
data
in
train_data
():
tensor_words
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
tensor_words
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
...
@@ -93,12 +96,13 @@ def main():
...
@@ -93,12 +96,13 @@ def main():
outs
=
exe
.
run
(
framework
.
default_main_program
(),
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
"words"
:
tensor_words
,
feed
=
{
"words"
:
tensor_words
,
"label"
:
tensor_label
},
"label"
:
tensor_label
},
fetch_list
=
[
cost
,
acc
])
fetch_list
=
[
cost
,
acc
_out
])
cost_val
=
np
.
array
(
outs
[
0
])
cost_val
=
np
.
array
(
outs
[
0
])
acc_val
=
np
.
array
(
outs
[
1
])
acc_val
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
))
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
)
+
if
cost_val
<
1.0
and
acc_val
>
0.7
:
" pass_acc="
+
str
(
pass_acc
))
if
cost_val
<
1.0
and
acc_val
>
0.8
:
exit
(
0
)
exit
(
0
)
exit
(
1
)
exit
(
1
)
...
...
python/paddle/v2/fluid/tests/test_pool2d_op.py
浏览文件 @
13ec6f99
...
@@ -3,8 +3,7 @@ import numpy as np
...
@@ -3,8 +3,7 @@ import numpy as np
from
op_test
import
OpTest
from
op_test
import
OpTest
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
=
[
0
,
0
],
global_pool
=
0
):
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
...
@@ -23,8 +22,7 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
...
@@ -23,8 +22,7 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
return
out
return
out
def
avg_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
=
[
0
,
0
],
global_pool
=
0
):
def
avg_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
...
@@ -47,6 +45,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
...
@@ -47,6 +45,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
class
TestPool2d_Op
(
OpTest
):
class
TestPool2d_Op
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
self
.
init_global_pool
()
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_pool_type
()
self
.
init_pool_type
()
if
self
.
global_pool
:
if
self
.
global_pool
:
...
@@ -75,8 +74,6 @@ class TestPool2d_Op(OpTest):
...
@@ -75,8 +74,6 @@ class TestPool2d_Op(OpTest):
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
strides
=
[
1
,
1
]
...
@@ -87,12 +84,14 @@ class TestPool2d_Op(OpTest):
...
@@ -87,12 +84,14 @@ class TestPool2d_Op(OpTest):
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool_type
=
"avg"
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
True
class
TestCase1
(
TestPool2d_Op
):
class
TestCase1
(
TestPool2d_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
strides
=
[
1
,
1
]
...
@@ -103,12 +102,14 @@ class TestCase1(TestPool2d_Op):
...
@@ -103,12 +102,14 @@ class TestCase1(TestPool2d_Op):
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool_type
=
"avg"
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
class
TestCase2
(
TestPool2d_Op
):
class
TestCase2
(
TestPool2d_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
strides
=
[
1
,
1
]
...
@@ -119,152 +120,69 @@ class TestCase2(TestPool2d_Op):
...
@@ -119,152 +120,69 @@ class TestCase2(TestPool2d_Op):
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool_type
=
"avg"
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
class
TestCase3
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
class
TestCase3
(
TestPool2d_Op
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d"
self
.
op_type
=
"pool2d"
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
class
TestCase4
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
class
TestCase4
(
TestCase1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d"
self
.
op_type
=
"pool2d"
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
class
TestCase5
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
class
TestCase5
(
TestCase2
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d"
self
.
op_type
=
"pool2d"
def
init_pool_type
(
self
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
#--------------------test pool2d_cudnn--------------------
#--------------------test pool2d_cudnn--------------------
class
TestCaseCudnn1
(
TestPool2d_Op
):
class
TestCudnnCase1
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
class
TestCaseCudnn2
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
class
TestCudnnCase2
(
TestCase1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
class
TestCaseCudnn3
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
class
TestCudnnCase3
(
TestCase2
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
class
TestCaseCudnn4
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
class
TestCudnnCase4
(
TestCase3
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
class
TestCaseCudnn5
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
0
,
0
]
class
TestCudnnCase5
(
TestCase4
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
class
TestCaseCudnn6
(
TestPool2d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
class
TestCudnnCase6
(
TestCase5
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
op_type
=
"pool2d_cudnn"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_pool3d_op.py
浏览文件 @
13ec6f99
...
@@ -3,8 +3,7 @@ import numpy as np
...
@@ -3,8 +3,7 @@ import numpy as np
from
op_test
import
OpTest
from
op_test
import
OpTest
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
=
[
0
,
0
],
global_pool
=
0
):
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
...
@@ -27,8 +26,7 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
...
@@ -27,8 +26,7 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
return
out
return
out
def
avg_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
=
[
0
,
0
],
global_pool
=
0
):
def
avg_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
==
1
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
...
@@ -55,6 +53,10 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
...
@@ -55,6 +53,10 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
class
TestPool3d_Op
(
OpTest
):
class
TestPool3d_Op
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
self
.
init_global_pool
()
self
.
init_op_type
()
self
.
init_pool_type
()
if
self
.
global_pool
:
if
self
.
global_pool
:
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
...
@@ -81,74 +83,115 @@ class TestPool3d_Op(OpTest):
...
@@ -81,74 +83,115 @@ class TestPool3d_Op(OpTest):
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
op_type
=
"pool3d"
self
.
pool_type
=
"avg"
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
0
,
0
,
0
]
self
.
paddings
=
[
0
,
0
,
0
]
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
True
class
TestCase1
(
TestPool3d_Op
):
class
TestCase1
(
TestPool3d_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"pool3d"
self
.
op_type
=
"pool3d"
self
.
pool_type
=
"avg"
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
0
,
0
,
0
]
self
.
paddings
=
[
0
,
0
,
0
]
def
init_op_type
(
self
):
class
TestCase2
(
TestPool3d_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"pool3d"
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool_type
=
"avg"
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
class
TestCase2
(
TestPool3d_Op
):
def
init_test_case
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool3D_forward_naive
=
avg_pool3D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
class
TestCase3
(
TestPool3d_Op
):
class
TestCase3
(
TestPool3d_Op
):
def
init_test_case
(
self
):
def
init_op_type
(
self
):
self
.
global_pool
=
True
self
.
op_type
=
"pool3d"
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
0
,
0
,
0
]
class
TestCase4
(
TestPool3d_Op
):
class
TestCase4
(
TestCase1
):
def
init_test_case
(
self
):
def
init_op_type
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"pool3d"
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
0
,
0
,
0
]
class
TestCase5
(
TestPool3d_Op
):
class
TestCase5
(
TestCase2
):
def
init_test_case
(
self
):
def
init_op_type
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"pool3d"
self
.
op_type
=
"pool3d"
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool_type
=
"max"
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
#--------------------test pool3d_cudnn--------------------
self
.
paddings
=
[
1
,
1
,
1
]
class
TestCudnnCase1
(
TestPool3d_Op
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
class
TestCudnnCase2
(
TestCase1
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
class
TestCudnnCase3
(
TestCase2
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
class
TestCudnnCase4
(
TestCase3
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
class
TestCudnnCase5
(
TestCase4
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
class
TestCudnnCase6
(
TestCase5
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_pool_max_op.py
浏览文件 @
13ec6f99
...
@@ -3,11 +3,13 @@ import numpy as np
...
@@ -3,11 +3,13 @@ import numpy as np
from
op_test
import
OpTest
from
op_test
import
OpTest
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
def
max_pool3D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
):
N
,
C
,
D
,
H
,
W
=
x
.
shape
N
,
C
,
D
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
:
ksize
=
[
D
,
H
,
W
]
ksize
=
[
D
,
H
,
W
]
paddings
=
[
0
,
0
,
0
]
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
D_out
=
(
D
-
ksize
[
0
]
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
H_out
=
(
H
-
ksize
[
1
]
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
H_out
=
(
H
-
ksize
[
1
]
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
W_out
=
(
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
/
strides
[
2
]
+
1
W_out
=
(
W
-
ksize
[
2
]
+
2
*
paddings
[
2
])
/
strides
[
2
]
+
1
...
@@ -40,11 +42,13 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
...
@@ -40,11 +42,13 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
return
out
,
mask
return
out
,
mask
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
0
):
def
max_pool2D_forward_naive
(
x
,
ksize
,
strides
,
paddings
,
global_pool
=
False
):
N
,
C
,
H
,
W
=
x
.
shape
N
,
C
,
H
,
W
=
x
.
shape
if
global_pool
==
1
:
if
global_pool
:
ksize
=
[
H
,
W
]
ksize
=
[
H
,
W
]
paddings
=
[
0
,
0
]
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
H_out
=
(
H
-
ksize
[
0
]
+
2
*
paddings
[
0
])
/
strides
[
0
]
+
1
W_out
=
(
W
-
ksize
[
1
]
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
W_out
=
(
W
-
ksize
[
1
]
+
2
*
paddings
[
1
])
/
strides
[
1
]
+
1
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
out
=
np
.
zeros
((
N
,
C
,
H_out
,
W_out
))
...
@@ -74,13 +78,13 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
...
@@ -74,13 +78,13 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
class
TestMaxPoolWithIndex_Op
(
OpTest
):
class
TestMaxPoolWithIndex_Op
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
if
self
.
global_pool
:
self
.
init_global
()
self
.
paddings
=
[
0
for
_
in
range
(
len
(
self
.
paddings
))]
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
output
,
mask
=
self
.
pool_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
output
,
mask
=
self
.
pool_forward_naive
(
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
)
self
.
paddings
,
self
.
global_pool
)
output
=
output
.
astype
(
"float32"
)
output
=
output
.
astype
(
"float32"
)
mask
=
mask
.
astype
(
"
floa
t32"
)
mask
=
mask
.
astype
(
"
in
t32"
)
self
.
attrs
=
{
self
.
attrs
=
{
'strides'
:
self
.
strides
,
'strides'
:
self
.
strides
,
...
@@ -99,41 +103,24 @@ class TestMaxPoolWithIndex_Op(OpTest):
...
@@ -99,41 +103,24 @@ class TestMaxPoolWithIndex_Op(OpTest):
# self.check_grad(set(['X']), ['Out'], max_relative_error=0.07)
# self.check_grad(set(['X']), ['Out'], max_relative_error=0.07)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
op_type
=
"max_pool3d_with_index"
self
.
index
=
"max_pool3d_with_index"
self
.
op_type
=
"%s"
%
self
.
index
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
def
init_global
(
self
):
self
.
global_pool
=
False
class
TestCase1
(
TestMaxPoolWithIndex_Op
):
class
TestCase1
(
TestMaxPoolWithIndex_Op
):
def
init_
test_case
(
self
):
def
init_
global
(
self
):
self
.
global_pool
=
True
self
.
global_pool
=
True
self
.
op_type
=
"max_pool3d_with_index"
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
class
TestCase2
(
TestMaxPoolWithIndex_Op
):
class
TestCase2
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"max_pool3d_with_index"
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
class
TestCase3
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"max_pool3d_with_index"
self
.
op_type
=
"max_pool3d_with_index"
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
,
7
]
...
@@ -141,32 +128,18 @@ class TestCase3(TestMaxPoolWithIndex_Op):
...
@@ -141,32 +128,18 @@ class TestCase3(TestMaxPoolWithIndex_Op):
self
.
strides
=
[
2
,
2
,
2
]
self
.
strides
=
[
2
,
2
,
2
]
self
.
paddings
=
[
0
,
0
,
0
]
self
.
paddings
=
[
0
,
0
,
0
]
def
init_global
(
self
):
class
TestCase4
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
global_pool
=
True
self
.
op_type
=
"max_pool3d_with_index"
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
1
,
1
,
1
]
self
.
paddings
=
[
1
,
1
,
1
]
class
TestCase5
(
TestMaxPoolWithIndex_Op
):
class
TestCase3
(
TestCase2
):
def
init_test_case
(
self
):
def
init_global
(
self
):
self
.
global_pool
=
True
self
.
global_pool
=
False
self
.
op_type
=
"max_pool3d_with_index"
self
.
pool_forward_naive
=
max_pool3D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
,
5
]
self
.
ksize
=
[
3
,
3
,
3
]
self
.
strides
=
[
2
,
2
,
2
]
self
.
paddings
=
[
0
,
0
,
0
]
class
TestCase6
(
TestMaxPoolWithIndex_Op
):
#----------------max_pool2d_with_index----------------
class
TestCase4
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
global_pool
=
False
self
.
op_type
=
"max_pool2d_with_index"
self
.
op_type
=
"max_pool2d_with_index"
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
]
...
@@ -174,10 +147,17 @@ class TestCase6(TestMaxPoolWithIndex_Op):
...
@@ -174,10 +147,17 @@ class TestCase6(TestMaxPoolWithIndex_Op):
self
.
strides
=
[
1
,
1
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
def
init_global
(
self
):
self
.
global_pool
=
True
class
TestCase
7
(
TestMaxPoolWithIndex_Op
):
class
TestCase
5
(
TestCase4
):
def
init_
test_case
(
self
):
def
init_
global
(
self
):
self
.
global_pool
=
False
self
.
global_pool
=
False
class
TestCase6
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
self
.
op_type
=
"max_pool2d_with_index"
self
.
op_type
=
"max_pool2d_with_index"
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
7
,
7
]
self
.
shape
=
[
2
,
3
,
7
,
7
]
...
@@ -185,27 +165,13 @@ class TestCase7(TestMaxPoolWithIndex_Op):
...
@@ -185,27 +165,13 @@ class TestCase7(TestMaxPoolWithIndex_Op):
self
.
strides
=
[
2
,
2
]
self
.
strides
=
[
2
,
2
]
self
.
paddings
=
[
0
,
0
]
self
.
paddings
=
[
0
,
0
]
def
init_global
(
self
):
class
TestCase8
(
TestMaxPoolWithIndex_Op
):
def
init_test_case
(
self
):
self
.
global_pool
=
True
self
.
global_pool
=
True
self
.
op_type
=
"max_pool2d_with_index"
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
1
]
class
TestCase9
(
TestMaxPoolWithIndex_Op
):
class
TestCase7
(
TestCase6
):
def
init_test_case
(
self
):
def
init_global
(
self
):
self
.
global_pool
=
True
self
.
global_pool
=
False
self
.
op_type
=
"max_pool2d_with_index"
self
.
pool_forward_naive
=
max_pool2D_forward_naive
self
.
shape
=
[
2
,
3
,
5
,
5
]
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
2
,
2
]
self
.
paddings
=
[
0
,
0
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_sequence_slice_op.py
0 → 100644
浏览文件 @
13ec6f99
import
unittest
import
numpy
as
np
import
sys
from
op_test
import
OpTest
class
TestSequenceSliceOp
(
OpTest
):
def
set_data
(
self
):
self
.
init_test_case
()
# only supprot one level LoD
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float32'
)
lod
=
self
.
x_lod
offset
=
np
.
array
(
self
.
offset
).
astype
(
"int64"
)
length
=
np
.
array
(
self
.
length
).
astype
(
"int64"
)
self
.
inputs
=
{
'X'
:
(
x
,
lod
),
'Offset'
:
offset
,
'Length'
:
length
}
outs
=
[]
#np.zeros((100, 3, 2)).astype('float32')
out_lod
=
[[
0
]]
out_lod_offset
=
0
for
i
in
range
(
len
(
offset
)):
sub_x
=
x
[
lod
[
0
][
i
]
+
offset
[
i
,
0
]:
lod
[
0
][
i
]
+
offset
[
i
,
0
]
+
length
[
i
,
0
],
:]
out_lod_offset
=
out_lod_offset
+
len
(
sub_x
)
outs
.
append
(
sub_x
)
out_lod
[
0
].
append
(
out_lod_offset
)
outs
=
np
.
concatenate
(
outs
,
axis
=
0
)
self
.
outputs
=
{
'Out'
:
(
outs
,
out_lod
)}
def
init_test_case
(
self
):
self
.
x_dim
=
(
100
,
3
,
2
)
self
.
x_lod
=
[[
0
,
20
,
40
,
60
,
80
,
100
]]
self
.
offset
=
[[
1
],
[
2
],
[
3
],
[
4
],
[
5
]]
self
.
length
=
[[
10
],
[
8
],
[
6
],
[
4
],
[
2
]]
def
setUp
(
self
):
self
.
op_type
=
"sequence_slice"
self
.
set_data
()
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录