Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
816e556b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
816e556b
编写于
1月 15, 2018
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fluid_infer
上级
3cf23bec
8d253e49
变更
75
展开全部
隐藏空白更改
内联
并排
Showing
75 changed file
with
2136 addition
and
900 deletion
+2136
-900
README.md
README.md
+1
-0
doc/api/v2/fluid/layers.rst
doc/api/v2/fluid/layers.rst
+129
-0
doc/getstarted/build_and_install/build_from_source_cn.rst
doc/getstarted/build_and_install/build_from_source_cn.rst
+10
-0
doc/getstarted/build_and_install/build_from_source_en.rst
doc/getstarted/build_and_install/build_from_source_en.rst
+10
-0
doc/howto/dev/new_op_cn.md
doc/howto/dev/new_op_cn.md
+1
-1
paddle/framework/data_device_transform.cc
paddle/framework/data_device_transform.cc
+2
-3
paddle/framework/data_device_transform.h
paddle/framework/data_device_transform.h
+2
-1
paddle/framework/data_layout.h
paddle/framework/data_layout.h
+14
-5
paddle/framework/data_transform.cc
paddle/framework/data_transform.cc
+4
-6
paddle/framework/data_transform.h
paddle/framework/data_transform.h
+3
-3
paddle/framework/op_kernel_type.h
paddle/framework/op_kernel_type.h
+5
-0
paddle/framework/op_registry_test.cc
paddle/framework/op_registry_test.cc
+0
-18
paddle/framework/operator.cc
paddle/framework/operator.cc
+28
-72
paddle/framework/operator.h
paddle/framework/operator.h
+1
-25
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+16
-3
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+0
-74
paddle/operators/conv_cudnn_op.cu.cc
paddle/operators/conv_cudnn_op.cu.cc
+15
-16
paddle/operators/conv_op.cc
paddle/operators/conv_op.cc
+73
-0
paddle/operators/conv_op.h
paddle/operators/conv_op.h
+8
-0
paddle/operators/conv_transpose_cudnn_op.cc
paddle/operators/conv_transpose_cudnn_op.cc
+0
-78
paddle/operators/conv_transpose_cudnn_op.cu.cc
paddle/operators/conv_transpose_cudnn_op.cu.cc
+18
-18
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+72
-0
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+8
-0
paddle/operators/math/sequence2batch.cc
paddle/operators/math/sequence2batch.cc
+1
-0
paddle/operators/pool_cudnn_op.cc
paddle/operators/pool_cudnn_op.cc
+0
-39
paddle/operators/pool_cudnn_op.cu.cc
paddle/operators/pool_cudnn_op.cu.cc
+17
-12
paddle/operators/pool_cudnn_op.h
paddle/operators/pool_cudnn_op.h
+0
-19
paddle/operators/pool_op.cc
paddle/operators/pool_op.cc
+62
-3
paddle/operators/pool_op.h
paddle/operators/pool_op.h
+8
-0
paddle/operators/print_op.cc
paddle/operators/print_op.cc
+105
-28
paddle/operators/reorder_lod_tensor_by_rank_op.cc
paddle/operators/reorder_lod_tensor_by_rank_op.cc
+33
-11
paddle/operators/shrink_rnn_memory_op.cc
paddle/operators/shrink_rnn_memory_op.cc
+13
-10
paddle/operators/while_op.cc
paddle/operators/while_op.cc
+10
-6
paddle/platform/dynload/cudnn.cc
paddle/platform/dynload/cudnn.cc
+1
-1
paddle/platform/dynload/cudnn.h
paddle/platform/dynload/cudnn.h
+1
-1
paddle/platform/dynload/dynamic_loader.cc
paddle/platform/dynload/dynamic_loader.cc
+1
-1
paddle/platform/dynload/dynamic_loader.h
paddle/platform/dynload/dynamic_loader.h
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+1
-6
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+18
-5
python/paddle/v2/fluid/__init__.py
python/paddle/v2/fluid/__init__.py
+18
-3
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+4
-1
python/paddle/v2/fluid/clip.py
python/paddle/v2/fluid/clip.py
+15
-7
python/paddle/v2/fluid/default_scope_funcs.py
python/paddle/v2/fluid/default_scope_funcs.py
+15
-11
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+404
-150
python/paddle/v2/fluid/distribute_transpiler_simple.py
python/paddle/v2/fluid/distribute_transpiler_simple.py
+242
-0
python/paddle/v2/fluid/distributed_spliter.py
python/paddle/v2/fluid/distributed_spliter.py
+35
-0
python/paddle/v2/fluid/evaluator.py
python/paddle/v2/fluid/evaluator.py
+19
-16
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+12
-3
python/paddle/v2/fluid/initializer.py
python/paddle/v2/fluid/initializer.py
+6
-1
python/paddle/v2/fluid/io.py
python/paddle/v2/fluid/io.py
+9
-3
python/paddle/v2/fluid/layers/control_flow.py
python/paddle/v2/fluid/layers/control_flow.py
+47
-33
python/paddle/v2/fluid/layers/nn.py
python/paddle/v2/fluid/layers/nn.py
+34
-13
python/paddle/v2/fluid/layers/ops.py
python/paddle/v2/fluid/layers/ops.py
+28
-1
python/paddle/v2/fluid/layers/tensor.py
python/paddle/v2/fluid/layers/tensor.py
+26
-14
python/paddle/v2/fluid/memory_optimization_transpiler.py
python/paddle/v2/fluid/memory_optimization_transpiler.py
+4
-2
python/paddle/v2/fluid/nets.py
python/paddle/v2/fluid/nets.py
+4
-1
python/paddle/v2/fluid/registry.py
python/paddle/v2/fluid/registry.py
+10
-7
python/paddle/v2/fluid/regularizer.py
python/paddle/v2/fluid/regularizer.py
+5
-1
python/paddle/v2/fluid/tests/CMakeLists.txt
python/paddle/v2/fluid/tests/CMakeLists.txt
+1
-0
python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
+5
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
.../v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
+0
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
...tests/book_distribute/notest_dist_label_semantic_roles.py
+0
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
...le/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
+0
-0
python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
.../book_distribute/notest_understand_sentiment_conv_dist.py
+0
-0
python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
...n/paddle/v2/fluid/tests/book_distribute/test_split_var.py
+39
-0
python/paddle/v2/fluid/tests/op_test.py
python/paddle/v2/fluid/tests/op_test.py
+39
-25
python/paddle/v2/fluid/tests/test_clip.py
python/paddle/v2/fluid/tests/test_clip.py
+67
-0
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+61
-32
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+58
-22
python/paddle/v2/fluid/tests/test_conv3d_op.py
python/paddle/v2/fluid/tests/test_conv3d_op.py
+61
-24
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+58
-22
python/paddle/v2/fluid/tests/test_parallel_op.py
python/paddle/v2/fluid/tests/test_parallel_op.py
+6
-2
python/paddle/v2/fluid/tests/test_pool2d_op.py
python/paddle/v2/fluid/tests/test_pool2d_op.py
+34
-15
python/paddle/v2/fluid/tests/test_pool3d_op.py
python/paddle/v2/fluid/tests/test_pool3d_op.py
+34
-15
python/paddle/v2/fluid/tests/test_print_op.py
python/paddle/v2/fluid/tests/test_print_op.py
+44
-10
未找到文件。
README.md
浏览文件 @
816e556b
...
@@ -37,6 +37,7 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
...
@@ -37,6 +37,7 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
(e.g. MKL, OpenBLAS, cuBLAS) or customized CPU/GPU kernels.
(e.g. MKL, OpenBLAS, cuBLAS) or customized CPU/GPU kernels.
- Optimized CNN networks through MKL-DNN library.
- Highly optimized recurrent networks which can handle **variable-length**
- Highly optimized recurrent networks which can handle **variable-length**
sequence without padding.
sequence without padding.
- Optimized local and distributed training for models with high dimensional
- Optimized local and distributed training for models with high dimensional
...
...
doc/api/v2/fluid/layers.rst
浏览文件 @
816e556b
...
@@ -358,3 +358,132 @@ reduce_min
...
@@ -358,3 +358,132 @@ reduce_min
.. autofunction:: paddle.v2.fluid.layers.reduce_min
.. autofunction:: paddle.v2.fluid.layers.reduce_min
:noindex:
:noindex:
logsigmoid
----------
.. autofunction:: paddle.v2.fluid.layers.logsigmoid
:noindex:
exp
---
.. autofunction:: paddle.v2.fluid.layers.exp
:noindex:
relu
----
.. autofunction:: paddle.v2.fluid.layers.relu
:noindex:
tanh
----
.. autofunction:: paddle.v2.fluid.layers.tanh
:noindex:
tanh_shrink
-----------
.. autofunction:: paddle.v2.fluid.layers.tanh_shrink
:noindex:
softshrink
----------
.. autofunction:: paddle.v2.fluid.layers.softshrink
:noindex:
sqrt
----
.. autofunction:: paddle.v2.fluid.layers.sqrt
:noindex:
abs
----
.. autofunction:: paddle.v2.fluid.layers.abs
:noindex:
ceil
----
.. autofunction:: paddle.v2.fluid.layers.ceil
:noindex:
floor
-----
.. autofunction:: paddle.v2.fluid.layers.floor
:noindex:
round
-----
.. autofunction:: paddle.v2.fluid.layers.round
:noindex:
reciprocal
----------
.. autofunction:: paddle.v2.fluid.layers.reciprocal
:noindex:
log
---
.. autofunction:: paddle.v2.fluid.layers.log
:noindex:
square
------
.. autofunction:: paddle.v2.fluid.layers.square
:noindex:
softplus
--------
.. autofunction:: paddle.v2.fluid.layers.softplus
:noindex:
softsign
---------
.. autofunction:: paddle.v2.fluid.layers.softsign
:noindex:
brelu
-----
.. autofunction:: paddle.v2.fluid.layers.brelu
:noindex:
leaky_relu
----------
.. autofunction:: paddle.v2.fluid.layers.leaky_relu
:noindex:
soft_relu
---------
.. autofunction:: paddle.v2.fluid.layers.soft_relu
:noindex:
elu
----
.. autofunction:: paddle.v2.fluid.layers.elu
:noindex:
relu6
-----
.. autofunction:: paddle.v2.fluid.layers.relu6
:noindex:
pow
----
.. autofunction:: paddle.v2.fluid.layers.pow
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.v2.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.v2.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid
-------------
.. autofunction:: paddle.v2.fluid.layers.hard_sigmoid
:noindex:
swish
------
.. autofunction:: paddle.v2.fluid.layers.swish
:noindex:
doc/getstarted/build_and_install/build_from_source_cn.rst
浏览文件 @
816e556b
...
@@ -32,6 +32,16 @@ PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译
...
@@ -32,6 +32,16 @@ PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译
pip install build/python/dist/*.whl
pip install build/python/dist/*.whl
如果机器中已经安装过PaddlePaddle,有两种方法:
.. code-block:: bash
1. 先卸载之前的版本,再重新安装
pip uninstall paddlepaddle
pip install build/python/dist/*.whl
2. 直接升级到更新的版本
pip install build/python/dist/*.whl -U
.. _run_test:
.. _run_test:
...
...
doc/getstarted/build_and_install/build_from_source_en.rst
浏览文件 @
816e556b
...
@@ -36,6 +36,16 @@ machine or copy it to the target machine.
...
@@ -36,6 +36,16 @@ machine or copy it to the target machine.
pip install build/python/dist/*.whl
pip install build/python/dist/*.whl
If the machine has installed PaddlePaddle before, there are two methods:
.. code-block:: bash
1. uninstall and reinstall
pip uninstall paddlepaddle
pip install build/python/dist/*.whl
2. upgrade directly
pip install build/python/dist/*.whl -U
.. _run_test:
.. _run_test:
...
...
doc/howto/dev/new_op_cn.md
浏览文件 @
816e556b
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
-
`framework::OperatorWithKernel`
:继承自OperatorBase,Op有计算函数,称作有Kernel。
-
`framework::OperatorWithKernel`
:继承自OperatorBase,Op有计算函数,称作有Kernel。
-
`class OpProtoAndCheckerMaker`
:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
-
`class OpProtoAndCheckerMaker`
:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自
`Operator
Base`
,后者继承自
`OperatorWithKernel
`
。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下:
依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自
`Operator
WithKernel`
,后者继承自
`OperatorBase
`
。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下:
内容 | 定义位置
内容 | 定义位置
...
...
paddle/framework/data_device_transform.cc
浏览文件 @
816e556b
...
@@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext(
...
@@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext(
}
}
}
}
Tensor
*
DeviceTransform
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
)
{
void
DeviceTransform
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
,
Tensor
*
out
)
{
VLOG
(
3
)
<<
"DeviceTransform in, src_place "
<<
in
.
place
()
VLOG
(
3
)
<<
"DeviceTransform in, src_place "
<<
in
.
place
()
<<
" dst_place: "
<<
dst_place
;
<<
" dst_place: "
<<
dst_place
;
Tensor
*
out
=
new
Tensor
();
auto
*
dev_ctx
=
GetDeviceContext
(
in
.
place
(),
dst_place
);
auto
*
dev_ctx
=
GetDeviceContext
(
in
.
place
(),
dst_place
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
Copy
(
in
,
dst_place
,
*
dev_ctx
,
out
);
Copy
(
in
,
dst_place
,
*
dev_ctx
,
out
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
return
out
;
}
}
}
// namespace framework
}
// namespace framework
...
...
paddle/framework/data_device_transform.h
浏览文件 @
816e556b
...
@@ -21,7 +21,8 @@ limitations under the License. */
...
@@ -21,7 +21,8 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
Tensor
*
DeviceTransform
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
);
void
DeviceTransform
(
const
Tensor
&
in
,
const
platform
::
Place
&
dst_place
,
Tensor
*
out
);
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/framework/data_layout.h
浏览文件 @
816e556b
...
@@ -14,7 +14,9 @@ limitations under the License. */
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#pragma once
#include <iostream>
#include <cctype>
#include <ostream>
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -27,12 +29,19 @@ enum class DataLayout {
...
@@ -27,12 +29,19 @@ enum class DataLayout {
};
};
inline
DataLayout
StringToDataLayout
(
const
std
::
string
&
str
)
{
inline
DataLayout
StringToDataLayout
(
const
std
::
string
&
str
)
{
if
(
str
==
"NHWC"
||
str
==
"nhwc"
)
{
std
::
string
s
(
str
);
for
(
size_t
i
=
0
;
i
<
s
.
size
();
++
i
)
{
s
[
i
]
=
toupper
(
s
[
i
]);
}
if
(
s
==
"NHWC"
)
{
return
DataLayout
::
kNHWC
;
return
DataLayout
::
kNHWC
;
}
else
if
(
s
tr
==
"NCHW"
||
str
==
"nchw
"
)
{
}
else
if
(
s
==
"NCHW
"
)
{
return
DataLayout
::
kNCHW
;
return
DataLayout
::
kNCHW
;
}
else
if
(
s
==
"ANYLAYOUT"
)
{
return
DataLayout
::
kAnyLayout
;
}
else
{
}
else
{
PADDLE_THROW
(
"Unknown storage order string: %s"
,
s
tr
);
PADDLE_THROW
(
"Unknown storage order string: %s"
,
s
);
}
}
}
}
...
@@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
...
@@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
}
}
}
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
DataLayout
l
)
{
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
DataLayout
&
l
)
{
out
<<
DataLayoutToString
(
l
);
out
<<
DataLayoutToString
(
l
);
return
out
;
return
out
;
}
}
...
...
paddle/framework/data_transform.cc
浏览文件 @
816e556b
...
@@ -19,16 +19,14 @@ limitations under the License. */
...
@@ -19,16 +19,14 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
Tensor
*
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
void
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
kernel_type_for_var
,
const
Tensor
&
input_tensor
)
{
const
Tensor
&
input_tensor
,
Tensor
*
out
)
{
Tensor
*
out
=
nullptr
;
if
(
!
platform
::
is_same_place
(
kernel_type_for_var
.
place_
,
if
(
!
platform
::
is_same_place
(
kernel_type_for_var
.
place_
,
expected_kernel_type
.
place_
))
{
expected_kernel_type
.
place_
))
{
out
=
DeviceTransform
(
input_tensor
,
expected_kernel_type
.
place_
);
DeviceTransform
(
input_tensor
,
expected_kernel_type
.
place_
,
out
);
}
}
PADDLE_ENFORCE_NOT_NULL
(
out
,
"out should not be null"
);
PADDLE_ENFORCE_NOT_NULL
(
out
,
"out should not be null"
);
return
out
;
}
}
void
CopyVariableWithTensor
(
const
Variable
&
in_var
,
const
Tensor
&
tensor
,
void
CopyVariableWithTensor
(
const
Variable
&
in_var
,
const
Tensor
&
tensor
,
...
...
paddle/framework/data_transform.h
浏览文件 @
816e556b
...
@@ -30,9 +30,9 @@ limitations under the License. */
...
@@ -30,9 +30,9 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
Tensor
*
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
void
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
kernel_type_for_var
,
const
Tensor
&
input_tensor
);
const
Tensor
&
input_tensor
,
Tensor
*
out
);
void
CopyVariableWithTensor
(
const
Variable
&
in_var
,
const
Tensor
&
tensor
,
void
CopyVariableWithTensor
(
const
Variable
&
in_var
,
const
Tensor
&
tensor
,
Variable
&
out_var
);
Variable
&
out_var
);
...
...
paddle/framework/op_kernel_type.h
浏览文件 @
816e556b
...
@@ -85,5 +85,10 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
...
@@ -85,5 +85,10 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
return
stream
.
str
();
return
stream
.
str
();
}
}
inline
bool
TransFromNeeded
(
const
OpKernelType
&
l
,
const
OpKernelType
&
r
)
{
return
(
!
platform
::
places_are_same_class
(
l
.
place_
,
r
.
place_
))
||
(
l
.
data_type_
!=
r
.
data_type_
)
||
(
l
.
data_layout_
!=
r
.
data_layout_
);
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/framework/op_registry_test.cc
浏览文件 @
816e556b
...
@@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) {
...
@@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) {
// TODO(qiao) add priority back
// TODO(qiao) add priority back
// use all available kernels
// use all available kernels
paddle
::
framework
::
UseALL
();
op
->
Run
(
scope
,
cuda_place
);
op
->
Run
(
scope
,
cuda_place
);
EXPECT_EQ
(
op_test_value
,
-
10
);
EXPECT_EQ
(
op_test_value
,
-
10
);
// remove cuda kernels
paddle
::
framework
::
UseCPU
();
op
->
Run
(
scope
,
cpu_place
);
EXPECT_EQ
(
op_test_value
,
-
9
);
// add cuda kernels
paddle
::
framework
::
UseCUDA
();
op
->
Run
(
scope
,
cuda_place
);
EXPECT_EQ
(
op_test_value
,
-
10
);
// use cudnn kernel
paddle
::
framework
::
UseCUDNN
();
op
->
Run
(
scope
,
cuda_place
);
EXPECT_EQ
(
op_test_value
,
-
20
);
}
}
paddle/framework/operator.cc
浏览文件 @
816e556b
...
@@ -29,52 +29,12 @@ DEFINE_bool(op_sync, false,
...
@@ -29,52 +29,12 @@ DEFINE_bool(op_sync, false,
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
=
{
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kCUDNN
),
void
UseCPU
()
{
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kPlain
),
kKernelPriority
.
clear
();
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kMKLDNN
),
/*Plain CPU*/
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kPlain
),
auto
pair0
=
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kPlain
);
};
kKernelPriority
.
insert
(
kKernelPriority
.
begin
(),
pair0
);
}
void
UseMKLDNN
()
{
UseCPU
();
#if PADDLE_WITH_MKLML
{
/*MKLDNN Kernel*/
auto
pair0
=
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kMKLDNN
);
kKernelPriority
.
insert
(
kKernelPriority
.
begin
(),
pair0
);
}
#endif
}
void
UseCUDA
()
{
UseMKLDNN
();
#if PADDLE_WITH_CUDA
/*Plain GPU*/
auto
pair0
=
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kPlain
);
kKernelPriority
.
insert
(
kKernelPriority
.
begin
(),
pair0
);
#endif
}
void
UseCUDNN
()
{
UseCUDA
();
#if PADDLE_WITH_CUDA
if
(
platform
::
dynload
::
HasCUDNN
())
{
/*CUDNN Kernel*/
auto
pair0
=
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kCUDNN
);
kKernelPriority
.
insert
(
kKernelPriority
.
begin
(),
pair0
);
}
#endif
}
void
UseALL
()
{
UseCPU
();
UseMKLDNN
();
UseCUDA
();
UseCUDNN
();
}
static
DDim
GetDims
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
static
DDim
GetDims
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
Variable
*
var
=
scope
.
FindVar
(
name
);
Variable
*
var
=
scope
.
FindVar
(
name
);
...
@@ -271,36 +231,33 @@ static bool VarIsTensor(const Variable* var) {
...
@@ -271,36 +231,33 @@ static bool VarIsTensor(const Variable* var) {
return
var
->
IsType
<
LoDTensor
>
()
||
var
->
IsType
<
SelectedRows
>
();
return
var
->
IsType
<
LoDTensor
>
()
||
var
->
IsType
<
SelectedRows
>
();
}
}
static
const
Tensor
*
GetTensorFromVar
(
const
Variable
*
var
)
{
static
const
Tensor
*
GetTensorFromVar
(
Variable
*
var
)
{
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
t
=
&
(
var
->
Get
<
LoDTensor
>
()
);
return
var
->
GetMutable
<
LoDTensor
>
(
);
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
t
=
&
(
var
->
Get
<
SelectedRows
>
().
value
()
);
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
(
);
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
return
t
;
}
}
static
Tensor
*
GetMutableTensorFromVar
(
Variable
*
var
)
{
static
Tensor
*
GetMutableTensorFromVar
(
Variable
*
var
)
{
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
t
=
var
->
GetMutable
<
LoDTensor
>
();
return
var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
t
=
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
return
t
;
}
}
template
<
>
template
<
>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
InputVar
(
name
);
auto
*
var
=
InputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
GetTensorFromVar
(
var
);
return
var
==
nullptr
?
nullptr
:
GetTensorFromVar
(
const_cast
<
Variable
*>
(
var
));
}
}
template
<
>
template
<
>
...
@@ -343,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) {
...
@@ -343,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) {
auto
it
=
all_kernels
.
find
(
op_type
);
auto
it
=
all_kernels
.
find
(
op_type
);
if
(
it
==
all_kernels
.
end
())
{
if
(
it
==
all_kernels
.
end
())
{
// All control operator must support GPU
// All control operator must support GPU
return
true
;
return
true
;
}
}
for
(
auto
&
kern_pair
:
it
->
second
)
{
for
(
auto
&
kern_pair
:
it
->
second
)
{
...
@@ -516,21 +474,17 @@ void OperatorWithKernel::Run(const Scope& scope,
...
@@ -516,21 +474,17 @@ void OperatorWithKernel::Run(const Scope& scope,
}
}
ExecutionContext
ctx
(
*
this
,
scope
,
*
dev_ctx
);
ExecutionContext
ctx
(
*
this
,
scope
,
*
dev_ctx
);
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ctx
);
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
for
(
auto
&
candidate
:
kKernelPriority
)
{
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
auto
candidate_key
=
// transform functions are ready.
OpKernelType
(
expected_kernel_key
.
data_type_
,
std
::
get
<
0
>
(
candidate
),
expected_kernel_key
.
data_layout_
,
std
::
get
<
1
>
(
candidate
));
if
((
candidate_key
==
expected_kernel_key
)
||
// for (auto& candidate : kKernelPriority) {
(
kernels
.
count
(
candidate_key
)))
{
// Do selection
expected_kernel_key
=
candidate_key
;
// }
break
;
}
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ctx
);
}
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
...
@@ -544,7 +498,7 @@ void OperatorWithKernel::Run(const Scope& scope,
...
@@ -544,7 +498,7 @@ void OperatorWithKernel::Run(const Scope& scope,
if
(
tensor_in
->
IsInitialized
())
{
if
(
tensor_in
->
IsInitialized
())
{
auto
kernel_type_for_var
=
this
->
GetKernelTypeForVar
(
auto
kernel_type_for_var
=
this
->
GetKernelTypeForVar
(
var_name_item
.
first
,
*
tensor_in
,
expected_kernel_key
);
var_name_item
.
first
,
*
tensor_in
,
expected_kernel_key
);
if
(
kernel_type_for_var
!=
expected_kernel_key
)
{
if
(
TransFromNeeded
(
kernel_type_for_var
,
expected_kernel_key
)
)
{
auto
out_var_names
=
OutputVars
(
true
);
auto
out_var_names
=
OutputVars
(
true
);
if
(
std
::
find
(
out_var_names
.
begin
(),
out_var_names
.
end
(),
if
(
std
::
find
(
out_var_names
.
begin
(),
out_var_names
.
end
(),
var_name
)
!=
out_var_names
.
end
())
{
var_name
)
!=
out_var_names
.
end
())
{
...
@@ -553,11 +507,13 @@ void OperatorWithKernel::Run(const Scope& scope,
...
@@ -553,11 +507,13 @@ void OperatorWithKernel::Run(const Scope& scope,
"does not support transform"
,
"does not support transform"
,
var_name
);
var_name
);
}
}
VLOG
(
3
)
<<
"need to do transform for var "
<<
var_name
;
VLOG
(
3
)
<<
"Transform Variable "
<<
var_name
<<
" from "
<<
kernel_type_for_var
<<
" to "
<<
expected_kernel_key
;
auto
*
trans_var
=
new_scope
.
Var
(
var_name
);
auto
*
trans_var
=
new_scope
.
Var
(
var_name
);
auto
*
out
=
DataTransform
(
expected_kernel_key
,
kernel_type_for_var
,
std
::
shared_ptr
<
Tensor
>
out
(
new
Tensor
);
*
tensor_in
);
DataTransform
(
expected_kernel_key
,
kernel_type_for_var
,
*
tensor_in
,
CopyVariableWithTensor
(
*
var
,
*
out
,
*
trans_var
);
out
.
get
());
CopyVariableWithTensor
(
*
var
,
*
(
out
.
get
()),
*
trans_var
);
}
}
}
}
}
}
...
...
paddle/framework/operator.h
浏览文件 @
816e556b
...
@@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
...
@@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
// define some kernel priority
// define some kernel priority
/* Define multiple kernel type fallback order*/
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
/**
* @brief Use cpu kernel only
*/
void
UseCPU
();
/**
* @brief Perfer MKLDNN kernel than Plain CPU kernel
*/
void
UseMKLDNN
();
/**
* @brief Perfer CUDA kernel than Plain CPU kernel
*/
void
UseCUDA
();
/**
* @brief Perfer cudnn kernel than Plain CUDA kernel
*/
void
UseCUDNN
();
/**
* @brief Use all available kernels
*/
void
UseALL
();
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
return
var_name
+
kGradVarSuffix
;
return
var_name
+
kGradVarSuffix
;
}
}
...
...
paddle/operators/CMakeLists.txt
浏览文件 @
816e556b
...
@@ -137,8 +137,6 @@ op_library(sum_op DEPS selected_rows_functor)
...
@@ -137,8 +137,6 @@ op_library(sum_op DEPS selected_rows_functor)
op_library
(
sgd_op DEPS selected_rows_functor
)
op_library
(
sgd_op DEPS selected_rows_functor
)
op_library
(
print_op DEPS lod_tensor
)
op_library
(
print_op DEPS lod_tensor
)
op_library
(
adagrad_op DEPS selected_rows_functor
)
op_library
(
adagrad_op DEPS selected_rows_functor
)
op_library
(
conv_op DEPS vol2col
)
op_library
(
pool_op DEPS pooling
)
op_library
(
maxout_op DEPS maxouting
)
op_library
(
maxout_op DEPS maxouting
)
op_library
(
unpool_op DEPS unpooling
)
op_library
(
unpool_op DEPS unpooling
)
op_library
(
pool_with_index_op DEPS pooling
)
op_library
(
pool_with_index_op DEPS pooling
)
...
@@ -149,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
...
@@ -149,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
conv_transpose_op DEPS vol2col
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding math_function
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding math_function
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
op_library
(
parallel_do_op DEPS executor
)
op_library
(
parallel_do_op DEPS executor
)
# Regist multiple Kernel to pybind
if
(
WITH_GPU
)
op_library
(
conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col
)
op_library
(
pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling
)
op_library
(
conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
conv_transpose_cudnn_op.cu.cc DEPS vol2col
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL(conv2d, CUDNN);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL(pool2d, CUDNN);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);
\n
"
)
else
()
op_library
(
conv_op SRCS conv_op.cc DEPS vol2col
)
op_library
(
pool_op SRCS pool_op.cc DEPS pooling
)
op_library
(
conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col
)
endif
()
# FIXME(typhoonzero): save/load depends lodtensor serialization functions
# FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library
(
save_op DEPS lod_tensor
)
op_library
(
save_op DEPS lod_tensor
)
op_library
(
load_op DEPS lod_tensor
)
op_library
(
load_op DEPS lod_tensor
)
...
...
paddle/operators/conv_cudnn_op.cc
已删除
100644 → 0
浏览文件 @
3cf23bec
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv_op.h"
namespace
paddle
{
namespace
operators
{
class
CudnnConv2DOpMaker
:
public
Conv2DOpMaker
{
public:
CudnnConv2DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
Conv2DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully."
)
.
SetDefault
(
4096
);
}
};
class
CudnnConv3DOpMaker
:
public
Conv3DOpMaker
{
public:
CudnnConv3DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
Conv3DOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully."
)
.
SetDefault
(
4096
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv2d_cudnn
,
ops
::
ConvOp
,
ops
::
CudnnConv2DOpMaker
,
conv2d_cudnn_grad
,
ops
::
ConvOpGrad
);
REGISTER_OP
(
conv3d_cudnn
,
ops
::
ConvOp
,
ops
::
CudnnConv3DOpMaker
,
conv3d_cudnn_grad
,
ops
::
ConvOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv2d_cudnn
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_cudnn_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_cudnn
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_cudnn_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/operators/conv_cudnn_op.cu.cc
浏览文件 @
816e556b
...
@@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
...
@@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
static_cast
<
size_t
>
(
1024
)
*
1024
*
1024
;
static_cast
<
size_t
>
(
1024
)
*
1024
*
1024
;
template
<
typename
T
>
template
<
typename
T
>
class
C
udnn
ConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
C
UDNN
ConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel<T> {
...
@@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel<T> {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
C
udnn
ConvGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
C
UDNN
ConvGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
// TODO(dzhwinter) : below register should be removed
REGISTER_OP_KERNEL
(
conv2d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_CUDA_KERNEL
(
conv2d_cudnn
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
);
paddle
::
operators
::
CudnnConvOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv2d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_CUDA_KERNEL
(
conv2d_cudnn_grad
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
paddle
::
operators
::
CudnnConvGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_CUDA_KERNEL
(
conv3d_cudnn
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
);
paddle
::
operators
::
CudnnConvOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_CUDA_KERNEL
(
conv3d_cudnn_grad
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CudnnConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
paddle
::
operators
::
CudnnConvGradOpKernel
<
double
>
);
paddle/operators/conv_op.cc
浏览文件 @
816e556b
...
@@ -67,6 +67,23 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -67,6 +67,23 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
ctx
->
ShareLoD
(
"Input"
,
"Output"
);
ctx
->
ShareLoD
(
"Input"
,
"Output"
);
}
}
framework
::
OpKernelType
ConvOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
Conv2DOpMaker
::
Conv2DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
Conv2DOpMaker
::
Conv2DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
AddInput
(
...
@@ -108,6 +125,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
...
@@ -108,6 +125,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
"dilations(h_dilation, w_dilation) of "
"dilations(h_dilation, w_dilation) of "
"convolution operator."
)
"convolution operator."
)
.
SetDefault
({
1
,
1
});
.
SetDefault
({
1
,
1
});
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddAttr
<
int
>
(
"workspace_size_MB"
,
"Only used in cudnn kernel. Need set use_cudnn to true."
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully."
)
.
SetDefault
(
4096
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution Operator.
Convolution Operator.
...
@@ -181,6 +218,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
...
@@ -181,6 +218,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
"dilations(d_dilation, h_dilation, w_dilation) of "
"dilations(d_dilation, h_dilation, w_dilation) of "
"convolution operator."
)
"convolution operator."
)
.
SetDefault
({
1
,
1
,
1
});
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddAttr
<
int
>
(
"workspace_size_MB"
,
"Only used in cudnn kernel. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully."
)
.
SetDefault
(
4096
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution3D Operator.
Convolution3D Operator.
...
@@ -224,6 +280,23 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -224,6 +280,23 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
}
}
}
}
framework
::
OpKernelType
ConvOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
...
paddle/operators/conv_op.h
浏览文件 @
816e556b
...
@@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel {
...
@@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel {
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
class
ConvOpGrad
:
public
framework
::
OperatorWithKernel
{
class
ConvOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
...
paddle/operators/conv_transpose_cudnn_op.cc
已删除
100644 → 0
浏览文件 @
3cf23bec
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv_transpose_op.h"
namespace
paddle
{
namespace
operators
{
class
CudnnConv2DTransposeOpMaker
:
public
Conv2DTransposeOpMaker
{
public:
CudnnConv2DTransposeOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
Conv2DTransposeOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
}
};
class
CudnnConv3DTransposeOpMaker
:
public
Conv3DTransposeOpMaker
{
public:
CudnnConv3DTransposeOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
Conv3DTransposeOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv2d_transpose_cudnn
,
ops
::
ConvTransposeOp
,
ops
::
CudnnConv2DTransposeOpMaker
,
conv2d_transpose_cudnn_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP
(
conv3d_transpose_cudnn
,
ops
::
ConvTransposeOp
,
ops
::
CudnnConv3DTransposeOpMaker
,
conv3d_transpose_cudnn_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/operators/conv_transpose_cudnn_op.cu.cc
浏览文件 @
816e556b
...
@@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
...
@@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
using
ScopedConvolutionDescriptor
=
platform
::
ScopedConvolutionDescriptor
;
using
ScopedConvolutionDescriptor
=
platform
::
ScopedConvolutionDescriptor
;
using
DataLayout
=
platform
::
DataLayout
;
using
DataLayout
=
platform
::
DataLayout
;
static
constexpr
size_t
kConvC
udnn
WorkspaceLimitBytes
=
1024
*
1024
*
1024
;
static
constexpr
size_t
kConvC
UDNN
WorkspaceLimitBytes
=
1024
*
1024
*
1024
;
template
<
typename
T
>
template
<
typename
T
>
class
C
udnn
ConvTransposeOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
C
UDNN
ConvTransposeOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
...
@@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv workspace ---------------------
// ------------------- cudnn conv workspace ---------------------
void
*
cudnn_workspace
=
nullptr
;
void
*
cudnn_workspace
=
nullptr
;
size_t
workspace_size_in_bytes
;
// final workspace to allocate.
size_t
workspace_size_in_bytes
;
// final workspace to allocate.
size_t
workspace_size_limit
=
kConvC
udnn
WorkspaceLimitBytes
;
size_t
workspace_size_limit
=
kConvC
UDNN
WorkspaceLimitBytes
;
if
(
user_workspace_size
>
0
)
{
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
}
...
@@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
...
@@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
C
udnn
ConvTransposeGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
C
UDNN
ConvTransposeGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
...
@@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
cudnnConvolutionBwdFilterAlgo_t
filter_algo
;
cudnnConvolutionBwdFilterAlgo_t
filter_algo
;
size_t
bwd_filter_ws_size
,
fwd_ws_size
;
size_t
bwd_filter_ws_size
,
fwd_ws_size
;
size_t
workspace_size_in_bytes
=
0
;
size_t
workspace_size_in_bytes
=
0
;
size_t
workspace_size_limit
=
kConvC
udnn
WorkspaceLimitBytes
;
size_t
workspace_size_limit
=
kConvC
UDNN
WorkspaceLimitBytes
;
if
(
user_workspace_size
>
0
)
{
if
(
user_workspace_size
>
0
)
{
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
workspace_size_limit
=
user_workspace_size
*
1024
*
1024
;
}
}
...
@@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
...
@@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_
CUDA_KERNEL
(
conv2d_transpose_cudnn
,
REGISTER_OP_
KERNEL
(
conv2d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
Cudnn
ConvTransposeOpKernel
<
float
>
,
ops
::
CUDNN
ConvTransposeOpKernel
<
float
>
,
ops
::
Cudnn
ConvTransposeOpKernel
<
double
>
);
ops
::
CUDNN
ConvTransposeOpKernel
<
double
>
);
REGISTER_OP_
CUDA_KERNEL
(
conv2d_transpose_cudnn_grad
,
REGISTER_OP_
KERNEL
(
conv2d_transpose_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
Cudnn
ConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNN
ConvTransposeGradOpKernel
<
float
>
,
ops
::
Cudnn
ConvTransposeGradOpKernel
<
double
>
);
ops
::
CUDNN
ConvTransposeGradOpKernel
<
double
>
);
REGISTER_OP_
CUDA_KERNEL
(
conv3d_transpose_cudnn
,
REGISTER_OP_
KERNEL
(
conv3d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
Cudnn
ConvTransposeOpKernel
<
float
>
,
ops
::
CUDNN
ConvTransposeOpKernel
<
float
>
,
ops
::
Cudnn
ConvTransposeOpKernel
<
double
>
);
ops
::
CUDNN
ConvTransposeOpKernel
<
double
>
);
REGISTER_OP_
CUDA_KERNEL
(
conv3d_transpose_cudnn_grad
,
REGISTER_OP_
KERNEL
(
conv3d_transpose_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
Cudnn
ConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNN
ConvTransposeGradOpKernel
<
float
>
,
ops
::
Cudnn
ConvTransposeGradOpKernel
<
double
>
);
ops
::
CUDNN
ConvTransposeGradOpKernel
<
double
>
);
paddle/operators/conv_transpose_op.cc
浏览文件 @
816e556b
...
@@ -58,6 +58,23 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -58,6 +58,23 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
}
}
framework
::
OpKernelType
ConvTransposeOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
Conv2DTransposeOpMaker
::
Conv2DTransposeOpMaker
(
OpProto
*
proto
,
Conv2DTransposeOpMaker
::
Conv2DTransposeOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
...
@@ -94,6 +111,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
...
@@ -94,6 +111,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
"(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"transpose operator."
)
"transpose operator."
)
.
SetDefault
({
0
,
0
});
.
SetDefault
({
0
,
0
});
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddAttr
<
int
>
(
"workspace_size_MB"
,
"Used in cudnn kernel only. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution2D Transpose Operator.
Convolution2D Transpose Operator.
...
@@ -163,6 +199,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
...
@@ -163,6 +199,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
"(vector<int> default:{0, 0, 0}), paddings(d_pad, "
"(vector<int> default:{0, 0, 0}), paddings(d_pad, "
"h_pad, w_pad) of convolution transpose operator."
)
"h_pad, w_pad) of convolution transpose operator."
)
.
SetDefault
({
0
,
0
,
0
});
.
SetDefault
({
0
,
0
,
0
});
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddAttr
<
int
>
(
"workspace_size_MB"
,
"Used in cudnn kernel only. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Convolution3D Transpose Operator.
Convolution3D Transpose Operator.
...
@@ -205,6 +260,23 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -205,6 +260,23 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
}
}
}
}
framework
::
OpKernelType
ConvTransposeOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
...
paddle/operators/conv_transpose_op.h
浏览文件 @
816e556b
...
@@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel {
...
@@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel {
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
class
ConvTransposeOpGrad
:
public
framework
::
OperatorWithKernel
{
class
ConvTransposeOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
...
paddle/operators/math/sequence2batch.cc
浏览文件 @
816e556b
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/operators/pool_cudnn_op.cc
已删除
100644 → 0
浏览文件 @
3cf23bec
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/pool_cudnn_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
pool2d_cudnn
,
ops
::
PoolOp
,
ops
::
Pool2dOpMaker
,
pool2d_cudnn_grad
,
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool2d_cudnn_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
)
REGISTER_OP
(
pool3d_cudnn
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
pool3d_cudnn_grad
,
ops
::
PoolOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pool3d_cudnn
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pool3d_cudnn_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
)
paddle/operators/pool_cudnn_op.cu.cc
浏览文件 @
816e556b
...
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/pool_cudnn_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/pool_op.h"
#include "paddle/platform/cudnn_helper.h"
#include "paddle/platform/cudnn_helper.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout;
...
@@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout;
using
PoolingMode
=
platform
::
PoolingMode
;
using
PoolingMode
=
platform
::
PoolingMode
;
template
<
typename
T
>
template
<
typename
T
>
class
PoolC
udnn
OpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
PoolC
UDNN
OpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
...
@@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
PoolC
udnn
GradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
PoolC
UDNN
GradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
...
@@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
...
@@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
pool2d_cudnn
,
ops
::
PoolCudnnOpKernel
<
float
>
,
REGISTER_OP_KERNEL
(
pool2d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
PoolCudnnOpKernel
<
double
>
);
ops
::
PoolCUDNNOpKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
pool2d_cudnn_grad
,
ops
::
PoolCudnnGradOpKernel
<
float
>
,
ops
::
PoolCUDNNOpKernel
<
double
>
);
ops
::
PoolCudnnGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
pool2d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
PoolCUDNNGradOpKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
pool3d_cudnn
,
ops
::
PoolCudnnOpKernel
<
float
>
,
ops
::
PoolCUDNNGradOpKernel
<
double
>
);
ops
::
PoolCudnnOpKernel
<
double
>
);
REGISTER_OP_CUDA_KERNEL
(
pool3d_cudnn_grad
,
ops
::
PoolCudnnGradOpKernel
<
float
>
,
REGISTER_OP_KERNEL
(
pool3d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
PoolCudnnGradOpKernel
<
double
>
);
ops
::
PoolCUDNNOpKernel
<
float
>
,
ops
::
PoolCUDNNOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
pool3d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
PoolCUDNNGradOpKernel
<
float
>
,
ops
::
PoolCUDNNGradOpKernel
<
double
>
);
paddle/operators/pool_cudnn_op.h
已删除
100644 → 0
浏览文件 @
3cf23bec
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/pool_op.h"
namespace
paddle
{
namespace
operators
{}
// namespace operators
}
// namespace paddle
paddle/operators/pool_op.cc
浏览文件 @
816e556b
...
@@ -61,6 +61,23 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
...
@@ -61,6 +61,23 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
ctx
->
ShareLoD
(
"X"
,
"Out"
);
ctx
->
ShareLoD
(
"X"
,
"Out"
);
}
}
framework
::
OpKernelType
PoolOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
void
PoolOpGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
void
PoolOpGrad
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
...
@@ -68,6 +85,23 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
...
@@ -68,6 +85,23 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
}
}
framework
::
OpKernelType
PoolOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
framework
::
LibraryType
library_
;
if
(
use_cudnn
)
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
else
{
library_
=
framework
::
LibraryType
::
kPlain
;
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
Pool2dOpMaker
::
Pool2dOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
Pool2dOpMaker
::
Pool2dOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
AddInput
(
...
@@ -101,15 +135,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
...
@@ -101,15 +135,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int>, default {1, 1}), strides(height, "
"(vector<int>, default {1, 1}), strides(height, "
"width) of pooling operator."
)
"width) of pooling operator."
)
.
SetDefault
({
1
,
1
});
// TODO(Chengduo): Add checker. (Currently,
.
SetDefault
({
1
,
1
});
// TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
// TypedAttrChecker don't support vector type.)
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"paddings"
,
"(vector<int>, default {0,0}), paddings(height, width) of pooling "
"(vector<int>, default {0,0}), paddings(height, width) of pooling "
"operator."
"operator."
"If global_pooling = true, paddings and ksize will be ignored."
)
"If global_pooling = true, paddings and ksize will be ignored."
)
.
SetDefault
({
0
,
0
});
// TODO(Chengduo): Add checker. (Currently,
.
SetDefault
({
0
,
0
});
// TypedAttrChecker don't support vector type.)
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Pool2d Operator.
Pool2d Operator.
...
@@ -182,6 +228,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
...
@@ -182,6 +228,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
.
SetDefault
({
0
,
0
,
0
});
// TODO(Chengduo): Add checker. (Currently,
.
SetDefault
({
0
,
0
,
0
});
// TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
// TypedAttrChecker don't support vector type.)
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default false) Only used in cudnn kernel, need install cudnn"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
// TODO(dzhwinter): need to registered layout transform function
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Pool3d Operator.
Pool3d Operator.
...
...
paddle/operators/pool_op.h
浏览文件 @
816e556b
...
@@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel {
...
@@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
class
PoolOpGrad
:
public
framework
::
OperatorWithKernel
{
class
PoolOpGrad
:
public
framework
::
OperatorWithKernel
{
...
@@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel {
...
@@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
};
class
Pool2dOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
Pool2dOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
paddle/operators/print_op.cc
浏览文件 @
816e556b
...
@@ -16,12 +16,17 @@
...
@@ -16,12 +16,17 @@
#include <ctime>
#include <ctime>
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/variable.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
#define CLOG std::cout
#define CLOG std::cout
const
std
::
string
kForward
=
"FORWARD"
;
const
std
::
string
kBackward
=
"BACKWARD"
;
const
std
::
string
kBoth
=
"BOTH"
;
struct
Formater
{
struct
Formater
{
std
::
string
message
;
std
::
string
message
;
std
::
string
name
;
std
::
string
name
;
...
@@ -122,40 +127,77 @@ class TensorPrintOp : public framework::OperatorBase {
...
@@ -122,40 +127,77 @@ class TensorPrintOp : public framework::OperatorBase {
TensorPrintOp
(
const
TensorPrintOp
&
o
)
TensorPrintOp
(
const
TensorPrintOp
&
o
)
:
framework
::
OperatorBase
(
:
framework
::
OperatorBase
(
static_cast
<
const
framework
::
OperatorBase
&>
(
o
))
{
static_cast
<
const
framework
::
OperatorBase
&>
(
o
))
{
PADDLE_THROW
(
"Not implemented"
);
PADDLE_THROW
(
"Not implemented
.
"
);
}
}
void
Run
(
const
framework
::
Scope
&
scope
,
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
// Only run the `first_n` times.
const
framework
::
Variable
*
in_var_ptr
=
nullptr
;
std
::
string
phase
=
kForward
;
std
::
string
printed_var_name
=
""
;
auto
&
inputs
=
Inputs
();
if
(
inputs
.
find
(
"In"
)
!=
inputs
.
end
()
&&
!
Inputs
(
"In"
).
empty
())
{
in_var_ptr
=
scope
.
FindVar
(
Input
(
"In"
));
printed_var_name
=
Inputs
(
"In"
).
front
();
}
else
if
(
inputs
.
find
(
"In@GRAD"
)
!=
inputs
.
end
()
&&
!
Inputs
(
"In@GRAD"
).
empty
())
{
in_var_ptr
=
scope
.
FindVar
(
Input
(
"In@GRAD"
));
printed_var_name
=
Inputs
(
"In@GRAD"
).
front
();
phase
=
kBackward
;
}
else
{
PADDLE_THROW
(
"Unknown phase, should be forward or backward."
);
}
PADDLE_ENFORCE_NOT_NULL
(
in_var_ptr
);
auto
&
in_tensor
=
in_var_ptr
->
Get
<
framework
::
LoDTensor
>
();
auto
*
out_var_ptr
=
scope
.
FindVar
(
Output
(
"Out"
));
auto
&
out_tensor
=
*
out_var_ptr
->
GetMutable
<
framework
::
LoDTensor
>
();
// Just copy data from input tensor to output tensor
// output tensor share same memory with input tensor
out_tensor
.
ShareDataWith
(
in_tensor
);
out_tensor
.
set_lod
(
in_tensor
.
lod
());
std
::
string
print_phase
=
Attr
<
std
::
string
>
(
"print_phase"
);
if
(
print_phase
!=
phase
&&
print_phase
!=
kBoth
)
{
return
;
}
int
first_n
=
Attr
<
int
>
(
"first_n"
);
int
first_n
=
Attr
<
int
>
(
"first_n"
);
if
(
first_n
>
0
&&
++
times_
>
first_n
)
return
;
if
(
first_n
>
0
&&
++
times_
>
first_n
)
return
;
PADDLE_ENFORCE
(
!
Inputs
(
"input"
).
empty
(),
"input should be set"
);
framework
::
LoDTensor
printed_tensor
;
auto
*
input_var
=
scope
.
FindVar
(
Input
(
"input"
));
printed_tensor
.
set_lod
(
in_tensor
.
lod
());
PADDLE_ENFORCE_NOT_NULL
(
input_var
);
printed_tensor
.
Resize
(
in_tensor
.
dims
());
auto
&
tensor
=
input_var
->
Get
<
framework
::
LoDTensor
>
();
// TODO(ChunweiYan) support GPU
if
(
platform
::
is_cpu_place
(
in_tensor
.
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
tensor
.
place
()));
printed_tensor
.
ShareDataWith
(
in_tensor
);
}
else
{
// copy data to cpu to print
platform
::
CPUPlace
place
;
framework
::
Copy
(
in_tensor
,
place
,
&
printed_tensor
);
}
Formater
formater
;
Formater
formater
;
if
(
Attr
<
bool
>
(
"print_tensor_name"
))
{
if
(
Attr
<
bool
>
(
"print_tensor_name"
))
{
formater
.
name
=
Inputs
(
"input"
).
front
()
;
formater
.
name
=
printed_var_name
;
}
}
if
(
Attr
<
bool
>
(
"print_tensor_type"
))
{
if
(
Attr
<
bool
>
(
"print_tensor_type"
))
{
formater
.
dtype
=
tensor
.
type
();
formater
.
dtype
=
printed_
tensor
.
type
();
}
}
if
(
Attr
<
bool
>
(
"print_tensor_shape"
))
{
if
(
Attr
<
bool
>
(
"print_tensor_shape"
))
{
formater
.
dims
.
assign
(
tensor
.
dims
()[
0
],
auto
&
dims
=
printed_tensor
.
dims
();
tensor
.
dims
()[
tensor
.
dims
().
size
()
-
1
]);
formater
.
dims
.
resize
(
dims
.
size
());
for
(
int
i
=
0
;
i
<
dims
.
size
();
++
i
)
formater
.
dims
[
i
]
=
dims
[
i
];
}
}
if
(
Attr
<
bool
>
(
"print_tensor_lod"
))
{
if
(
Attr
<
bool
>
(
"print_tensor_lod"
))
{
formater
.
lod
=
tensor
.
lod
();
formater
.
lod
=
printed_
tensor
.
lod
();
}
}
formater
.
summarize
=
Attr
<
int
>
(
"summarize"
);
formater
.
summarize
=
Attr
<
int
>
(
"summarize"
);
formater
.
data
=
(
void
*
)
tensor
.
data
<
void
>
();
formater
.
data
=
(
void
*
)
printed_
tensor
.
data
<
void
>
();
formater
(
tensor
.
numel
());
formater
(
printed_
tensor
.
numel
());
}
}
private:
private:
...
@@ -166,27 +208,46 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -166,27 +208,46 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
PrintOpProtoAndCheckMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
PrintOpProtoAndCheckMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"
input"
,
"the tensor that will
be displayed."
);
AddInput
(
"
In"
,
"Input tensor to
be displayed."
);
AddAttr
<
int
>
(
"first_n"
,
"Only log `first_n` number of times."
);
AddAttr
<
int
>
(
"first_n"
,
"Only log `first_n` number of times."
);
AddAttr
<
std
::
string
>
(
"message"
,
"A string message to print as a prefix."
);
AddAttr
<
std
::
string
>
(
"message"
,
"A string message to print as a prefix."
);
AddAttr
<
int
>
(
"summarize"
,
"
Print this number of elements in the tensor
."
);
AddAttr
<
int
>
(
"summarize"
,
"
Number of elements printed
."
);
AddAttr
<
bool
>
(
"print_tensor_name"
,
"Whether to print the tensor name."
);
AddAttr
<
bool
>
(
"print_tensor_name"
,
"Whether to print the tensor name."
);
AddAttr
<
bool
>
(
"print_tensor_type"
,
"Whether to print the tensor's dtype."
);
AddAttr
<
bool
>
(
"print_tensor_type"
,
"Whether to print the tensor's dtype."
);
AddAttr
<
bool
>
(
"print_tensor_shape"
,
"Whether to print the tensor's shape."
);
AddAttr
<
bool
>
(
"print_tensor_shape"
,
"Whether to print the tensor's shape."
);
AddAttr
<
bool
>
(
"print_tensor_lod"
,
"Whether to print the tensor's lod."
);
AddAttr
<
bool
>
(
"print_tensor_lod"
,
"Whether to print the tensor's lod."
);
AddAttr
<
std
::
string
>
(
"print_phase"
,
"(string, default 'BOTH') Which phase to display including 'FORWARD' "
"'BACKWARD' and 'BOTH'."
)
.
SetDefault
(
kBoth
)
.
InEnum
({
kForward
,
kBackward
,
kBoth
});
AddOutput
(
"Out"
,
"Output tensor with same data as input tensor."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Creates a print op that will print when a tensor is accessed.
Creates a print op that will print when a tensor is accessed.
Wraps the tensor passed in so that whenever that a tensor is accessed,
Wraps the tensor passed in so that whenever that a tensor is accessed,
the message `message` is printed, along with the current value of the
the message `message` is printed, along with the current value of the
tensor `t`.)DOC"
);
tensor `t`.)DOC"
);
}
}
};
};
class
InferShape
:
public
framework
::
InferShapeBase
{
class
InferShape
Forward
:
public
framework
::
InferShapeBase
{
public:
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"input"
),
"input should be set"
);
PADDLE_ENFORCE
(
context
->
HasInput
(
"In"
),
"Input(In) should not be null."
);
context
->
ShareLoD
(
"In"
,
/*->*/
"Out"
);
context
->
SetOutputDim
(
"Out"
,
context
->
GetInputDim
(
"In"
));
}
};
class
InferShapeBackward
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"In@GRAD"
),
"Input(In@GRAD) should not be null."
);
context
->
ShareLoD
(
"In@GRAD"
,
/*->*/
"Out"
);
context
->
SetOutputDim
(
"Out"
,
context
->
GetInputDim
(
"In@GRAD"
));
}
}
};
};
...
@@ -196,11 +257,27 @@ class InferVarType : public framework::VarTypeInference {
...
@@ -196,11 +257,27 @@ class InferVarType : public framework::VarTypeInference {
framework
::
BlockDesc
*
block
)
const
override
{}
framework
::
BlockDesc
*
block
)
const
override
{}
};
};
class
PrintOpProtoAndCheckGradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op_desc_ptr
=
new
framework
::
OpDesc
();
op_desc_ptr
->
SetType
(
"print_grad"
);
op_desc_ptr
->
SetInput
(
"In@GRAD"
,
OutputGrad
(
"Out"
));
op_desc_ptr
->
SetOutput
(
"Out"
,
InputGrad
(
"In"
));
op_desc_ptr
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op_desc_ptr
);
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OPERATOR
(
print
,
paddle
::
operators
::
TensorPrintOp
,
namespace
ops
=
paddle
::
operators
;
paddle
::
operators
::
PrintOpProtoAndCheckMaker
,
paddle
::
operators
::
InferShape
,
REGISTER_OPERATOR
(
print
,
ops
::
TensorPrintOp
,
ops
::
PrintOpProtoAndCheckMaker
,
paddle
::
operators
::
InferVarType
,
ops
::
PrintOpProtoAndCheckGradOpMaker
,
ops
::
InferShapeForward
,
paddle
::
framework
::
EmptyGradOpMaker
);
ops
::
InferVarType
);
REGISTER_OPERATOR
(
print_grad
,
ops
::
TensorPrintOp
,
ops
::
InferShapeBackward
);
paddle/operators/reorder_lod_tensor_by_rank_op.cc
浏览文件 @
816e556b
...
@@ -26,22 +26,44 @@ class ReorderLoDTensorByRankTableOpProtoMaker
...
@@ -26,22 +26,44 @@ class ReorderLoDTensorByRankTableOpProtoMaker
ReorderLoDTensorByRankTableOpProtoMaker
(
OpProto
*
proto
,
ReorderLoDTensorByRankTableOpProtoMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(LoDTensor) the input lod tensor need to be reordered."
);
AddInput
(
"X"
,
"(LoDTensor), the input lod tensor to be reordered according to "
"Input(RankTable)."
);
AddInput
(
"RankTable"
,
AddInput
(
"RankTable"
,
"(LoDRankTable) the rank table that input need follow"
);
"(LoDRankTable), the rank table according to which Input(X) is "
AddOutput
(
"Out"
,
"(LoDTensor) reordered lod tensor"
);
"reordered."
);
AddComment
(
R"DOC(ReorderLoDTensorByRankTable
AddOutput
(
"Out"
,
"(LoDTensor), the reordered lod tensor."
);
AddComment
(
R"DOC(ReorderLoDTensorByRankTable operator.
Reorder the input X by the rank of `RankTable`. If `RankTable` is ordered by
Input(X) is a batch of sequences. Input(RankTable) stores new orders of the
index [3, 0, 2, 1]. Input X will reorder its sequence, the third sequence of
input sequence batch. The reorder_lod_tensor_by_rank operator reorders the
X will be the first sequence of Output.
Input(X) according to the information provided by Input(RankTable).
NOTE: The RankTable does not need to be calculated by X.
For example:
For example:
The X = [Seq0, Seq1, Seq2, Seq3]. The indices of RankTable are [3, 0, 2, 1].
The Out = [Seq3, Seq0, Seq2, Seq1] with correct LoD information.
If the indices stored in the Input(RankTable) are [3, 0, 2, 1], the
Input(X) will be reordered that the fourth sequence in Input(X) will become the
first one, and then followed by the original first, third, and the second one.
This is:
X = [Seq0, Seq1, Seq2, Seq3]. The indices in RankTable are [3, 0, 2, 1].
Out = [Seq3, Seq0, Seq2, Seq1] with a new LoD information.
If the LoD information of Input(X) is empty, this means Input(X) is not sequence
data. This is also identical to a batch of sequences where each sequence has a
fixed length 1. In this case, the reorder_lod_tensor_by_rank operator reorders
each slice of Input(X) along the first axis according to Input(RankTable).
This is:
X = [Slice0, Slice1, Slice2, Slice3] and its LoD information is empty. The
indices in RankTable are [3, 0, 2, 1].
Out = [Slice3, Slice0, Slice2, Slice1] with no LoD information is appended.
NOTE: This operator sorts Input(X) according to a given LoDRankTable which does
not need to be calculated according to Input(X). It can be calculated according
to another different sequence, and then this operator sorts Input(X) according
to the given LoDRankTable.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/shrink_rnn_memory_op.cc
浏览文件 @
816e556b
...
@@ -45,7 +45,7 @@ class ShrinkRNNMemoryOp : public ArrayOp {
...
@@ -45,7 +45,7 @@ class ShrinkRNNMemoryOp : public ArrayOp {
rank_items
.
begin
();
rank_items
.
begin
();
auto
*
out_var
=
scope
.
FindVar
(
Output
(
"Out"
));
auto
*
out_var
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output
Out must be set
"
);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output
(Out) must be set.
"
);
auto
&
out_tensor
=
*
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
&
out_tensor
=
*
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
height
=
dst_num_rows
;
size_t
height
=
dst_num_rows
;
...
@@ -76,15 +76,17 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -76,15 +76,17 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) The step index. The RNN step memory 'X' will be "
"(LoDTensor) The step index. The RNN step memory 'X' will be "
"shrinked to match the size of the input of the index'th step."
);
"shrinked to match the size of the input of the index'th step."
);
AddOutput
(
"Out"
,
"(LoDTensor) The shrinked RNN step memory."
);
AddOutput
(
"Out"
,
"(LoDTensor) The shrinked RNN step memory."
);
AddComment
(
AddComment
(
R"DOC(
R"DOC(
This operator is used to shrink output batch of memory defined in dynamic RNN.
In dynamic RNN, we are able to handle sequences of different lengths.
Because of the multiple lengths, the size of each step input can be
Dynamic RNN is able to handle variable-length sequences, in which, sequences in
different, which may lead to a mismatching between the input of
a mini-batch are sorted by their lengths first. After that, the longest sequence
the current step and the memory generated by the previous one. This
becomes the first one in the sorted batch, followed by the second longest, the
operator shrinks memory according to the size of the next step input,
third longest, and so on. Dynamic RNN then slices a batch input timestep by
to make sure that they can match each other.
timestep from the sorted input. Once any sequence in the input batch reaches its
)DOC"
);
end, memory defined in dynamicRNN has to shrink its outputs to adapt to the input
batch size for the next time step.
)DOC"
);
}
}
};
};
...
@@ -136,6 +138,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
...
@@ -136,6 +138,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
math
::
set_constant
(
dev_ctx
,
&
rest_tensor
,
0.0
f
);
math
::
set_constant
(
dev_ctx
,
&
rest_tensor
,
0.0
f
);
}
}
}
}
dx_tensor
.
set_lod
(
x_tensor
.
lod
());
}
}
};
};
...
...
paddle/operators/while_op.cc
浏览文件 @
816e556b
...
@@ -121,8 +121,8 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -121,8 +121,8 @@ class WhileGradOp : public framework::OperatorBase {
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
auto
outside_og_name
=
outside_og_names
[
i
];
auto
outside_og_name
=
outside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
VLOG
(
10
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
VLOG
(
8
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
<<
inside_og_name
;
<<
inside_og_name
;
auto
&
og_outside
=
auto
&
og_outside
=
detail
::
Ref
(
scope
.
FindVar
(
outside_og_name
),
detail
::
Ref
(
scope
.
FindVar
(
outside_og_name
),
"Cannot find Outside Gradient %s"
,
outside_og_name
);
"Cannot find Outside Gradient %s"
,
outside_og_name
);
...
@@ -141,11 +141,11 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -141,11 +141,11 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
VLOG
(
10
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
VLOG
(
8
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
inside_array
.
resize
(
outside_array
.
size
());
inside_array
.
resize
(
outside_array
.
size
());
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
VLOG
(
10
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
VLOG
(
8
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
...
@@ -187,10 +187,14 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -187,10 +187,14 @@ class WhileGradOp : public framework::OperatorBase {
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
inside_tensor
.
dims
());
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
inside_tensor
.
dims
());
attrs
[
"value"
]
=
0.0
f
;
attrs
[
"value"
]
=
0.0
f
;
auto
var_name
=
pg_names
[
param_id
];
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
"fill_constant"
,
framework
::
VariableNameMap
{},
"fill_constant"
,
framework
::
VariableNameMap
{},
{{
"Out"
,
{
pg_names
[
param_id
]
}}},
attrs
);
{{
"Out"
,
{
var_name
}}},
attrs
);
zero_op
->
Run
(
scope
,
dev_place
);
zero_op
->
Run
(
scope
,
dev_place
);
scope
.
FindVar
(
var_name
)
->
GetMutable
<
framework
::
LoDTensor
>
()
->
set_lod
(
inside_tensor
.
lod
());
}
}
}
}
...
@@ -231,7 +235,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
...
@@ -231,7 +235,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto
igs
=
InputGrad
(
kX
,
/*do not drop empty gradient*/
false
);
auto
igs
=
InputGrad
(
kX
,
/*do not drop empty gradient*/
false
);
for
(
auto
&
each_ig
:
igs
)
{
for
(
auto
&
each_ig
:
igs
)
{
if
(
inner_op_outputs
.
find
(
each_ig
)
==
inner_op_outputs
.
end
())
{
if
(
inner_op_outputs
.
find
(
each_ig
)
==
inner_op_outputs
.
end
())
{
VLOG
(
10
)
<<
"Ignore "
<<
each_ig
;
VLOG
(
8
)
<<
"Ignore "
<<
each_ig
;
each_ig
=
framework
::
kEmptyVarName
;
each_ig
=
framework
::
kEmptyVarName
;
}
}
}
}
...
...
paddle/platform/dynload/cudnn.cc
浏览文件 @
816e556b
...
@@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
...
@@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
bool
HasCUDNN
()
{
bool
HasCUDNN
()
{
std
::
call_once
(
cudnn_dso_flag
,
GetC
udnn
DsoHandle
,
&
cudnn_dso_handle
);
std
::
call_once
(
cudnn_dso_flag
,
GetC
UDNN
DsoHandle
,
&
cudnn_dso_handle
);
return
cudnn_dso_handle
!=
nullptr
;
return
cudnn_dso_handle
!=
nullptr
;
}
}
...
...
paddle/platform/dynload/cudnn.h
浏览文件 @
816e556b
...
@@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
...
@@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
auto operator()(Args... args) -> decltype(__name(args...)) { \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using cudnn_func = decltype(__name(args...)) (*)(Args...); \
using cudnn_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(cudnn_dso_flag, \
std::call_once(cudnn_dso_flag, \
paddle::platform::dynload::GetC
udnn
DsoHandle, \
paddle::platform::dynload::GetC
UDNN
DsoHandle, \
&cudnn_dso_handle); \
&cudnn_dso_handle); \
EnforceCUDNNLoaded(#__name); \
EnforceCUDNNLoaded(#__name); \
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
...
...
paddle/platform/dynload/dynamic_loader.cc
浏览文件 @
816e556b
...
@@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) {
...
@@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) {
#endif
#endif
}
}
void
GetC
udnn
DsoHandle
(
void
**
dso_handle
)
{
void
GetC
UDNN
DsoHandle
(
void
**
dso_handle
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
dso_handle
,
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
dso_handle
,
false
);
false
);
...
...
paddle/platform/dynload/dynamic_loader.h
浏览文件 @
816e556b
...
@@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle);
...
@@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle);
* @param **dso_handle dso handler
* @param **dso_handle dso handler
*
*
*/
*/
void
GetC
udnn
DsoHandle
(
void
**
dso_handle
);
void
GetC
UDNN
DsoHandle
(
void
**
dso_handle
);
/**
/**
* @brief load the DSO of CURAND
* @brief load the DSO of CURAND
...
...
paddle/pybind/pybind.cc
浏览文件 @
816e556b
...
@@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"init_glog"
,
framework
::
InitGLOG
);
m
.
def
(
"init_glog"
,
framework
::
InitGLOG
);
m
.
def
(
"init_devices"
,
&
framework
::
InitDevices
);
m
.
def
(
"init_devices"
,
&
framework
::
InitDevices
);
m
.
def
(
"use_cpu"
,
framework
::
UseCPU
);
m
.
def
(
"use_mkldnn"
,
framework
::
UseMKLDNN
);
m
.
def
(
"use_cuda"
,
framework
::
UseCUDA
);
m
.
def
(
"use_cudnn"
,
framework
::
UseCUDNN
);
m
.
def
(
"use_all"
,
framework
::
UseALL
);
m
.
def
(
"is_compile_gpu"
,
IsCompileGPU
);
m
.
def
(
"is_compile_gpu"
,
IsCompileGPU
);
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
816e556b
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <string>
#include <string>
#include "paddle/framework/tensor.h"
#include "paddle/framework/
lod_
tensor.h"
#include "paddle/memory/memcpy.h"
#include "paddle/memory/memcpy.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/device_context.h"
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
...
@@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
...
@@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
template
<
typename
T
>
template
<
typename
T
>
T
TensorGetElement
(
framework
::
Tensor
&
self
,
size_t
offset
)
{
T
TensorGetElement
(
framework
::
Tensor
&
self
,
size_t
offset
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
self
.
place
()));
if
(
platform
::
is_cpu_place
(
self
.
place
()))
{
return
self
.
data
<
T
>
()[
offset
];
return
self
.
data
<
T
>
()[
offset
];
}
else
{
std
::
shared_ptr
<
framework
::
Tensor
>
dst
(
new
framework
::
Tensor
);
framework
::
Copy
(
self
,
platform
::
CPUPlace
(),
dst
.
get
());
return
dst
->
data
<
T
>
()[
offset
];
}
}
}
// TODO(dzhwinter) : fix the redundent Tensor allocate and free
template
<
typename
T
>
template
<
typename
T
>
void
TensorSetElement
(
framework
::
Tensor
&
self
,
size_t
offset
,
T
elem
)
{
void
TensorSetElement
(
framework
::
Tensor
&
self
,
size_t
offset
,
T
elem
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
self
.
place
()));
if
(
platform
::
is_gpu_place
(
self
.
place
()))
{
self
.
data
<
T
>
()[
offset
]
=
elem
;
std
::
shared_ptr
<
framework
::
Tensor
>
dst
(
new
framework
::
Tensor
);
framework
::
Copy
(
self
,
platform
::
CPUPlace
(),
dst
.
get
());
dst
->
data
<
T
>
()[
offset
]
=
elem
;
framework
::
Copy
(
*
dst
.
get
(),
self
.
place
(),
&
self
);
}
else
if
(
platform
::
is_cpu_place
(
self
.
place
()))
{
self
.
data
<
T
>
()[
offset
]
=
elem
;
}
}
}
template
<
typename
T
>
template
<
typename
T
>
...
...
python/paddle/v2/fluid/__init__.py
浏览文件 @
816e556b
...
@@ -18,14 +18,29 @@ from param_attr import ParamAttr
...
@@ -18,14 +18,29 @@ from param_attr import ParamAttr
from
data_feeder
import
DataFeeder
from
data_feeder
import
DataFeeder
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
from
distribute_transpiler
import
DistributeTranspiler
from
distribute_transpiler
import
DistributeTranspiler
from
distribute_transpiler_simple
import
SimpleDistributeTranspiler
import
clip
import
clip
from
memory_optimization_transpiler
import
memory_optimize
from
memory_optimization_transpiler
import
memory_optimize
Tensor
=
LoDTensor
Tensor
=
LoDTensor
__all__
=
framework
.
__all__
+
executor
.
__all__
+
[
__all__
=
framework
.
__all__
+
executor
.
__all__
+
[
'io'
,
'initializer'
,
'layers'
,
'nets'
,
'optimizer'
,
'backward'
,
'io'
,
'regularizer'
,
'LoDTensor'
,
'CPUPlace'
,
'CUDAPlace'
,
'Tensor'
,
'ParamAttr'
'initializer'
,
'DataFeeder'
,
'clip'
,
'DistributeTranspiler'
,
'memory_optimize'
'layers'
,
'nets'
,
'optimizer'
,
'backward'
,
'regularizer'
,
'LoDTensor'
,
'CPUPlace'
,
'CUDAPlace'
,
'Tensor'
,
'ParamAttr'
'DataFeeder'
,
'clip'
,
'SimpleDistributeTranspiler'
,
'DistributeTranspiler'
,
'memory_optimize'
,
]
]
...
...
python/paddle/v2/fluid/backward.py
浏览文件 @
816e556b
...
@@ -3,7 +3,10 @@ from . import core
...
@@ -3,7 +3,10 @@ from . import core
import
collections
import
collections
import
copy
import
copy
__all__
=
[
'append_backward'
,
'calc_gradient'
]
__all__
=
[
'append_backward'
,
'calc_gradient'
,
]
def
_rename_arg_
(
op_descs
,
old_name
,
new_name
,
begin_idx
=
None
,
end_idx
=
None
):
def
_rename_arg_
(
op_descs
,
old_name
,
new_name
,
begin_idx
=
None
,
end_idx
=
None
):
...
...
python/paddle/v2/fluid/clip.py
浏览文件 @
816e556b
...
@@ -3,7 +3,10 @@ import layers
...
@@ -3,7 +3,10 @@ import layers
from
.
import
core
from
.
import
core
__all__
=
[
__all__
=
[
'GradientClipByValue'
,
'append_gradient_clip_ops'
,
'error_clip_callback'
'GradientClipByValue'
,
'ErrorClipByValue'
,
'append_gradient_clip_ops'
,
'error_clip_callback'
,
]
]
...
@@ -23,12 +26,12 @@ class ErrorClipByValue(BaseErrorClipAttr):
...
@@ -23,12 +26,12 @@ class ErrorClipByValue(BaseErrorClipAttr):
self
.
min
=
min
self
.
min
=
min
def
append_clip_op
(
self
,
block
,
grad_name
):
def
append_clip_op
(
self
,
block
,
grad_name
):
block
.
append_op
(
clip_op_desc
=
block
.
desc
.
append_op
()
type
=
"clip"
,
clip_op_desc
.
set_type
(
"clip"
)
inputs
=
{
"X"
:
grad_name
},
clip_op_desc
.
set_input
(
"X"
,
[
grad_name
])
outputs
=
{
"Out"
:
grad_name
},
clip_op_desc
.
set_output
(
"Out"
,
[
grad_name
])
attrs
=
{
"min"
:
self
.
min
,
clip_op_desc
.
set_attr
(
"min"
,
self
.
min
)
"max"
:
self
.
max
}
)
clip_op_desc
.
set_attr
(
"max"
,
self
.
max
)
def
error_clip_callback
(
block
,
context
):
def
error_clip_callback
(
block
,
context
):
...
@@ -39,6 +42,11 @@ def error_clip_callback(block, context):
...
@@ -39,6 +42,11 @@ def error_clip_callback(block, context):
op_desc
.
output_arg_names
()):
op_desc
.
output_arg_names
()):
fwd_var
=
block
.
var_recursive
(
grad_to_var
[
grad_n
])
fwd_var
=
block
.
var_recursive
(
grad_to_var
[
grad_n
])
error_clip
=
getattr
(
fwd_var
,
"error_clip"
,
None
)
error_clip
=
getattr
(
fwd_var
,
"error_clip"
,
None
)
if
not
(
error_clip
is
None
or
isinstance
(
error_clip
,
BaseErrorClipAttr
)):
raise
TypeError
(
"Variable's error_clip should be an instance of BaseErrorClipAttr or None."
)
if
error_clip
is
not
None
:
if
error_clip
is
not
None
:
error_clip
.
append_clip_op
(
block
,
grad_n
)
error_clip
.
append_clip_op
(
block
,
grad_n
)
...
...
python/paddle/v2/fluid/default_scope_funcs.py
浏览文件 @
816e556b
"""
"""
Default scope function.
Default scope function.
`Paddle` manages Scope as programming language's scope. It just a
`Paddle` manages Scope as programming language's scope. It just a
thread-local stack of Scope. Top of that stack is current scope, the bottom
thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent.
of that stack is all scopes' parent.
Invoking `var/find_var` can `new/find` variable in current scope.
Invoking `var/find_var` can `new/find` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local
Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope.
scope.
A `scoped_function` will take a `function` as input. That function will be
A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope.
invoked in a new local scope.
"""
"""
import
paddle.v2.fluid.core
import
paddle.v2.fluid.core
...
@@ -19,8 +19,12 @@ import threading
...
@@ -19,8 +19,12 @@ import threading
__tl_scope__
=
threading
.
local
()
__tl_scope__
=
threading
.
local
()
__all__
=
[
__all__
=
[
'get_cur_scope'
,
'enter_local_scope'
,
'leave_local_scope'
,
'var'
,
'get_cur_scope'
,
'find_var'
,
'scoped_function'
'enter_local_scope'
,
'leave_local_scope'
,
'var'
,
'find_var'
,
'scoped_function'
,
]
]
...
@@ -71,7 +75,7 @@ def find_var(name):
...
@@ -71,7 +75,7 @@ def find_var(name):
def
scoped_function
(
func
):
def
scoped_function
(
func
):
"""
"""
invoke `func` in new scope.
invoke `func` in new scope.
:param func: a callable function that will be run in new scope.
:param func: a callable function that will be run in new scope.
:type func: callable
:type func: callable
"""
"""
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
816e556b
此差异已折叠。
点击以展开。
python/paddle/v2/fluid/distribute_transpiler_simple.py
0 → 100644
浏览文件 @
816e556b
import
framework
from
framework
import
Program
,
default_main_program
,
Parameter
,
Variable
import
optimizer
from
layer_helper
import
LayerHelper
def
hash_name_to_server
(
params_grads
,
pserver_endpoints
):
"""
:param param_grads:
:return: a map of pserver endpoint ->
params -> [param list]
grads -> [grad list]
"""
def
_hash_param
(
param_name
,
total
):
return
hash
(
param_name
)
%
total
param_grad_map
=
dict
()
for
param
,
grad
in
params_grads
:
if
param
.
trainable
is
True
and
grad
is
not
None
:
server_id
=
_hash_param
(
param
.
name
,
len
(
pserver_endpoints
))
server_for_param
=
pserver_endpoints
[
server_id
]
if
not
param_grad_map
.
has_key
(
server_for_param
):
param_grad_map
[
server_for_param
]
=
{
"params"
:
[],
"grads"
:
[]}
param_grad_map
[
server_for_param
][
"params"
].
append
(
param
)
param_grad_map
[
server_for_param
][
"grads"
].
append
(
grad
)
return
param_grad_map
def
round_robin
(
params_grads
,
pserver_endpoints
):
assert
(
len
(
params_grads
)
>
len
(
pserver_endpoints
))
param_grad_map
=
dict
()
pserver_idx
=
0
for
param
,
grad
in
params_grads
:
if
param
.
trainable
is
True
:
server_for_param
=
pserver_endpoints
[
pserver_idx
]
if
not
param_grad_map
.
has_key
(
server_for_param
):
param_grad_map
[
server_for_param
]
=
{
"params"
:
[],
"grads"
:
[]}
param_grad_map
[
server_for_param
][
"params"
].
append
(
param
)
param_grad_map
[
server_for_param
][
"grads"
].
append
(
grad
)
pserver_idx
+=
1
if
pserver_idx
>=
len
(
pserver_endpoints
):
pserver_idx
=
0
return
param_grad_map
class
SimpleDistributeTranspiler
:
def
transpile
(
self
,
optimize_ops
,
params_grads
,
program
=
None
,
pservers
=
"127.0.0.1:6174"
,
trainers
=
1
,
split_method
=
round_robin
):
"""
Transpile the program to a distributed data-parallelism programs.
The main_program will be transform to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
in to a parameter server program.
Use different methods to split trainable varialbles to different
parameter servers.
Example to run:
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
pserver_endpoint = os.getenv("PSERVER")
if pserver_endpoint:
pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
exe.run(fluid.default_startup_program())
exe.run(pserver_prog)
else:
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
...
:param optimize_ops: op list of optimization, should be the
return value of Optimizer.minimize
:type optimize_ops: list
:param program: program to optimize, default default_main_program
:param pservers: parameter server endpoints like "m1:6174,m2:6174"
:type pservers: string
:return: return a list of programs
"""
if
program
is
None
:
program
=
default_main_program
()
self
.
program
=
program
self
.
trainers
=
trainers
self
.
optimize_ops
=
optimize_ops
self
.
_optimize_distributed
(
optimize_ops
,
program
,
params_grads
,
pservers
=
pservers
,
trainers
=
trainers
,
split_method
=
split_method
)
def
_clone_param
(
self
,
block
,
v
):
assert
isinstance
(
v
,
Parameter
)
new_p
=
Parameter
(
block
=
block
,
shape
=
v
.
shape
,
dtype
=
v
.
dtype
,
type
=
v
.
type
,
lod_level
=
v
.
lod_level
,
stop_gradient
=
v
.
stop_gradient
,
trainable
=
v
.
trainable
,
optimize_attr
=
v
.
optimize_attr
,
regularizer
=
v
.
regularizer
,
name
=
v
.
name
)
block
.
vars
[
new_p
.
name
]
=
new_p
def
_clone_var
(
self
,
block
,
var
):
assert
isinstance
(
var
,
Variable
)
return
block
.
create_var
(
name
=
var
.
name
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
,
type
=
var
.
type
,
lod_level
=
var
.
lod_level
,
persistable
=
var
.
persistable
)
def
_optimize_distributed
(
self
,
optimize_ops
,
program
,
params_and_grads
,
**
kwargs
):
if
kwargs
.
has_key
(
"split_method"
):
split_method
=
kwargs
[
"split_method"
]
else
:
split_method
=
round_robin
assert
(
callable
(
split_method
))
pserver_endpoints
=
kwargs
[
"pservers"
].
split
(
","
)
self
.
param_grad_map
=
split_method
(
params_and_grads
,
pserver_endpoints
)
send_op_ordered_inputs
=
[]
send_op_ordered_outputs
=
[]
epmap
=
[]
for
ep
,
v
in
self
.
param_grad_map
.
iteritems
():
send_op_ordered_inputs
.
extend
(
v
[
"grads"
])
send_op_ordered_outputs
.
extend
(
v
[
"params"
])
for
i
in
v
[
"grads"
]:
epmap
.
append
(
ep
)
send_op
=
program
.
global_block
().
append_op
(
type
=
"send"
,
inputs
=
{
"X"
:
send_op_ordered_inputs
},
# inputs is a list of tensors to be send
outputs
=
{
"Out"
:
send_op_ordered_outputs
},
attrs
=
{
"endpoints"
:
pserver_endpoints
,
"epmap"
:
epmap
})
def
get_trainer_program
(
self
):
# remove optimize ops and add a send op to main_program
self
.
program
.
global_block
().
delete_ops
(
self
.
optimize_ops
)
return
self
.
program
def
_create_var_for_trainers
(
self
,
block
,
var
,
trainers
):
var_list
=
[]
for
i
in
xrange
(
trainers
):
var_each
=
block
.
create_var
(
name
=
"%s.trainer_%d"
%
(
var
.
name
,
i
),
psersistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
var_list
.
append
(
var_each
)
return
var_list
def
get_pserver_program
(
self
,
endpoint
,
optimize_ops
):
pserver_program
=
Program
()
for
v
in
self
.
param_grad_map
[
endpoint
][
"params"
]:
self
.
_clone_param
(
pserver_program
.
global_block
(),
v
)
optimize_sub_program
=
Program
()
grad_var_names
=
[
var
.
name
for
var
in
self
.
param_grad_map
[
endpoint
][
"grads"
]
]
for
opt_op
in
optimize_ops
:
for
_
,
var
in
opt_op
.
inputs
.
iteritems
():
# NOTE: append operators to merge gradients from multiple
# trainers. If trainers == 1, this is not needed.
if
self
.
trainers
>
1
and
var
.
name
in
grad_var_names
:
vars2merge
=
self
.
_create_var_for_trainers
(
optimize_sub_program
.
global_block
(),
var
,
self
.
trainers
)
merged_var
=
optimize_sub_program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
optimize_sub_program
.
global_block
().
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
vars2merge
},
outputs
=
{
"Out"
:
merged_var
})
optimize_sub_program
.
global_block
().
append_op
(
type
=
"scale"
,
inputs
=
{
"X"
:
merged_var
},
outputs
=
{
"Out"
:
merged_var
},
attrs
=
{
"scale"
:
1.0
/
float
(
self
.
trainers
)})
else
:
optimize_sub_program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
if
opt_op
.
inputs
.
has_key
(
"Grad"
):
if
opt_op
.
inputs
[
"Grad"
].
name
in
grad_var_names
:
optimize_sub_program
.
global_block
().
append_op
(
type
=
opt_op
.
type
,
inputs
=
opt_op
.
inputs
,
outputs
=
opt_op
.
outputs
,
attrs
=
opt_op
.
attrs
)
else
:
optimize_sub_program
.
global_block
().
append_op
(
type
=
opt_op
.
type
,
inputs
=
opt_op
.
inputs
,
outputs
=
opt_op
.
outputs
,
attrs
=
opt_op
.
attrs
)
pserver_program
.
global_block
().
append_op
(
type
=
"recv"
,
inputs
=
{
"RX"
:
self
.
param_grad_map
[
endpoint
][
"grads"
]},
# grads to recv
outputs
=
{},
attrs
=
{
"OptimizeProgram"
:
optimize_sub_program
.
desc
,
"endpoint"
:
endpoint
,
"ParamList"
:
[
p
.
name
for
p
in
self
.
param_grad_map
[
endpoint
][
"params"
]],
"GradList"
:
[
p
.
name
for
p
in
self
.
param_grad_map
[
endpoint
][
"grads"
]],
"Trainers"
:
self
.
trainers
})
pserver_program
.
sync_with_cpp
()
return
pserver_program
python/paddle/v2/fluid/distributed_spliter.py
0 → 100644
浏览文件 @
816e556b
def
hash_name
(
varlist
,
pserver_endpoints
):
"""
hash variable names to several endpoints.
:param varlist: a list of Variables
:return: a map of pserver endpoint -> varname
"""
def
_hash_block
(
block_str
,
total
):
return
hash
(
block_str
)
%
total
eplist
=
[]
for
var
in
varlist
:
server_id
=
_hash_block
(
var
.
name
(),
len
(
pserver_endpoints
))
server_for_param
=
pserver_endpoints
[
server_id
]
eplist
.
append
(
server_for_param
)
return
eplist
def
round_robin
(
varlist
,
pserver_endpoints
):
"""
distribute variables to several endpoints.
"""
assert
(
len
(
varlist
)
>
len
(
pserver_endpoints
))
eplist
=
[]
pserver_idx
=
0
for
var
in
varlist
:
server_for_param
=
pserver_endpoints
[
pserver_idx
]
eplist
.
append
(
server_for_param
)
pserver_idx
+=
1
if
pserver_idx
>=
len
(
pserver_endpoints
):
pserver_idx
=
0
return
eplist
python/paddle/v2/fluid/evaluator.py
浏览文件 @
816e556b
...
@@ -4,7 +4,10 @@ import layers
...
@@ -4,7 +4,10 @@ import layers
from
framework
import
Program
,
unique_name
,
Variable
,
program_guard
from
framework
import
Program
,
unique_name
,
Variable
,
program_guard
from
layer_helper
import
LayerHelper
from
layer_helper
import
LayerHelper
__all__
=
[
'Accuracy'
,
'ChunkEvaluator'
]
__all__
=
[
'Accuracy'
,
'ChunkEvaluator'
,
]
def
_clone_var_
(
block
,
var
):
def
_clone_var_
(
block
,
var
):
...
@@ -21,19 +24,19 @@ def _clone_var_(block, var):
...
@@ -21,19 +24,19 @@ def _clone_var_(block, var):
class
Evaluator
(
object
):
class
Evaluator
(
object
):
"""
"""
Base Class for all evaluators
Base Class for all evaluators
Args:
Args:
name(str): The name of evaluator. such as, "accuracy". Used for generate
name(str): The name of evaluator. such as, "accuracy". Used for generate
temporary variable name.
temporary variable name.
main_program(Program, optional): The evaluator should be added to this
main_program(Program, optional): The evaluator should be added to this
main_program. Default default_main_program()
main_program. Default default_main_program()
startup_program(Program, optional):The parameter should be added to this
startup_program(Program, optional):The parameter should be added to this
startup_program. Default default_startup_program()
startup_program. Default default_startup_program()
Attributes:
Attributes:
states(list): The list of state variables. states will be reset to zero
states(list): The list of state variables. states will be reset to zero
when `reset` is invoked.
when `reset` is invoked.
metrics(list): The list of metrics variables. They will be calculate
metrics(list): The list of metrics variables. They will be calculate
every mini-batch
every mini-batch
"""
"""
...
@@ -66,14 +69,14 @@ class Evaluator(object):
...
@@ -66,14 +69,14 @@ class Evaluator(object):
def
create_state
(
self
,
suffix
,
dtype
,
shape
):
def
create_state
(
self
,
suffix
,
dtype
,
shape
):
"""
"""
Create state variable.
Create state variable.
NOTE: It is not a public API.
NOTE: It is not a public API.
Args:
Args:
suffix(str): the state suffix.
suffix(str): the state suffix.
dtype(str|core.DataType): the state data type
dtype(str|core.DataType): the state data type
shape(tuple|list): the shape of state
shape(tuple|list): the shape of state
Returns: State variable
Returns: State variable
...
@@ -127,8 +130,8 @@ class Accuracy(Evaluator):
...
@@ -127,8 +130,8 @@ class Accuracy(Evaluator):
class
ChunkEvaluator
(
Evaluator
):
class
ChunkEvaluator
(
Evaluator
):
"""
"""
Accumulate counter numbers output by chunk_eval from mini-batches and
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
compute the precision recall and F1-score using the accumulated counter
numbers.
numbers.
"""
"""
...
...
python/paddle/v2/fluid/framework.py
浏览文件 @
816e556b
...
@@ -7,9 +7,15 @@ import proto.framework_pb2 as framework_pb2
...
@@ -7,9 +7,15 @@ import proto.framework_pb2 as framework_pb2
from
.
import
core
from
.
import
core
__all__
=
[
__all__
=
[
'Block'
,
'Variable'
,
'Program'
,
'Operator'
,
'default_startup_program'
,
'Block'
,
'default_main_program'
,
'program_guard'
,
'switch_startup_program'
,
'Variable'
,
'switch_main_program'
'Program'
,
'Operator'
,
'default_startup_program'
,
'default_main_program'
,
'program_guard'
,
'switch_startup_program'
,
'switch_main_program'
,
]
]
EMPTY_VAR_NAME
=
core
.
kEmptyVarName
()
EMPTY_VAR_NAME
=
core
.
kEmptyVarName
()
...
@@ -274,6 +280,9 @@ class Variable(object):
...
@@ -274,6 +280,9 @@ class Variable(object):
uid
=
core
.
unique_integer
(
prefix
)
# unique during whole process.
uid
=
core
.
unique_integer
(
prefix
)
# unique during whole process.
return
"_"
.
join
([
prefix
,
str
(
uid
)])
return
"_"
.
join
([
prefix
,
str
(
uid
)])
def
set_error_clip
(
self
,
error_clip
):
self
.
error_clip
=
error_clip
def
get_all_op_protos
():
def
get_all_op_protos
():
"""
"""
...
...
python/paddle/v2/fluid/initializer.py
浏览文件 @
816e556b
import
framework
import
framework
import
numpy
as
np
import
numpy
as
np
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
]
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
]
class
Initializer
(
object
):
class
Initializer
(
object
):
...
...
python/paddle/v2/fluid/io.py
浏览文件 @
816e556b
...
@@ -4,9 +4,15 @@ import cPickle as pickle
...
@@ -4,9 +4,15 @@ import cPickle as pickle
from
paddle.v2.fluid.framework
import
Program
,
Parameter
,
default_main_program
,
Variable
from
paddle.v2.fluid.framework
import
Program
,
Parameter
,
default_main_program
,
Variable
__all__
=
[
__all__
=
[
'save_vars'
,
'save_params'
,
'save_persistables'
,
'load_vars'
,
'load_params'
,
'save_vars'
,
'load_persistables'
,
"save_inference_model"
,
"load_inference_model"
,
'save_params'
,
"get_inference_program"
'save_persistables'
,
'load_vars'
,
'load_params'
,
'load_persistables'
,
'save_inference_model'
,
'load_inference_model'
,
'get_inference_program'
,
]
]
...
...
python/paddle/v2/fluid/layers/control_flow.py
浏览文件 @
816e556b
...
@@ -117,7 +117,8 @@ def Print(input,
...
@@ -117,7 +117,8 @@ def Print(input,
print_tensor_name
=
True
,
print_tensor_name
=
True
,
print_tensor_type
=
True
,
print_tensor_type
=
True
,
print_tensor_shape
=
True
,
print_tensor_shape
=
True
,
print_tensor_lod
=
True
):
print_tensor_lod
=
True
,
print_phase
=
'both'
):
'''
'''
**Print operator**
**Print operator**
...
@@ -128,18 +129,21 @@ def Print(input,
...
@@ -128,18 +129,21 @@ def Print(input,
tensor `t`.
tensor `t`.
Args:
Args:
input(Variable): A Tensor to print.
input (Variable): A Tensor to print.
summarize(int): Print this number of elements in the tensor, will print all
summarize (int): Print this number of elements in the tensor, will print
if left negative.
all if left is negative.
message(str): A string message to print as a prefix.
message (str): A string message to print as a prefix.
first_n(int): Only log `first_n` number of times.
first_n (int): Only log `first_n` number of times.
print_tensor_name(bool): Print the tensor name.
print_tensor_name (bool): Print the tensor name.
print_tensor_type(bool): Print the tensor type.
print_tensor_type (bool): Print the tensor type.
print_tensor_shape(bool): Print the tensor shape.
print_tensor_shape (bool): Print the tensor shape.
print_tensor_lod(bool): Print the tensor lod.
print_tensor_lod (bool): Print the tensor lod.
print_phase (bool): Which phase to displace, including 'forward',
'backward' and 'both'. If set to 'backward' or 'both', will
print the gradients of input tensor.
Returns:
Returns:
None
Variable: Output tensor, same data with input tensor.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -149,10 +153,10 @@ def Print(input,
...
@@ -149,10 +153,10 @@ def Print(input,
message="The content of some_layer: ")
message="The content of some_layer: ")
'''
'''
helper
=
LayerHelper
(
'print'
,
**
locals
())
helper
=
LayerHelper
(
'print'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
()
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'print'
,
type
=
'print'
,
inputs
=
{
'
input
'
:
input
},
inputs
=
{
'
In
'
:
input
},
attrs
=
{
attrs
=
{
'first_n'
:
first_n
,
'first_n'
:
first_n
,
'summarize'
:
summarize
,
'summarize'
:
summarize
,
...
@@ -161,7 +165,9 @@ def Print(input,
...
@@ -161,7 +165,9 @@ def Print(input,
'print_tensor_type'
:
print_tensor_type
,
'print_tensor_type'
:
print_tensor_type
,
'print_tensor_shape'
:
print_tensor_shape
,
'print_tensor_shape'
:
print_tensor_shape
,
'print_tensor_lod'
:
print_tensor_lod
,
'print_tensor_lod'
:
print_tensor_lod
,
})
'print_phase'
:
print_phase
.
upper
()
},
outputs
=
{
'Out'
:
out
})
return
out
return
out
...
@@ -742,11 +748,10 @@ def topk(input, k):
...
@@ -742,11 +748,10 @@ def topk(input, k):
def
lod_tensor_to_array
(
x
,
table
):
def
lod_tensor_to_array
(
x
,
table
):
"""This function performs the operation that converts an LOD_Tensor to
""" Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
an array.
Args:
Args:
x (Variable|list): The
tensor that needs to be converted to an
array.
x (Variable|list): The
LOD tensor to be converted to a LOD tensor
array.
table (ParamAttr|list): The variable that stores the level of lod
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
which is ordered by sequence length in
descending order.
descending order.
...
@@ -776,11 +781,10 @@ def lod_tensor_to_array(x, table):
...
@@ -776,11 +781,10 @@ def lod_tensor_to_array(x, table):
def
array_to_lod_tensor
(
x
,
table
):
def
array_to_lod_tensor
(
x
,
table
):
"""This function performs the operations that converts an array to
"""Convert a LoD_Tensor_Aarry to an LoDTensor.
an LOD_Tensor.
Args:
Args:
x (Variable|list): The
array that needs
to be converted to a tensor.
x (Variable|list): The
lod tensor array
to be converted to a tensor.
table (ParamAttr|list): The variable that stores the level of lod
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
which is ordered by sequence length in
descending order.
descending order.
...
@@ -808,7 +812,8 @@ def array_to_lod_tensor(x, table):
...
@@ -808,7 +812,8 @@ def array_to_lod_tensor(x, table):
def
increment
(
x
,
value
=
1.0
,
in_place
=
True
):
def
increment
(
x
,
value
=
1.0
,
in_place
=
True
):
"""This function performs an operation that increments each value in the
"""
This function performs an operation that increments each value in the
input :math:`x` by an amount: :math:`value` as mentioned in the input
input :math:`x` by an amount: :math:`value` as mentioned in the input
parameter. This operation is performed in-place by default.
parameter. This operation is performed in-place by default.
...
@@ -841,17 +846,24 @@ def increment(x, value=1.0, in_place=True):
...
@@ -841,17 +846,24 @@ def increment(x, value=1.0, in_place=True):
def
array_write
(
x
,
i
,
array
=
None
):
def
array_write
(
x
,
i
,
array
=
None
):
"""This function performs the operation to write the data out as an
"""
LOD_TENSOR_ARRAY.
This function writes the given input variable to the specified position
indicating by the arrary index to an output LOD_TENSOR_ARRAY. If the
output LOD_TENSOR_ARRAY is not given(None), a new one will be created and
returned.
Args:
Args:
x (Variable|list): The input tensor from which the data will be read.
x (Variable|list): The input tensor from which the data will be read.
i (Variable|list): The subscript index in tensor array, that points the
i (Variable|list): The index of the output LOD_TENSOR_ARRAY, pointing to
place from which data will be read.
the position to which the input tensor will be
array (Variable|list): The data can be read into this variable if
written.
this is assigned.
array (Variable|list): The output LOD_TENSOR_ARRAY to which the input
tensor will be written. If this parameter is
NONE, a new LOD_TENSOR_ARRAY will be created and
returned.
Returns:
Returns:
Variable: The
tensor type variable that has the data written to it
.
Variable: The
output LOD_TENSOR_ARRAY where the input tensor is written
.
Examples:
Examples:
.. code-block::python
.. code-block::python
...
@@ -1214,7 +1226,8 @@ class DynamicRNN(object):
...
@@ -1214,7 +1226,8 @@ class DynamicRNN(object):
self
.
lod_rank_table
=
None
self
.
lod_rank_table
=
None
self
.
max_seq_len
=
None
self
.
max_seq_len
=
None
self
.
step_idx
=
None
self
.
step_idx
=
None
self
.
zero_idx
=
fill_constant
(
shape
=
[
1
],
value
=
0
,
dtype
=
'int64'
)
self
.
zero_idx
=
fill_constant
(
shape
=
[
1
],
value
=
0
,
dtype
=
'int64'
,
force_cpu
=
True
)
self
.
mem_dict
=
dict
()
self
.
mem_dict
=
dict
()
self
.
output_array
=
[]
self
.
output_array
=
[]
self
.
outputs
=
[]
self
.
outputs
=
[]
...
@@ -1228,7 +1241,7 @@ class DynamicRNN(object):
...
@@ -1228,7 +1241,7 @@ class DynamicRNN(object):
self
.
_assert_in_rnn_block_
(
"step_input"
)
self
.
_assert_in_rnn_block_
(
"step_input"
)
if
not
isinstance
(
x
,
Variable
):
if
not
isinstance
(
x
,
Variable
):
raise
TypeError
(
raise
TypeError
(
"step_input() can only take a Variable as its input"
)
"step_input() can only take a Variable as its input
.
"
)
parent_block
=
self
.
_parent_block_
()
parent_block
=
self
.
_parent_block_
()
if
self
.
lod_rank_table
is
None
:
if
self
.
lod_rank_table
is
None
:
self
.
lod_rank_table
=
parent_block
.
create_var
(
self
.
lod_rank_table
=
parent_block
.
create_var
(
...
@@ -1269,7 +1282,8 @@ class DynamicRNN(object):
...
@@ -1269,7 +1282,8 @@ class DynamicRNN(object):
def
block
(
self
):
def
block
(
self
):
if
self
.
status
!=
DynamicRNN
.
BEFORE_RNN
:
if
self
.
status
!=
DynamicRNN
.
BEFORE_RNN
:
raise
ValueError
(
"rnn.block() can only be invoke once"
)
raise
ValueError
(
"rnn.block() can only be invoke once"
)
self
.
step_idx
=
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
self
.
step_idx
=
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
,
force_cpu
=
True
)
self
.
step_idx
.
stop_gradient
=
False
self
.
step_idx
.
stop_gradient
=
False
self
.
status
=
DynamicRNN
.
IN_RNN
self
.
status
=
DynamicRNN
.
IN_RNN
with
self
.
while_op
.
block
():
with
self
.
while_op
.
block
():
...
@@ -1289,8 +1303,8 @@ class DynamicRNN(object):
...
@@ -1289,8 +1303,8 @@ class DynamicRNN(object):
def
__call__
(
self
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
*
args
,
**
kwargs
):
if
self
.
status
!=
DynamicRNN
.
AFTER_RNN
:
if
self
.
status
!=
DynamicRNN
.
AFTER_RNN
:
raise
ValueError
(
raise
ValueError
(
(
"Output of the dynamic RNN can only be visited "
"Dynamic RNN outputs can only be retrieved after rnn block"
)
"outside the rnn block."
)
)
if
len
(
self
.
outputs
)
==
1
:
if
len
(
self
.
outputs
)
==
1
:
return
self
.
outputs
[
0
]
return
self
.
outputs
[
0
]
else
:
else
:
...
...
python/paddle/v2/fluid/layers/nn.py
浏览文件 @
816e556b
...
@@ -9,12 +9,33 @@ from ..param_attr import ParamAttr
...
@@ -9,12 +9,33 @@ from ..param_attr import ParamAttr
from
tensor
import
concat
from
tensor
import
concat
__all__
=
[
__all__
=
[
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'gru_unit'
,
'linear_chain_crf'
,
'fc'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'accuracy'
,
'embedding'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'sequence_pool'
,
'pool2d'
,
'dynamic_lstm'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'sequence_expand'
,
'gru_unit'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'linear_chain_crf'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'accuracy'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'sequence_pool'
,
'pool2d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'sequence_expand'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
]
]
...
@@ -248,13 +269,13 @@ def gru_unit(input,
...
@@ -248,13 +269,13 @@ def gru_unit(input,
h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
of the equation above, the :math:`z_t` is split into 3 parts -
of the equation above, the :math:`z_t` is split into 3 parts -
:math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
:math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
implement a full GRU unit operator for an input, a fully
implement a full GRU unit operator for an input, a fully
connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
an intermediate candidate hidden output, which is denoted by :math:`m_t`.
an intermediate candidate hidden output, which is denoted by :math:`m_t`.
This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
...
@@ -276,7 +297,7 @@ def gru_unit(input,
...
@@ -276,7 +297,7 @@ def gru_unit(input,
.. code-block:: python
.. code-block:: python
# assuming we have x_t_data and prev_hidden of size=10
# assuming we have x_t_data and prev_hidden of size=10
x_t = fluid.layers.fc(input=x_t_data, size=30)
x_t = fluid.layers.fc(input=x_t_data, size=30)
hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t,
hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t,
hidden = prev_hidden)
hidden = prev_hidden)
...
@@ -754,7 +775,7 @@ def conv2d(input,
...
@@ -754,7 +775,7 @@ def conv2d(input,
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'conv2d
_cudnn
'
,
type
=
'conv2d'
,
inputs
=
{
inputs
=
{
'Input'
:
input
,
'Input'
:
input
,
'Filter'
:
filter_param
,
'Filter'
:
filter_param
,
...
...
python/paddle/v2/fluid/layers/ops.py
浏览文件 @
816e556b
from
..registry
import
register_layer
from
..registry
import
register_layer
__activations__
=
[
__activations__
=
[
'abs'
,
'tanh'
,
'sigmoid'
,
'relu'
,
'sqrt'
,
'ceil'
,
'floor'
,
'log'
,
'round'
'sigmoid'
,
'logsigmoid'
,
'exp'
,
'relu'
,
'tanh'
,
'tanh_shrink'
,
'softshrink'
,
'sqrt'
,
'abs'
,
'ceil'
,
'floor'
,
'round'
,
'reciprocal'
,
'log'
,
'square'
,
'softplus'
,
'softsign'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_shrink'
,
'thresholded_relu'
,
'hard_sigmoid'
,
'swish'
,
]
]
__all__
=
[
__all__
=
[
...
...
python/paddle/v2/fluid/layers/tensor.py
浏览文件 @
816e556b
...
@@ -6,8 +6,16 @@ from ..core import DataType
...
@@ -6,8 +6,16 @@ from ..core import DataType
import
numpy
import
numpy
__all__
=
[
__all__
=
[
'create_tensor'
,
'create_parameter'
,
'cast'
,
'concat'
,
'sums'
,
'assign'
,
'create_tensor'
,
'fill_constant_batch_size_like'
,
'fill_constant'
,
'ones'
,
'zeros'
'create_parameter'
,
'cast'
,
'concat'
,
'sums'
,
'assign'
,
'fill_constant_batch_size_like'
,
'fill_constant'
,
'ones'
,
'zeros'
,
]
]
...
@@ -172,29 +180,30 @@ def assign(input, output):
...
@@ -172,29 +180,30 @@ def assign(input, output):
return
output
return
output
def
fill_constant
(
shape
,
dtype
,
value
,
out
=
None
):
def
fill_constant
(
shape
,
dtype
,
value
,
force_cpu
=
False
,
out
=
None
):
"""
"""
**fill_constant**
**fill_constant**
This function creates a tensor
of specified *shape*
and
This function creates a tensor
with specified `shape` and `dtype`,
and
*dtype*, and initializes this with a constant supplied in *value*
.
initializes it with a constant specifed by `value`
.
It also sets *stop_gradient*
to True.
The attribute `stop_gradient` of the created tensor is set
to True.
Args:
Args:
shape(tuple|list|None): Shape of
output tensor
shape(tuple|list|None): Shape of
the output tensor.
dtype(np.dtype|core.DataType|str): Data type of
output tensor
dtype(np.dtype|core.DataType|str): Data type of
the output tensor.
value(float):
Constant value to initialize the output tensor
value(float):
The constant value used to initialize the output tensor.
out(Variable):
Output Variable to initialize
out(Variable):
The output tensor.
Returns:
Returns:
Variable: The tensor variable storing the output
Variable: The tensor variable storing the output
.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
"""
"""
helper
=
LayerHelper
(
"fill_constant"
,
**
locals
())
helper
=
LayerHelper
(
"fill_constant"
,
**
locals
())
if
out
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
)
...
@@ -202,9 +211,12 @@ def fill_constant(shape, dtype, value, out=None):
...
@@ -202,9 +211,12 @@ def fill_constant(shape, dtype, value, out=None):
type
=
'fill_constant'
,
type
=
'fill_constant'
,
inputs
=
{},
inputs
=
{},
outputs
=
{
'Out'
:
[
out
]},
outputs
=
{
'Out'
:
[
out
]},
attrs
=
{
'shape'
:
shape
,
attrs
=
{
'dtype'
:
out
.
dtype
,
'shape'
:
shape
,
'value'
:
float
(
value
)})
'dtype'
:
out
.
dtype
,
'value'
:
float
(
value
),
'force_cpu'
:
force_cpu
})
out
.
stop_gradient
=
True
out
.
stop_gradient
=
True
return
out
return
out
...
...
python/paddle/v2/fluid/memory_optimization_transpiler.py
浏览文件 @
816e556b
...
@@ -121,8 +121,10 @@ class ControlFlowGraph(object):
...
@@ -121,8 +121,10 @@ class ControlFlowGraph(object):
# and dtype_to_size[cache_dtype]
# and dtype_to_size[cache_dtype]
if
x_dtype
==
cache_dtype
:
if
x_dtype
==
cache_dtype
:
print
(
print
(
"Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s "
(
"Hit Cache !!!! cache pool index "
%
"is %d, var name is %s, "
"cached var name is %s, "
"var shape is %s "
)
%
(
index
,
x
,
cache_var
,
str
(
cache_shape
)))
(
index
,
x
,
cache_var
,
str
(
cache_shape
)))
self
.
pool
.
pop
(
index
)
self
.
pool
.
pop
(
index
)
_rename_arg_
(
_rename_arg_
(
...
...
python/paddle/v2/fluid/nets.py
浏览文件 @
816e556b
import
layers
import
layers
__all__
=
[
"simple_img_conv_pool"
,
"sequence_conv_pool"
]
__all__
=
[
"simple_img_conv_pool"
,
"sequence_conv_pool"
,
]
def
simple_img_conv_pool
(
input
,
def
simple_img_conv_pool
(
input
,
...
...
python/paddle/v2/fluid/registry.py
浏览文件 @
816e556b
...
@@ -8,7 +8,11 @@ import proto.framework_pb2 as framework_pb2
...
@@ -8,7 +8,11 @@ import proto.framework_pb2 as framework_pb2
from
framework
import
OpProtoHolder
,
Variable
,
Program
,
Operator
from
framework
import
OpProtoHolder
,
Variable
,
Program
,
Operator
from
paddle.v2.fluid.layer_helper
import
LayerHelper
,
unique_name
from
paddle.v2.fluid.layer_helper
import
LayerHelper
,
unique_name
__all__
=
[
'deprecated'
,
'register_layer'
,
'autodoc'
]
__all__
=
[
'deprecated'
,
'register_layer'
,
'autodoc'
,
]
def
_convert_
(
name
):
def
_convert_
(
name
):
...
@@ -80,11 +84,10 @@ def _generate_doc_string_(op_proto):
...
@@ -80,11 +84,10 @@ def _generate_doc_string_(op_proto):
def
register_layer
(
op_type
):
def
register_layer
(
op_type
):
"""
"""Register the Python layer for an Operator.
Register an Python layer for an Operator
Args:
Args:
op_type: The name of the operator to be created
op_type: The name of the operator to be created
.
This function takes in the operator type (sigmoid, mean , average etc) and
This function takes in the operator type (sigmoid, mean , average etc) and
creates the operator functionality.
creates the operator functionality.
...
@@ -98,16 +101,16 @@ def register_layer(op_type):
...
@@ -98,16 +101,16 @@ def register_layer(op_type):
if
len
(
not_intermediate_outputs
)
!=
1
:
if
len
(
not_intermediate_outputs
)
!=
1
:
raise
ValueError
(
"Only one non intermediate output operator can be"
,
raise
ValueError
(
"Only one non intermediate output operator can be"
,
"automatically generated"
)
"automatically generated
.
"
)
if
not_intermediate_outputs
[
0
].
duplicable
:
if
not_intermediate_outputs
[
0
].
duplicable
:
raise
ValueError
(
raise
ValueError
(
"Only non duplicable op can be automatically generated"
)
"Only non duplicable op can be automatically generated
.
"
)
for
output
in
intermediate_outputs
:
for
output
in
intermediate_outputs
:
if
output
.
duplicable
:
if
output
.
duplicable
:
raise
ValueError
(
"The op can be automatically generated only when "
,
raise
ValueError
(
"The op can be automatically generated only when "
,
"all intermediate ops are not duplicable"
)
"all intermediate ops are not duplicable
.
"
)
o_name
=
not_intermediate_outputs
[
0
].
name
o_name
=
not_intermediate_outputs
[
0
].
name
intermediate_output_names
=
[
output
.
name
for
output
in
intermediate_outputs
]
intermediate_output_names
=
[
output
.
name
for
output
in
intermediate_outputs
]
...
...
python/paddle/v2/fluid/regularizer.py
浏览文件 @
816e556b
import
framework
import
framework
__all__
=
[
'append_regularization_ops'
,
'L1Decay'
,
'L2Decay'
]
__all__
=
[
'append_regularization_ops'
,
'L1Decay'
,
'L2Decay'
,
]
def
append_regularization_ops
(
parameters_and_grads
,
regularization
=
None
):
def
append_regularization_ops
(
parameters_and_grads
,
regularization
=
None
):
...
...
python/paddle/v2/fluid/tests/CMakeLists.txt
浏览文件 @
816e556b
...
@@ -5,3 +5,4 @@ foreach(src ${TEST_OPS})
...
@@ -5,3 +5,4 @@ foreach(src ${TEST_OPS})
endforeach
()
endforeach
()
add_subdirectory
(
book
)
add_subdirectory
(
book
)
add_subdirectory
(
book_distribute
)
python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
0 → 100644
浏览文件 @
816e556b
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/v2/fluid/tests/book_distribute/test_dist_fit_a_line.py
→
python/paddle/v2/fluid/tests/book_distribute/
no
test_dist_fit_a_line.py
浏览文件 @
816e556b
文件已移动
python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py
→
python/paddle/v2/fluid/tests/book_distribute/
no
test_dist_label_semantic_roles.py
浏览文件 @
816e556b
文件已移动
python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py
→
python/paddle/v2/fluid/tests/book_distribute/
no
test_dist_word2vec.py
浏览文件 @
816e556b
文件已移动
python/paddle/v2/fluid/tests/book_distribute/test_understand_sentiment_conv_dist.py
→
python/paddle/v2/fluid/tests/book_distribute/
no
test_understand_sentiment_conv_dist.py
浏览文件 @
816e556b
文件已移动
python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
0 → 100644
浏览文件 @
816e556b
import
math
import
unittest
from
paddle.v2.fluid.distribute_transpiler
import
split_dense_variable
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.core
as
core
import
random
class
TestSplitVar
(
unittest
.
TestCase
):
def
test_check_output
(
self
):
# split below shapes to 10 servers
shapes
=
[[
3
,
5
],
[
1024
],
[
28
,
784
],
[
8
,
1020
],
[
800
,
10
]]
expected_sizes
=
[
[
15
],
[
1024
],
[
2352
,
2352
,
2352
,
2352
,
2352
,
2352
,
2352
,
2352
,
2352
,
784
],
[
2040
,
2040
,
2040
,
2040
],
[
1150
,
1150
,
1150
,
1150
,
1150
,
1150
,
1100
]
]
var_list
=
[]
program
=
fluid
.
Program
()
for
shape
in
shapes
:
var
=
program
.
global_block
().
create_var
(
name
=
str
(
random
.
randint
(
10000
,
99999
)),
persistable
=
True
,
# dtype=core.VarDesc.VarType.LOD_TENSOR,
shape
=
shape
)
var_list
.
append
(
var
)
blocks
=
split_dense_variable
(
var_list
,
10
)
all_sizes
=
[]
for
s
in
expected_sizes
:
for
s2
in
s
:
all_sizes
.
append
(
s2
)
for
i
,
block_str
in
enumerate
(
blocks
):
varname
,
block_id
,
size
=
block_str
.
split
(
":"
)
self
.
assertEqual
(
int
(
size
),
all_sizes
[
i
])
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/op_test.py
浏览文件 @
816e556b
...
@@ -31,7 +31,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
...
@@ -31,7 +31,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs
[
in_name
]
=
[]
kwargs
[
in_name
]
=
[]
if
in_dup
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
sub_in
=
inputs
[
in_name
]
for
sub_in_name
,
_
in
sub_in
:
for
item
in
sub_in
:
sub_in_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
in_name
,
sub_in_name
)
__create_var__
(
in_name
,
sub_in_name
)
else
:
else
:
__create_var__
(
in_name
,
in_name
)
__create_var__
(
in_name
,
in_name
)
...
@@ -41,7 +42,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
...
@@ -41,7 +42,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs
[
out_name
]
=
[]
kwargs
[
out_name
]
=
[]
if
out_dup
:
if
out_dup
:
sub_out
=
outputs
[
out_name
]
sub_out
=
outputs
[
out_name
]
for
sub_out_name
,
_
in
sub_out
:
for
item
in
sub_out
:
sub_out_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
out_name
,
sub_out_name
)
__create_var__
(
out_name
,
sub_out_name
)
else
:
else
:
__create_var__
(
out_name
,
out_name
)
__create_var__
(
out_name
,
out_name
)
...
@@ -71,13 +73,15 @@ def set_input(scope, op, inputs, place):
...
@@ -71,13 +73,15 @@ def set_input(scope, op, inputs, place):
if
in_name
in
inputs
:
if
in_name
in
inputs
:
if
in_dup
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
sub_in
=
inputs
[
in_name
]
for
sub_in_name
,
sub_in_val
in
sub_in
:
for
item
in
sub_in
:
sub_in_name
,
sub_in_val
=
item
[
0
],
item
[
1
]
__set_input__
(
sub_in_name
,
sub_in_val
)
__set_input__
(
sub_in_name
,
sub_in_val
)
else
:
else
:
__set_input__
(
in_name
,
inputs
[
in_name
])
__set_input__
(
in_name
,
inputs
[
in_name
])
def
get_numeric_gradient
(
scope
,
def
get_numeric_gradient
(
place
,
scope
,
op
,
op
,
inputs
,
inputs
,
input_to_check
,
input_to_check
,
...
@@ -85,7 +89,7 @@ def get_numeric_gradient(scope,
...
@@ -85,7 +89,7 @@ def get_numeric_gradient(scope,
delta
=
0.005
,
delta
=
0.005
,
in_place
=
False
):
in_place
=
False
):
# FIXME: change this method by compile time concepts
# FIXME: change this method by compile time concepts
set_input
(
scope
,
op
,
inputs
,
core
.
CPUPlace
()
)
set_input
(
scope
,
op
,
inputs
,
place
)
def
product
(
dim
):
def
product
(
dim
):
return
reduce
(
lambda
a
,
b
:
a
*
b
,
dim
,
1
)
return
reduce
(
lambda
a
,
b
:
a
*
b
,
dim
,
1
)
...
@@ -93,7 +97,7 @@ def get_numeric_gradient(scope,
...
@@ -93,7 +97,7 @@ def get_numeric_gradient(scope,
def
get_output
():
def
get_output
():
sum
=
[]
sum
=
[]
for
output_name
in
output_names
:
for
output_name
in
output_names
:
op
.
run
(
scope
,
core
.
CPUPlace
()
)
op
.
run
(
scope
,
place
)
sum
.
append
(
sum
.
append
(
np
.
array
(
scope
.
find_var
(
output_name
).
get_tensor
()).
mean
())
np
.
array
(
scope
.
find_var
(
output_name
).
get_tensor
()).
mean
())
return
np
.
array
(
sum
).
mean
()
return
np
.
array
(
sum
).
mean
()
...
@@ -127,7 +131,7 @@ def get_numeric_gradient(scope,
...
@@ -127,7 +131,7 @@ def get_numeric_gradient(scope,
# we use a for loop to compute the gradient of every element.
# we use a for loop to compute the gradient of every element.
for
i
in
xrange
(
tensor_size
):
for
i
in
xrange
(
tensor_size
):
if
in_place
:
if
in_place
:
set_input
(
scope
,
op
,
inputs
,
core
.
CPUPlace
()
)
set_input
(
scope
,
op
,
inputs
,
place
)
# get one input element throw it's index i.
# get one input element throw it's index i.
origin
=
__get_elem__
(
tensor_to_check
,
i
)
origin
=
__get_elem__
(
tensor_to_check
,
i
)
...
@@ -137,7 +141,7 @@ def get_numeric_gradient(scope,
...
@@ -137,7 +141,7 @@ def get_numeric_gradient(scope,
y_pos
=
get_output
()
y_pos
=
get_output
()
if
in_place
:
if
in_place
:
set_input
(
scope
,
op
,
inputs
,
core
.
CPUPlace
()
)
set_input
(
scope
,
op
,
inputs
,
place
)
x_neg
=
origin
-
delta
x_neg
=
origin
-
delta
__set_elem__
(
tensor_to_check
,
i
,
x_neg
)
__set_elem__
(
tensor_to_check
,
i
,
x_neg
)
...
@@ -283,7 +287,8 @@ class OpTest(unittest.TestCase):
...
@@ -283,7 +287,8 @@ class OpTest(unittest.TestCase):
if
not
isinstance
(
sub_out
,
list
):
if
not
isinstance
(
sub_out
,
list
):
raise
AssertionError
(
"sub_out type %s is not list"
,
raise
AssertionError
(
"sub_out type %s is not list"
,
type
(
sub_out
))
type
(
sub_out
))
for
sub_out_name
,
expect
in
sub_out
:
for
item
in
sub_out
:
sub_out_name
,
expect
=
item
[
0
],
item
[
1
]
idx
=
find_actual
(
sub_out_name
,
fetch_list
)
idx
=
find_actual
(
sub_out_name
,
fetch_list
)
actual
=
outs
[
idx
]
actual
=
outs
[
idx
]
actual_t
=
np
.
array
(
actual
)
actual_t
=
np
.
array
(
actual
)
...
@@ -347,6 +352,24 @@ class OpTest(unittest.TestCase):
...
@@ -347,6 +352,24 @@ class OpTest(unittest.TestCase):
in_place
=
False
,
in_place
=
False
,
max_relative_error
=
0.005
,
max_relative_error
=
0.005
,
user_defined_grads
=
None
):
user_defined_grads
=
None
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_grad_with_place
(
place
,
inputs_to_check
,
output_names
,
no_grad_set
,
numeric_grad_delta
,
in_place
,
max_relative_error
,
user_defined_grads
)
def
check_grad_with_place
(
self
,
place
,
inputs_to_check
,
output_names
,
no_grad_set
=
None
,
numeric_grad_delta
=
0.005
,
in_place
=
False
,
max_relative_error
=
0.005
,
user_defined_grads
=
None
):
self
.
scope
=
core
.
Scope
()
self
.
scope
=
core
.
Scope
()
op_inputs
=
self
.
inputs
if
hasattr
(
self
,
"inputs"
)
else
dict
()
op_inputs
=
self
.
inputs
if
hasattr
(
self
,
"inputs"
)
else
dict
()
op_outputs
=
self
.
outputs
if
hasattr
(
self
,
"outputs"
)
else
dict
()
op_outputs
=
self
.
outputs
if
hasattr
(
self
,
"outputs"
)
else
dict
()
...
@@ -362,6 +385,7 @@ class OpTest(unittest.TestCase):
...
@@ -362,6 +385,7 @@ class OpTest(unittest.TestCase):
numeric_grads
=
user_defined_grads
or
[
numeric_grads
=
user_defined_grads
or
[
get_numeric_gradient
(
get_numeric_gradient
(
place
,
self
.
scope
,
self
.
scope
,
self
.
op
,
self
.
op
,
self
.
inputs
,
self
.
inputs
,
...
@@ -370,22 +394,12 @@ class OpTest(unittest.TestCase):
...
@@ -370,22 +394,12 @@ class OpTest(unittest.TestCase):
delta
=
numeric_grad_delta
,
delta
=
numeric_grad_delta
,
in_place
=
in_place
)
for
input_to_check
in
inputs_to_check
in_place
=
in_place
)
for
input_to_check
in
inputs_to_check
]
]
cpu_place
=
core
.
CPUPlace
()
analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place
,
cpu_analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
cpu_place
,
output_names
,
no_grad_set
)
output_names
,
no_grad_set
)
self
.
__assert_is_close
(
numeric_grads
,
analytic_grads
,
inputs_to_check
,
self
.
__assert_is_close
(
numeric_grads
,
cpu_analytic_grads
,
max_relative_error
,
inputs_to_check
,
max_relative_error
,
"Gradient Check On %s"
%
str
(
place
))
"Gradient Check On %s"
%
str
(
cpu_place
))
if
core
.
is_compile_gpu
()
and
self
.
op
.
support_gpu
():
gpu_place
=
core
.
CUDAPlace
(
0
)
gpu_analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
gpu_place
,
output_names
,
no_grad_set
)
self
.
__assert_is_close
(
numeric_grads
,
gpu_analytic_grads
,
inputs_to_check
,
max_relative_error
,
"Gradient Check On %s"
%
str
(
gpu_place
))
@
staticmethod
@
staticmethod
def
_create_var_descs_
(
block
,
var_dict
):
def
_create_var_descs_
(
block
,
var_dict
):
...
...
python/paddle/v2/fluid/tests/test_clip.py
0 → 100644
浏览文件 @
816e556b
from
__future__
import
print_function
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
BATCH_SIZE
=
128
CLIP_MAX
=
2e-6
CLIP_MIN
=
-
1e-6
prog
=
fluid
.
framework
.
Program
()
with
fluid
.
program_guard
(
main_program
=
prog
):
image
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
dtype
=
'float32'
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
)
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
)
predict
=
fluid
.
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
)
label
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
prog_clip
=
prog
.
clone
()
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
set_error_clip
(
fluid
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
))
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost_clip
,
callback
=
fluid
.
clip
.
error_clip_callback
)
hidden1_grad
=
prog
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
hidden1_grad_clip
=
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
hidden2_grad
=
prog
.
block
(
0
).
var
(
hidden2
.
name
+
"@GRAD"
)
hidden2_grad_clip
=
prog_clip
.
block
(
0
).
var
(
hidden2
.
name
+
"@GRAD"
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
image
,
label
],
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
count
=
0
for
data
in
train_reader
():
count
+=
1
if
count
>
5
:
break
out1
,
out2
=
exe
.
run
(
prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
hidden1_grad
,
hidden2_grad
])
out1_clip
,
out2_clip
=
exe
.
run
(
prog_clip
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
hidden1_grad_clip
,
hidden2_grad_clip
])
if
not
((
out1
.
clip
(
min
=
CLIP_MIN
,
max
=
CLIP_MAX
)
==
out1_clip
).
all
()
and
(
out2
==
out2_clip
).
all
()):
exit
(
1
)
exit
(
0
)
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
816e556b
...
@@ -49,7 +49,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
...
@@ -49,7 +49,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
class
TestConv2dOp
(
OpTest
):
class
TestConv2dOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
core
.
use_cuda
()
self
.
use_cudnn
=
False
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_group
()
self
.
init_group
()
self
.
init_dilation
()
self
.
init_dilation
()
...
@@ -70,30 +70,59 @@ class TestConv2dOp(OpTest):
...
@@ -70,30 +70,59 @@ class TestConv2dOp(OpTest):
'strides'
:
self
.
stride
,
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'paddings'
:
self
.
pad
,
'groups'
:
self
.
groups
,
'groups'
:
self
.
groups
,
'dilations'
:
self
.
dilations
'dilations'
:
self
.
dilations
,
'use_cudnn'
:
self
.
use_cudnn
}
}
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
else
:
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Input'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Input'
],
no_grad_set
=
set
([
'Filter'
]))
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Filter'
]))
else
:
self
.
check_grad
(
[
'Input'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Filter'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Filter'
],
no_grad_set
=
set
([
'Input'
]))
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
else
:
self
.
check_grad
(
[
'Filter'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
pad
=
[
0
,
0
]
...
@@ -167,39 +196,39 @@ class TestWithDilation(TestConv2dOp):
...
@@ -167,39 +196,39 @@ class TestWithDilation(TestConv2dOp):
self
.
groups
=
3
self
.
groups
=
3
#----------------Conv2dC
udnn
----------------
#----------------Conv2dC
UDNN
----------------
class
TestC
udnn
(
TestConv2dOp
):
class
TestC
UDNN
(
TestConv2dOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
core
.
use_cudnn
()
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d
_cudnn
"
self
.
op_type
=
"conv2d"
class
TestC
udnn
WithPad
(
TestWithPad
):
class
TestC
UDNN
WithPad
(
TestWithPad
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
core
.
use_cudnn
()
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d
_cudnn
"
self
.
op_type
=
"conv2d"
class
TestC
udnn
WithStride
(
TestWithStride
):
class
TestC
UDNN
WithStride
(
TestWithStride
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
core
.
use_cudnn
()
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d
_cudnn
"
self
.
op_type
=
"conv2d"
class
TestC
udnn
WithGroup
(
TestWithGroup
):
class
TestC
UDNN
WithGroup
(
TestWithGroup
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
core
.
use_cudnn
()
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d
_cudnn
"
self
.
op_type
=
"conv2d"
class
TestC
udnn
With1x1
(
TestWith1x1
):
class
TestC
UDNN
With1x1
(
TestWith1x1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
core
.
use_cudnn
()
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d
_cudnn
"
self
.
op_type
=
"conv2d"
# cudnn v5 does not support dilation conv.
# cudnn v5 does not support dilation conv.
# class TestC
udnn
WithDilation(TestWithDilation):
# class TestC
UDNN
WithDilation(TestWithDilation):
# def init_op_type(self):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
# self.op_type = "conv_cudnn"
...
...
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -37,6 +39,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
...
@@ -37,6 +39,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
class
TestConv2dTransposeOp
(
OpTest
):
class
TestConv2dTransposeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
# init as conv transpose
# init as conv transpose
self
.
use_cudnn
=
False
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_test_case
()
self
.
init_test_case
()
...
@@ -47,7 +50,9 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -47,7 +50,9 @@ class TestConv2dTransposeOp(OpTest):
self
.
attrs
=
{
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'paddings'
:
self
.
pad
,
'dilations'
:
self
.
dilations
'dilations'
:
self
.
dilations
,
'use_cudnn'
:
self
.
use_cudnn
,
'data_format'
:
'AnyLayout'
# TODO(dzhwinter) : should be fix latter
}
}
output
=
conv2dtranspose_forward_naive
(
input_
,
filter_
,
output
=
conv2dtranspose_forward_naive
(
input_
,
filter_
,
...
@@ -56,25 +61,53 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -56,25 +61,53 @@ class TestConv2dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Filter'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Filter'
],
no_grad_set
=
set
([
'Input'
]))
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
else
:
self
.
check_grad
(
[
'Filter'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Input'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Input'
],
no_grad_set
=
set
([
'Filter'
]))
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Filter'
]))
else
:
self
.
check_grad
(
[
'Input'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
else
:
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
pad
=
[
0
,
0
]
...
@@ -119,12 +152,13 @@ class TestWithDilation(TestConv2dTransposeOp):
...
@@ -119,12 +152,13 @@ class TestWithDilation(TestConv2dTransposeOp):
# ------------ test_cudnn ------------
# ------------ test_cudnn ------------
class
TestC
udnn
(
TestConv2dTransposeOp
):
class
TestC
UDNN
(
TestConv2dTransposeOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d_transpose"
class
TestC
udnn
WithPad
(
TestWithPad
):
class
TestC
UDNN
WithPad
(
TestWithPad
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
...
@@ -134,10 +168,11 @@ class TestCudnnWithPad(TestWithPad):
...
@@ -134,10 +168,11 @@ class TestCudnnWithPad(TestWithPad):
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d_transpose"
class
TestC
udnn
WithStride
(
TestWithStride
):
class
TestC
UDNN
WithStride
(
TestWithStride
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
stride
=
[
2
,
2
]
...
@@ -147,11 +182,12 @@ class TestCudnnWithStride(TestWithStride):
...
@@ -147,11 +182,12 @@ class TestCudnnWithStride(TestWithStride):
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv2d_transpose"
# #cudnn v5 does not support dilation conv.
# #cudnn v5 does not support dilation conv.
# class TestC
udnn
WithDilation(TestWithDilation):
# class TestC
UDNN
WithDilation(TestWithDilation):
# def init_test_case(self):
# def init_test_case(self):
# self.pad = [1, 1]
# self.pad = [1, 1]
# self.stride = [2, 2]
# self.stride = [2, 2]
...
@@ -161,7 +197,7 @@ class TestCudnnWithStride(TestWithStride):
...
@@ -161,7 +197,7 @@ class TestCudnnWithStride(TestWithStride):
# self.filter_size = [f_c, 6, 3, 3]
# self.filter_size = [f_c, 6, 3, 3]
#
#
# def init_op_type(self):
# def init_op_type(self):
# self.op_type = "conv2d_transpose
_cudnn
"
# self.op_type = "conv2d_transpose"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_conv3d_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -54,6 +56,7 @@ def conv3d_forward_naive(input, filter, group, conv_param):
...
@@ -54,6 +56,7 @@ def conv3d_forward_naive(input, filter, group, conv_param):
class
TestConv3dOp
(
OpTest
):
class
TestConv3dOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_cudnn
=
False
self
.
init_group
()
self
.
init_group
()
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_dilation
()
self
.
init_dilation
()
...
@@ -62,7 +65,9 @@ class TestConv3dOp(OpTest):
...
@@ -62,7 +65,9 @@ class TestConv3dOp(OpTest):
conv3d_param
=
{
conv3d_param
=
{
'stride'
:
self
.
stride
,
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'pad'
:
self
.
pad
,
'dilations'
:
self
.
dilations
'dilations'
:
self
.
dilations
,
'use_cudnn'
:
self
.
use_cudnn
,
'data_format'
:
'AnyLayout'
# TODO(dzhwinter) : should be fix latter
}
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
...
@@ -79,25 +84,53 @@ class TestConv3dOp(OpTest):
...
@@ -79,25 +84,53 @@ class TestConv3dOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.03
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.03
)
else
:
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.03
)
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Input'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.03
,
place
,
[
'Input'
],
no_grad_set
=
set
([
'Filter'
]))
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Filter'
]))
else
:
self
.
check_grad
(
[
'Input'
],
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Filter'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.03
,
place
,
[
'Filter'
],
no_grad_set
=
set
([
'Input'
]))
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Input'
]))
else
:
self
.
check_grad
(
[
'Filter'
],
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Input'
]))
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
pad
=
[
0
,
0
,
0
]
...
@@ -169,31 +202,35 @@ class TestWithDilation(TestConv3dOp):
...
@@ -169,31 +202,35 @@ class TestWithDilation(TestConv3dOp):
self
.
groups
=
3
self
.
groups
=
3
class
TestC
udnn
(
TestConv3dOp
):
class
TestC
UDNN
(
TestConv3dOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d"
class
TestWithGroup1C
udnn
(
TestWithGroup1
):
class
TestWithGroup1C
UDNN
(
TestWithGroup1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d"
class
TestWithGroup2C
udnn
(
TestWithGroup2
):
class
TestWithGroup2C
UDNN
(
TestWithGroup2
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d"
class
TestWith1x1C
udnn
(
TestWith1x1
):
class
TestWith1x1C
UDNN
(
TestWith1x1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d"
# FIXME(typhoonzero): find a way to determine if
# FIXME(typhoonzero): find a way to determine if
# using cudnn > 6 in python
# using cudnn > 6 in python
# class TestWithDilationC
udnn
(TestWithDilation):
# class TestWithDilationC
UDNN
(TestWithDilation):
# def init_op_type(self):
# def init_op_type(self):
# self.op_type = "conv3d
_cudnn
"
# self.op_type = "conv3d"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -44,6 +46,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
...
@@ -44,6 +46,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
class
TestConv3dTransposeOp
(
OpTest
):
class
TestConv3dTransposeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
# init as conv transpose
# init as conv transpose
self
.
use_cudnn
=
False
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_test_case
()
self
.
init_test_case
()
...
@@ -54,7 +57,9 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -54,7 +57,9 @@ class TestConv3dTransposeOp(OpTest):
self
.
attrs
=
{
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'paddings'
:
self
.
pad
,
'dilations'
:
self
.
dilations
'dilations'
:
self
.
dilations
,
'use_cudnn'
:
self
.
use_cudnn
,
'data_format'
:
'AnyLayout'
# TODO(dzhwinter) : should be fix latter
}
}
output
=
conv3dtranspose_forward_naive
(
input_
,
filter_
,
output
=
conv3dtranspose_forward_naive
(
input_
,
filter_
,
...
@@ -63,25 +68,53 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -63,25 +68,53 @@ class TestConv3dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.03
)
else
:
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.03
)
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Input'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Input'
],
no_grad_set
=
set
([
'Filter'
]))
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Filter'
]))
else
:
self
.
check_grad
(
[
'Input'
],
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
self
.
check_grad
(
if
self
.
use_cudnn
:
[
'Filter'
],
place
=
core
.
CUDAPlace
(
0
)
'Output'
,
self
.
check_grad_with_place
(
max_relative_error
=
0.02
,
place
,
[
'Filter'
],
no_grad_set
=
set
([
'Input'
]))
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Input'
]))
else
:
self
.
check_grad
(
[
'Filter'
],
'Output'
,
max_relative_error
=
0.03
,
no_grad_set
=
set
([
'Input'
]))
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
,
0
]
self
.
pad
=
[
0
,
0
,
0
]
...
@@ -126,12 +159,13 @@ class TestWithDilation(TestConv3dTransposeOp):
...
@@ -126,12 +159,13 @@ class TestWithDilation(TestConv3dTransposeOp):
# ------------ test_cudnn ------------
# ------------ test_cudnn ------------
class
TestC
udnn
(
TestConv3dTransposeOp
):
class
TestC
UDNN
(
TestConv3dTransposeOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d_transpose"
class
TestC
udnn
WithPad
(
TestWithPad
):
class
TestC
UDNN
WithPad
(
TestWithPad
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
...
@@ -141,10 +175,11 @@ class TestCudnnWithPad(TestWithPad):
...
@@ -141,10 +175,11 @@ class TestCudnnWithPad(TestWithPad):
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d_transpose"
class
TestC
udnn
WithStride
(
TestWithStride
):
class
TestC
UDNN
WithStride
(
TestWithStride
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
2
,
2
,
2
]
self
.
stride
=
[
2
,
2
,
2
]
...
@@ -154,11 +189,12 @@ class TestCudnnWithStride(TestWithStride):
...
@@ -154,11 +189,12 @@ class TestCudnnWithStride(TestWithStride):
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_transpose_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"conv3d_transpose"
# #cudnn v5 does not support dilation conv.
# #cudnn v5 does not support dilation conv.
# class TestC
udnn
WithDilation(TestWithDilation):
# class TestC
UDNN
WithDilation(TestWithDilation):
# def init_test_case(self):
# def init_test_case(self):
# self.pad = [1, 1, 1]
# self.pad = [1, 1, 1]
# self.stride = [2, 2, 2]
# self.stride = [2, 2, 2]
...
@@ -168,7 +204,7 @@ class TestCudnnWithStride(TestWithStride):
...
@@ -168,7 +204,7 @@ class TestCudnnWithStride(TestWithStride):
# self.filter_size = [f_c, 6, 3, 3, 3]
# self.filter_size = [f_c, 6, 3, 3, 3]
#
#
# def init_op_type(self):
# def init_op_type(self):
# self.op_type = "conv3d_transpose
_cudnn
"
# self.op_type = "conv3d_transpose"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_parallel_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid
as
fluid
import
numpy
import
numpy
import
sys
# TODO(dzhwinter): get places op check need to be enhanced.
sys
.
exit
(
0
)
class
BaseParallelForTest
(
unittest
.
TestCase
):
class
BaseParallelForTest
(
unittest
.
TestCase
):
...
@@ -13,13 +17,13 @@ class BaseParallelForTest(unittest.TestCase):
...
@@ -13,13 +17,13 @@ class BaseParallelForTest(unittest.TestCase):
returns the data layers, and the second yield returns the loss.
returns the data layers, and the second yield returns the loss.
The modified data variables will be sent back during the first
The modified data variables will be sent back during the first
yield.
yield.
feed(dict): The executor feeding dictionary.
feed(dict): The executor feeding dictionary.
fetch(list|basestr): The fetch name lists.
fetch(list|basestr): The fetch name lists.
Returns:
Returns:
None
None
Raises:
Raises:
AssertionError when the computation of cpu, parallel.for in cpu,
AssertionError when the computation of cpu, parallel.for in cpu,
gpu, parallel.for in gpu are different.
gpu, parallel.for in gpu are different.
...
...
python/paddle/v2/fluid/tests/test_pool2d_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -44,6 +46,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
...
@@ -44,6 +46,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
class
TestPool2d_Op
(
OpTest
):
class
TestPool2d_Op
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_cudnn
=
False
self
.
init_test_case
()
self
.
init_test_case
()
self
.
init_global_pool
()
self
.
init_global_pool
()
self
.
init_op_type
()
self
.
init_op_type
()
...
@@ -62,15 +65,25 @@ class TestPool2d_Op(OpTest):
...
@@ -62,15 +65,25 @@ class TestPool2d_Op(OpTest):
'ksize'
:
self
.
ksize
,
'ksize'
:
self
.
ksize
,
'pooling_type'
:
self
.
pool_type
,
'pooling_type'
:
self
.
pool_type
,
'global_pooling'
:
self
.
global_pool
,
'global_pooling'
:
self
.
global_pool
,
'use_cudnn'
:
self
.
use_cudnn
,
'data_format'
:
'AnyLayout'
# TODO(dzhwinter) : should be fix latter
}
}
self
.
outputs
=
{
'Out'
:
output
.
astype
(
'float32'
)}
self
.
outputs
=
{
'Out'
:
output
.
astype
(
'float32'
)}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
pool_type
!=
"max"
:
if
self
.
use_cudnn
and
self
.
pool_type
!=
"max"
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
elif
self
.
pool_type
!=
"max"
:
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -153,35 +166,41 @@ class TestCase5(TestCase2):
...
@@ -153,35 +166,41 @@ class TestCase5(TestCase2):
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
#--------------------test pool2d
_cudnn
--------------------
#--------------------test pool2d--------------------
class
TestC
udnn
Case1
(
TestPool2d_Op
):
class
TestC
UDNN
Case1
(
TestPool2d_Op
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
class
TestC
udnn
Case2
(
TestCase1
):
class
TestC
UDNN
Case2
(
TestCase1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
class
TestC
udnn
Case3
(
TestCase2
):
class
TestC
UDNN
Case3
(
TestCase2
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
class
TestC
udnn
Case4
(
TestCase3
):
class
TestC
UDNN
Case4
(
TestCase3
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
class
TestC
udnn
Case5
(
TestCase4
):
class
TestC
UDNN
Case5
(
TestCase4
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
class
TestC
udnn
Case6
(
TestCase5
):
class
TestC
UDNN
Case6
(
TestCase5
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool2d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool2d"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_pool3d_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -52,6 +54,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
...
@@ -52,6 +54,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
class
TestPool3d_Op
(
OpTest
):
class
TestPool3d_Op
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_cudnn
=
False
self
.
init_test_case
()
self
.
init_test_case
()
self
.
init_global_pool
()
self
.
init_global_pool
()
self
.
init_op_type
()
self
.
init_op_type
()
...
@@ -71,15 +74,25 @@ class TestPool3d_Op(OpTest):
...
@@ -71,15 +74,25 @@ class TestPool3d_Op(OpTest):
'ksize'
:
self
.
ksize
,
'ksize'
:
self
.
ksize
,
'pooling_type'
:
self
.
pool_type
,
'pooling_type'
:
self
.
pool_type
,
'global_pooling'
:
self
.
global_pool
,
'global_pooling'
:
self
.
global_pool
,
'use_cudnn'
:
self
.
use_cudnn
,
'data_format'
:
'AnyLayout'
# TODO(dzhwinter) : should be fix latter
}
}
self
.
outputs
=
{
'Out'
:
output
.
astype
(
'float32'
)}
self
.
outputs
=
{
'Out'
:
output
.
astype
(
'float32'
)}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
if
self
.
use_cudnn
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
pool_type
!=
"max"
:
if
self
.
use_cudnn
and
self
.
pool_type
!=
"max"
:
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
elif
self
.
pool_type
!=
"max"
:
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
self
.
check_grad
(
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -163,35 +176,41 @@ class TestCase5(TestCase2):
...
@@ -163,35 +176,41 @@ class TestCase5(TestCase2):
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
self
.
pool3D_forward_naive
=
max_pool3D_forward_naive
#--------------------test pool3d
_cudnn
--------------------
#--------------------test pool3d--------------------
class
TestC
udnn
Case1
(
TestPool3d_Op
):
class
TestC
UDNN
Case1
(
TestPool3d_Op
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
class
TestC
udnn
Case2
(
TestCase1
):
class
TestC
UDNN
Case2
(
TestCase1
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
class
TestC
udnn
Case3
(
TestCase2
):
class
TestC
UDNN
Case3
(
TestCase2
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
class
TestC
udnn
Case4
(
TestCase3
):
class
TestC
UDNN
Case4
(
TestCase3
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
class
TestC
udnn
Case5
(
TestCase4
):
class
TestC
UDNN
Case5
(
TestCase4
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
class
TestC
udnn
Case6
(
TestCase5
):
class
TestC
UDNN
Case6
(
TestCase5
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"pool3d_cudnn"
self
.
use_cudnn
=
True
self
.
op_type
=
"pool3d"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_print_op.py
浏览文件 @
816e556b
import
unittest
import
unittest
import
numpy
as
np
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.layers
as
pd
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.backward
import
append_backward
from
paddle.v2.fluid.framework
import
switch_main_program
from
paddle.v2.fluid.framework
import
Program
import
numpy
as
np
class
TestPrintOpCPU
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
place
=
core
.
CPUPlace
()
self
.
x_tensor
=
core
.
LoDTensor
()
tensor_np
=
np
.
random
.
random
(
size
=
(
2
,
3
)).
astype
(
'float32'
)
self
.
x_tensor
.
set
(
tensor_np
,
self
.
place
)
self
.
x_tensor
.
set_lod
([[
0
,
1
,
1
]])
def
build_network
(
self
,
only_forward
,
**
kargs
):
x
=
layers
.
data
(
'x'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
1
)
x
.
stop_gradient
=
False
printed
=
layers
.
Print
(
input
=
x
,
**
kargs
)
if
only_forward
:
return
printed
loss
=
layers
.
mean
(
x
=
printed
)
append_backward
(
loss
=
loss
)
return
loss
class
TestSumOp
(
unittest
.
TestCase
):
def
test_forward
(
self
):
def
test_tensor
(
self
):
switch_main_program
(
Program
())
i
=
pd
.
zeros
(
shape
=
[
2
,
10
],
dtype
=
'float32'
)
printed
=
self
.
build_network
(
True
,
print_phase
=
'forward'
)
exe
=
Executor
(
self
.
place
)
outs
=
exe
.
run
(
feed
=
{
'x'
:
self
.
x_tensor
},
fetch_list
=
[
printed
],
return_numpy
=
False
)
pd
.
Print
(
i
,
message
=
"I am a message"
,
summarize
=
10
)
def
test_backward
(
self
):
switch_main_program
(
Program
())
loss
=
self
.
build_network
(
False
,
print_phase
=
'backward'
)
exe
=
Executor
(
self
.
place
)
outs
=
exe
.
run
(
feed
=
{
'x'
:
self
.
x_tensor
},
fetch_list
=
[
loss
],
return_numpy
=
False
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
exe
.
run
()
class
TestPrintOpGPU
(
TestPrintOpCPU
):
def
setUp
(
self
):
self
.
place
=
core
.
CUDAPlace
(
0
)
self
.
x_tensor
=
core
.
LoDTensor
()
tensor_np
=
np
.
random
.
random
(
size
=
(
2
,
3
)).
astype
(
'float32'
)
self
.
x_tensor
.
set
(
tensor_np
,
self
.
place
)
self
.
x_tensor
.
set_lod
([[
0
,
1
,
1
]])
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录