Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
e1a46bba
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e1a46bba
编写于
6月 25, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into some_small_fixes
上级
bc9d19c7
6d6996af
变更
47
显示空白变更内容
内联
并排
Showing
47 changed file
with
986 addition
and
277 deletion
+986
-277
Dockerfile
Dockerfile
+2
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+1
-1
doc/v2/faq/build_and_install/index_cn.rst
doc/v2/faq/build_and_install/index_cn.rst
+9
-0
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+6
-5
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+2
-3
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+1
-1
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+9
-4
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+2
-0
paddle/fluid/framework/lod_tensor.cc
paddle/fluid/framework/lod_tensor.cc
+7
-3
paddle/fluid/framework/lod_tensor_test.cc
paddle/fluid/framework/lod_tensor_test.cc
+15
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+13
-0
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+2
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+1
-1
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+2
-1
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+13
-2
paddle/fluid/operators/distributed/grpc_client.h
paddle/fluid/operators/distributed/grpc_client.h
+5
-1
paddle/fluid/operators/distributed/grpc_server.cc
paddle/fluid/operators/distributed/grpc_server.cc
+20
-8
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+4
-0
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+18
-15
paddle/fluid/operators/listen_and_serv_op.h
paddle/fluid/operators/listen_and_serv_op.h
+1
-1
paddle/fluid/operators/parallel_do_op.cc
paddle/fluid/operators/parallel_do_op.cc
+1
-1
paddle/fluid/operators/recurrent_op.cc
paddle/fluid/operators/recurrent_op.cc
+2
-1
paddle/fluid/operators/send_recv_op_test.cc
paddle/fluid/operators/send_recv_op_test.cc
+4
-1
paddle/fluid/operators/softmax_mkldnn_op.cc
paddle/fluid/operators/softmax_mkldnn_op.cc
+167
-50
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+18
-4
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+240
-0
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+26
-6
paddle/fluid/operators/while_op.cc
paddle/fluid/operators/while_op.cc
+2
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+7
-2
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+5
-2
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+138
-0
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+3
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+0
-5
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+10
-10
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+6
-5
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+18
-6
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+3
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+75
-72
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+17
-13
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-3
python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
...n/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
+26
-0
python/paddle/fluid/tests/unittests/test_sum_op.py
python/paddle/fluid/tests/unittests/test_sum_op.py
+6
-0
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+21
-13
python/paddle/reader/decorator.py
python/paddle/reader/decorator.py
+2
-2
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+18
-9
python/paddle/v2/dataset/flowers.py
python/paddle/v2/dataset/flowers.py
+34
-16
未找到文件。
Dockerfile
浏览文件 @
e1a46bba
...
@@ -76,7 +76,8 @@ RUN easy_install -U pip && \
...
@@ -76,7 +76,8 @@ RUN easy_install -U pip && \
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip
install
pre-commit
'ipython==5.3.0'
&&
\
RUN
pip
install
pre-commit
'ipython==5.3.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
opencv-python
#For docstring checker
#For docstring checker
RUN
pip
install
pylint pytest astroid isort
RUN
pip
install
pylint pytest astroid isort
...
...
cmake/external/mkldnn.cmake
浏览文件 @
e1a46bba
...
@@ -54,7 +54,7 @@ ExternalProject_Add(
...
@@ -54,7 +54,7 @@ ExternalProject_Add(
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
DEPENDS
${
MKLDNN_DEPENDS
}
DEPENDS
${
MKLDNN_DEPENDS
}
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_TAG
"
db3424ad44901513c03a1ea31ccaacdf633fbe9f
"
GIT_TAG
"
a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51
"
PREFIX
${
MKLDNN_SOURCES_DIR
}
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
...
...
doc/v2/faq/build_and_install/index_cn.rst
浏览文件 @
e1a46bba
...
@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装:
...
@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装:
保存并关闭文件。
保存并关闭文件。
这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。
这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。
10. 通过pip安装的PaddlePaddle在 :code:`import paddle.fluid` 报找不到 :code:`libmkldnn.so` 或 :code:`libmklml_intel.so`
------------------------------------------------------------------------------------------
出现这种问题的原因是在导入 :code:`paddle.fluid` 时需要加载 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`,
但是系统没有找到该文件。一般通过pip安装PaddlePaddle时会将 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`
拷贝到 :code:`/usr/local/lib` 路径下,所以解决办法是将该路径加到 :code:`LD_LIBRARY_PATH` 环境变量下,
即: :code:`export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH` 。
**注意**:如果是在虚拟环境中安装PaddlePaddle, :code:`libmkldnn.so` 和 :code:`libmklml_intel.so` 可能不在 :code:`/usr/local/lib` 路径下。
\ No newline at end of file
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
e1a46bba
...
@@ -73,6 +73,9 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -73,6 +73,9 @@ void BroadcastOpHandle::RunImpl() {
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
int
type
=
platform
::
ToNCCLDataType
(
in_tensor
.
type
());
size_t
numel
=
static_cast
<
size_t
>
(
in_tensor
.
numel
());
for
(
auto
out_var_handle
:
out_var_handles
)
{
for
(
auto
out_var_handle
:
out_var_handles
)
{
Variable
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx_
)
Variable
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx_
)
->
FindVar
(
out_var_handle
->
name_
);
->
FindVar
(
out_var_handle
->
name_
);
...
@@ -87,13 +90,11 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -87,13 +90,11 @@ void BroadcastOpHandle::RunImpl() {
send_recv_buffer
=
const_cast
<
void
*>
(
in_tensor
.
data
<
void
>
());
send_recv_buffer
=
const_cast
<
void
*>
(
in_tensor
.
data
<
void
>
());
out_handle
=
out_var_handle
;
out_handle
=
out_var_handle
;
}
else
{
}
else
{
send_recv_buffer
=
send_recv_buffer
=
VariableVisitor
::
GetMutableTensor
(
out_var
)
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
.
Resize
(
in_tensor
.
dims
())
out_var_handle
->
place_
);
.
mutable_data
(
out_var_handle
->
place_
);
}
}
int
type
=
platform
::
ToNCCLDataType
(
in_tensor
.
type
());
size_t
numel
=
static_cast
<
size_t
>
(
in_tensor
.
numel
());
broadcast_calls
.
emplace_back
(
broadcast_calls
.
emplace_back
(
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
e1a46bba
...
@@ -351,7 +351,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
...
@@ -351,7 +351,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
auto
&
prev_grad
=
vars
.
back
();
auto
&
prev_grad
=
vars
.
back
();
op_handle
->
AddInput
(
prev_grad
.
get
());
op_handle
->
AddInput
(
prev_grad
.
get
());
auto
var
=
new
VarHandle
(
vars
.
size
()
-
1
,
i
,
og
,
p
);
auto
var
=
new
VarHandle
(
vars
.
size
(),
i
,
og
,
p
);
vars
.
emplace_back
(
var
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
op_handle
->
AddOutput
(
var
);
}
}
...
@@ -447,8 +447,7 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result,
...
@@ -447,8 +447,7 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result,
op_handle
->
AddInput
(
prev_grad
.
get
());
op_handle
->
AddInput
(
prev_grad
.
get
());
}
}
auto
&
vars
=
result
->
vars_
[
dst_dev_id
][
og
];
auto
&
vars
=
result
->
vars_
[
dst_dev_id
][
og
];
auto
var
=
auto
var
=
new
VarHandle
(
vars
.
size
(),
dst_dev_id
,
og
,
places_
[
dst_dev_id
]);
new
VarHandle
(
vars
.
size
()
-
1
,
dst_dev_id
,
og
,
places_
[
dst_dev_id
]);
vars
.
emplace_back
(
var
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
op_handle
->
AddOutput
(
var
);
return
var
;
return
var
;
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
e1a46bba
...
@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
...
@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#endif
#endif
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
override
;
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
override
;
int
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
;
int
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
override
;
private:
private:
void
CreateOpHandleIOs
(
SSAGraph
*
result
,
const
OpDesc
&
op
,
void
CreateOpHandleIOs
(
SSAGraph
*
result
,
const
OpDesc
&
op
,
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
e1a46bba
...
@@ -11,8 +11,8 @@
...
@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include <map>
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -122,11 +122,16 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
...
@@ -122,11 +122,16 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
!
events_
.
empty
())
{
// Use event
if
(
!
events_
.
empty
())
{
// Use event
std
::
function
<
void
()
>
method
=
callback
;
std
::
function
<
void
()
>
method
=
callback
;
// NOTE(zcd): device context must be ordered here because RecordEvent
// will use a mutex to ensure the safe of multi-threads.
std
::
map
<
platform
::
DeviceContext
*
,
platform
::
Place
>
ordered_ctxes
;
for
(
auto
&
p
:
dev_ctxes_
)
{
for
(
auto
&
p
:
dev_ctxes_
)
{
ordered_ctxes
.
emplace
(
p
.
second
,
p
.
first
);
}
for
(
auto
&
p
:
ordered_ctxes
)
{
method
=
[
method
,
p
,
this
]()
{
method
=
[
method
,
p
,
this
]()
{
static_cast
<
platform
::
CUDADeviceContext
*>
(
p
.
second
)
->
RecordEvent
(
static_cast
<
platform
::
CUDADeviceContext
*>
(
p
.
first
)
->
RecordEvent
(
events_
.
at
(
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
),
events_
.
at
(
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
second
).
device
),
method
);
method
);
};
};
}
}
...
...
paddle/fluid/framework/framework.proto
浏览文件 @
e1a46bba
...
@@ -27,6 +27,7 @@ enum AttrType {
...
@@ -27,6 +27,7 @@ enum AttrType {
BOOLEANS
=
7
;
BOOLEANS
=
7
;
BLOCK
=
8
;
BLOCK
=
8
;
LONG
=
9
;
LONG
=
9
;
BLOCKS
=
10
;
}
}
// OpDesc describes an instance of a C++ framework::OperatorBase
// OpDesc describes an instance of a C++ framework::OperatorBase
...
@@ -46,6 +47,7 @@ message OpDesc {
...
@@ -46,6 +47,7 @@ message OpDesc {
repeated
bool
bools
=
11
;
repeated
bool
bools
=
11
;
optional
int32
block_idx
=
12
;
optional
int32
block_idx
=
12
;
optional
int64
l
=
13
;
optional
int64
l
=
13
;
repeated
int32
blocks_idx
=
14
;
};
};
message
Var
{
message
Var
{
...
...
paddle/fluid/framework/lod_tensor.cc
浏览文件 @
e1a46bba
...
@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
...
@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
}
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
LoDTensor
&
t
)
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
LoDTensor
&
t
)
{
PADDLE_ENFORCE
(
t
.
type
().
hash_code
()
==
typeid
(
float
).
hash_code
());
if
(
!
platform
::
is_cpu_place
(
t
.
place
()))
{
if
(
!
platform
::
is_cpu_place
(
t
.
place
()))
{
LoDTensor
tt
;
LoDTensor
tt
;
framework
::
TensorCopy
(
t
,
platform
::
CPUPlace
(),
&
tt
);
framework
::
TensorCopy
(
t
,
platform
::
CPUPlace
(),
&
tt
);
...
@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
...
@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// only print first ten elements
// only print first ten elements
int64_t
size
=
t
.
numel
()
<
10
?
t
.
numel
()
:
10
;
int64_t
size
=
t
.
numel
()
<
10
?
t
.
numel
()
:
10
;
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
t
.
type
().
hash_code
()
==
typeid
(
float
).
hash_code
())
{
os
<<
t
.
data
<
float
>
()[
i
]
<<
" "
;
os
<<
t
.
data
<
float
>
()[
i
]
<<
" "
;
}
else
if
(
t
.
type
().
hash_code
()
==
typeid
(
int64_t
).
hash_code
())
{
os
<<
t
.
data
<
int64_t
>
()[
i
]
<<
" "
;
}
else
{
PADDLE_THROW
(
"LoDTensor data type not in [float, int64_t]"
);
}
}
}
return
os
;
return
os
;
...
...
paddle/fluid/framework/lod_tensor_test.cc
浏览文件 @
e1a46bba
...
@@ -26,6 +26,20 @@
...
@@ -26,6 +26,20 @@
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
TEST
(
LoD
,
PrintLoDTensor
)
{
LoDTensor
tensor1
;
tensor1
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
tensor1
.
data
<
float
>
()[
0
]
=
0.2
;
tensor1
.
data
<
float
>
()[
1
]
=
0.5
;
LOG
(
INFO
)
<<
tensor1
;
LoDTensor
tensor2
;
tensor2
.
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
tensor2
.
data
<
int64_t
>
()[
0
]
=
1
;
tensor2
.
data
<
int64_t
>
()[
1
]
=
2
;
LOG
(
INFO
)
<<
tensor2
;
}
TEST
(
LoD
,
data
)
{
TEST
(
LoD
,
data
)
{
LoD
lod
{{
0
,
1
,
2
}};
LoD
lod
{{
0
,
1
,
2
}};
lod
.
push_back
({
0
,
2
,
4
,
5
});
lod
.
push_back
({
0
,
2
,
4
,
5
});
...
@@ -37,7 +51,7 @@ TEST(LoD, data) {
...
@@ -37,7 +51,7 @@ TEST(LoD, data) {
}
}
}
}
TEST
(
Lo
dExpand
,
test
)
{
TEST
(
Lo
D
,
ExpandLoD
)
{
LoD
lod
{{
0
,
2
}};
LoD
lod
{{
0
,
2
}};
LoDTensor
tensor
;
LoDTensor
tensor
;
tensor
.
set_lod
(
lod
);
tensor
.
set_lod
(
lod
);
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
e1a46bba
...
@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
...
@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
need_update_
=
true
;
need_update_
=
true
;
}
}
void
OpDesc
::
SetBlocksAttr
(
const
std
::
string
&
name
,
std
::
vector
<
BlockDesc
*>
blocks
)
{
this
->
attrs_
[
name
]
=
blocks
;
need_update_
=
true
;
}
void
OpDesc
::
SetAttrMap
(
void
OpDesc
::
SetAttrMap
(
const
std
::
unordered_map
<
std
::
string
,
Attribute
>
&
attr_map
)
{
const
std
::
unordered_map
<
std
::
string
,
Attribute
>
&
attr_map
)
{
attrs_
=
attr_map
;
attrs_
=
attr_map
;
...
@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
...
@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void
operator
()(
const
std
::
vector
<
bool
>
&
v
)
const
{
void
operator
()(
const
std
::
vector
<
bool
>
&
v
)
const
{
VectorToRepeated
(
v
,
attr_
->
mutable_bools
());
VectorToRepeated
(
v
,
attr_
->
mutable_bools
());
}
}
void
operator
()(
const
std
::
vector
<
BlockDesc
*>
&
v
)
const
{
std
::
vector
<
int
>
blocks_idx
;
for
(
auto
blk
:
v
)
{
blocks_idx
.
push_back
(
blk
->
ID
());
}
VectorToRepeated
(
blocks_idx
,
attr_
->
mutable_blocks_idx
());
}
void
operator
()(
BlockDesc
*
desc
)
const
{
attr_
->
set_block_idx
(
desc
->
ID
());
}
void
operator
()(
BlockDesc
*
desc
)
const
{
attr_
->
set_block_idx
(
desc
->
ID
());
}
void
operator
()(
int64_t
v
)
const
{
attr_
->
set_l
(
v
);
}
void
operator
()(
int64_t
v
)
const
{
attr_
->
set_l
(
v
);
}
void
operator
()(
boost
::
blank
)
const
{
PADDLE_THROW
(
"Unexpected branch"
);
}
void
operator
()(
boost
::
blank
)
const
{
PADDLE_THROW
(
"Unexpected branch"
);
}
...
...
paddle/fluid/framework/op_desc.h
浏览文件 @
e1a46bba
...
@@ -77,6 +77,8 @@ class OpDesc {
...
@@ -77,6 +77,8 @@ class OpDesc {
void
SetBlockAttr
(
const
std
::
string
&
name
,
BlockDesc
*
block
);
void
SetBlockAttr
(
const
std
::
string
&
name
,
BlockDesc
*
block
);
void
SetBlocksAttr
(
const
std
::
string
&
name
,
std
::
vector
<
BlockDesc
*>
blocks
);
Attribute
GetAttr
(
const
std
::
string
&
name
)
const
;
Attribute
GetAttr
(
const
std
::
string
&
name
)
const
;
Attribute
GetNullableAttr
(
const
std
::
string
&
name
)
const
;
Attribute
GetNullableAttr
(
const
std
::
string
&
name
)
const
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e1a46bba
...
@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor(
...
@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor(
#endif
#endif
}
}
builder_
=
std
::
move
(
builder_factory
.
Create
()
);
builder_
=
builder_factory
.
Create
(
);
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
exec_strategy
,
member_
->
local_scopes_
,
places
,
builder_
->
Build
(
main_program
)));
builder_
->
Build
(
main_program
)));
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
e1a46bba
...
@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
...
@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using
Attribute
=
using
Attribute
=
boost
::
variant
<
boost
::
blank
,
int
,
float
,
std
::
string
,
std
::
vector
<
int
>
,
boost
::
variant
<
boost
::
blank
,
int
,
float
,
std
::
string
,
std
::
vector
<
int
>
,
std
::
vector
<
float
>
,
std
::
vector
<
std
::
string
>
,
bool
,
std
::
vector
<
float
>
,
std
::
vector
<
std
::
string
>
,
bool
,
std
::
vector
<
bool
>
,
BlockDesc
*
,
int64_t
>
;
std
::
vector
<
bool
>
,
BlockDesc
*
,
int64_t
,
std
::
vector
<
BlockDesc
*>>
;
using
AttributeMap
=
std
::
unordered_map
<
std
::
string
,
Attribute
>
;
using
AttributeMap
=
std
::
unordered_map
<
std
::
string
,
Attribute
>
;
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
e1a46bba
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <limits>
#include <limits>
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep,
...
@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep,
var_h
.
scope
=
p_scope
;
var_h
.
scope
=
p_scope
;
var_h
.
name
=
var_name_val
;
var_h
.
name
=
var_name_val
;
var_h
.
ctx
=
p_ctx
;
var_h
.
ctx
=
p_ctx
;
var_h
.
method
=
"Send"
;
VLOG
(
3
)
<<
var_h
.
String
()
<<
" begin"
;
// stub context
// stub context
SendProcessor
*
s
=
new
SendProcessor
(
ch
);
SendProcessor
*
s
=
new
SendProcessor
(
ch
);
...
@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep,
...
@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep,
var_h
.
scope
=
p_scope
;
var_h
.
scope
=
p_scope
;
var_h
.
name
=
var_name_val
;
var_h
.
name
=
var_name_val
;
var_h
.
ctx
=
p_ctx
;
var_h
.
ctx
=
p_ctx
;
var_h
.
method
=
"Get"
;
VLOG
(
3
)
<<
var_h
.
String
()
<<
" begin"
;
// stub context
// stub context
GetProcessor
*
s
=
new
GetProcessor
(
ch
);
GetProcessor
*
s
=
new
GetProcessor
(
ch
);
...
@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep,
...
@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep,
var_h
.
scope
=
p_scope
;
var_h
.
scope
=
p_scope
;
var_h
.
name
=
out_var_name_val
;
var_h
.
name
=
out_var_name_val
;
var_h
.
ctx
=
p_ctx
;
var_h
.
ctx
=
p_ctx
;
var_h
.
method
=
"Prefetch"
;
VLOG
(
3
)
<<
var_h
.
String
()
<<
" begin"
;
// stub context
// stub context
GetProcessor
*
s
=
new
GetProcessor
(
ch
);
GetProcessor
*
s
=
new
GetProcessor
(
ch
);
...
@@ -243,10 +253,11 @@ void GRPCClient::Proceed() {
...
@@ -243,10 +253,11 @@ void GRPCClient::Proceed() {
GPR_ASSERT
(
ok
);
GPR_ASSERT
(
ok
);
PADDLE_ENFORCE
(
c
);
PADDLE_ENFORCE
(
c
);
if
(
c
->
status_
.
ok
())
{
if
(
c
->
status_
.
ok
())
{
VLOG
(
3
)
<<
c
->
var_h_
.
String
()
<<
" process"
;
c
->
Process
();
c
->
Process
();
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"var: "
<<
c
->
var_h_
.
String
()
LOG
(
FATAL
)
<<
c
->
var_h_
.
String
()
<<
" grpc error:"
<<
c
->
status_
.
error_message
();
<<
"
meets
grpc error:"
<<
c
->
status_
.
error_message
();
}
}
delete
c
;
delete
c
;
{
{
...
...
paddle/fluid/operators/distributed/grpc_client.h
浏览文件 @
e1a46bba
...
@@ -47,14 +47,18 @@ namespace operators {
...
@@ -47,14 +47,18 @@ namespace operators {
namespace
distributed
{
namespace
distributed
{
struct
VarHandle
{
struct
VarHandle
{
// RPC endpoint.
std
::
string
ep
;
std
::
string
ep
;
const
platform
::
DeviceContext
*
ctx
;
const
platform
::
DeviceContext
*
ctx
;
const
framework
::
Scope
*
scope
;
const
framework
::
Scope
*
scope
;
// Variable name.
std
::
string
name
;
std
::
string
name
;
// RPC method name.
std
::
string
method
;
std
::
string
String
()
const
{
std
::
string
String
()
const
{
std
::
ostringstream
s
;
std
::
ostringstream
s
;
s
<<
"name:["
<<
name
<<
"]
ep:["
<<
ep
<<
"]"
;
s
<<
method
<<
" name:["
<<
name
<<
"],
ep:["
<<
ep
<<
"]"
;
return
s
.
str
();
return
s
.
str
();
}
}
};
};
...
...
paddle/fluid/operators/distributed/grpc_server.cc
浏览文件 @
e1a46bba
...
@@ -41,6 +41,19 @@ class RequestBase {
...
@@ -41,6 +41,19 @@ class RequestBase {
virtual
~
RequestBase
()
{}
virtual
~
RequestBase
()
{}
virtual
void
Process
()
=
0
;
virtual
void
Process
()
=
0
;
std
::
string
Status2String
(
const
std
::
string
&
method
)
{
std
::
string
status
=
"Process"
;
if
(
status_
==
FINISH
)
{
status
=
"Finish"
;
}
std
::
ostringstream
s
;
s
<<
method
<<
" name:["
<<
GetReqName
()
<<
"]"
<<
", ep:["
<<
ctx_
.
peer
()
<<
"]"
<<
" "
<<
status
<<
" using req_id:"
<<
req_id_
;
return
s
.
str
();
}
CallStatus
Status
()
const
{
CallStatus
Status
()
const
{
std
::
lock_guard
<
std
::
mutex
>
l
(
status_mu_
);
std
::
lock_guard
<
std
::
mutex
>
l
(
status_mu_
);
return
status_
;
return
status_
;
...
@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
...
@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int
req_id
)
{
int
req_id
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
if
(
is_shut_down_
)
{
if
(
is_shut_down_
)
{
VLOG
(
3
)
<<
"shutdown, do not TryToRegisterNewSendOne"
;
LOG
(
WARNING
)
<<
"shutdown, do not TryToRegisterNewSendOne"
;
return
;
return
;
}
}
...
@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest(
...
@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest(
bool
ok
=
false
;
bool
ok
=
false
;
while
(
true
)
{
while
(
true
)
{
VLOG
(
3
)
<<
"HandleRequest "
<<
rpc_name
<<
" wait next"
;
VLOG
(
4
)
<<
"HandleRequest "
<<
rpc_name
<<
" wait next"
;
if
(
!
cq
->
Next
(
&
tag
,
&
ok
))
{
if
(
!
cq
->
Next
(
&
tag
,
&
ok
))
{
LOG
(
INFO
)
<<
"CompletionQueue "
<<
rpc_name
<<
" shutdown!"
;
LOG
(
INFO
)
<<
"CompletionQueue "
<<
rpc_name
<<
" shutdown!"
;
break
;
break
;
}
}
int
req_id
=
static_cast
<
int
>
(
reinterpret_cast
<
intptr_t
>
(
tag
));
int
req_id
=
static_cast
<
int
>
(
reinterpret_cast
<
intptr_t
>
(
tag
));
VLOG
(
3
)
<<
"HandleRequest "
<<
rpc_name
<<
", req_id:"
<<
req_id
VLOG
(
4
)
<<
"HandleRequest "
<<
rpc_name
<<
", req_id:"
<<
req_id
<<
" get next"
;
<<
" get next"
;
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
auto
&
reqs
=
rpc_reqs_
[
rpc_name
];
...
@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest(
...
@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest(
base
=
reqs
[
req_id
];
base
=
reqs
[
req_id
];
}
}
VLOG
(
3
)
<<
base
->
Status2String
(
rpc_name
);
// reference:
// reference:
// https://github.com/tensorflow/tensorflow/issues/5596
// https://github.com/tensorflow/tensorflow/issues/5596
// https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
// https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
// https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
// https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
if
(
!
ok
)
{
if
(
!
ok
)
{
LOG
(
WARNING
)
<<
"completion queue:"
<<
rpc_name
LOG
(
WARNING
)
<<
"completion queue:"
<<
rpc_name
<<
" recv no regular event
:argument name[
"
<<
" recv no regular event"
<<
base
->
GetReqName
()
<<
"]"
;
<<
" context:"
<<
base
->
Status2String
(
rpc_name
)
;
TryToRegisterNewOne
(
rpc_name
,
req_id
);
TryToRegisterNewOne
(
rpc_name
,
req_id
);
delete
base
;
delete
base
;
continue
;
continue
;
}
}
VLOG
(
3
)
<<
"queue id:"
<<
rpc_name
<<
", req_id:"
<<
req_id
<<
", status:"
<<
base
->
Status
();
switch
(
base
->
Status
())
{
switch
(
base
->
Status
())
{
case
PROCESS
:
{
case
PROCESS
:
{
base
->
Process
();
base
->
Process
();
...
...
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
e1a46bba
...
@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
...
@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
if
(
total_written
+
size_to_write
>
length
)
{
if
(
total_written
+
size_to_write
>
length
)
{
size_to_write
=
length
-
total_written
;
size_to_write
=
length
-
total_written
;
}
}
// This log is useful to see how long a internal block size is of rpc.
VLOG
(
7
)
<<
"copy "
<<
size_to_write
<<
" data to CUDAPlace"
;
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
stream
());
...
@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
...
@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
}
}
// TODO(gongwb): can we avoid copy?
// TODO(gongwb): can we avoid copy?
platform
::
CPUPlace
cpu
;
platform
::
CPUPlace
cpu
;
// This log is useful to see how long a internal block size is of rpc.
VLOG
(
7
)
<<
"copy "
<<
size_to_write
<<
" data to CPUPlace"
;
memory
::
Copy
(
cpu
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
);
memory
::
Copy
(
cpu
,
reinterpret_cast
<
void
*>
(
p
),
cpu
,
data
,
size_to_write
);
p
+=
size_to_write
;
p
+=
size_to_write
;
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
e1a46bba
...
@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop(
...
@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop(
framework
::
Scope
*
recv_scope
,
framework
::
Scope
*
recv_scope
,
const
std
::
vector
<
int
>
&
prefetch_block_id_list
)
const
{
const
std
::
vector
<
int
>
&
prefetch_block_id_list
)
const
{
size_t
num_blocks
=
program
->
Size
();
size_t
num_blocks
=
program
->
Size
();
auto
optimize_blocks
=
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
"server program should have at least 2 blocks"
);
"server program should have at least 2 blocks"
);
std
::
vector
<
int
>
optimize_block_id_list
;
std
::
vector
<
int
>
optimize_blocks_idx
;
for
(
int
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
{
for
(
auto
blk
:
optimize_blocks
)
{
if
(
std
::
find
(
prefetch_block_id_list
.
begin
(),
prefetch_block_id_list
.
end
(),
optimize_blocks_idx
.
push_back
(
blk
->
ID
());
blkid
)
==
prefetch_block_id_list
.
end
())
{
optimize_block_id_list
.
push_back
(
blkid
);
}
}
}
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
optimize_blocks_idx
);
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
optimize_block_id_list
);
// Insert placeholder for block0 which holds current op itself.
// Insert placeholder for block0 which holds current op itself.
optimize_prepared
.
insert
(
optimize_prepared
.
insert
(
optimize_prepared
.
begin
(),
optimize_prepared
.
begin
(),
...
@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop(
...
@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop(
// and this will still work.
// and this will still work.
// The optimize blocks which have the same parent ID would run parallel
// The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future
// TODO(Yancey1989): need to use ParallelExecutor for future
int32_t
last_parent_blkid
=
program
->
Block
(
1
).
Parent
();
int32_t
last_parent_blkid
=
optimize_blocks
[
0
]
->
Parent
();
std
::
vector
<
size_t
>
parallel_blkids
;
std
::
vector
<
size_t
>
parallel_blkids
;
parallel_blkids
.
push_back
(
1
);
parallel_blkids
.
push_back
(
optimize_blocks
[
0
]
->
ID
()
);
double
ts
=
GetTimestamp
();
double
ts
=
GetTimestamp
();
for
(
size_t
i
=
1
;
i
<
optimize_block
_id_list
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
optimize_block
s
.
size
();
++
i
)
{
// skip the first optimize block because it is already in the
// skip the first optimize block because it is already in the
// parallel_blkids.
// parallel_blkids.
int
blkid
=
optimize_block
_id_list
[
i
]
;
int
blkid
=
optimize_block
s
[
i
]
->
ID
()
;
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
program
,
recv_scope
);
...
@@ -261,8 +260,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
...
@@ -261,8 +260,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
request_prefetch_handler_
.
get
());
request_prefetch_handler_
.
get
());
auto
*
optimize_block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
auto
optimize_blocks
=
auto
*
program
=
optimize_block
->
Program
();
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
PADDLE_ENFORCE
(
optimize_blocks
.
size
()
>=
1
,
"optimize blocks should be 1 at least on the pserver side."
);
auto
*
program
=
optimize_blocks
[
0
]
->
Program
();
framework
::
Executor
executor
(
dev_place
);
framework
::
Executor
executor
(
dev_place
);
// prepare for prefetch
// prepare for prefetch
...
@@ -339,8 +341,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -339,8 +341,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
"a map from grad name to it's optimize block id"
)
"a map from grad name to it's optimize block id"
)
.
SetDefault
({});
.
SetDefault
({});
AddAttr
<
bool
>
(
"sync_mode"
,
"if works at sync_mode or not"
).
SetDefault
(
true
);
AddAttr
<
bool
>
(
"sync_mode"
,
"if works at sync_mode or not"
).
SetDefault
(
true
);
AddAttr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
,
AddAttr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
"BlockID to run on server side."
);
kOptimizeBlocks
,
"Optimize blocks to run on server side."
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kPrefetchVarNameToBlockId
,
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kPrefetchVarNameToBlockId
,
"prefetch blocks to run on server side."
)
"prefetch blocks to run on server side."
)
.
SetDefault
({});
.
SetDefault
({});
...
...
paddle/fluid/operators/listen_and_serv_op.h
浏览文件 @
e1a46bba
...
@@ -30,7 +30,7 @@ limitations under the License. */
...
@@ -30,7 +30,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
constexpr
char
kOptimizeBlock
[]
=
"OptimizeBlock
"
;
constexpr
char
kOptimizeBlock
s
[]
=
"optimize_blocks
"
;
constexpr
char
kPrefetchVarNameToBlockId
[]
=
"prefetch_var_name_to_block_id"
;
constexpr
char
kPrefetchVarNameToBlockId
[]
=
"prefetch_var_name_to_block_id"
;
void
RunServer
(
std
::
shared_ptr
<
distributed
::
RPCServer
>
service
);
void
RunServer
(
std
::
shared_ptr
<
distributed
::
RPCServer
>
service
);
...
...
paddle/fluid/operators/parallel_do_op.cc
浏览文件 @
e1a46bba
...
@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
...
@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
framework
::
AttributeMap
{});
framework
::
AttributeMap
{
{
"use_mkldnn"
,
{
false
}}
});
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
...
...
paddle/fluid/operators/recurrent_op.cc
浏览文件 @
e1a46bba
...
@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{});
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
sum_op
->
Run
(
cur_scope
,
place
);
sum_op
->
Run
(
cur_scope
,
place
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
...
...
paddle/fluid/operators/send_recv_op_test.cc
浏览文件 @
e1a46bba
...
@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
...
@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
// sub program run in listen_and_serv_op, for simple test we use sum
// sub program run in listen_and_serv_op, for simple test we use sum
f
::
ProgramDesc
program
;
f
::
ProgramDesc
program
;
const
auto
&
root_block
=
program
.
Block
(
0
);
const
auto
&
root_block
=
program
.
Block
(
0
);
std
::
vector
<
framework
::
BlockDesc
*>
optimize_blocks
;
auto
*
optimize_block
=
program
.
AppendBlock
(
root_block
);
auto
*
optimize_block
=
program
.
AppendBlock
(
root_block
);
optimize_blocks
.
push_back
(
optimize_block
);
auto
*
prefetch_block
=
program
.
AppendBlock
(
root_block
);
auto
*
prefetch_block
=
program
.
AppendBlock
(
root_block
);
// X for server side tensors, RX for received tensors, must be of same shape.
// X for server side tensors, RX for received tensors, must be of same shape.
AddOp
(
"sum"
,
{{
"X"
,
{
"x0"
,
"x1"
}}},
{{
"Out"
,
{
"Out"
}}},
{},
optimize_block
,
AddOp
(
"sum"
,
{{
"X"
,
{
"x0"
,
"x1"
}}},
{{
"Out"
,
{
"Out"
}}},
{},
optimize_block
,
...
@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
...
@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs
.
insert
({
"Fanin"
,
1
});
attrs
.
insert
({
"Fanin"
,
1
});
attrs
.
insert
({
"ParamList"
,
std
::
vector
<
std
::
string
>
({
"Out"
})});
attrs
.
insert
({
"ParamList"
,
std
::
vector
<
std
::
string
>
({
"Out"
})});
attrs
.
insert
({
"GradList"
,
std
::
vector
<
std
::
string
>
({
"x1"
})});
attrs
.
insert
({
"GradList"
,
std
::
vector
<
std
::
string
>
({
"x1"
})});
attrs
.
insert
({
"
OptimizeBlock"
,
optimize_block
});
attrs
.
insert
({
"
optimize_blocks"
,
optimize_blocks
});
attrs
.
insert
({
"PrefetchBlock"
,
prefetch_block
});
attrs
.
insert
({
"PrefetchBlock"
,
prefetch_block
});
attrs
.
insert
({
"grad_to_block_id"
,
std
::
vector
<
std
::
string
>
({
""
})});
attrs
.
insert
({
"grad_to_block_id"
,
std
::
vector
<
std
::
string
>
({
""
})});
attrs
.
insert
({
"sync_mode"
,
true
});
attrs
.
insert
({
"sync_mode"
,
true
});
...
...
paddle/fluid/operators/softmax_mkldnn_op.cc
浏览文件 @
e1a46bba
...
@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc;
...
@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc;
using
mkldnn
::
memory
;
// Note: paddle has also "memory" namespace
using
mkldnn
::
memory
;
// Note: paddle has also "memory" namespace
using
mkldnn
::
primitive
;
using
mkldnn
::
primitive
;
using
mkldnn
::
softmax_forward
;
using
mkldnn
::
softmax_forward
;
using
mkldnn
::
softmax_backward
;
using
mkldnn
::
prop_kind
;
using
mkldnn
::
prop_kind
;
using
mkldnn
::
stream
;
using
mkldnn
::
stream
;
using
platform
::
to_void_cast
;
class
SoftmaxMKLDNNHandler
:
public
platform
::
MKLDNNHandler
{
public:
SoftmaxMKLDNNHandler
(
std
::
shared_ptr
<
mkldnn
::
softmax_forward
::
primitive_desc
>
softmax_pd
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
softmax_pd_
(
softmax_pd
)
{}
SoftmaxMKLDNNHandler
(
std
::
shared_ptr
<
mkldnn
::
softmax_forward
::
primitive_desc
>
softmax_pd
,
std
::
shared_ptr
<
mkldnn
::
softmax_backward
::
primitive_desc
>
softmax_bwd_pd
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
softmax_pd_
(
softmax_pd
),
softmax_bwd_pd_
(
softmax_bwd_pd
)
{
// If we are in Grad operatgor then update a key with BWD suffix to
// distinguish from FWD memory primitives
key_
+=
"-BWD"
;
}
std
::
shared_ptr
<
mkldnn
::
softmax_forward
>
AcquireSoftmax
(
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
)
{
/*Generate key*/
auto
prim_key
=
key_
+
"@softmax_p"
;
auto
softmax_p
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_forward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
softmax_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find softmax primitive in device context"
);
if
(
softmax_p
==
nullptr
)
{
softmax_p
=
std
::
make_shared
<
mkldnn
::
softmax_forward
>
(
*
(
softmax_pd_
.
get
()),
*
(
static_cast
<
mkldnn
::
memory
*>
(
src_memory_p
.
get
())),
*
(
static_cast
<
mkldnn
::
memory
*>
(
dst_memory_p
.
get
())));
dev_ctx_
.
SetBlob
(
prim_key
,
softmax_p
);
}
else
{
is_reusing_
=
true
;
}
return
softmax_p
;
}
std
::
shared_ptr
<
mkldnn
::
softmax_backward
>
AcquireSoftmaxBackward
(
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
diff_dst_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
diff_src_memory_p
)
{
auto
prim_key
=
key_
+
"@softmax_bwd_p"
;
auto
softmax_bwd_p
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_backward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
softmax_bwd_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find softmax backward primitive in device context"
);
if
(
softmax_bwd_p
==
nullptr
)
{
softmax_bwd_p
=
std
::
make_shared
<
mkldnn
::
softmax_backward
>
(
*
softmax_bwd_pd_
,
*
(
dst_memory_p
.
get
()),
*
(
diff_dst_memory_p
.
get
()),
*
(
diff_src_memory_p
.
get
()));
dev_ctx_
.
SetBlob
(
prim_key
,
softmax_bwd_p
);
}
else
{
is_reusing_
=
true
;
}
return
softmax_bwd_p
;
}
private:
std
::
shared_ptr
<
mkldnn
::
softmax_forward
::
primitive_desc
>
softmax_pd_
;
std
::
shared_ptr
<
mkldnn
::
softmax_backward
::
primitive_desc
>
softmax_bwd_pd_
;
};
template
<
typename
T
>
template
<
typename
T
>
class
SoftmaxMKLDNNKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
SoftmaxMKLDNNKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
...
@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
...
@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
// Same memory descriptor to be used for input and output
// Same memory descriptor to be used for input and output
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
// Generate keys for storing/retriving primitives for this operator
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Each MKLDNN operator may have diffrent hashing function
const
std
::
string
key
=
auto
gethash
=
[](
memory
::
dims
&
operand_dims
)
{
platform
::
MKLDNNHandler
::
GetHash
(
softmax_tz
,
ctx
.
op
().
Output
(
"Out"
));
return
std
::
string
(
std
::
to_string
(
operand_dims
[
0
])
+
"-"
+
const
std
::
string
key_softmax_pd
=
key
+
"@softmax_pd"
;
std
::
to_string
(
operand_dims
[
1
]));
};
const
std
::
string
key
=
gethash
(
softmax_tz
);
const
std
::
string
key_softmax_p
=
key
+
"@softmax_p"
;
const
std
::
string
key_softmax_src_mem_p
=
key
+
"@softmax_src_mem_p"
;
const
std
::
string
key_softmax_dst_mem_p
=
key
+
"@softmax_dst_mem_p"
;
std
::
shared_ptr
<
void
>
softmax_p
=
dev_ctx
.
GetBlob
(
key_softmax_p
);
if
(
softmax_p
==
nullptr
)
{
// Currently only NC data format is supported
// Currently only NC data format is supported
auto
softmax_md
=
auto
softmax_md
=
MKLDNNMemDesc
(
MKLDNNMemDesc
({
softmax_tz
},
memory
::
f32
,
memory
::
format
::
nc
);
{
softmax_tz
},
platform
::
MKLDNNGetDataType
<
T
>
()
,
memory
::
format
::
nc
);
// Normalization is made after innermost dimension eg. C out of NC
// Normalization is made after innermost dimension eg. C out of NC
auto
softmax_desc
=
softmax_forward
::
desc
(
prop_kind
::
forward_scoring
,
auto
softmax_desc
=
softmax_forward
::
desc
(
prop_kind
::
forward_scoring
,
softmax_md
,
1
/*dim: C*/
);
softmax_md
,
1
/*dim: C*/
);
// create memory primitives
auto
softmax_pd
=
std
::
make_shared
<
mkldnn
::
softmax_forward
::
primitive_desc
>
(
auto
softmax_src_memory_p
=
std
::
make_shared
<
memory
>
(
softmax_desc
,
mkldnn_engine
);
memory
::
primitive_desc
{
softmax_md
,
mkldnn_engine
},
dev_ctx
.
SetBlob
(
key_softmax_pd
,
softmax_pd
);
static_cast
<
void
*>
(
const_cast
<
T
*>
(
input_data
)));
dev_ctx
.
SetBlob
(
key_softmax_src_mem_p
,
softmax_src_memory_p
);
SoftmaxMKLDNNHandler
handler
(
softmax_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
softmax_dst_memory_p
=
std
::
make_shared
<
memory
>
(
auto
softmax_src_memory_p
=
memory
::
primitive_desc
{
softmax_md
,
mkldnn_engine
},
handler
.
AcquireSrcMemory
(
softmax_md
,
to_void_cast
<
T
>
(
input_data
));
static_cast
<
void
*>
(
output_data
));
auto
softmax_dst_memory_p
=
dev_ctx
.
SetBlob
(
key_softmax_dst_mem_p
,
softmax_dst_memory_p
);
handler
.
AcquireDstMemory
(
softmax_md
,
to_void_cast
<
T
>
(
output_data
));
auto
softmax_p
=
auto
softmax_forward_pd
=
handler
.
AcquireSoftmax
(
softmax_dst_memory_p
,
softmax_src_memory_p
);
std
::
make_shared
<
softmax_forward
::
primitive_desc
>
(
softmax_desc
,
mkldnn_engine
);
softmax_p
=
std
::
make_shared
<
softmax_forward
>
(
*
(
softmax_forward_pd
.
get
()),
*
(
static_cast
<
memory
*>
(
softmax_src_memory_p
.
get
())),
*
(
static_cast
<
memory
*>
(
softmax_dst_memory_p
.
get
())));
dev_ctx
.
SetBlob
(
key_softmax_p
,
softmax_p
);
}
else
{
// Primitives already exist
auto
src_memory_p
=
std
::
static_pointer_cast
<
memory
>
(
dev_ctx
.
GetBlob
(
key_softmax_src_mem_p
));
PADDLE_ENFORCE
(
src_memory_p
!=
nullptr
,
"Fail to find softmax src mem_p in device context"
);
auto
dst_memory_p
=
std
::
static_pointer_cast
<
memory
>
(
dev_ctx
.
GetBlob
(
key_softmax_dst_mem_p
));
PADDLE_ENFORCE
(
dst_memory_p
!=
nullptr
,
"Fail to find softmax dst mem_p in device context"
);
src_memory_p
->
set_data_handle
(
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
input_data
)));
dst_memory_p
->
set_data_handle
(
output_data
);
}
std
::
vector
<
primitive
>
pipeline
{
std
::
vector
<
primitive
>
pipeline
{
*
(
static_cast
<
softmax_forward
::
primitive
*>
(
softmax_p
.
get
()))};
*
(
static_cast
<
softmax_forward
::
primitive
*>
(
softmax_p
.
get
()))};
...
@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
...
@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
}
}
};
};
template
<
typename
T
>
class
SoftmaxMKLDNNGradKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
output
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
const
T
*
dst_data
=
output
->
data
<
T
>
();
auto
*
dout
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
const
auto
*
diff_dst_ptr
=
dout
->
template
data
<
T
>();
auto
*
dx
=
ctx
.
template
Output
<
framework
::
Tensor
>(
framework
::
GradVarName
(
"X"
));
T
*
diff_src_ptr
=
dx
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
std
::
vector
<
int
>
src_tz
(
dst_tz
);
PADDLE_ENFORCE
(
output
->
dims
().
size
()
==
2UL
,
"The input of softmax op must be a 2D matrix."
);
// MKL-DNN does support softmax over selected axis. Having 2D Tensor,
// we will make normalization after final eg. axis: 1
PADDLE_ENFORCE
(((
src_tz
[
0
]
==
dst_tz
[
0
])
&&
(
src_tz
[
1
]
==
dst_tz
[
1
])),
"Softmax input and output dimensions should match"
);
// Same memory descriptor to be used for input and output
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
// Currently only supports NC data format
// retrieve eltwise primitive desc from device context
const
std
::
string
key
=
platform
::
MKLDNNHandler
::
GetHash
(
softmax_tz
,
ctx
.
op
().
Input
(
"Out"
));
const
std
::
string
key_softmax_pd
=
key
+
"@softmax_pd"
;
auto
softmax_pd
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_forward
::
primitive_desc
>
(
dev_ctx
.
GetBlob
(
key_softmax_pd
));
PADDLE_ENFORCE
(
softmax_pd
!=
nullptr
,
"Fail to find softmax_pd in device context"
);
// TODO(jczaja): Add layouts support when there is a need to do so
// Two dimensional softmax does support NC format
auto
data_softmax_md
=
MKLDNNMemDesc
(
{
softmax_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
nc
);
auto
diff_softmax_md
=
MKLDNNMemDesc
(
{
softmax_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
nc
);
// Normalization is made after innermost dimension eg. C out of NC
auto
softmax_bwd_desc
=
softmax_backward
::
desc
(
diff_softmax_md
,
data_softmax_md
,
1
/* dim: C*/
);
auto
softmax_bwd_pd
=
std
::
make_shared
<
mkldnn
::
softmax_backward
::
primitive_desc
>
(
softmax_bwd_desc
,
mkldnn_engine
,
*
softmax_pd
);
SoftmaxMKLDNNHandler
handler
(
softmax_pd
,
softmax_bwd_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
data_softmax_md
,
to_void_cast
<
T
>
(
dst_data
));
auto
diff_dst_memory_p
=
handler
.
AcquireDiffDstMemory
(
diff_softmax_md
,
to_void_cast
<
T
>
(
diff_dst_ptr
));
auto
diff_src_memory_p
=
handler
.
AcquireDiffSrcMemory
(
diff_softmax_md
,
to_void_cast
<
T
>
(
diff_src_ptr
));
// Get primitve from device context
auto
softmax_bwd_p
=
handler
.
AcquireSoftmaxBackward
(
dst_memory_p
,
diff_dst_memory_p
,
diff_src_memory_p
);
std
::
vector
<
primitive
>
pipeline
{
*
softmax_bwd_p
};
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -127,3 +242,5 @@ namespace ops = paddle::operators;
...
@@ -127,3 +242,5 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL
(
softmax
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
REGISTER_OP_KERNEL
(
softmax
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
SoftmaxMKLDNNKernel
<
float
>
);
ops
::
SoftmaxMKLDNNKernel
<
float
>
);
REGISTER_OP_KERNEL
(
softmax_grad
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
SoftmaxMKLDNNGradKernel
<
float
>
);
paddle/fluid/operators/softmax_op.cc
浏览文件 @
e1a46bba
...
@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
...
@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// choose cudnn kernel if the runtime supported.
// choose cudnn kernel if the runtime supported.
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
}
#endif
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
#ifdef PADDLE_WITH_MKLDNN
return
framework
::
OpKernelType
(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
framework
::
StringToDataLayout
(
data_format
),
library_
);
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
auto
input_data_type
=
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"float16 can only be used on GPU place"
);
}
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
};
};
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
0 → 100644
浏览文件 @
e1a46bba
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
paddle
::
framework
::
Tensor
;
using
paddle
::
platform
::
MKLDNNDeviceContext
;
using
paddle
::
platform
::
CPUDeviceContext
;
using
framework
::
DataLayout
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
stream
;
using
mkldnn
::
sum
;
using
mkldnn
::
reorder
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
class
SumMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
in_vars
=
ctx
.
MultiInputVar
(
"X"
);
const
int
N
=
in_vars
.
size
();
auto
out_var
=
ctx
.
OutputVar
(
"Out"
);
bool
in_place
=
out_var
==
in_vars
[
0
];
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
LoDTensor
*
output
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
dst_tz
=
framework
::
vectorize2int
(
output
->
dims
());
auto
src_tz
=
dst_tz
;
memory
::
format
output_format
{
memory
::
format
::
format_undef
};
std
::
vector
<
float
>
scales
;
std
::
vector
<
memory
::
primitive_desc
>
srcs_mpd
;
std
::
vector
<
mkldnn
::
memory
>
srcs_mem
;
PADDLE_ENFORCE
(
in_vars
[
0
]
->
IsType
<
LoDTensor
>
(),
"Input[0] must be LoDTensors"
);
auto
&
input0
=
in_vars
[
0
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE
(
input0
.
layout
()
==
DataLayout
::
kMKLDNN
&&
input0
.
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format for inputs[0]"
);
memory
::
format
input_format
=
input0
.
format
();
if
(
src_tz
.
size
()
==
1
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
x
;
}
if
(
src_tz
.
size
()
==
2
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
nc
;
}
for
(
int
i
=
in_place
?
1
:
0
;
i
<
N
;
i
++
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
LoDTensor
>
(),
"all inputs must be all LoDTensors"
);
auto
&
input
=
in_vars
[
i
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE
(
input
.
layout
()
==
DataLayout
::
kMKLDNN
&&
input
.
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format for inputs"
);
if
(
input
.
numel
()
==
0
)
{
continue
;
}
const
T
*
input_data
=
input
.
data
<
T
>
();
auto
src_md
=
memory
::
desc
(
src_tz
,
memory
::
data_type
::
f32
,
input_format
);
auto
src_mpd
=
memory
::
primitive_desc
(
src_md
,
mkldnn_engine
);
auto
src_mem
=
memory
(
src_mpd
,
to_void_cast
(
input_data
));
srcs_mpd
.
push_back
(
src_mpd
);
srcs_mem
.
push_back
(
src_mem
);
scales
.
push_back
(
1.0
);
}
auto
dst_md
=
memory
::
desc
(
dst_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_mpd
);
std
::
shared_ptr
<
memory
>
dst_mem
;
if
(
in_place
)
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
()));
}
else
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
(),
output_data
));
}
std
::
vector
<
mkldnn
::
primitive
::
at
>
inputs
;
for
(
size_t
i
=
0
;
i
<
srcs_mem
.
size
();
++
i
)
{
inputs
.
push_back
(
srcs_mem
[
i
]);
}
auto
sum_prim
=
mkldnn
::
sum
(
sum_pd
,
inputs
,
*
dst_mem
);
output_format
=
(
memory
::
format
)
platform
::
GetMKLDNNFormat
(
sum_pd
);
primitive
reorder_prim
;
std
::
shared_ptr
<
memory
>
target_mem
;
if
(
in_place
)
{
output_format
=
input_format
;
target_mem
.
reset
(
new
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
output_format
},
mkldnn_engine
},
output_data
));
reorder_prim
=
reorder
(
*
dst_mem
,
*
target_mem
);
}
std
::
vector
<
primitive
>
pipeline
;
pipeline
.
push_back
(
sum_prim
);
if
(
in_place
)
pipeline
.
push_back
(
reorder_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
output_format
);
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std
::
unique_ptr
<
framework
::
SelectedRows
>
in0
;
if
(
in_place
)
{
// If is in_place, we store the input[0] to in0
auto
&
in_sel0
=
in_vars
[
0
]
->
Get
<
SelectedRows
>
();
auto
&
rows
=
in_sel0
.
rows
();
in0
.
reset
(
new
framework
::
SelectedRows
(
rows
,
in_sel0
.
height
()));
in0
->
mutable_value
()
->
ShareDataWith
(
in_sel0
.
value
());
}
auto
get_selected_row
=
[
&
](
size_t
i
)
->
const
SelectedRows
&
{
if
(
i
==
0
&&
in0
)
{
return
*
in0
.
get
();
}
else
{
return
in_vars
[
i
]
->
Get
<
SelectedRows
>
();
}
};
auto
*
out
=
ctx
.
Output
<
SelectedRows
>
(
"Out"
);
out
->
mutable_rows
()
->
clear
();
auto
*
out_value
=
out
->
mutable_value
();
// Runtime InferShape
size_t
first_dim
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
first_dim
+=
sel_row
.
rows
().
size
();
}
auto
in_dim
=
framework
::
vectorize
(
get_selected_row
(
N
-
1
).
value
().
dims
());
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
return
;
}
out_value
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SelectedRowsAddTo
<
CPUDeviceContext
,
T
>
functor
;
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
functor
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
sel_row
,
offset
,
out
);
offset
+=
sel_row
.
value
().
numel
();
}
}
else
if
(
out_var
->
IsType
<
framework
::
LoDTensorArray
>
())
{
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto
&
out_array
=
*
out_var
->
GetMutable
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_vars
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensorArray
>
(),
"Only support all inputs are TensorArray"
);
auto
&
in_array
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
0
;
i
<
in_array
.
size
();
++
i
)
{
if
(
in_array
[
i
].
numel
()
!=
0
)
{
if
(
i
>=
out_array
.
size
())
{
out_array
.
resize
(
i
+
1
);
}
if
(
out_array
[
i
].
numel
()
==
0
)
{
framework
::
TensorCopy
(
in_array
[
i
],
in_array
[
i
].
place
(),
ctx
.
device_context
(),
&
out_array
[
i
]);
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
}
else
{
PADDLE_ENFORCE
(
out_array
[
i
].
lod
()
==
in_array
[
i
].
lod
());
auto
in
=
EigenVector
<
T
>::
Flatten
(
in_array
[
i
]);
auto
result
=
EigenVector
<
T
>::
Flatten
(
out_array
[
i
]);
result
.
device
(
*
ctx
.
template
device_context
<
MKLDNNDeviceContext
>()
.
eigen_device
())
=
result
+
in
;
}
}
}
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
());
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_KERNEL
(
sum
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
SumMKLDNNOpKernel
<
float
>
);
paddle/fluid/operators/sum_op.cc
浏览文件 @
e1a46bba
...
@@ -18,6 +18,10 @@ limitations under the License. */
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
framework
::
Tensor
;
using
framework
::
Tensor
;
...
@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
x_vars
=
ctx
.
MultiInputVar
(
"X"
);
auto
x_vars
=
ctx
.
MultiInputVar
(
"X"
);
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensor
>
())
{
int
dtype
=
-
1
;
int
dtype
=
-
1
;
for
(
auto
&
x_var
:
x_vars
)
{
for
(
auto
&
x_var
:
x_vars
)
{
...
@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor"
);
"Sum operator should have at least one tensor"
);
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
ctx
.
GetPlace
(),
ctx
.
device_context
()
);
layout
,
library
);
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
SelectedRows
>
())
{
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
SelectedRows
>
())
{
for
(
auto
&
var
:
x_vars
)
{
for
(
auto
&
var
:
x_vars
)
{
auto
&
value
=
var
->
Get
<
framework
::
SelectedRows
>
().
value
();
auto
&
value
=
var
->
Get
<
framework
::
SelectedRows
>
().
value
();
if
(
value
.
IsInitialized
())
{
if
(
value
.
IsInitialized
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
value
.
type
()),
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
value
.
type
()),
ctx
.
device_context
());
ctx
.
device_context
()
,
layout
,
library
);
}
}
}
}
// if input sparse vars are not initialized, use an default kernel type.
// if input sparse vars are not initialized, use an default kernel type.
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
ctx
.
device_context
());
ctx
.
device_context
()
,
layout
,
library
);
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
for
(
auto
&
x_var
:
x_vars
)
{
for
(
auto
&
x_var
:
x_vars
)
{
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
auto
&
each
:
array
)
{
for
(
auto
&
each
:
array
)
{
if
(
each
.
numel
()
!=
0
)
{
if
(
each
.
numel
()
!=
0
)
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
ctx
.
device_context
());
ctx
.
device_context
(),
layout
,
library
);
}
}
}
}
}
}
...
@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"X"
,
"(vector<Tensor>) The input tensors of sum operator."
)
AddInput
(
"X"
,
"(vector<Tensor>) The input tensors of sum operator."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(Tensor) The output tensor of sum operator."
).
Reuse
(
"X"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of sum operator."
).
Reuse
(
"X"
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Sum operator.
Sum operator.
...
@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
...
@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework
::
BlockDesc
*
block
)
const
override
{
framework
::
BlockDesc
*
block
)
const
override
{
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
var_type
=
framework
::
proto
::
VarType
::
SELECTED_ROWS
;
auto
var_type
=
framework
::
proto
::
VarType
::
SELECTED_ROWS
;
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
VLOG
(
10
)
<<
name
<<
" "
VLOG
(
10
)
<<
name
<<
" "
<<
block
->
FindRecursiveOrCreateVar
(
name
).
GetType
();
<<
block
->
FindRecursiveOrCreateVar
(
name
).
GetType
();
...
@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
...
@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
sum
,
ops
::
SumOp
,
ops
::
SumOpMaker
,
ops
::
SumGradMaker
,
REGISTER_OPERATOR
(
sum
,
ops
::
SumOp
,
ops
::
SumOpMaker
,
ops
::
SumGradMaker
,
ops
::
SumOpVarTypeInference
);
ops
::
SumOpVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
...
...
paddle/fluid/operators/while_op.cc
浏览文件 @
e1a46bba
...
@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->
set_lod
(
inside_tensor
.
lod
());
->
set_lod
(
inside_tensor
.
lod
());
}
}
}
}
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{});
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
sum_op
->
Run
(
cur_scope
,
dev_place
);
sum_op
->
Run
(
cur_scope
,
dev_place
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
}
}
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
e1a46bba
cc_library
(
dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce
)
cc_library
(
dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce
)
list
(
APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc
)
list
(
APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc
)
# There is no macOS version of NCCL.
if
(
NOT APPLE
)
list
(
APPEND CUDA_SRCS nccl.cc
)
endif
()
if
(
TENSORRT_FOUND
)
if
(
TENSORRT_FOUND
)
list
(
APPEND CUDA_SRCS tensorrt.cc
)
list
(
APPEND CUDA_SRCS tensorrt.cc
)
endif
()
endif
()
configure_file
(
cupti_lib_path.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/cupti_lib_path.h
)
configure_file
(
cupti_lib_path.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/cupti_lib_path.h
)
if
(
CUPTI_FOUND
)
if
(
CUPTI_FOUND
)
list
(
APPEND CUDA_SRCS cupti.cc
)
list
(
APPEND CUDA_SRCS cupti.cc
)
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
e1a46bba
...
@@ -44,8 +44,10 @@ limitations under the License. */
...
@@ -44,8 +44,10 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#include "paddle/fluid/platform/dynload/curand.h"
#ifndef __APPLE__
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -174,6 +176,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
...
@@ -174,6 +176,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
throw
std
::
runtime_error
(
err
+
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
err
+
string
::
Sprintf
(
args
...));
}
}
#ifndef __APPLE__
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
ncclResult_t
stat
,
const
Args
&
...
args
)
{
ncclResult_t
stat
,
const
Args
&
...
args
)
{
...
@@ -184,7 +187,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
...
@@ -184,7 +187,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
string
::
Sprintf
(
args
...));
string
::
Sprintf
(
args
...));
}
}
}
}
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
#endif // PADDLE_WITH_CUDA
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
e1a46bba
...
@@ -99,5 +99,143 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
...
@@ -99,5 +99,143 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
inline
mkldnn
::
memory
::
format
GetMKLDNNFormat
(
const
mkldnn
::
sum
::
primitive_desc
&
memory
)
{
return
static_cast
<
mkldnn
::
memory
::
format
>
(
memory
.
dst_primitive_desc
().
desc
().
data
.
format
);
}
class
MKLDNNHandler
{
public:
MKLDNNHandler
(
const
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
dev_ctx_
(
dev_ctx
),
engine_
(
engine
),
key_
(
base_key
),
is_reusing_
(
false
)
{}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
)
{
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_src_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
)
{
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_weights_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
)
{
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffDstMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
)
{
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_diff_dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffSrcMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
)
{
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_diff_src_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemoryFromPrimitive
(
mkldnn
::
memory
::
primitive_desc
mdp
,
void
*
ptr
,
const
std
::
string
&
suffix
)
{
auto
local_key
=
key_
+
suffix
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mdp
,
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
,
const
std
::
string
&
suffix
)
{
/*Generate key*/
auto
local_key
=
key_
+
suffix
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mkldnn
::
memory
::
primitive_desc
{
md
,
engine_
},
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemory
(
mkldnn
::
memory
::
primitive_desc
&
mpd
,
mkldnn
::
memory
::
primitive_desc
&
user_mpd
,
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_memory_p
,
const
std
::
string
&
suffix
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
// create reorder primitive if the input format is not the preferred one
auto
local_key
=
key_
+
suffix
;
auto
key_reorder_p
=
key_
+
suffix
+
"reorder_p"
;
auto
target_memory_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
target_memory_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find mem primitive in device context"
);
if
(
target_memory_p
==
nullptr
)
{
target_memory_p
=
user_memory_p
;
std
::
shared_ptr
<
mkldnn
::
primitive
>
reorder_p
;
if
(
mpd
!=
user_mpd
)
{
target_memory_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mpd
);
auto
reorder_p
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
user_memory_p
,
*
target_memory_p
);
dev_ctx_
.
SetBlob
(
key_reorder_p
,
reorder_p
);
pipeline
.
push_back
(
*
reorder_p
);
}
dev_ctx_
.
SetBlob
(
local_key
,
target_memory_p
);
}
else
{
// Make reorder if needed
auto
reorder_p
=
std
::
static_pointer_cast
<
mkldnn
::
reorder
>
(
dev_ctx_
.
GetBlob
(
key_reorder_p
));
if
(
reorder_p
!=
nullptr
)
{
pipeline
.
push_back
(
*
reorder_p
);
}
is_reusing_
=
true
;
}
return
target_memory_p
;
}
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
};
return
dims2str
(
operand_dims
)
+
suffix
;
};
protected:
const
MKLDNNDeviceContext
&
dev_ctx_
;
mkldnn
::
engine
engine_
;
std
::
string
key_
;
bool
is_reusing_
;
};
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/protobuf.cc
浏览文件 @
e1a46bba
...
@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) {
...
@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) {
.
value
(
"STRINGS"
,
pd
::
proto
::
AttrType
::
STRINGS
)
.
value
(
"STRINGS"
,
pd
::
proto
::
AttrType
::
STRINGS
)
.
value
(
"BOOL"
,
pd
::
proto
::
AttrType
::
BOOLEAN
)
.
value
(
"BOOL"
,
pd
::
proto
::
AttrType
::
BOOLEAN
)
.
value
(
"BOOLS"
,
pd
::
proto
::
AttrType
::
BOOLEANS
)
.
value
(
"BOOLS"
,
pd
::
proto
::
AttrType
::
BOOLEANS
)
.
value
(
"BLOCK"
,
pd
::
proto
::
AttrType
::
BLOCK
);
.
value
(
"BLOCK"
,
pd
::
proto
::
AttrType
::
BLOCK
)
.
value
(
"BLOCKS"
,
pd
::
proto
::
AttrType
::
BLOCKS
);
pybind11
::
class_
<
pd
::
OpDesc
>
op_desc
(
*
m
,
"OpDesc"
,
""
);
pybind11
::
class_
<
pd
::
OpDesc
>
op_desc
(
*
m
,
"OpDesc"
,
""
);
op_desc
op_desc
...
@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) {
...
@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) {
.
def
(
"set_attr"
,
&
pd
::
OpDesc
::
SetAttr
)
.
def
(
"set_attr"
,
&
pd
::
OpDesc
::
SetAttr
)
.
def
(
"attr"
,
&
pd
::
OpDesc
::
GetAttr
)
.
def
(
"attr"
,
&
pd
::
OpDesc
::
GetAttr
)
.
def
(
"set_block_attr"
,
&
pd
::
OpDesc
::
SetBlockAttr
)
.
def
(
"set_block_attr"
,
&
pd
::
OpDesc
::
SetBlockAttr
)
.
def
(
"set_blocks_attr"
,
&
pd
::
OpDesc
::
SetBlocksAttr
)
.
def
(
"set_serialized_attr"
,
.
def
(
"set_serialized_attr"
,
[](
pd
::
OpDesc
&
self
,
const
std
::
string
&
name
,
[](
pd
::
OpDesc
&
self
,
const
std
::
string
&
name
,
const
pybind11
::
bytes
&
seriralized
)
{
const
pybind11
::
bytes
&
seriralized
)
{
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e1a46bba
...
@@ -167,9 +167,6 @@ PYBIND11_PLUGIN(core) {
...
@@ -167,9 +167,6 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set_lod"
,
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
// the input lod is offset-based level-of-detail info
// the input lod is offset-based level-of-detail info
LOG
(
WARNING
)
<<
"set_lod is deprecated and will be removed by 9.2018, "
"please switch to set_recursive_sequence_lengths."
;
LoD
new_lod
;
LoD
new_lod
;
new_lod
.
reserve
(
lod
.
size
());
new_lod
.
reserve
(
lod
.
size
());
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
...
@@ -196,8 +193,6 @@ PYBIND11_PLUGIN(core) {
...
@@ -196,8 +193,6 @@ PYBIND11_PLUGIN(core) {
.
def
(
"lod"
,
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
// output the offset-based lod info
// output the offset-based lod info
LOG
(
WARNING
)
<<
"lod is deprecated and will be removed by 9.2018, "
"please switch to recursive_sequence_lengths."
;
LoD
lod
=
self
.
lod
();
LoD
lod
=
self
.
lod
();
std
::
vector
<
std
::
vector
<
size_t
>>
new_lod
;
std
::
vector
<
std
::
vector
<
size_t
>>
new_lod
;
new_lod
.
reserve
(
lod
.
size
());
new_lod
.
reserve
(
lod
.
size
());
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
e1a46bba
...
@@ -133,7 +133,7 @@ EOF
...
@@ -133,7 +133,7 @@ EOF
-DWITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
OFF
}
\
-DWITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
OFF
}
\
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
\
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
\
-DWITH_CONTRIB
=
${
WITH_CONTRIB
:-
ON
}
\
-DWITH_CONTRIB
=
${
WITH_CONTRIB
:-
ON
}
\
-DWITH_ANAKIN
=
ON
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
ON
}
}
}
function
abort
(){
function
abort
(){
...
...
python/paddle/fluid/backward.py
浏览文件 @
e1a46bba
...
@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
...
@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
var_name
in
op_desc
.
input_arg_names
():
for
var_name
in
op_desc
.
input_arg_names
():
if
len
(
renamed_vars
[
var_name
])
>
1
:
if
len
(
renamed_vars
[
var_name
])
>
1
:
pending_sum_ops
.
append
(
pending_sum_ops
.
append
(
(
_create_op_desc_
(
(
_create_op_desc_
(
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
}),
idx
))
{
"use_mkldnn"
:
False
}),
idx
))
renamed_vars
[
var_name
]
=
[
var_name
]
renamed_vars
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
for
var_name
in
op_desc
.
output_arg_names
():
if
var_name
==
core
.
empty_var_name
(
if
var_name
==
core
.
empty_var_name
(
...
@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
...
@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars
[
var_name
].
append
(
new_name
)
renamed_vars
[
var_name
].
append
(
new_name
)
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
if
len
(
inputs
)
>
1
:
if
len
(
inputs
)
>
1
:
pending_sum_ops
.
append
((
_create_op_desc_
(
pending_sum_ops
.
append
(
"sum"
,
{
"X"
:
inputs
},
{
"Out"
:
[
var_name
]},
{}),
len
(
op_descs
)))
(
_create_op_desc_
(
"sum"
,
{
"X"
:
inputs
},
{
"Out"
:
[
var_name
]},
{
"use_mkldnn"
:
False
}),
len
(
op_descs
)))
# sum_op descs are sorted according to their insert position
# sum_op descs are sorted according to their insert position
for
p
in
reversed
(
pending_sum_ops
):
for
p
in
reversed
(
pending_sum_ops
):
op_descs
.
insert
(
p
[
1
],
p
[
0
])
op_descs
.
insert
(
p
[
1
],
p
[
0
])
...
...
python/paddle/fluid/framework.py
浏览文件 @
e1a46bba
...
@@ -558,15 +558,20 @@ class Operator(object):
...
@@ -558,15 +558,20 @@ class Operator(object):
if
(
attr_name
not
in
self
.
attrs
)
or
(
if
(
attr_name
not
in
self
.
attrs
)
or
(
self
.
attrs
[
attr_name
]
is
None
):
self
.
attrs
[
attr_name
]
is
None
):
continue
continue
if
isinstance
(
self
.
attrs
[
attr_name
],
Block
):
attr_val
=
self
.
attrs
[
attr_name
]
if
isinstance
(
attr_val
,
Block
):
self
.
desc
.
set_block_attr
(
attr_name
,
self
.
desc
.
set_block_attr
(
attr_name
,
self
.
attrs
[
attr_name
].
desc
)
self
.
attrs
[
attr_name
].
desc
)
elif
isinstance
(
self
.
attrs
[
attr_name
],
core
.
BlockDesc
)
or
\
elif
isinstance
(
attr_val
,
list
)
and
attr_val
and
\
isinstance
(
self
.
attrs
[
attr_name
],
core
.
ProgramDesc
):
all
(
isinstance
(
v
,
Block
)
for
v
in
attr_val
):
self
.
desc
.
set_blocks_attr
(
attr_name
,
[
v
.
desc
for
v
in
attr_val
])
elif
isinstance
(
attr_val
,
core
.
BlockDesc
)
or
\
isinstance
(
attr_val
,
core
.
ProgramDesc
):
self
.
desc
.
set_serialized_attr
(
self
.
desc
.
set_serialized_attr
(
attr_name
,
self
.
attrs
[
attr_name
]
.
serialize_to_string
())
attr_name
,
attr_val
.
serialize_to_string
())
else
:
else
:
self
.
desc
.
set_attr
(
attr_name
,
self
.
attrs
[
attr_name
]
)
self
.
desc
.
set_attr
(
attr_name
,
attr_val
)
self
.
desc
.
check_attrs
()
self
.
desc
.
check_attrs
()
if
self
.
has_kernel
(
type
):
if
self
.
has_kernel
(
type
):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
...
@@ -715,6 +720,9 @@ class Operator(object):
...
@@ -715,6 +720,9 @@ class Operator(object):
self
.
attrs
[
name
]
=
val
self
.
attrs
[
name
]
=
val
if
isinstance
(
val
,
Block
):
if
isinstance
(
val
,
Block
):
self
.
desc
.
set_block_attr
(
name
,
val
.
desc
)
self
.
desc
.
set_block_attr
(
name
,
val
.
desc
)
elif
isinstance
(
val
,
list
)
and
val
and
all
(
isinstance
(
v
,
Block
)
for
v
in
val
):
self
.
desc
.
set_blocks_attr
(
name
,
[
v
.
desc
for
v
in
val
])
elif
isinstance
(
val
,
core
.
BlockDesc
)
or
\
elif
isinstance
(
val
,
core
.
BlockDesc
)
or
\
isinstance
(
val
,
core
.
ProgramDesc
):
isinstance
(
val
,
core
.
ProgramDesc
):
self
.
desc
.
set_serialized_attr
(
name
,
val
.
serialize_to_string
())
self
.
desc
.
set_serialized_attr
(
name
,
val
.
serialize_to_string
())
...
@@ -1387,7 +1395,11 @@ class Program(object):
...
@@ -1387,7 +1395,11 @@ class Program(object):
* Set for_test to True when we want to clone the program for testing.
* Set for_test to True when we want to clone the program for testing.
Notes: This API DOES NOT prune any operator. Use
Notes: This API DOES NOT prune any operator. Use
:code:`clone(for_test=True)` before backward and optimization please.
:code:`clone(for_test=True)` before backward and optimization please. e.g.
>>> test_program = fluid.default_main_program().clone(for_test=True)
>>> optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
>>> optimizer.minimize()
Args:
Args:
for_test(bool): True if change the :code:`is_test` attribute of
for_test(bool): True if change the :code:`is_test` attribute of
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
e1a46bba
...
@@ -186,7 +186,6 @@ class ListenAndServ(object):
...
@@ -186,7 +186,6 @@ class ListenAndServ(object):
main_program
=
self
.
helper
.
main_program
main_program
=
self
.
helper
.
main_program
current_block
=
main_program
.
current_block
()
current_block
=
main_program
.
current_block
()
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
parent_block
()
empty_block
=
Program
().
global_block
()
parent_block
.
append_op
(
parent_block
.
append_op
(
type
=
'listen_and_serv'
,
type
=
'listen_and_serv'
,
...
@@ -195,8 +194,9 @@ class ListenAndServ(object):
...
@@ -195,8 +194,9 @@ class ListenAndServ(object):
attrs
=
{
attrs
=
{
'endpoint'
:
self
.
endpoint
,
'endpoint'
:
self
.
endpoint
,
'Fanin'
:
self
.
fan_in
,
'Fanin'
:
self
.
fan_in
,
'OptimizeBlock'
:
current_block
,
'optimize_blocks'
:
[
'PrefetchBlock'
:
empty_block
,
current_block
],
# did not support multiple optimize blocks in layers
'sync_mode'
:
True
,
# did not support async now in layers
'sync_mode'
:
True
,
# did not support async now in layers
'grad_to_block_id'
:
[
""
]
'grad_to_block_id'
:
[
""
]
})
})
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
e1a46bba
...
@@ -199,7 +199,10 @@ def fc(input,
...
@@ -199,7 +199,10 @@ def fc(input,
else
:
else
:
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
})
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
},
attrs
=
{
"use_mkldnn"
:
use_mkldnn
})
# add bias
# add bias
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
# add activation
# add activation
...
@@ -4910,16 +4913,16 @@ def random_crop(x, shape, seed=None):
...
@@ -4910,16 +4913,16 @@ def random_crop(x, shape, seed=None):
return
out
return
out
def
log
(
x
):
def
log
(
input
):
"""
"""
Calculates the natural log of the given input tensor, element-wise.
Calculates the natural log of the given input tensor, element-wise.
.. math::
.. math::
Out =
\\
ln(
x
)
Out =
\\
ln(
input
)
Args:
Args:
x (Variable): Input tensor.
input (Variable): Input tensor.
Returns:
Returns:
Variable: The natural log of the input tensor computed element-wise.
Variable: The natural log of the input tensor computed element-wise.
...
@@ -4928,27 +4931,27 @@ def log(x):
...
@@ -4928,27 +4931,27 @@ def log(x):
.. code-block:: python
.. code-block:: python
output = fluid.layers.log(
x
)
output = fluid.layers.log(
input
)
"""
"""
helper
=
LayerHelper
(
'log'
,
**
locals
())
helper
=
LayerHelper
(
'log'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
out
=
helper
.
create_tmp_variable
(
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"log"
,
inputs
=
{
"X"
:
input
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"log"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
def
relu
(
x
):
def
relu
(
input
):
"""
"""
Relu takes one input data (Tensor) and produces one output data (Tensor)
Relu takes one input data (Tensor) and produces one output data (Tensor)
where the rectified linear function, y = max(0,
x
), is applied to
where the rectified linear function, y = max(0,
input
), is applied to
the tensor elementwise.
the tensor elementwise.
.. math::
.. math::
Out =
\\
max(0,
x
)
Out =
\\
max(0,
input
)
Args:
Args:
x (Variable): The input tensor.
input (Variable): The input tensor.
Returns:
Returns:
Variable: The output tensor with the same shape as input.
Variable: The output tensor with the same shape as input.
...
@@ -4957,12 +4960,12 @@ def relu(x):
...
@@ -4957,12 +4960,12 @@ def relu(x):
.. code-block:: python
.. code-block:: python
output = fluid.layers.relu(
x
)
output = fluid.layers.relu(
input
)
"""
"""
helper
=
LayerHelper
(
'relu'
,
**
locals
())
helper
=
LayerHelper
(
'relu'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
out
=
helper
.
create_tmp_variable
(
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"relu"
,
inputs
=
{
"X"
:
input
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"relu"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
e1a46bba
...
@@ -230,7 +230,11 @@ def sums(input, out=None):
...
@@ -230,7 +230,11 @@ def sums(input, out=None):
helper
=
LayerHelper
(
'sum'
,
**
locals
())
helper
=
LayerHelper
(
'sum'
,
**
locals
())
if
out
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
})
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'use_mkldnn'
:
False
})
return
out
return
out
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
e1a46bba
...
@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
...
@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list
(
REMOVE_ITEM TEST_OPS test_dist_train
)
list
(
REMOVE_ITEM TEST_OPS test_dist_train
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_crf
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_crf
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed
)
# TODO(wuyi): this test hungs on CI, will add it back later
list
(
REMOVE_ITEM TEST_OPS test_listen_and_serv_op
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
endforeach
(
TEST_OP
)
...
@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$
...
@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
浏览文件 @
e1a46bba
...
@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest):
...
@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest):
self
.
_wait_ps_ready
(
p1
.
pid
)
self
.
_wait_ps_ready
(
p1
.
pid
)
# raise SIGTERM to pserver
# raise SIGTERM to pserver
os
.
kill
(
p1
.
pid
,
signal
.
SIG
KILL
)
os
.
kill
(
p1
.
pid
,
signal
.
SIG
INT
)
p1
.
join
()
p1
.
join
()
# run pserver on CPU in async mode
# run pserver on CPU in async mode
...
@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest):
...
@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest):
self
.
_wait_ps_ready
(
p2
.
pid
)
self
.
_wait_ps_ready
(
p2
.
pid
)
# raise SIGTERM to pserver
# raise SIGTERM to pserver
os
.
kill
(
p2
.
pid
,
signal
.
SIG
KILL
)
os
.
kill
(
p2
.
pid
,
signal
.
SIG
TERM
)
p2
.
join
()
p2
.
join
()
...
...
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
0 → 100644
浏览文件 @
e1a46bba
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_sum_op
import
TestSumOp
class
TestMKLDNN
(
TestSumOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_sum_op.py
浏览文件 @
e1a46bba
...
@@ -20,12 +20,15 @@ from op_test import OpTest
...
@@ -20,12 +20,15 @@ from op_test import OpTest
class
TestSumOp
(
OpTest
):
class
TestSumOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
op_type
=
"sum"
self
.
use_mkldnn
=
False
self
.
init_kernel_type
()
x0
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x0
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
y
=
x0
+
x1
+
x2
y
=
x0
+
x1
+
x2
self
.
outputs
=
{
'Out'
:
y
}
self
.
outputs
=
{
'Out'
:
y
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
...
@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
...
@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'x0'
],
'Out'
)
self
.
check_grad
([
'x0'
],
'Out'
)
def
init_kernel_type
(
self
):
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
e1a46bba
...
@@ -396,7 +396,7 @@ class DistributeTranspiler(object):
...
@@ -396,7 +396,7 @@ class DistributeTranspiler(object):
return
varname
return
varname
return
""
return
""
def
__clone_lr_op_sub_block__
(
op
,
program
,
new
_block
):
def
__clone_lr_op_sub_block__
(
op
,
program
,
lr
_block
):
if
not
op
.
has_attr
(
'sub_block'
):
if
not
op
.
has_attr
(
'sub_block'
):
return
return
...
@@ -405,36 +405,41 @@ class DistributeTranspiler(object):
...
@@ -405,36 +405,41 @@ class DistributeTranspiler(object):
assert
isinstance
(
origin_block
,
Block
)
assert
isinstance
(
origin_block
,
Block
)
# we put the new sub block to new block to follow the block
# we put the new sub block to new block to follow the block
# hierarchy of the original blocks
# hierarchy of the original blocks
new_sub_block
=
program
.
create_block
(
new
_block
.
idx
)
new_sub_block
=
program
.
create_block
(
lr
_block
.
idx
)
# clone vars
# clone vars
for
var
in
origin_block
.
vars
:
for
var
in
origin_block
.
vars
:
new_sub_block
.
clone_variable
(
var
)
new_sub_block
.
clone_variable
(
var
)
# clone ops
# clone ops
for
op
in
origin_block
.
ops
:
for
o
rigin_o
p
in
origin_block
.
ops
:
self
.
_clone_lr_op
(
program
,
new_sub_block
,
op
)
cloned_op
=
self
.
_clone_lr_op
(
program
,
new_sub_block
,
origin_
op
)
# clone sub_block of op
# clone sub_block of op
__clone_lr_op_sub_block__
(
op
,
program
,
new_sub_block
)
__clone_lr_op_sub_block__
(
cloned_
op
,
program
,
new_sub_block
)
# reset the block of op
# reset the block of op
op
.
set_attr
(
'sub_block'
,
new_sub_block
)
op
.
set_attr
(
'sub_block'
,
new_sub_block
)
# append lr decay ops to the child block if exists
# append lr decay ops to the child block if exists
lr_ops
=
self
.
_get_lr_ops
()
lr_ops
=
self
.
_get_lr_ops
()
# record optimize blocks and we can run them on pserver parallel
optimize_blocks
=
[]
if
len
(
lr_ops
)
>
0
:
if
len
(
lr_ops
)
>
0
:
lr_decay_block
=
pserver_program
.
create_block
(
lr_decay_block
=
pserver_program
.
create_block
(
pserver_program
.
num_blocks
-
1
)
pserver_program
.
num_blocks
-
1
)
optimize_blocks
.
append
(
lr_decay_block
)
for
_
,
op
in
enumerate
(
lr_ops
):
for
_
,
op
in
enumerate
(
lr_ops
):
self
.
_append_pserver_non_opt_ops
(
lr_decay_block
,
op
)
cloned_op
=
self
.
_append_pserver_non_opt_ops
(
lr_decay_block
,
op
)
# append sub blocks to pserver_program in lr_decay_op
# append sub blocks to pserver_program in lr_decay_op
__clone_lr_op_sub_block__
(
op
,
pserver_program
,
lr_decay_block
)
__clone_lr_op_sub_block__
(
cloned_op
,
pserver_program
,
lr_decay_block
)
# append op to the current block
# append op to the current block
grad_to_block_id
=
[]
grad_to_block_id
=
[]
pre_block_idx
=
pserver_program
.
num_blocks
-
1
pre_block_idx
=
pserver_program
.
num_blocks
-
1
for
idx
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
for
idx
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
per_opt_block
=
pserver_program
.
create_block
(
pre_block_idx
)
per_opt_block
=
pserver_program
.
create_block
(
pre_block_idx
)
optimize_blocks
.
append
(
per_opt_block
)
# append grad merging ops before clip and weight decay
# append grad merging ops before clip and weight decay
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
# find the origin @GRAD var before clipping
# find the origin @GRAD var before clipping
...
@@ -453,6 +458,7 @@ class DistributeTranspiler(object):
...
@@ -453,6 +458,7 @@ class DistributeTranspiler(object):
if
global_ops
:
if
global_ops
:
opt_state_block
=
pserver_program
.
create_block
(
opt_state_block
=
pserver_program
.
create_block
(
pserver_program
.
num_blocks
-
1
)
pserver_program
.
num_blocks
-
1
)
optimize_blocks
.
append
(
opt_state_block
)
for
glb_op
in
global_ops
:
for
glb_op
in
global_ops
:
__append_optimize_op__
(
glb_op
,
opt_state_block
,
__append_optimize_op__
(
glb_op
,
opt_state_block
,
grad_to_block_id
,
None
)
grad_to_block_id
,
None
)
...
@@ -474,11 +480,11 @@ class DistributeTranspiler(object):
...
@@ -474,11 +480,11 @@ class DistributeTranspiler(object):
assert
len
(
prefetch_var_name_to_block_id
)
==
0
assert
len
(
prefetch_var_name_to_block_id
)
==
0
attrs
=
{
attrs
=
{
"
OptimizeBlock"
:
pserver_program
.
block
(
1
)
,
"
optimize_blocks"
:
optimize_blocks
,
"endpoint"
:
endpoint
,
"endpoint"
:
endpoint
,
"Fanin"
:
self
.
trainer_num
,
"Fanin"
:
self
.
trainer_num
,
"sync_mode"
:
self
.
sync_mode
,
"sync_mode"
:
self
.
sync_mode
,
"grad_to_block_id"
:
grad_to_block_id
"grad_to_block_id"
:
grad_to_block_id
,
}
}
if
len
(
prefetch_var_name_to_block_id
)
>
0
:
if
len
(
prefetch_var_name_to_block_id
)
>
0
:
attrs
[
'prefetch_var_name_to_block_id'
]
\
attrs
[
'prefetch_var_name_to_block_id'
]
\
...
@@ -872,7 +878,8 @@ class DistributeTranspiler(object):
...
@@ -872,7 +878,8 @@ class DistributeTranspiler(object):
table_opt_block
.
append_op
(
table_opt_block
.
append_op
(
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
pserver_side_table_grad_list
},
inputs
=
{
"X"
:
pserver_side_table_grad_list
},
outputs
=
{
"Out"
:
[
grad_var
]})
outputs
=
{
"Out"
:
[
grad_var
]},
attrs
=
{
"use_mkldnn"
:
False
})
else
:
else
:
# in async_mode, for table gradient, it also need to be splited to each parameter server
# in async_mode, for table gradient, it also need to be splited to each parameter server
origin_grad_name
=
grad_var
.
name
origin_grad_name
=
grad_var
.
name
...
@@ -1104,7 +1111,8 @@ class DistributeTranspiler(object):
...
@@ -1104,7 +1111,8 @@ class DistributeTranspiler(object):
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
vars2merge
},
inputs
=
{
"X"
:
vars2merge
},
outputs
=
{
"Out"
:
merged_var
})
outputs
=
{
"Out"
:
merged_var
},
attrs
=
{
"use_mkldnn"
:
False
})
# TODO(panyx0718): What if it's SELECTED_ROWS.
# TODO(panyx0718): What if it's SELECTED_ROWS.
if
not
merged_var
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
not
merged_var
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
optimize_block
.
append_op
(
optimize_block
.
append_op
(
...
@@ -1209,7 +1217,7 @@ class DistributeTranspiler(object):
...
@@ -1209,7 +1217,7 @@ class DistributeTranspiler(object):
if
var
not
in
program
.
global_block
().
vars
:
if
var
not
in
program
.
global_block
().
vars
:
block
.
clone_variable
(
var
)
block
.
clone_variable
(
var
)
block
.
append_op
(
return
block
.
append_op
(
type
=
op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
op
.
attrs
)
type
=
op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
op
.
attrs
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
...
@@ -1247,7 +1255,7 @@ class DistributeTranspiler(object):
...
@@ -1247,7 +1255,7 @@ class DistributeTranspiler(object):
elif
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
elif
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
program
.
global_block
().
clone_variable
(
var
)
program
.
global_block
().
clone_variable
(
var
)
optimize_block
.
append_op
(
return
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
inputs
=
inputs
,
inputs
=
inputs
,
outputs
=
outputs
,
outputs
=
outputs
,
...
...
python/paddle/reader/decorator.py
浏览文件 @
e1a46bba
python/paddle/v2/dataset/cifar.py
浏览文件 @
e1a46bba
...
@@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
...
@@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
CIFAR100_MD5
=
'eb9058c3a382ffc7106e4002c42a8d85'
CIFAR100_MD5
=
'eb9058c3a382ffc7106e4002c42a8d85'
def
reader_creator
(
filename
,
sub_name
):
def
reader_creator
(
filename
,
sub_name
,
cycle
=
False
):
def
read_batch
(
batch
):
def
read_batch
(
batch
):
data
=
batch
[
'data'
]
data
=
batch
[
'data'
]
labels
=
batch
.
get
(
'labels'
,
batch
.
get
(
'fine_labels'
,
None
))
labels
=
batch
.
get
(
'labels'
,
batch
.
get
(
'fine_labels'
,
None
))
...
@@ -56,10 +56,13 @@ def reader_creator(filename, sub_name):
...
@@ -56,10 +56,13 @@ def reader_creator(filename, sub_name):
names
=
(
each_item
.
name
for
each_item
in
f
names
=
(
each_item
.
name
for
each_item
in
f
if
sub_name
in
each_item
.
name
)
if
sub_name
in
each_item
.
name
)
while
True
:
for
name
in
names
:
for
name
in
names
:
batch
=
cPickle
.
load
(
f
.
extractfile
(
name
))
batch
=
cPickle
.
load
(
f
.
extractfile
(
name
))
for
item
in
read_batch
(
batch
):
for
item
in
read_batch
(
batch
):
yield
item
yield
item
if
not
cycle
:
break
return
reader
return
reader
...
@@ -94,34 +97,40 @@ def test100():
...
@@ -94,34 +97,40 @@ def test100():
'test'
)
'test'
)
def
train10
():
def
train10
(
cycle
=
False
):
"""
"""
CIFAR-10 training set creator.
CIFAR-10 training set creator.
It returns a reader creator, each sample in the reader is image pixels in
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
[0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Training reader creator
:return: Training reader creator
:rtype: callable
:rtype: callable
"""
"""
return
reader_creator
(
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'data_batch'
)
'data_batch'
,
cycle
=
cycle
)
def
test10
():
def
test10
(
cycle
=
False
):
"""
"""
CIFAR-10 test set creator.
CIFAR-10 test set creator.
It returns a reader creator, each sample in the reader is image pixels in
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
[0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Test reader creator.
:return: Test reader creator.
:rtype: callable
:rtype: callable
"""
"""
return
reader_creator
(
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'test_batch'
)
'test_batch'
,
cycle
=
cycle
)
def
fetch
():
def
fetch
():
...
...
python/paddle/v2/dataset/flowers.py
浏览文件 @
e1a46bba
...
@@ -76,7 +76,8 @@ def reader_creator(data_file,
...
@@ -76,7 +76,8 @@ def reader_creator(data_file,
dataset_name
,
dataset_name
,
mapper
,
mapper
,
buffered_size
=
1024
,
buffered_size
=
1024
,
use_xmap
=
True
):
use_xmap
=
True
,
cycle
=
False
):
'''
'''
1. read images from tar file and
1. read images from tar file and
merge images into batch files in 102flowers.tgz_batch/
merge images into batch files in 102flowers.tgz_batch/
...
@@ -96,6 +97,8 @@ def reader_creator(data_file,
...
@@ -96,6 +97,8 @@ def reader_creator(data_file,
:type mapper: callable
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: data reader
:return: data reader
:rtype: callable
:rtype: callable
'''
'''
...
@@ -108,6 +111,7 @@ def reader_creator(data_file,
...
@@ -108,6 +111,7 @@ def reader_creator(data_file,
file_list
=
batch_images_from_tar
(
data_file
,
dataset_name
,
img2label
)
file_list
=
batch_images_from_tar
(
data_file
,
dataset_name
,
img2label
)
def
reader
():
def
reader
():
while
True
:
for
file
in
open
(
file_list
):
for
file
in
open
(
file_list
):
file
=
file
.
strip
()
file
=
file
.
strip
()
batch
=
None
batch
=
None
...
@@ -117,6 +121,8 @@ def reader_creator(data_file,
...
@@ -117,6 +121,8 @@ def reader_creator(data_file,
labels
=
batch
[
'label'
]
labels
=
batch
[
'label'
]
for
sample
,
label
in
itertools
.
izip
(
data
,
batch
[
'label'
]):
for
sample
,
label
in
itertools
.
izip
(
data
,
batch
[
'label'
]):
yield
sample
,
int
(
label
)
-
1
yield
sample
,
int
(
label
)
-
1
if
not
cycle
:
break
if
use_xmap
:
if
use_xmap
:
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
cpu_count
()))
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
cpu_count
()))
...
@@ -125,7 +131,7 @@ def reader_creator(data_file,
...
@@ -125,7 +131,7 @@ def reader_creator(data_file,
return
map_readers
(
mapper
,
reader
)
return
map_readers
(
mapper
,
reader
)
def
train
(
mapper
=
train_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
train
(
mapper
=
train_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
,
cycle
=
False
):
'''
'''
Create flowers training set reader.
Create flowers training set reader.
It returns a reader, each sample in the reader is
It returns a reader, each sample in the reader is
...
@@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
...
@@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: train data reader
:return: train data reader
:rtype: callable
:rtype: callable
'''
'''
return
reader_creator
(
return
reader_creator
(
download
(
DATA_URL
,
'flowers'
,
DATA_MD5
),
download
(
DATA_URL
,
'flowers'
,
DATA_MD5
),
download
(
LABEL_URL
,
'flowers'
,
LABEL_MD5
),
download
(
LABEL_URL
,
'flowers'
,
LABEL_MD5
),
download
(
SETID_URL
,
'flowers'
,
SETID_MD5
),
TRAIN_FLAG
,
mapper
,
download
(
SETID_URL
,
'flowers'
,
SETID_MD5
),
buffered_size
,
use_xmap
)
TRAIN_FLAG
,
mapper
,
buffered_size
,
use_xmap
,
cycle
=
cycle
)
def
test
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
test
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
,
cycle
=
False
):
'''
'''
Create flowers test set reader.
Create flowers test set reader.
It returns a reader, each sample in the reader is
It returns a reader, each sample in the reader is
...
@@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
...
@@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: test data reader
:return: test data reader
:rtype: callable
:rtype: callable
'''
'''
return
reader_creator
(
return
reader_creator
(
download
(
DATA_URL
,
'flowers'
,
DATA_MD5
),
download
(
DATA_URL
,
'flowers'
,
DATA_MD5
),
download
(
LABEL_URL
,
'flowers'
,
LABEL_MD5
),
download
(
LABEL_URL
,
'flowers'
,
LABEL_MD5
),
download
(
SETID_URL
,
'flowers'
,
SETID_MD5
),
TEST_FLAG
,
mapper
,
download
(
SETID_URL
,
'flowers'
,
SETID_MD5
),
buffered_size
,
use_xmap
)
TEST_FLAG
,
mapper
,
buffered_size
,
use_xmap
,
cycle
=
cycle
)
def
valid
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
valid
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录