Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7834b4a4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7834b4a4
编写于
8月 07, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into dev_op_tensor_support
上级
7c55e08c
3300a532
变更
21
显示空白变更内容
内联
并排
Showing
21 changed file
with
375 addition
and
236 deletion
+375
-236
.travis.yml
.travis.yml
+0
-9
cmake/generic.cmake
cmake/generic.cmake
+4
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+2
-3
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+5
-4
paddle/fluid/operators/feed_op.cc
paddle/fluid/operators/feed_op.cc
+0
-1
paddle/fluid/operators/fetch_barrier_op.cc
paddle/fluid/operators/fetch_barrier_op.cc
+0
-6
paddle/fluid/operators/fetch_op.cc
paddle/fluid/operators/fetch_op.cc
+0
-3
paddle/fluid/operators/load_op.cc
paddle/fluid/operators/load_op.cc
+0
-3
paddle/fluid/operators/recv_op.cc
paddle/fluid/operators/recv_op.cc
+0
-2
paddle/fluid/operators/send_barrier_op.cc
paddle/fluid/operators/send_barrier_op.cc
+0
-5
paddle/fluid/operators/send_op.cc
paddle/fluid/operators/send_op.cc
+0
-3
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+6
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+19
-0
paddle/scripts/paddle_docker_build.sh
paddle/scripts/paddle_docker_build.sh
+3
-0
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+0
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+20
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+4
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+17
-8
python/paddle/fluid/tests/unittests/test_initializer.py
python/paddle/fluid/tests/unittests/test_initializer.py
+153
-116
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+11
-14
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
...fluid/tests/unittests/test_parallel_executor_seresnext.py
+131
-55
未找到文件。
.travis.yml
浏览文件 @
7834b4a4
...
@@ -27,15 +27,6 @@ script:
...
@@ -27,15 +27,6 @@ script:
# 43min timeout
# 43min timeout
paddle/scripts/paddle_docker_build.sh ${JOB}
paddle/scripts/paddle_docker_build.sh ${JOB}
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
-
|
if [[ "$JOB" != "doc" ]]; then exit 0; fi;
# For document only
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
notifications
:
notifications
:
email
:
email
:
on_success
:
change
on_success
:
change
...
...
cmake/generic.cmake
浏览文件 @
7834b4a4
...
@@ -265,6 +265,7 @@ function(cc_test TARGET_NAME)
...
@@ -265,6 +265,7 @@ function(cc_test TARGET_NAME)
if
(
${
cc_test_SERIAL
}
)
if
(
${
cc_test_SERIAL
}
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true
)
endif
()
endif
()
endif
()
endif
()
endfunction
(
cc_test
)
endfunction
(
cc_test
)
...
@@ -330,6 +331,7 @@ function(nv_test TARGET_NAME)
...
@@ -330,6 +331,7 @@ function(nv_test TARGET_NAME)
if
(
nv_test_SERIAL
)
if
(
nv_test_SERIAL
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true
)
endif
()
endif
()
endif
()
endif
()
endfunction
(
nv_test
)
endfunction
(
nv_test
)
...
@@ -577,7 +579,8 @@ function(py_test TARGET_NAME)
...
@@ -577,7 +579,8 @@ function(py_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
endif
()
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
7834b4a4
...
@@ -136,6 +136,8 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -136,6 +136,8 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform
::
SetDeviceId
(
dev_id
);
platform
::
SetDeviceId
(
dev_id
);
#endif
#endif
}
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
VLOG
(
10
)
<<
"+ "
<<
DebugStringEx
(
&
scope
);
VLOG
(
10
)
<<
"+ "
<<
DebugStringEx
(
&
scope
);
}
}
...
@@ -639,9 +641,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -639,9 +641,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
auto
*
dev_ctx
=
pool
.
Get
(
place
);
// For profiling, don't move out of this function because that will result
// in the failure of multi-GPU profiling.
platform
::
RecordEvent
record_event
(
Type
(),
dev_ctx
);
// check if op[type] has kernel registered.
// check if op[type] has kernel registered.
auto
&
all_op_kernels
=
AllOpKernels
();
auto
&
all_op_kernels
=
AllOpKernels
();
auto
kernels_iter
=
all_op_kernels
.
find
(
type_
);
auto
kernels_iter
=
all_op_kernels
.
find
(
type_
);
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
7834b4a4
...
@@ -74,9 +74,10 @@ if (WITH_ANAKIN) # only needed in CI
...
@@ -74,9 +74,10 @@ if (WITH_ANAKIN) # only needed in CI
target_link_libraries
(
inference_anakin_api anakin anakin_saber_common
)
target_link_libraries
(
inference_anakin_api anakin anakin_saber_common
)
target_link_libraries
(
inference_anakin_api_shared anakin anakin_saber_common
)
target_link_libraries
(
inference_anakin_api_shared anakin anakin_saber_common
)
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
cc_test
(
inference_anakin_test SRCS api_anakin_engine_tester.cc
# this test is unstable, disable it first.
ARGS --model=
${
ANAKIN_INSTALL_DIR
}
/mobilenet_v2.anakin.bin
#cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
DEPS inference_anakin_api_shared
)
#ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
target_compile_options
(
inference_anakin_test BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
#DEPS inference_anakin_api_shared)
#target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
endif
()
endif
()
paddle/fluid/operators/feed_op.cc
浏览文件 @
7834b4a4
...
@@ -31,7 +31,6 @@ class FeedOp : public framework::OperatorBase {
...
@@ -31,7 +31,6 @@ class FeedOp : public framework::OperatorBase {
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
// get device context from pool
// get device context from pool
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
platform
::
RecordEvent
record_event
(
Type
(),
dev_ctx
);
auto
feed_var_name
=
Input
(
"X"
);
auto
feed_var_name
=
Input
(
"X"
);
auto
*
feed_var
=
scope
.
FindVar
(
feed_var_name
);
auto
*
feed_var
=
scope
.
FindVar
(
feed_var_name
);
...
...
paddle/fluid/operators/fetch_barrier_op.cc
浏览文件 @
7834b4a4
...
@@ -36,12 +36,6 @@ class FetchBarrierOp : public framework::OperatorBase {
...
@@ -36,12 +36,6 @@ class FetchBarrierOp : public framework::OperatorBase {
void
RunImpl
(
const
framework
::
Scope
&
scope
,
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
std
::
vector
<
std
::
string
>
eps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoints"
);
std
::
vector
<
std
::
string
>
eps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoints"
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
ctx
=
*
pool
.
Get
(
place
);
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
...
...
paddle/fluid/operators/fetch_op.cc
浏览文件 @
7834b4a4
...
@@ -30,9 +30,6 @@ class FetchOp : public framework::OperatorBase {
...
@@ -30,9 +30,6 @@ class FetchOp : public framework::OperatorBase {
private:
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
auto
fetch_var_name
=
Input
(
"X"
);
auto
fetch_var_name
=
Input
(
"X"
);
auto
*
fetch_var
=
scope
.
FindVar
(
fetch_var_name
);
auto
*
fetch_var
=
scope
.
FindVar
(
fetch_var_name
);
PADDLE_ENFORCE
(
fetch_var
!=
nullptr
,
PADDLE_ENFORCE
(
fetch_var
!=
nullptr
,
...
...
paddle/fluid/operators/load_op.cc
浏览文件 @
7834b4a4
...
@@ -31,9 +31,6 @@ class LoadOp : public framework::OperatorBase {
...
@@ -31,9 +31,6 @@ class LoadOp : public framework::OperatorBase {
private:
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
platform
::
RecordEvent
record_event
(
Type
(),
dev_ctx
);
// FIXME(yuyang18): We save variable to local file now, but we should change
// FIXME(yuyang18): We save variable to local file now, but we should change
// it to save an output stream.
// it to save an output stream.
auto
filename
=
Attr
<
std
::
string
>
(
"file_path"
);
auto
filename
=
Attr
<
std
::
string
>
(
"file_path"
);
...
...
paddle/fluid/operators/recv_op.cc
浏览文件 @
7834b4a4
...
@@ -40,8 +40,6 @@ class RecvOp : public framework::OperatorBase {
...
@@ -40,8 +40,6 @@ class RecvOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
ctx
=
*
pool
.
Get
(
place
);
auto
&
ctx
=
*
pool
.
Get
(
place
);
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
...
...
paddle/fluid/operators/send_barrier_op.cc
浏览文件 @
7834b4a4
...
@@ -39,11 +39,6 @@ class SendBarrierOp : public framework::OperatorBase {
...
@@ -39,11 +39,6 @@ class SendBarrierOp : public framework::OperatorBase {
std
::
vector
<
std
::
string
>
eps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoints"
);
std
::
vector
<
std
::
string
>
eps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoints"
);
bool
sync_mode
=
Attr
<
bool
>
(
"sync_mode"
);
bool
sync_mode
=
Attr
<
bool
>
(
"sync_mode"
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
ctx
=
*
pool
.
Get
(
place
);
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
...
...
paddle/fluid/operators/send_op.cc
浏览文件 @
7834b4a4
...
@@ -42,9 +42,6 @@ class SendOp : public framework::OperatorBase {
...
@@ -42,9 +42,6 @@ class SendOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
ctx
=
*
pool
.
Get
(
place
);
auto
&
ctx
=
*
pool
.
Get
(
place
);
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
7834b4a4
...
@@ -110,6 +110,8 @@ Event::Event(EventType type, std::string name, uint32_t thread_id,
...
@@ -110,6 +110,8 @@ Event::Event(EventType type, std::string name, uint32_t thread_id,
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
if
(
has_cuda_
)
{
if
(
has_cuda_
)
{
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
PADDLE_ENFORCE
(
cudaSetDevice
(
boost
::
get
<
platform
::
CUDAPlace
>
(
cuda_dev_ctx
->
GetPlace
()).
device
));
PADDLE_ENFORCE
(
cudaGetDevice
(
&
device_
));
PADDLE_ENFORCE
(
cudaGetDevice
(
&
device_
));
PADDLE_ENFORCE
(
cudaEventCreate
(
&
event_
));
PADDLE_ENFORCE
(
cudaEventCreate
(
&
event_
));
auto
stream
=
cuda_dev_ctx
->
stream
();
auto
stream
=
cuda_dev_ctx
->
stream
();
...
@@ -176,6 +178,7 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
...
@@ -176,6 +178,7 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
is_enabled_
=
true
;
is_enabled_
=
true
;
dev_ctx_
=
dev_ctx
;
dev_ctx_
=
dev_ctx
;
...
@@ -186,6 +189,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
...
@@ -186,6 +189,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
}
}
RecordEvent
::~
RecordEvent
()
{
RecordEvent
::~
RecordEvent
()
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
||
!
is_enabled_
)
return
;
if
(
g_state
==
ProfilerState
::
kDisabled
||
!
is_enabled_
)
return
;
DeviceTracer
*
tracer
=
GetDeviceTracer
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
tracer
)
{
if
(
tracer
)
{
...
@@ -198,6 +202,7 @@ RecordEvent::~RecordEvent() {
...
@@ -198,6 +202,7 @@ RecordEvent::~RecordEvent() {
RecordBlock
::
RecordBlock
(
int
block_id
)
RecordBlock
::
RecordBlock
(
int
block_id
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
is_enabled_
=
true
;
is_enabled_
=
true
;
SetCurBlock
(
block_id
);
SetCurBlock
(
block_id
);
...
@@ -205,6 +210,7 @@ RecordBlock::RecordBlock(int block_id)
...
@@ -205,6 +210,7 @@ RecordBlock::RecordBlock(int block_id)
}
}
RecordBlock
::~
RecordBlock
()
{
RecordBlock
::~
RecordBlock
()
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
||
!
is_enabled_
)
return
;
if
(
g_state
==
ProfilerState
::
kDisabled
||
!
is_enabled_
)
return
;
DeviceTracer
*
tracer
=
GetDeviceTracer
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
tracer
)
{
if
(
tracer
)
{
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
7834b4a4
...
@@ -419,6 +419,25 @@ EOF
...
@@ -419,6 +419,25 @@ EOF
linkchecker doc/v2/en/html/index.html
linkchecker doc/v2/en/html/index.html
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html
linkchecker doc/v2/api/en/html/index.html
if
[[
"
$TRAVIS_PULL_REQUEST
"
!=
"false"
]]
;
then
exit
0
;
fi
;
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if
[
"
$TRAVIS_BRANCH
"
==
"develop_doc"
]
;
then
PPO_SCRIPT_BRANCH
=
develop
elif
[[
"
$TRAVIS_BRANCH
"
==
"develop"
||
"
$TRAVIS_BRANCH
"
=
~ ^v|release/[[:digit:]]+
\.
[[
:digit:]]+
(
\.
[[
:digit:]]+
)
?
(
-
\S
*
)
?
$
]]
;
then
PPO_SCRIPT_BRANCH
=
master
else
# Early exit, this branch doesn't require documentation build
return
0
;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export
DEPLOY_DOCS_SH
=
https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/
$PPO_SCRIPT_BRANCH
/scripts/deploy/deploy_docs.sh
export
PYTHONPATH
=
$PYTHONPATH
:
${
PADDLE_ROOT
}
/build/python:/paddle/build/python
cd
..
curl
$DEPLOY_DOCS_SH
| bash
-s
$CONTENT_DEC_PASSWD
$TRAVIS_BRANCH
${
PADDLE_ROOT
}
${
PADDLE_ROOT
}
/build/doc/
${
PPO_SCRIPT_BRANCH
}
cd
-
}
}
function
gen_html
()
{
function
gen_html
()
{
...
...
paddle/scripts/paddle_docker_build.sh
浏览文件 @
7834b4a4
...
@@ -52,6 +52,9 @@ EOL
...
@@ -52,6 +52,9 @@ EOL
${
DOCKER_CMD
}
run
-it
\
${
DOCKER_CMD
}
run
-it
\
${
DOCKER_ENV
}
\
${
DOCKER_ENV
}
\
-e
SCRIPT_NAME
=
$0
\
-e
SCRIPT_NAME
=
$0
\
-e
CONTENT_DEC_PASSWD
=
$CONTENT_DEC_PASSWD
\
-e
TRAVIS_BRANCH
=
$TRAVIS_BRANCH
\
-e
TRAVIS_PULL_REQUEST
=
$TRAVIS_PULL_REQUEST
\
-v
$PADDLE_ROOT
:/paddle
\
-v
$PADDLE_ROOT
:/paddle
\
-v
${
HOME
}
/.ccache:/root/.ccache
\
-v
${
HOME
}
/.ccache:/root/.ccache
\
-w
/paddle
\
-w
/paddle
\
...
...
python/paddle/fluid/backward.py
浏览文件 @
7834b4a4
...
@@ -572,8 +572,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
...
@@ -572,8 +572,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
program
.
current_block_idx
=
current_block_idx
program
.
current_block_idx
=
current_block_idx
program
.
_sync_with_cpp
()
program
.
_sync_with_cpp
()
# FIXME(zcd): prevent loss.grad optimized by mem_opt.
loss
.
block
.
var
(
_append_grad_suffix_
(
loss
.
name
)).
persistable
=
True
if
parameter_list
is
not
None
:
if
parameter_list
is
not
None
:
parameters
=
parameter_list
parameters
=
parameter_list
...
...
python/paddle/fluid/framework.py
浏览文件 @
7834b4a4
...
@@ -1038,7 +1038,26 @@ class Block(object):
...
@@ -1038,7 +1038,26 @@ class Block(object):
global_block
=
self
.
program
.
global_block
()
global_block
=
self
.
program
.
global_block
()
param
=
Parameter
(
global_block
,
*
args
,
**
kwargs
)
param
=
Parameter
(
global_block
,
*
args
,
**
kwargs
)
if
'initializer'
in
kwargs
:
if
'initializer'
in
kwargs
:
kwargs
[
'initializer'
](
param
,
self
)
def
_is_inited_by
(
block
,
var
):
init_ops
=
[]
for
op
in
block
.
ops
:
if
var
.
name
in
op
.
output_arg_names
:
init_ops
.
append
(
op
)
return
init_ops
initializer
=
kwargs
[
'initializer'
]
init_ops
=
_is_inited_by
(
global_block
,
param
)
init_ops_len
=
len
(
init_ops
)
if
init_ops_len
>
1
:
raise
RuntimeError
(
"param "
+
param
.
name
+
" is inited by multiple init ops "
+
str
(
init_ops
))
elif
init_ops_len
==
1
:
#TODO already inited, do nothing, should log a warning
pass
else
:
initializer
(
param
,
self
)
return
param
return
param
def
append_op
(
self
,
*
args
,
**
kwargs
):
def
append_op
(
self
,
*
args
,
**
kwargs
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
7834b4a4
...
@@ -949,6 +949,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
...
@@ -949,6 +949,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
helper
=
LayerHelper
(
'dropout'
,
**
locals
())
helper
=
LayerHelper
(
'dropout'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
mask
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
mask
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
if
(
seed
is
None
or
seed
==
0
)
and
helper
.
main_program
.
random_seed
!=
0
:
seed
=
helper
.
main_program
.
random_seed
helper
.
append_op
(
helper
.
append_op
(
type
=
'dropout'
,
type
=
'dropout'
,
inputs
=
{
'X'
:
[
x
]},
inputs
=
{
'X'
:
[
x
]},
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
7834b4a4
...
@@ -73,9 +73,18 @@ class TranspilerTest(unittest.TestCase):
...
@@ -73,9 +73,18 @@ class TranspilerTest(unittest.TestCase):
return
self
.
transpiler
return
self
.
transpiler
def
transpiler_test_impl
(
self
):
pass
class
TestBasicModel
(
TranspilerTest
):
def
test_transpiler
(
self
):
def
test_transpiler
(
self
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
self
.
transpiler_test_impl
()
class
TestBasicModel
(
TranspilerTest
):
def
transpiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver2
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
)
pserver2
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
)
...
@@ -123,7 +132,7 @@ class TestBasicModel(TranspilerTest):
...
@@ -123,7 +132,7 @@ class TestBasicModel(TranspilerTest):
class
TestBasicModelWithLargeBlockSize
(
TranspilerTest
):
class
TestBasicModelWithLargeBlockSize
(
TranspilerTest
):
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
min_block_size
=
1048576
config
.
min_block_size
=
1048576
...
@@ -148,7 +157,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest):
...
@@ -148,7 +157,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest):
[
"sum"
,
"scale"
,
"sgd"
])
[
"sum"
,
"scale"
,
"sgd"
])
# confirm startup program
# confirm startup program
self
.
assertEqual
([
op
.
type
for
op
in
startup
.
global_block
().
ops
],
self
.
assertEqual
([
op
.
type
for
op
in
startup
.
global_block
().
ops
],
[
"fill_constant"
,
"fill_constant"
,
"fill_constant"
])
[
"fill_constant"
,
"fill_constant"
])
# the variable #fc_w will be split into two blocks
# the variable #fc_w will be split into two blocks
fc_w_var
=
startup2
.
global_block
().
var
(
"fc_w"
)
fc_w_var
=
startup2
.
global_block
().
var
(
"fc_w"
)
self
.
assertEqual
(
fc_w_var
.
shape
,
(
1000L
,
1000L
))
self
.
assertEqual
(
fc_w_var
.
shape
,
(
1000L
,
1000L
))
...
@@ -177,7 +186,7 @@ class TestNoSliceVar(TranspilerTest):
...
@@ -177,7 +186,7 @@ class TestNoSliceVar(TranspilerTest):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestNoSliceVar
,
self
).
setUp
()
super
(
TestNoSliceVar
,
self
).
setUp
()
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
slice_var_up
=
False
config
.
slice_var_up
=
False
...
@@ -212,7 +221,7 @@ class TestLRDecay(TranspilerTest):
...
@@ -212,7 +221,7 @@ class TestLRDecay(TranspilerTest):
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
return
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
=
self
.
get_trainer
()
...
@@ -242,7 +251,7 @@ class TestLRDecayConditional(TranspilerTest):
...
@@ -242,7 +251,7 @@ class TestLRDecayConditional(TranspilerTest):
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
return
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
=
self
.
get_trainer
()
...
@@ -291,7 +300,7 @@ class TestL2Decay(TranspilerTest):
...
@@ -291,7 +300,7 @@ class TestL2Decay(TranspilerTest):
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
return
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
=
self
.
get_trainer
()
...
@@ -326,7 +335,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
...
@@ -326,7 +335,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
return
def
t
est_transpiler
(
self
):
def
t
ranspiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
=
self
.
get_trainer
()
...
...
python/paddle/fluid/tests/unittests/test_initializer.py
浏览文件 @
7834b4a4
...
@@ -27,6 +27,7 @@ class TestConstantInitializer(unittest.TestCase):
...
@@ -27,6 +27,7 @@ class TestConstantInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -43,6 +44,7 @@ class TestConstantInitializer(unittest.TestCase):
...
@@ -43,6 +44,7 @@ class TestConstantInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -61,6 +63,7 @@ class TestUniformInitializer(unittest.TestCase):
...
@@ -61,6 +63,7 @@ class TestUniformInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -80,17 +83,18 @@ class TestUniformInitializer(unittest.TestCase):
...
@@ -80,17 +83,18 @@ class TestUniformInitializer(unittest.TestCase):
program
=
framework
.
Program
()
program
=
framework
.
Program
()
program
.
random_seed
=
123
program
.
random_seed
=
123
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
lod_level
=
0
,
lod_level
=
0
,
name
=
"param
"
,
name
=
"param1
"
,
initializer
=
initializer
.
UniformInitializer
())
initializer
=
initializer
.
UniformInitializer
())
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
lod_level
=
0
,
lod_level
=
0
,
name
=
"param
"
,
name
=
"param2
"
,
initializer
=
initializer
.
UniformInitializer
(
seed
=
456
))
initializer
=
initializer
.
UniformInitializer
(
seed
=
456
))
init_op
=
block
.
ops
[
1
]
init_op
=
block
.
ops
[
1
]
self
.
assertEqual
(
init_op
.
attr
(
"seed"
),
123
)
self
.
assertEqual
(
init_op
.
attr
(
"seed"
),
123
)
...
@@ -102,6 +106,7 @@ class TestUniformInitializer(unittest.TestCase):
...
@@ -102,6 +106,7 @@ class TestUniformInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -115,6 +120,25 @@ class TestUniformInitializer(unittest.TestCase):
...
@@ -115,6 +120,25 @@ class TestUniformInitializer(unittest.TestCase):
self
.
assertAlmostEqual
(
init_op
.
attr
(
'max'
),
3.1
,
delta
=
DELTA
)
self
.
assertAlmostEqual
(
init_op
.
attr
(
'max'
),
3.1
,
delta
=
DELTA
)
self
.
assertEqual
(
init_op
.
attr
(
'seed'
),
123
)
self
.
assertEqual
(
init_op
.
attr
(
'seed'
),
123
)
def
test_uniform_initializer_two_op
(
self
):
"""Test uniform initializer with supplied attributes
"""
program
=
framework
.
Program
()
block
=
program
.
global_block
()
for
i
in
range
(
2
):
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"param"
,
initializer
=
initializer
.
UniformInitializer
(
-
4.2
,
float
(
i
),
123
))
self
.
assertEqual
(
len
(
block
.
ops
),
1
)
init_op0
=
block
.
ops
[
0
]
self
.
assertEqual
(
init_op0
.
type
,
'uniform_random'
)
self
.
assertAlmostEqual
(
init_op0
.
attr
(
'min'
),
-
4.2
,
delta
=
DELTA
)
self
.
assertAlmostEqual
(
init_op0
.
attr
(
'max'
),
0.0
,
delta
=
DELTA
)
self
.
assertEqual
(
init_op0
.
attr
(
'seed'
),
123
)
class
TestNormalInitializer
(
unittest
.
TestCase
):
class
TestNormalInitializer
(
unittest
.
TestCase
):
def
test_normal_initializer_default_value
(
self
):
def
test_normal_initializer_default_value
(
self
):
...
@@ -122,6 +146,7 @@ class TestNormalInitializer(unittest.TestCase):
...
@@ -122,6 +146,7 @@ class TestNormalInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -140,6 +165,7 @@ class TestNormalInitializer(unittest.TestCase):
...
@@ -140,6 +165,7 @@ class TestNormalInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -161,6 +187,7 @@ class TestXavierInitializer(unittest.TestCase):
...
@@ -161,6 +187,7 @@ class TestXavierInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -181,6 +208,7 @@ class TestXavierInitializer(unittest.TestCase):
...
@@ -181,6 +208,7 @@ class TestXavierInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
,
15
,
20
],
shape
=
[
5
,
10
,
15
,
20
],
...
@@ -203,6 +231,7 @@ class TestXavierInitializer(unittest.TestCase):
...
@@ -203,6 +231,7 @@ class TestXavierInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -223,6 +252,7 @@ class TestXavierInitializer(unittest.TestCase):
...
@@ -223,6 +252,7 @@ class TestXavierInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
,
15
,
20
],
shape
=
[
5
,
10
,
15
,
20
],
...
@@ -244,6 +274,7 @@ class TestXavierInitializer(unittest.TestCase):
...
@@ -244,6 +274,7 @@ class TestXavierInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -267,6 +298,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -267,6 +298,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -287,6 +319,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -287,6 +319,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
,
15
,
20
],
shape
=
[
5
,
10
,
15
,
20
],
...
@@ -308,6 +341,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -308,6 +341,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -328,6 +362,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -328,6 +362,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
param
=
block
.
create_parameter
(
param
=
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
,
15
,
20
],
shape
=
[
5
,
10
,
15
,
20
],
...
@@ -348,6 +383,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -348,6 +383,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
5
,
10
],
shape
=
[
5
,
10
],
...
@@ -370,6 +406,7 @@ class TestMSRAInitializer(unittest.TestCase):
...
@@ -370,6 +406,7 @@ class TestMSRAInitializer(unittest.TestCase):
"""
"""
program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
block
=
program
.
global_block
()
for
_
in
range
(
2
):
block
.
create_parameter
(
block
.
create_parameter
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
8
,
1
,
3
,
3
],
shape
=
[
8
,
1
,
3
,
3
],
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
7834b4a4
...
@@ -98,16 +98,13 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -98,16 +98,13 @@ class TestMNIST(TestParallelExecutorBase):
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
MNIST_RECORDIO_FILE
,
reader
,
feeder
)
MNIST_RECORDIO_FILE
,
reader
,
feeder
)
def
_init_data
(
self
,
random
=
True
):
def
_init_data
(
self
):
np
.
random
.
seed
(
5
)
np
.
random
.
seed
(
5
)
if
random
:
img
=
np
.
random
.
random
(
size
=
[
32
,
784
]).
astype
(
np
.
float32
)
img
=
np
.
random
.
random
(
size
=
[
32
,
784
]).
astype
(
np
.
float32
)
else
:
img
=
np
.
ones
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
return
img
,
label
return
img
,
label
def
_compare_reduce_and_allreduce
(
self
,
model
,
use_cuda
,
random_data
=
True
):
def
_compare_reduce_and_allreduce
(
self
,
model
,
use_cuda
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
return
self
.
check_network_convergence
(
self
.
check_network_convergence
(
...
@@ -115,7 +112,7 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -115,7 +112,7 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
self
.
check_network_convergence
(
model
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
,
use_reduce
=
True
)
model
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
,
use_reduce
=
True
)
img
,
label
=
self
.
_init_data
(
random_data
)
img
,
label
=
self
.
_init_data
()
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
model
,
model
,
...
@@ -166,27 +163,27 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -166,27 +163,27 @@ class TestMNIST(TestParallelExecutorBase):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
return
img
,
label
=
self
.
_init_data
(
random
=
False
)
img
,
label
=
self
.
_init_data
()
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
method
=
simple_fc_net
,
seed
=
1
000
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
"label"
:
label
},
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
use_parallel_executor
=
False
)
use_parallel_executor
=
False
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
method
=
simple_fc_net
,
seed
=
1
000
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
"label"
:
label
},
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
use_parallel_executor
=
True
)
use_parallel_executor
=
True
)
for
p_f
in
parallel_first_loss
:
self
.
assertAlmostEquals
(
self
.
assertAlmostEquals
(
p_f
,
single_first_loss
[
0
]
,
delta
=
1e-6
)
np
.
mean
(
parallel_first_loss
),
single_first_loss
,
delta
=
1e-6
)
for
p_l
in
parallel_last_loss
:
self
.
assertAlmostEquals
(
self
.
assertAlmostEquals
(
p_l
,
single_last_loss
[
0
]
,
delta
=
1e-6
)
np
.
mean
(
parallel_last_loss
),
single_last_loss
,
delta
=
1e-6
)
def
test_simple_fc_parallel_accuracy
(
self
):
def
test_simple_fc_parallel_accuracy
(
self
):
self
.
check_simple_fc_parallel_accuracy
(
True
)
self
.
check_simple_fc_parallel_accuracy
(
True
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
浏览文件 @
7834b4a4
...
@@ -21,6 +21,19 @@ from parallel_executor_test_base import TestParallelExecutorBase
...
@@ -21,6 +21,19 @@ from parallel_executor_test_base import TestParallelExecutorBase
import
unittest
import
unittest
import
math
import
math
import
os
import
os
import
numpy
as
np
# FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor
# and Executor is different. Because, for ParallelExecutor, the dropout_op of
# the neural net will be copied N copies(N is the number of device). This will
# lead to the random numbers generated by ParallelExecutor and Executor are different.
# So, if we compare the loss of ParallelExecutor and Executor, we should remove the
# dropout_op.
remove_dropout
=
False
# FIXME(zcd): If the neural net has batch_norm, the output of ParallelExecutor
# and Executor is different.
remove_bn
=
False
def
squeeze_excitation
(
input
,
num_channels
,
reduction_ratio
):
def
squeeze_excitation
(
input
,
num_channels
,
reduction_ratio
):
...
@@ -53,7 +66,8 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
...
@@ -53,7 +66,8 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
groups
=
groups
,
groups
=
groups
,
act
=
None
,
act
=
None
,
bias_attr
=
False
)
bias_attr
=
False
)
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
momentum
=
0.1
)
return
conv
if
remove_bn
else
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
momentum
=
0.1
)
def
shortcut
(
input
,
ch_out
,
stride
):
def
shortcut
(
input
,
ch_out
,
stride
):
...
@@ -92,13 +106,14 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
...
@@ -92,13 +106,14 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
def
SE_ResNeXt50Small
(
batch_size
=
2
,
use_feed
=
False
):
batch_size
=
12
assert
not
use_feed
,
"SE_ResNeXt doesn't support feed yet"
img_shape
=
[
3
,
224
,
224
]
img
=
fluid
.
layers
.
fill_constant
(
def
SE_ResNeXt50Small
(
use_feed
):
shape
=
[
batch_size
,
3
,
224
,
224
],
dtype
=
'float32'
,
value
=
0.0
)
label
=
fluid
.
layers
.
fill_constant
(
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
img_shape
,
dtype
=
'float32'
)
shape
=
[
batch_size
,
1
],
dtype
=
'int64'
,
value
=
0.0
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
conv
=
conv_bn_layer
(
conv
=
conv_bn_layer
(
input
=
img
,
num_filters
=
16
,
filter_size
=
3
,
stride
=
2
,
act
=
'relu'
)
input
=
img
,
num_filters
=
16
,
filter_size
=
3
,
stride
=
2
,
act
=
'relu'
)
...
@@ -127,7 +142,8 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
...
@@ -127,7 +142,8 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
reshape
=
fluid
.
layers
.
reshape
(
reshape
=
fluid
.
layers
.
reshape
(
x
=
conv
,
shape
=
[
-
1
,
shape
[
1
],
shape
[
2
]
*
shape
[
3
]])
x
=
conv
,
shape
=
[
-
1
,
shape
[
1
],
shape
[
2
]
*
shape
[
3
]])
pool
=
fluid
.
layers
.
reduce_mean
(
input
=
reshape
,
dim
=
2
)
pool
=
fluid
.
layers
.
reduce_mean
(
input
=
reshape
,
dim
=
2
)
dropout
=
fluid
.
layers
.
dropout
(
x
=
pool
,
dropout_prob
=
0.2
)
dropout
=
pool
if
remove_dropout
else
fluid
.
layers
.
dropout
(
x
=
pool
,
dropout_prob
=
0.2
,
seed
=
1
)
# Classifier layer:
# Classifier layer:
prediction
=
fluid
.
layers
.
fc
(
input
=
dropout
,
size
=
1000
,
act
=
'softmax'
)
prediction
=
fluid
.
layers
.
fc
(
input
=
dropout
,
size
=
1000
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
...
@@ -135,18 +151,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
...
@@ -135,18 +151,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
return
loss
return
loss
class
TestResnet
(
TestParallelExecutorBase
):
def
cosine_decay
(
learning_rate
,
step_each_epoch
,
epochs
=
120
):
def
check_resnet_convergence_with_learning_rate_decay
(
self
,
use_cuda
=
True
,
use_reduce
=
False
,
iter
=
20
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
def
_cosine_decay
(
learning_rate
,
step_each_epoch
,
epochs
=
120
):
"""
"""
Applies cosine decay to the learning rate.
Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
...
@@ -159,51 +164,122 @@ class TestResnet(TestParallelExecutorBase):
...
@@ -159,51 +164,122 @@ class TestResnet(TestParallelExecutorBase):
(
ops
.
cos
(
epoch
*
(
math
.
pi
/
epochs
))
+
1
)
/
2
(
ops
.
cos
(
epoch
*
(
math
.
pi
/
epochs
))
+
1
)
/
2
return
decayed_lr
return
decayed_lr
def
_optimizer
(
learning_rate
=
0.01
):
def
optimizer
(
learning_rate
=
0.01
):
optimizer
=
fluid
.
optimizer
.
Momentum
(
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
_
cosine_decay
(
learning_rate
=
cosine_decay
(
learning_rate
=
learning_rate
,
step_each_epoch
=
2
,
epochs
=
1
),
learning_rate
=
learning_rate
,
step_each_epoch
=
2
,
epochs
=
1
),
momentum
=
0.9
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
return
optimizer
return
optimizer
import
functools
batch_size
=
2
class
TestResnet
(
TestParallelExecutorBase
):
@
classmethod
def
setUpClass
(
cls
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
global
remove_dropout
global
remove_bn
remove_dropout
=
False
remove_bn
=
False
def
_init_data
(
self
,
batch_size
=
2
,
random
=
True
):
np
.
random
.
seed
(
5
)
if
random
:
img
=
np
.
random
.
random
(
size
=
[
batch_size
]
+
img_shape
).
astype
(
np
.
float32
)
else
:
img
=
np
.
ones
(
shape
=
[
batch_size
]
+
img_shape
,
dtype
=
'float32'
)
label
=
[
np
.
random
.
randint
(
0
,
999
)
for
_
in
range
(
batch_size
)]
label
=
np
.
array
(
label
).
astype
(
np
.
int64
).
reshape
(
-
1
,
1
)
return
img
,
label
def
_compare_reduce_and_allreduce
(
self
,
model
,
use_cuda
,
iter
=
20
,
delta2
=
1e-4
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
global
remove_bn
remove_bn
=
True
img
,
label
=
self
.
_init_data
(
batch_size
=
batch_size
)
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
iter
=
iter
,
batch_size
=
batch_size
,
use_cuda
=
use_cuda
,
use_reduce
=
False
,
optimizer
=
optimizer
)
reduce_first_loss
,
reduce_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
iter
=
iter
,
batch_size
=
batch_size
,
use_cuda
=
use_cuda
,
use_reduce
=
True
,
optimizer
=
optimizer
)
for
loss
in
zip
(
all_reduce_first_loss
,
reduce_first_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
all_reduce_last_loss
,
reduce_last_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
delta2
)
def
_check_resnet_convergence
(
self
,
model
,
use_cuda
=
True
,
use_reduce
=
False
,
iter
=
20
,
delta2
=
1e-6
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
global
remove_dropout
global
remove_bn
remove_dropout
=
True
remove_bn
=
True
img
,
label
=
self
.
_init_data
(
batch_size
=
batch_size
)
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
functools
.
partial
(
model
,
SE_ResNeXt50Small
,
batch_size
=
batch_size
),
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
iter
=
iter
,
iter
=
iter
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
,
use_reduce
=
use_reduce
,
optimizer
=
_
optimizer
,
optimizer
=
optimizer
,
use_parallel_executor
=
False
)
use_parallel_executor
=
False
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
functools
.
partial
(
model
,
SE_ResNeXt50Small
,
batch_size
=
batch_size
),
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
iter
=
iter
,
iter
=
iter
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
,
use_reduce
=
use_reduce
,
optimizer
=
_
optimizer
)
optimizer
=
optimizer
)
for
p_f
in
parallel_first_loss
:
self
.
assertAlmostEquals
(
self
.
assertAlmostEquals
(
p_f
,
single_first_loss
[
0
],
delta
=
1e-6
)
np
.
mean
(
parallel_first_loss
)
,
single_first_loss
[
0
],
delta
=
1e-6
)
for
p_l
in
parallel_last_loss
:
self
.
assertAlmostEquals
(
self
.
assertAlmostEquals
(
p_l
,
single_last_loss
[
0
],
delta
=
1e-6
)
np
.
mean
(
parallel_last_loss
),
single_last_loss
[
0
],
delta
=
delta2
)
def
test_seresnext_with_learning_rate_decay
(
self
):
def
test_seresnext_with_learning_rate_decay
(
self
):
self
.
check_resnet_convergence_with_learning_rate_decay
(
True
,
False
)
self
.
_check_resnet_convergence
(
model
=
SE_ResNeXt50Small
,
use_cuda
=
True
)
self
.
check_resnet_convergence_with_learning_rate_decay
(
self
.
_check_resnet_convergence
(
False
,
False
,
iter
=
5
)
model
=
SE_ResNeXt50Small
,
use_cuda
=
False
,
iter
=
2
,
delta2
=
1e-3
)
def
test_seresnext_with_new_strategy_with_learning_rate_decay
(
self
):
def
test_seresnext_with_new_strategy
(
self
):
self
.
check_resnet_convergence_with_learning_rate_decay
(
True
,
True
)
# self._compare_reduce_and_allreduce(
self
.
check_resnet_convergence_with_learning_rate_decay
(
# model=SE_ResNeXt50Small, use_cuda=True)
False
,
True
,
iter
=
5
)
self
.
_compare_reduce_and_allreduce
(
model
=
SE_ResNeXt50Small
,
use_cuda
=
False
,
iter
=
5
,
delta2
=
1e-2
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录