Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4978db2c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4978db2c
编写于
6月 18, 2019
作者:
C
chengduo
提交者:
GitHub
6月 18, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove nccl dep when the number of GPU is 1 (#18158)
* remove nccl dep when the number of GPU is 1 test=develop
上级
25ab23be
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
8 addition
and
5 deletion
+8
-5
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+3
-3
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+3
-1
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+1
-1
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
...n/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+1
-0
未找到文件。
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
4978db2c
...
@@ -369,8 +369,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
...
@@ -369,8 +369,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
"Execution which can get better performance,"
"Execution which can get better performance,"
<<
"you can force it off by env FLAGS_enable_parallel_graph=0"
;
<<
"you can force it off by env FLAGS_enable_parallel_graph=0"
;
if
(
member_
->
use_cuda_
)
{
if
(
member_
->
use_cuda_
&&
member_
->
nranks_
>
1
)
{
// Bcast Parameters to all GPUs
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
member_
->
InitOrGetNCCLCommunicator
(
scope
,
build_strategy
);
member_
->
InitOrGetNCCLCommunicator
(
scope
,
build_strategy
);
...
@@ -405,10 +404,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
...
@@ -405,10 +404,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
}
}
return
false
;
return
false
;
};
};
// Bcast Parameters to all GPUs
if
(
need_broadcast
())
{
if
(
need_broadcast
())
{
BCastParamsToDevices
(
bcast_vars
,
build_strategy
.
trainer_id_
);
BCastParamsToDevices
(
bcast_vars
,
build_strategy
.
trainer_id_
);
}
}
// Startup Program has been run. All local scopes has correct parameters.
// Startup Program has been run. All local scopes has correct parameters.
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
4978db2c
...
@@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() {
...
@@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() {
eigen_device_
.
reset
();
eigen_device_
.
reset
();
PADDLE_ENFORCE
(
cudaStreamDestroy
(
stream_
));
PADDLE_ENFORCE
(
cudaStreamDestroy
(
stream_
));
#if !defined(_WIN32)
#if !defined(_WIN32)
PADDLE_ENFORCE
(
dynload
::
ncclCommDestroy
(
nccl_comm_
));
if
(
nccl_comm_
)
{
PADDLE_ENFORCE
(
dynload
::
ncclCommDestroy
(
nccl_comm_
));
}
#endif
#endif
}
}
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
4978db2c
...
@@ -223,5 +223,5 @@ if(WITH_DISTRIBUTE)
...
@@ -223,5 +223,5 @@ if(WITH_DISTRIBUTE)
endif
()
endif
()
set_tests_properties
(
test_recordio_reader test_parallel_executor_test_while_train test_parallel_executor_mnist
set_tests_properties
(
test_recordio_reader test_parallel_executor_test_while_train test_parallel_executor_mnist
test_parallel_executor_seresnext test_parallel_executor_crf
test_parallel_executor_seresnext test_parallel_executor_crf
test_sync_batch_norm_op
PROPERTIES LABELS
"RUN_TYPE=DIST"
)
PROPERTIES LABELS
"RUN_TYPE=DIST"
)
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
浏览文件 @
4978db2c
...
@@ -98,6 +98,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
...
@@ -98,6 +98,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
#####################################################################
#####################################################################
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
assert
core
.
get_cuda_device_count
()
>
1
main
,
startup
,
outs
=
self
.
build_program
(
place
,
layout
,
seed
,
True
,
main
,
startup
,
outs
=
self
.
build_program
(
place
,
layout
,
seed
,
True
,
only_forward
)
only_forward
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录