Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
3067114f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3067114f
编写于
2月 11, 2018
作者:
Y
Yang Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
clean up
上级
cd9e660d
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
3 addition
and
39 deletion
+3
-39
paddle/fluid/operators/nccl_op.cu.cc
paddle/fluid/operators/nccl_op.cu.cc
+0
-8
paddle/fluid/operators/parallel_do_op.cc
paddle/fluid/operators/parallel_do_op.cc
+0
-5
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+0
-25
python/paddle/v2/fluid/tests/test_parallel_op.py
python/paddle/v2/fluid/tests/test_parallel_op.py
+3
-1
未找到文件。
paddle/fluid/operators/nccl_op.cu.cc
浏览文件 @
3067114f
...
...
@@ -47,11 +47,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
auto
ins
=
ctx
.
MultiInput
<
LoDTensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
LoDTensor
>
(
"Out"
);
LOG
(
INFO
)
<<
"------------------"
;
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
LOG
(
INFO
)
<<
"------------------"
;
ncclRedOp_t
reduction_op_
=
ncclSum
;
LOG
(
INFO
)
<<
"------------------"
;
if
(
reduction
==
"ncclMin"
)
{
reduction_op_
=
ncclMin
;
...
...
@@ -65,19 +62,14 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
"Invalid reduction. default ncclSum."
);
}
LOG
(
INFO
)
<<
"------------------"
;
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
LOG
(
INFO
)
<<
"------------------"
;
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
LOG
(
INFO
)
<<
"------------------"
;
// device id
int
gpu_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
()).
GetDeviceId
();
LOG
(
INFO
)
<<
"------------------"
;
int
idx
=
comm
->
GetCommId
(
gpu_id
);
LOG
(
INFO
)
<<
"------------------"
;
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
++
i
)
{
VLOG
(
1
)
<<
"gpu : "
<<
" invoke allreduce. send "
<<
ins
[
i
]
->
numel
()
<<
" recv "
...
...
paddle/fluid/operators/parallel_do_op.cc
浏览文件 @
3067114f
...
...
@@ -151,7 +151,6 @@ class ParallelDoOp : public framework::OperatorBase {
}
WaitOnPlaces
(
places
);
// PADDLE_ENFORCE_EQ(places.size(), sub_scopes.size());
std
::
vector
<
std
::
future
<
void
>>
workers
;
workers
.
reserve
(
places
.
size
());
for
(
size_t
place_idx
=
0
;
place_idx
<
sub_scopes
.
size
();
++
place_idx
)
{
...
...
@@ -219,21 +218,18 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto
&
sub_scopes
=
scope
.
FindVar
(
Input
(
kParallelScopes
))
->
Get
<
std
::
vector
<
framework
::
Scope
*>>
();
auto
&
places
=
scope
.
FindVar
(
Input
(
kPlaces
))
->
Get
<
platform
::
PlaceList
>
();
// PADDLE_ENFORCE_EQ(places.size(), sub_scopes.size());
// feed output@grad
SplitTensorAndMoveTensorToScopes
(
scope
,
const_cast
<
std
::
vector
<
framework
::
Scope
*>
*>
(
&
sub_scopes
),
places
,
Inputs
(
framework
::
GradVarName
(
kOutputs
)));
WaitOnPlaces
(
places
);
LOG
(
INFO
)
<<
"places "
<<
places
.
size
();
// exe run
std
::
vector
<
std
::
future
<
void
>>
workers
;
for
(
size_t
i
=
0
;
i
<
sub_scopes
.
size
();
++
i
)
{
auto
&
place
=
places
[
i
];
auto
*
cur_scope
=
sub_scopes
[
i
];
LOG
(
INFO
)
<<
place
;
// execute
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
...
...
@@ -242,7 +238,6 @@ class ParallelDoGradOp : public framework::OperatorBase {
false
/*create_local_scope*/
);
}));
}
LOG
(
INFO
)
<<
"places "
<<
places
.
size
();
for
(
auto
&
worker
:
workers
)
{
worker
.
wait
();
}
...
...
python/paddle/v2/fluid/backward.py
浏览文件 @
3067114f
...
...
@@ -230,44 +230,19 @@ def _callback_lookup_(op):
def
__call__
(
self
,
block
,
context
):
if
not
self
.
has_inserted_nccl_init
:
# global_block = block.program.global_block()
# op_desc = global_block.desc.append_op()
# var_desc = global_block.desc.var('nccl_com__do_not_change_')
# var_desc.set_type(core.VarDesc.VarType.NCCL_COM)
# self.nccl_com = global_block.create_var(
# name='nccl_com', type=core.VarDesc.VarType.NCCL_COM)
# framework.Operator(
# global_block,
# type='ncclInit',
# desc=op_desc,
# inputs={},
# outputs={'Communicator': [self.nccl_com]})
op_desc
=
_create_op_desc_
(
"ncclInit"
,
{
"parallel_scopes"
:
self
.
parallel_scopes_name
},
{
"Communicator"
:
[
'nccl_com__do_not_change_'
]},
{})
# block.desc.append_op().copy_from(op_desc)
print
(
serialize_op_decs
(
op_desc
))
block
.
program
.
global_block
().
desc
.
append_op
().
copy_from
(
op_desc
)
self
.
has_inserted_nccl_init
=
True
current_op_desc
=
context
[
"__current_op_desc__"
]
# print(serialize_op_decs(context))
for
o_param
in
current_op_desc
.
output_names
():
for
o_argu
in
current_op_desc
.
output
(
o_param
):
if
o_argu
in
self
.
param_grad_names
:
# # print("reduce", o_argu)
# op_desc = block.desc.append_op()
# op_desc.set_type("ncclAllReduce")
# op_desc.set_input("X", [o_argu])
#
# # FIXME(tonyyang-svail):
# # Looks like nccl_com has been changed to nccl_com_0
# op_desc.set_input("Communicator", ['nccl_com_0'])
# out_var = block.create_var()
# op_desc.set_output("Out", [out_var.name])
# op_desc.set_attr("reduction", "ncclSum")
allreduce_out_name
=
o_argu
+
"__nccl_all_reduce__"
op_desc
=
_create_op_desc_
(
"ncclAllReduce"
,
{
...
...
python/paddle/v2/fluid/tests/test_parallel_op.py
浏览文件 @
3067114f
...
...
@@ -175,7 +175,9 @@ class ParallelOpTest(BaseParallelForTest):
def
test_simple_fc
(
self
):
self
.
run_test
(
callback
=
self
.
__network__
,
feed
=
{
'img'
:
numpy
.
random
.
random
(
size
=
(
8
,
784
)).
astype
(
'float32'
)},
feed
=
{
'img'
:
numpy
.
random
.
random
(
size
=
(
51
,
784
)).
astype
(
'float32'
)
},
fetch
=
[
'fc1.w@GRAD'
])
def
test_fc_with_tiny_data
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录