Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
af2f5fc8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
You need to sign in or sign up before continuing.
提交
af2f5fc8
编写于
11月 26, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bugs
上级
5d5e0656
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
31 addition
and
24 deletion
+31
-24
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+1
-1
paddle/fluid/operators/distributed/parameter_prefetch.cc
paddle/fluid/operators/distributed/parameter_prefetch.cc
+28
-22
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+2
-1
未找到文件。
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
af2f5fc8
...
@@ -862,7 +862,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
...
@@ -862,7 +862,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
if
(
node
->
Op
()
->
Type
()
==
"fetch_barrier"
)
{
if
(
node
->
Op
()
->
Type
()
==
"fetch_barrier"
)
{
outvar_dev_id
=
outvar_dev_id
=
GetVarDeviceID
(
*
result
,
output
->
Name
(),
*
sharded_var_device
);
GetVarDeviceID
(
*
result
,
output
->
Name
(),
*
sharded_var_device
);
PADDLE_ENFORCE_NE
(
outvar_dev_id
,
-
1
);
PADDLE_ENFORCE_NE
(
outvar_dev_id
,
-
1
,
"output name %s"
,
output
->
Name
()
);
}
}
p
=
places_
[
outvar_dev_id
];
p
=
places_
[
outvar_dev_id
];
ir
::
Node
*
new_node
=
nullptr
;
ir
::
Node
*
new_node
=
nullptr
;
...
...
paddle/fluid/operators/distributed/parameter_prefetch.cc
浏览文件 @
af2f5fc8
...
@@ -100,7 +100,7 @@ inline void SplitIdsIntoMultipleVarsBySection(
...
@@ -100,7 +100,7 @@ inline void SplitIdsIntoMultipleVarsBySection(
}
}
}
}
inline
void
MergeMultipleVarsIntoOnBySection
(
inline
void
MergeMultipleVarsIntoOn
e
BySection
(
const
std
::
string
&
id_name
,
const
std
::
string
&
out_name
,
const
std
::
string
&
id_name
,
const
std
::
string
&
out_name
,
const
std
::
vector
<
std
::
string
>&
out_var_names
,
const
std
::
vector
<
std
::
string
>&
out_var_names
,
const
std
::
vector
<
int64_t
>&
height_section
,
const
std
::
vector
<
int64_t
>&
height_section
,
...
@@ -125,25 +125,30 @@ inline void MergeMultipleVarsIntoOnBySection(
...
@@ -125,25 +125,30 @@ inline void MergeMultipleVarsIntoOnBySection(
for
(
size_t
section_idx
=
0
;
section_idx
<
out_var_names
.
size
();
for
(
size_t
section_idx
=
0
;
section_idx
<
out_var_names
.
size
();
++
section_idx
)
{
++
section_idx
)
{
auto
&
ids_in_this_section
=
splited_ids
[
section_idx
];
auto
&
ids_in_this_section
=
splited_ids
[
section_idx
];
auto
&
prefetch_out_var
=
if
(
!
ids_in_this_section
.
empty
())
{
scope
->
Var
(
out_var_names
[
section_idx
])
->
Get
<
framework
::
LoDTensor
>
();
auto
&
prefetch_out_var
=
const
auto
*
out_var_data
=
prefetch_out_var
.
data
<
float
>
();
scope
->
Var
(
out_var_names
[
section_idx
])
->
Get
<
framework
::
LoDTensor
>
();
auto
&
dims
=
prefetch_out_var
.
dims
();
const
auto
*
out_var_data
=
prefetch_out_var
.
data
<
float
>
();
auto
&
dims
=
prefetch_out_var
.
dims
();
PADDLE_ENFORCE_EQ
(
dims
.
size
(),
2
,
""
);
PADDLE_ENFORCE_EQ
(
ids_in_this_section
.
size
(),
dims
[
0
]);
PADDLE_ENFORCE_EQ
(
dims
.
size
(),
2
,
""
);
PADDLE_ENFORCE_EQ
(
ids_in_this_section
.
size
(),
dims
[
0
]);
auto
row_numel
=
dims
[
1
];
auto
row_numel
=
dims
[
1
];
for
(
size_t
i
=
0
;
i
<
dims
[
0
];
++
i
)
{
auto
id
=
ids_in_this_section
[
i
];
for
(
size_t
i
=
0
;
i
<
dims
[
0
];
++
i
)
{
auto
origin_id
=
id
+
abs_sections
[
section_idx
];
auto
id
=
ids_in_this_section
[
i
];
auto
&
offsets
=
id_to_offset
[
origin_id
];
auto
origin_id
=
id
+
abs_sections
[
section_idx
];
for
(
auto
&
offset
:
offsets
)
{
auto
&
offsets
=
id_to_offset
[
origin_id
];
// should support GPU tensor
for
(
auto
&
offset
:
offsets
)
{
memory
::
Copy
(
cpu_place
,
out_tensor_data
+
offset
*
row_numel
,
cpu_place
,
// should support GPU tensor
out_var_data
+
i
*
row_numel
,
sizeof
(
float
)
*
row_numel
);
memory
::
Copy
(
cpu_place
,
out_tensor_data
+
offset
*
row_numel
,
cpu_place
,
out_var_data
+
i
*
row_numel
,
sizeof
(
float
)
*
row_numel
);
}
}
}
}
else
{
VLOG
(
30
)
<<
"ids in this section is empty"
;
}
}
}
}
}
}
...
@@ -190,13 +195,14 @@ void prefetch(const std::string& id_name, const std::string& out_name,
...
@@ -190,13 +195,14 @@ void prefetch(const std::string& id_name, const std::string& out_name,
VLOG
(
30
)
<<
"don't send no-initialied variable: "
<<
out_var_names
[
i
];
VLOG
(
30
)
<<
"don't send no-initialied variable: "
<<
out_var_names
[
i
];
}
}
}
}
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
PADDLE_ENFORCE
(
rets
[
i
]
->
Wait
(),
"internal error in RPCClient"
);
PADDLE_ENFORCE
(
rets
[
i
]
->
Wait
(),
"internal error in RPCClient"
);
}
}
MergeMultipleVarsIntoOnBySection
(
id_name
,
out_name
,
out_var_names
,
MergeMultipleVarsIntoOn
e
BySection
(
id_name
,
out_name
,
out_var_names
,
height_sections
,
splited_ids
,
context
,
height_sections
,
splited_ids
,
context
,
&
local_scope
);
&
local_scope
);
context
.
scope
().
DeleteScope
(
&
local_scope
);
context
.
scope
().
DeleteScope
(
&
local_scope
);
}
}
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
af2f5fc8
...
@@ -444,7 +444,7 @@ class DistributeTranspiler(object):
...
@@ -444,7 +444,7 @@ class DistributeTranspiler(object):
# connect deps to send op in async mode
# connect deps to send op in async mode
recv_dep_in
=
self
.
grad_name_to_send_dummy_out
[
recv_dep_in
=
self
.
grad_name_to_send_dummy_out
[
self
.
param_name_to_grad_name
[
param_varname
]]
self
.
param_name_to_grad_name
[
param_varname
]]
all_recv_outputs
.
extend
(
splited_var
)
# get recv op_role_var, if not splited, the grad should have .trainer suffix
# get recv op_role_var, if not splited, the grad should have .trainer suffix
# if splited, grad should be the original grad var name. ParallelExecutor
# if splited, grad should be the original grad var name. ParallelExecutor
# will use op_role_var to get expected device place to run this op.
# will use op_role_var to get expected device place to run this op.
...
@@ -460,6 +460,7 @@ class DistributeTranspiler(object):
...
@@ -460,6 +460,7 @@ class DistributeTranspiler(object):
self
.
_update_remote_sparse_update_op
(
param_varname
,
self
.
_update_remote_sparse_update_op
(
param_varname
,
height_sections
,
eps
)
height_sections
,
eps
)
else
:
else
:
all_recv_outputs
.
extend
(
splited_var
)
program
.
global_block
().
append_op
(
program
.
global_block
().
append_op
(
type
=
"recv"
,
type
=
"recv"
,
inputs
=
{
"X"
:
[
recv_dep_in
]},
inputs
=
{
"X"
:
[
recv_dep_in
]},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录