Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
8dec4ad7
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8dec4ad7
编写于
3月 21, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use int not Place for vars
上级
31815010
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
21 addition
and
25 deletion
+21
-25
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+21
-25
未找到文件。
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
8dec4ad7
...
@@ -28,6 +28,7 @@ limitations under the License. */
...
@@ -28,6 +28,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
using
details
::
ComputationOpHandle
;
using
details
::
DummyVarHandle
;
using
details
::
DummyVarHandle
;
using
details
::
FetchOpHandle
;
using
details
::
FetchOpHandle
;
using
details
::
NCCLAllReduceOpHandle
;
using
details
::
NCCLAllReduceOpHandle
;
...
@@ -35,7 +36,6 @@ using details::OpHandleBase;
...
@@ -35,7 +36,6 @@ using details::OpHandleBase;
using
details
::
ScaleLossGradOpHandle
;
using
details
::
ScaleLossGradOpHandle
;
using
details
::
VarHandle
;
using
details
::
VarHandle
;
using
details
::
VarHandleBase
;
using
details
::
VarHandleBase
;
using
details
::
ComputationOpHandle
;
class
ParallelExecutorPrivate
{
class
ParallelExecutorPrivate
{
public:
public:
...
@@ -43,7 +43,9 @@ class ParallelExecutorPrivate {
...
@@ -43,7 +43,9 @@ class ParallelExecutorPrivate {
const
std
::
vector
<
platform
::
Place
>
&
places
)
const
std
::
vector
<
platform
::
Place
>
&
places
)
:
places_
(
places
),
:
places_
(
places
),
fetch_dev_ctxs_
(
places
),
fetch_dev_ctxs_
(
places
),
pool_
(
num_threads
<=
1
?
nullptr
:
new
ThreadPool
(
num_threads
))
{}
pool_
(
num_threads
<=
1
?
nullptr
:
new
ThreadPool
(
num_threads
))
{
vars_
.
resize
(
places
.
size
());
}
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
platform
::
DeviceContextPool
fetch_dev_ctxs_
;
platform
::
DeviceContextPool
fetch_dev_ctxs_
;
...
@@ -52,12 +54,7 @@ class ParallelExecutorPrivate {
...
@@ -52,12 +54,7 @@ class ParallelExecutorPrivate {
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
platform
::
Place
main_place_
;
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
map
<
int
,
VarHandle
>>>
vars_
;
std
::
unordered_map
<
platform
::
Place
,
std
::
unordered_map
<
std
::
string
,
std
::
map
<
int
,
VarHandle
>>
,
platform
::
PlaceHash
>
vars_
;
std
::
unordered_set
<
std
::
unique_ptr
<
VarHandleBase
>>
dep_vars_
;
std
::
unordered_set
<
std
::
unique_ptr
<
VarHandleBase
>>
dep_vars_
;
...
@@ -69,8 +66,8 @@ class ParallelExecutorPrivate {
...
@@ -69,8 +66,8 @@ class ParallelExecutorPrivate {
std
::
unique_ptr
<
platform
::
EnforceNotMet
>
exception_
;
std
::
unique_ptr
<
platform
::
EnforceNotMet
>
exception_
;
VarHandle
*
GetVarHandle
(
const
std
::
string
&
each_var_name
,
VarHandle
*
GetVarHandle
(
const
std
::
string
&
each_var_name
,
const
platform
::
Place
&
place
)
{
const
platform
::
Place
&
place
,
size_t
place_offset
)
{
auto
&
var_holders
=
vars_
[
place
];
auto
&
var_holders
=
vars_
[
place
_offset
];
auto
&
var_holder
=
var_holders
[
each_var_name
];
auto
&
var_holder
=
var_holders
[
each_var_name
];
VarHandle
*
var
=
nullptr
;
VarHandle
*
var
=
nullptr
;
if
(
var_holder
.
empty
())
{
if
(
var_holder
.
empty
())
{
...
@@ -118,8 +115,8 @@ class ParallelExecutorPrivate {
...
@@ -118,8 +115,8 @@ class ParallelExecutorPrivate {
}
}
void
GenerateVar
(
OpHandleBase
*
op_handle
,
const
std
::
string
&
each_var_name
,
void
GenerateVar
(
OpHandleBase
*
op_handle
,
const
std
::
string
&
each_var_name
,
const
platform
::
Place
&
place
)
{
const
platform
::
Place
&
place
,
size_t
place_offset
)
{
auto
&
vars
=
vars_
[
place
][
each_var_name
];
auto
&
vars
=
vars_
[
place
_offset
][
each_var_name
];
size_t
version
=
vars
.
size
();
size_t
version
=
vars
.
size
();
auto
&
var
=
vars
[
version
];
auto
&
var
=
vars
[
version
];
var
.
version_
=
version
;
var
.
version_
=
version
;
...
@@ -144,11 +141,10 @@ ParallelExecutor::ParallelExecutor(
...
@@ -144,11 +141,10 @@ ParallelExecutor::ParallelExecutor(
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
member_
->
local_scopes_
.
push_back
(
&
scope
->
NewScope
());
member_
->
local_scopes_
.
push_back
(
&
scope
->
NewScope
());
}
}
member_
->
main_place_
=
places
[
0
];
// Bcast Parameters to all GPUs
// Bcast Parameters to all GPUs
BuildNCCLCommunicator
();
BuildNCCLCommunicator
();
if
(
platform
::
is_gpu_place
(
member_
->
main_place_
)
&&
if
(
platform
::
is_gpu_place
(
places
[
0
]
)
&&
member_
->
local_scopes_
.
size
()
!=
1
)
{
// Is CUDA
member_
->
local_scopes_
.
size
()
!=
1
)
{
// Is CUDA
BCastParamsToGPUs
(
startup_program
);
BCastParamsToGPUs
(
startup_program
);
}
}
...
@@ -201,13 +197,13 @@ void ParallelExecutor::ConstructDependencyGraph(
...
@@ -201,13 +197,13 @@ void ParallelExecutor::ConstructDependencyGraph(
auto
var_names
=
op
->
InputArgumentNames
();
auto
var_names
=
op
->
InputArgumentNames
();
for
(
auto
&
each_var_name
:
var_names
)
{
for
(
auto
&
each_var_name
:
var_names
)
{
VarHandle
*
var
=
member_
->
GetVarHandle
(
each_var_name
,
p
);
VarHandle
*
var
=
member_
->
GetVarHandle
(
each_var_name
,
p
,
i
);
op_handle
->
AddInput
(
var
);
op_handle
->
AddInput
(
var
);
}
}
var_names
=
op
->
OutputArgumentNames
();
var_names
=
op
->
OutputArgumentNames
();
for
(
auto
&
each_var_name
:
var_names
)
{
for
(
auto
&
each_var_name
:
var_names
)
{
member_
->
GenerateVar
(
op_handle
,
each_var_name
,
p
);
member_
->
GenerateVar
(
op_handle
,
each_var_name
,
p
,
i
);
}
}
if
(
is_forwarding
)
{
if
(
is_forwarding
)
{
...
@@ -224,7 +220,7 @@ void ParallelExecutor::ConstructDependencyGraph(
...
@@ -224,7 +220,7 @@ void ParallelExecutor::ConstructDependencyGraph(
// loss->pending_ops_.emplace_back(op_handle);
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
// op_handle->inputs_.emplace_back(loss);
member_
->
GenerateVar
(
op_handle
,
loss_var_name
+
"@GRAD"
,
p
);
member_
->
GenerateVar
(
op_handle
,
loss_var_name
+
"@GRAD"
,
p
,
i
);
change_forward
=
true
;
change_forward
=
true
;
}
}
}
}
...
@@ -245,7 +241,7 @@ void ParallelExecutor::ConstructDependencyGraph(
...
@@ -245,7 +241,7 @@ void ParallelExecutor::ConstructDependencyGraph(
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
auto
&
p
=
member_
->
places_
[
i
];
auto
&
p
=
member_
->
places_
[
i
];
auto
&
vars
=
member_
->
vars_
[
p
][
og
];
auto
&
vars
=
member_
->
vars_
[
i
][
og
];
if
(
vars
.
empty
())
{
// This device has no data. continue.
if
(
vars
.
empty
())
{
// This device has no data. continue.
continue
;
continue
;
...
@@ -280,8 +276,8 @@ void ParallelExecutor::ConstructDependencyGraph(
...
@@ -280,8 +276,8 @@ void ParallelExecutor::ConstructDependencyGraph(
* https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR)
* https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR)
*/
*/
void
ParallelExecutor
::
PolishGraphToSupportDataHazards
()
const
{
void
ParallelExecutor
::
PolishGraphToSupportDataHazards
()
const
{
for
(
auto
&
place_pair
:
member_
->
vars_
)
{
for
(
auto
&
var_map
:
member_
->
vars_
)
{
for
(
auto
&
name_pair
:
place_pair
.
second
)
{
for
(
auto
&
name_pair
:
var_map
)
{
if
(
name_pair
.
second
.
size
()
<=
1
)
{
if
(
name_pair
.
second
.
size
()
<=
1
)
{
return
;
return
;
}
}
...
@@ -369,8 +365,8 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -369,8 +365,8 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
pending_ops
;
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
pending_ops
;
std
::
vector
<
DummyVarHandle
>
dummy_vars
;
std
::
vector
<
DummyVarHandle
>
dummy_vars
;
for
(
auto
&
place_pair
:
member_
->
vars_
)
{
for
(
auto
&
var_map
:
member_
->
vars_
)
{
for
(
auto
&
name_pair
:
place_pair
.
second
)
{
for
(
auto
&
name_pair
:
var_map
)
{
for
(
auto
&
version_pair
:
name_pair
.
second
)
{
for
(
auto
&
version_pair
:
name_pair
.
second
)
{
pending_vars
[
&
version_pair
.
second
]
=
pending_vars
[
&
version_pair
.
second
]
=
version_pair
.
second
.
generated_op_
==
nullptr
;
version_pair
.
second
.
generated_op_
==
nullptr
;
...
@@ -395,9 +391,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -395,9 +391,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
VarHandleBase
*>>
fetched_vars
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
VarHandleBase
*>>
fetched_vars
;
for
(
auto
&
fetch_var_name
:
fetch_tensors
)
{
for
(
auto
&
fetch_var_name
:
fetch_tensors
)
{
for
(
auto
&
pair
:
member_
->
vars_
)
{
for
(
auto
&
var_map
:
member_
->
vars_
)
{
auto
it
=
pair
.
second
.
find
(
fetch_var_name
);
auto
it
=
var_map
.
find
(
fetch_var_name
);
if
(
it
!=
pair
.
second
.
end
())
{
if
(
it
!=
var_map
.
end
())
{
fetched_vars
[
fetch_var_name
].
push_back
(
&
it
->
second
.
rbegin
()
->
second
);
fetched_vars
[
fetch_var_name
].
push_back
(
&
it
->
second
.
rbegin
()
->
second
);
}
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录