Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8496b2e4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8496b2e4
编写于
1月 05, 2018
作者:
Y
Yang Yu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine parallel_do
上级
60e27d11
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
25 addition
and
22 deletion
+25
-22
paddle/framework/lod_tensor.cc
paddle/framework/lod_tensor.cc
+4
-4
paddle/operators/parallel_do_op.cc
paddle/operators/parallel_do_op.cc
+20
-18
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+1
-0
未找到文件。
paddle/framework/lod_tensor.cc
浏览文件 @
8496b2e4
...
...
@@ -270,10 +270,10 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
"Batch size should be divided by places size"
);
std
::
vector
<
LoDTensor
>
lods
;
for
(
in
t
place_idx
=
0
;
place_idx
<
places
.
size
();
++
place_idx
)
{
in
t
begin
=
place_idx
*
dims
()[
0
]
/
places
.
size
();
in
t
end
=
(
place_idx
+
1
)
*
dims
()[
0
]
/
places
.
size
();
auto
src
=
Slice
(
begin
,
end
);
for
(
size_
t
place_idx
=
0
;
place_idx
<
places
.
size
();
++
place_idx
)
{
size_
t
begin
=
place_idx
*
dims
()[
0
]
/
places
.
size
();
size_
t
end
=
(
place_idx
+
1
)
*
dims
()[
0
]
/
places
.
size
();
auto
src
=
Slice
(
static_cast
<
int
>
(
begin
),
static_cast
<
int
>
(
end
)
);
LoDTensor
dst
;
dst
.
Resize
(
src
.
dims
());
...
...
paddle/operators/parallel_do_op.cc
浏览文件 @
8496b2e4
...
...
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thread>
#include <vector>
#include "paddle/framework/executor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/threadpool.h"
namespace
paddle
{
namespace
operators
{
constexpr
char
kInputs
[]
=
"inputs"
;
constexpr
char
kParameters
[]
=
"parameters"
;
constexpr
char
kPlaces
[]
=
"places"
;
static
constexpr
char
kInputs
[]
=
"inputs"
;
static
constexpr
char
kParameters
[]
=
"parameters"
;
static
constexpr
char
kPlaces
[]
=
"places"
;
constexpr
char
kOutputs
[]
=
"outputs"
;
constexpr
char
kParallelScopes
[]
=
"parallel_scopes"
;
static
constexpr
char
kOutputs
[]
=
"outputs"
;
static
constexpr
char
kParallelScopes
[]
=
"parallel_scopes"
;
constexpr
char
kParallelBlock
[]
=
"sub_block"
;
static
constexpr
char
kParallelBlock
[]
=
"sub_block"
;
// using ParallelScopeVar = std::vector<framework::Scope *>;
using
LoDTensor
=
framework
::
LoDTensor
;
...
...
@@ -85,7 +85,8 @@ class ParallelDoOp : public framework::OperatorBase {
SplitTensorAndMoveTensorToScopes
(
scope
,
sub_scopes
,
places
,
Inputs
(
kInputs
));
std
::
vector
<
std
::
thread
>
workers
;
std
::
vector
<
std
::
future
<
void
>>
workers
;
workers
.
reserve
(
places
.
size
());
for
(
size_t
place_idx
=
0
;
place_idx
<
places
.
size
();
++
place_idx
)
{
VLOG
(
3
)
<<
"Run "
<<
place_idx
;
...
...
@@ -93,26 +94,27 @@ class ParallelDoOp : public framework::OperatorBase {
auto
*
cur_scope
=
sub_scopes
[
place_idx
];
// copy parameter
if
(
dev_ctx
.
GetPlace
()
!=
place
)
{
// some version of boost lacks != for boost::variant
if
(
!
(
dev_ctx
.
GetPlace
()
==
place
))
{
PADDLE_THROW
(
"Not Implemented"
);
}
// execute
workers
.
push_back
(
std
::
thread
([
program
,
cur_scope
,
place
,
block
]
{
auto
executor
=
framework
::
Executor
(
place
);
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
executor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
}));
}
for
(
auto
&
worker
:
workers
)
{
worker
.
join
();
worker
.
wait
();
}
// merge output
for
(
auto
&
o_name
:
Outputs
(
kOutputs
))
{
std
::
vector
<
const
framework
::
LoDTensor
*>
lod_tensors
;
lod_tensors
.
reserve
(
sub_scopes
.
size
());
for
(
auto
*
sub_scope
:
sub_scopes
)
{
lod_tensors
.
push
_back
(
&
sub_scope
->
FindVar
(
o_name
)
->
Get
<
LoDTensor
>
());
lod_tensors
.
emplace
_back
(
&
sub_scope
->
FindVar
(
o_name
)
->
Get
<
LoDTensor
>
());
}
auto
*
lod_tensor_to_be_merged
=
...
...
@@ -177,7 +179,7 @@ class ParallelDoGradOp : public OperatorBase {
}
// exe run
std
::
vector
<
std
::
thread
>
workers
;
std
::
vector
<
std
::
future
<
void
>
>
workers
;
for
(
size_t
place_idx
=
0
;
place_idx
<
places
.
size
();
++
place_idx
)
{
VLOG
(
3
)
<<
"Run "
<<
place_idx
;
...
...
@@ -185,14 +187,14 @@ class ParallelDoGradOp : public OperatorBase {
auto
*
cur_scope
=
sub_scopes
[
place_idx
];
// execute
workers
.
push_back
(
std
::
thread
([
program
,
cur_scope
,
place
,
block
]
{
auto
executor
=
framework
::
E
xecutor
(
place
);
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
e
xecutor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
}));
}
for
(
auto
&
worker
:
workers
)
{
worker
.
join
();
worker
.
wait
();
}
// merge grad
...
...
python/paddle/v2/fluid/backward.py
浏览文件 @
8496b2e4
...
...
@@ -205,6 +205,7 @@ def _append_backward_ops_(target,
# Getting op's corresponding grad_op
grad_op_desc
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
op
.
desc
,
no_grad_dict
[
block
.
idx
],
grad_sub_block_list
)
grad_op_descs
.
extend
(
grad_op_desc
)
grad_to_var
.
update
(
op_grad_to_var
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录