Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1866d2db
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1866d2db
编写于
1月 24, 2019
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
parameter send support selected_rows
上级
ca5d96bb
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
77 addition
and
8 deletion
+77
-8
paddle/fluid/operators/distributed/parameter_send.cc
paddle/fluid/operators/distributed/parameter_send.cc
+76
-8
paddle/fluid/operators/distributed/parameter_send.h
paddle/fluid/operators/distributed/parameter_send.h
+1
-0
未找到文件。
paddle/fluid/operators/distributed/parameter_send.cc
浏览文件 @
1866d2db
...
@@ -47,6 +47,15 @@ static size_t GetSectionIndex(int64_t id,
...
@@ -47,6 +47,15 @@ static size_t GetSectionIndex(int64_t id,
return
abs_sections
.
size
()
-
1
;
return
abs_sections
.
size
()
-
1
;
}
}
static
int
FindOutIdx
(
int
row
,
const
std
::
vector
<
int64_t
>&
abs_sections
)
{
for
(
size_t
i
=
1
;
i
<
abs_sections
.
size
();
++
i
)
{
if
(
row
<
abs_sections
[
i
])
{
return
i
-
1
;
}
}
return
abs_sections
.
size
()
-
1
;
}
static
std
::
vector
<
int64_t
>
ToAbsoluteSection
(
static
std
::
vector
<
int64_t
>
ToAbsoluteSection
(
const
std
::
vector
<
int
>&
height_sections
)
{
const
std
::
vector
<
int
>&
height_sections
)
{
std
::
vector
<
int64_t
>
abs_sections
;
std
::
vector
<
int64_t
>
abs_sections
;
...
@@ -97,21 +106,22 @@ static void SplitIdsIntoMultipleVarsBySection(
...
@@ -97,21 +106,22 @@ static void SplitIdsIntoMultipleVarsBySection(
}
}
}
}
template
<
typename
T
>
void
send
(
const
std
::
string
&
var_name
,
void
send
(
const
std
::
string
&
var_name
,
const
std
::
vector
<
std
::
string
>&
send_varnames
,
const
std
::
vector
<
std
::
string
>&
send_varnames
,
const
std
::
vector
<
std
::
string
>&
epmap
,
const
std
::
vector
<
std
::
string
>&
epmap
,
const
std
::
vector
<
int
>&
height_sections
,
const
std
::
vector
<
int
>&
height_sections
,
const
framework
::
ExecutionContext
&
c
ontext
,
const
framework
::
ExecutionContext
&
c
tx
,
const
framework
::
Scope
&
scope
,
const
framework
::
Scope
&
scope
,
bool
sync
)
{
bool
sync
)
{
framework
::
Scope
*
local_scope
=
scope
.
NewTmpScope
();
framework
::
Scope
*
local_scope
=
scope
.
NewTmpScope
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
cpu_ctx
=
*
pool
.
Get
(
platform
::
CPUPlace
());
auto
&
cpu_ctx
=
*
pool
.
Get
(
platform
::
CPUPlace
());
auto
&
actual_ctx
=
*
pool
.
Get
(
c
ontext
.
GetPlace
());
auto
&
actual_ctx
=
*
pool
.
Get
(
c
tx
.
GetPlace
());
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
*
rpc_client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
(
c
ontext
.
Attr
<
int
>
(
"trainer_id"
));
c
tx
.
Attr
<
int
>
(
"trainer_id"
));
auto
*
send_var
=
scope
.
FindVar
(
var_name
);
auto
*
send_var
=
scope
.
FindVar
(
var_name
);
size_t
out_num
=
send_varnames
.
size
();
size_t
out_num
=
send_varnames
.
size
();
...
@@ -122,7 +132,7 @@ void send(const std::string& var_name,
...
@@ -122,7 +132,7 @@ void send(const std::string& var_name,
outs_dims
.
reserve
(
out_num
);
outs_dims
.
reserve
(
out_num
);
// infer output shape
// infer output shape
int
num
=
c
ontext
.
Attr
<
int
>
(
"num"
);
int
num
=
c
tx
.
Attr
<
int
>
(
"num"
);
if
(
num
>
0
)
{
if
(
num
>
0
)
{
int64_t
in_axis_dim
=
send_tensor_dims
[
0
];
int64_t
in_axis_dim
=
send_tensor_dims
[
0
];
PADDLE_ENFORCE_EQ
(
in_axis_dim
%
num
,
0
,
PADDLE_ENFORCE_EQ
(
in_axis_dim
%
num
,
0
,
...
@@ -153,13 +163,71 @@ void send(const std::string& var_name,
...
@@ -153,13 +163,71 @@ void send(const std::string& var_name,
*
out
=
send_tensor
.
Slice
(
row_offset
,
row_offset
+
outs_dims
[
i
][
0
]);
*
out
=
send_tensor
.
Slice
(
row_offset
,
row_offset
+
outs_dims
[
i
][
0
]);
row_offset
+=
outs_dims
[
i
][
0
];
row_offset
+=
outs_dims
[
i
][
0
];
}
}
}
else
if
(
send_var
->
IsType
<
framework
::
LoDTensor
>
())
{
}
else
if
(
send_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
send_slr
=
send_var
->
Get
<
framework
::
SelectedRows
>
();
auto
abs_sections
=
ToAbsoluteSection
(
height_sections
);
auto
send_rows
=
send_slr
.
rows
();
std
::
vector
<
std
::
vector
<
int
>>
outs_rows_idx
;
std
::
vector
<
std
::
vector
<
int
>>
outs_dense_idx
;
outs_rows_idx
.
resize
(
out_num
);
outs_dense_idx
.
resize
(
out_num
);
auto
row_numel
=
send_slr
.
value
().
numel
()
/
send_slr
.
value
().
dims
()[
0
];
auto
src
=
send_slr
.
value
().
data
<
T
>
();
// create output var in local scope
// create output var in local scope
std
::
vector
<
framework
::
SelectedRows
*>
outs
;
for
(
auto
&
name
:
send_varnames
)
{
for
(
auto
&
name
:
send_varnames
)
{
local_scope
->
Var
(
name
)
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
*
out
=
local_scope
->
Var
(
name
)
->
GetMutable
<
framework
::
SelectedRows
>
();
outs
.
push_back
(
out
);
}
// split rows index into output sparse vars
for
(
size_t
i
=
0
;
i
<
send_rows
.
size
();
++
i
)
{
int
out_idx
=
FindOutIdx
(
send_rows
[
i
],
abs_sections
);
outs_rows_idx
[
out_idx
].
push_back
(
send_rows
[
i
]);
outs_dense_idx
[
out_idx
].
push_back
(
i
);
}
}
auto
place
=
ctx
.
GetPlace
();
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
auto
rows_idx
=
outs_rows_idx
[
i
];
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
auto
dims
=
send_slr
.
GetCompleteDims
();
dims
[
0
]
=
rows_idx
.
size
();
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
send_slr
.
place
());
outs
[
i
]
->
mutable_rows
()
->
clear
();
if
(
rows_idx
.
size
()
>
0
)
{
for
(
auto
idx
:
rows_idx
)
{
outs
[
i
]
->
mutable_rows
()
->
push_back
(
idx
-
abs_sections
[
i
]);
}
auto
dst
=
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
size_t
j
=
0
;
j
<
rows_idx
.
size
();
j
++
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
+
j
*
row_numel
,
platform
::
CPUPlace
(),
src
+
outs_dense_idx
[
i
][
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
memory
::
Copy
(
platform
::
CUDAPlace
(),
dst
+
j
*
row_numel
,
platform
::
CUDAPlace
(),
src
+
outs_dense_idx
[
i
][
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
,
stream
);
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
}
}
}
PADDLE_ENFORCE_EQ
(
rows_idx
.
size
(),
outs
[
i
]
->
rows
().
size
(),
"rows should has the same size with tensor dim 0"
);
}
}
else
{
}
else
{
PADDLE_THROW
(
"unsupported var type"
);
PADDLE_THROW
(
"unsupported var type
to send!
"
);
}
}
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
std
::
vector
<
distributed
::
VarHandlePtr
>
rets
;
...
...
paddle/fluid/operators/distributed/parameter_send.h
浏览文件 @
1866d2db
...
@@ -23,6 +23,7 @@ namespace paddle {
...
@@ -23,6 +23,7 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
distributed
{
namespace
distributed
{
template
<
typename
T
>
void
send
(
const
std
::
string
&
var_name
,
void
send
(
const
std
::
string
&
var_name
,
const
std
::
vector
<
std
::
string
>&
send_varnames
,
const
std
::
vector
<
std
::
string
>&
send_varnames
,
const
std
::
vector
<
std
::
string
>&
epmap
,
const
std
::
vector
<
std
::
string
>&
epmap
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录