Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
18615626
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
18615626
编写于
1月 22, 2018
作者:
武
武毅
提交者:
GitHub
1月 22, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7715 from Yancey1989/split_selected_rows_to_multi_pserver
[WIP] Split SelectedRows to multiple pservers
上级
85671b8a
d0a93936
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
101 addition
and
63 deletion
+101
-63
paddle/operators/recv_op.cc
paddle/operators/recv_op.cc
+1
-1
paddle/operators/split_selected_rows_op.cc
paddle/operators/split_selected_rows_op.cc
+7
-14
paddle/operators/split_selected_rows_op.h
paddle/operators/split_selected_rows_op.h
+49
-19
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+23
-9
python/paddle/v2/fluid/tests/test_split_selected_rows_op.py
python/paddle/v2/fluid/tests/test_split_selected_rows_op.py
+21
-20
未找到文件。
paddle/operators/recv_op.cc
浏览文件 @
18615626
...
@@ -103,7 +103,7 @@ class RecvOp : public framework::OperatorBase {
...
@@ -103,7 +103,7 @@ class RecvOp : public framework::OperatorBase {
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
bool
exit_flag
=
false
;
bool
exit_flag
=
false
;
int64
_t
barrier_size
=
param_count
*
fan_in
;
size
_t
barrier_size
=
param_count
*
fan_in
;
while
(
!
exit_flag
)
{
while
(
!
exit_flag
)
{
// Get from multiple trainers, we don't care about the order in which
// Get from multiple trainers, we don't care about the order in which
// the gradients arrives, just add suffix 0~n and merge the gradient.
// the gradients arrives, just add suffix 0~n and merge the gradient.
...
...
paddle/operators/split_selected_rows_op.cc
浏览文件 @
18615626
...
@@ -23,8 +23,6 @@ class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -23,8 +23,6 @@ class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input SelectedRows."
);
AddInput
(
"X"
,
"The input SelectedRows."
);
AddOutput
(
"Out"
,
"The outputs of input SelectedRows."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"The outputs of input SelectedRows."
).
AsDuplicable
();
AddAttr
<
std
::
vector
<
int
>>
(
"rows_sections"
,
"Rows section for output."
)
.
SetDefault
(
std
::
vector
<
int
>
({}));
AddAttr
<
std
::
vector
<
int
>>
(
"height_sections"
,
AddAttr
<
std
::
vector
<
int
>>
(
"height_sections"
,
"Height for each output SelectedRows."
)
"Height for each output SelectedRows."
)
.
SetDefault
(
std
::
vector
<
int
>
({}));
.
SetDefault
(
std
::
vector
<
int
>
({}));
...
@@ -35,16 +33,16 @@ height_sections is only needed when need to split the dims of the original tenso
...
@@ -35,16 +33,16 @@ height_sections is only needed when need to split the dims of the original tenso
Example:
Example:
Input:
Input:
X.rows = {
0,
7, 5}
X.rows = {7, 5}
X.height = 12
X.height = 12
Attr:
Attr:
rows_sections = {1, 2}
height_sections = {4, 8}
height_sections = {}
Out:
Out:
out0.rows = {0}
out0.rows = {}
out0.height = 12
out0.height = 4
out1.rows = {7, 5}
out2.height = 12
out1.rows = {5, 7}
out2.height = 8
)DOC"
);
)DOC"
);
}
}
...
@@ -61,11 +59,6 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel {
...
@@ -61,11 +59,6 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel {
std
::
vector
<
int
>
height_sections
=
std
::
vector
<
int
>
height_sections
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"height_sections"
);
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"height_sections"
);
std
::
vector
<
int
>
rows_sections
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"rows_sections"
);
PADDLE_ENFORCE_EQ
(
rows_sections
.
size
(),
ctx
->
Outputs
(
"Out"
).
size
(),
"The size of rows section should be the same with Outputs size."
);
int64_t
n
=
ctx
->
Outputs
(
"Out"
).
size
();
int64_t
n
=
ctx
->
Outputs
(
"Out"
).
size
();
std
::
vector
<
framework
::
DDim
>
outs_dims
;
std
::
vector
<
framework
::
DDim
>
outs_dims
;
...
...
paddle/operators/split_selected_rows_op.h
浏览文件 @
18615626
...
@@ -16,40 +16,70 @@ limitations under the License. */
...
@@ -16,40 +16,70 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
static
int
FindOutIdx
(
int
row
,
const
std
::
vector
<
int
>&
height_sections
)
{
int
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
height_sections
.
size
();
++
i
)
{
if
(
row
>=
offset
&&
row
<
(
offset
+
height_sections
[
i
]))
{
return
i
;
}
offset
+=
height_sections
[
i
];
}
return
-
1
;
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
SplitSelectedRowsOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
SplitSelectedRowsOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
SelectedRows
>
(
"Out"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
SelectedRows
>
(
"Out"
);
auto
rows_sections
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"rows_sections"
);
auto
height_sections
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"height_sections"
);
auto
height_sections
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"height_sections"
);
int64_t
n
=
outs
.
size
();
auto
x_rows
=
x
->
rows
();
int
offset
=
0
;
std
::
vector
<
std
::
vector
<
int
>>
outs_rows_idx
;
outs_rows_idx
.
resize
(
outs
.
size
());
for
(
int64_t
i
=
0
;
i
<
n
;
++
i
)
{
auto
row_numel
=
x
->
value
().
numel
()
/
x
->
value
().
dims
()[
0
];
framework
::
Vector
<
int64_t
>
out_rows
;
auto
src
=
x
->
value
().
data
<
T
>
();
for
(
int64_t
j
=
0
;
j
<
rows_sections
[
i
];
++
j
)
{
out_rows
.
push_back
(
x
->
rows
()[
offset
+
j
]);
for
(
size_t
i
=
0
;
i
<
x_rows
.
size
();
++
i
)
{
}
int
out_idx
=
FindOutIdx
(
x_rows
[
i
],
height_sections
);
outs_rows_idx
[
out_idx
].
push_back
(
i
);
}
auto
place
=
ctx
.
GetPlace
();
auto
&
out
=
outs
[
i
];
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
auto
x_dims
=
x
->
GetCompleteDims
();
auto
rows_idx
=
outs_rows_idx
[
i
];
x_dims
[
0
]
=
rows_sections
[
i
];
if
(
rows_idx
.
size
()
>
0
)
{
out
->
mutable_value
()
->
mutable_data
<
T
>
(
x_dims
,
ctx
.
GetPlace
());
auto
dims
=
x
->
GetCompleteDims
();
framework
::
Copy
(
x
->
value
().
Slice
(
offset
,
rows_sections
[
i
]
+
offset
),
dims
[
0
]
=
rows_idx
.
size
();
x
->
place
(),
ctx
.
device_context
(),
out
->
mutable_value
());
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
x
->
place
());
outs
[
i
]
->
set_rows
(
out_rows
);
for
(
auto
idx
:
rows_idx
)
{
if
(
height_sections
.
size
())
{
outs
[
i
]
->
mutable_rows
()
->
push_back
(
x_rows
[
idx
]);
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
}
auto
dst
=
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
size_t
j
=
0
;
j
<
rows_idx
.
size
();
j
++
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
+
j
*
row_numel
,
platform
::
CPUPlace
(),
src
+
rows_idx
[
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
memory
::
Copy
(
platform
::
CUDAPlace
(),
dst
+
j
*
row_numel
,
platform
::
CUDAPlace
(),
src
+
rows_idx
[
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
,
stream
);
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
}
}
}
}
offset
+=
rows_sections
[
i
];
}
}
}
}
};
};
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
18615626
...
@@ -19,6 +19,7 @@ import optimizer
...
@@ -19,6 +19,7 @@ import optimizer
from
layer_helper
import
LayerHelper
from
layer_helper
import
LayerHelper
from
distributed_spliter
import
*
from
distributed_spliter
import
*
import
math
import
math
from
.
import
core
class
VarBlock
:
class
VarBlock
:
...
@@ -217,15 +218,28 @@ class DistributeTranspiler:
...
@@ -217,15 +218,28 @@ class DistributeTranspiler:
if
len
(
splited_vars
)
<=
1
:
if
len
(
splited_vars
)
<=
1
:
continue
continue
orig_var
=
program
.
global_block
().
vars
[
varname
]
orig_var
=
program
.
global_block
().
vars
[
varname
]
sections
=
[]
if
orig_var
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
for
v
in
splited_vars
:
height_sections
=
[]
sections
.
append
(
v
.
shape
[
0
])
for
v
in
splited_vars
:
program
.
global_block
().
append_op
(
height_sections
.
append
(
v
.
shape
[
0
])
type
=
"split"
,
program
.
global_block
().
append_op
(
inputs
=
{
"X"
:
orig_var
},
type
=
"split_selected_rows"
,
outputs
=
{
"Out"
:
splited_vars
},
inputs
=
{
"X"
:
orig_var
},
attrs
=
{
"sections"
:
sections
}
# assume split evenly
outputs
=
{
"Out"
:
splited_vars
},
)
attrs
=
{
"height_sections"
:
height_sections
})
elif
orig_var
==
core
.
VarDesc
.
VarType
.
LOD_TENSOR
:
sections
=
[]
for
v
in
splited_vars
:
sections
.
append
(
v
.
shape
[
0
])
program
.
global_block
().
append_op
(
type
=
"split"
,
inputs
=
{
"X"
:
orig_var
},
outputs
=
{
"Out"
:
splited_vars
},
attrs
=
{
"sections"
:
sections
}
# assume split evenly
)
else
:
AssertionError
(
"Variable type should be in set "
"[LOD_TENSOR, SELECTED_ROWS]"
)
return
var_mapping
return
var_mapping
def
get_trainer_program
(
self
):
def
get_trainer_program
(
self
):
...
...
python/paddle/v2/fluid/tests/test_split_selected_rows_op.py
浏览文件 @
18615626
...
@@ -35,8 +35,8 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -35,8 +35,8 @@ class TestSpliteSelectedRows(unittest.TestCase):
def
check_with_place
(
self
,
place
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
rows
=
[
0
,
5
,
7
,
4
]
rows
=
[
0
,
5
,
7
,
4
,
20
]
height
=
1
0
height
=
2
0
row_numel
=
2
row_numel
=
2
# initialize input variable X
# initialize input variable X
...
@@ -46,38 +46,41 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -46,38 +46,41 @@ class TestSpliteSelectedRows(unittest.TestCase):
np_array
=
np
.
ones
((
len
(
rows
),
row_numel
)).
astype
(
"float32"
)
np_array
=
np
.
ones
((
len
(
rows
),
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
1
]
=
4.0
np_array
[
2
,
1
]
=
4.0
np_array
[
4
,
1
]
=
8.0
x_tensor
=
x
.
get_tensor
()
x_tensor
=
x
.
get_tensor
()
x_tensor
.
set
(
np_array
,
place
)
x_tensor
.
set
(
np_array
,
place
)
rows_sections
=
[
2
,
2
]
height_sections
=
[
5
,
5
,
5
,
5
,
3
]
height_sections
=
[]
# initialize output variables [out0, out1]
# initialize output variables [out0, out1]
out0
=
scope
.
var
(
'out0'
).
get_selected_rows
()
outs_name
=
[
"out%d"
%
i
for
i
in
xrange
(
len
(
height_sections
))]
out1
=
scope
.
var
(
'out1'
).
get_selected_rows
()
outs
=
[
scope
.
var
(
var_name
).
get_selected_rows
()
for
var_name
in
outs_name
]
# expected output selected rows
# expected output selected rows
expected_out0_rows
=
[
0
,
5
]
expected_out0_rows
=
[
0
,
4
]
expected_out1_rows
=
[
7
,
4
]
expected_out1_rows
=
[
5
,
7
]
expected_
height
=
height
expected_
out4_rows
=
[
20
]
op
=
Operator
(
op
=
Operator
(
"split_selected_rows"
,
"split_selected_rows"
,
X
=
"X"
,
X
=
"X"
,
Out
=
[
"out0"
,
"out1"
],
Out
=
outs_name
,
rows_sections
=
rows_sections
,
height_sections
=
height_sections
)
height_sections
=
height_sections
)
op
.
run
(
scope
,
place
)
op
.
run
(
scope
,
place
)
self
.
assertEqual
(
out0
.
rows
(),
expected_out0_rows
)
self
.
assertEqual
(
outs
[
0
].
rows
(),
expected_out0_rows
)
self
.
assertEqual
(
out1
.
rows
(),
expected_out1_rows
)
self
.
assertEqual
(
outs
[
1
].
rows
(),
expected_out1_rows
)
self
.
assertEqual
(
outs
[
4
].
rows
(),
expected_out4_rows
)
self
.
assertEqual
(
out
0
.
height
(),
expected_height
)
self
.
assertEqual
(
out
s
[
0
].
height
(),
height_sections
[
0
]
)
self
.
assertEqual
(
out
1
.
height
(),
expected_height
)
self
.
assertEqual
(
out
s
[
4
].
height
(),
height_sections
[
4
]
)
self
.
assertAlmostEqual
(
2.0
,
np
.
array
(
out0
.
get_tensor
())[
0
,
0
])
self
.
assertAlmostEqual
(
2.0
,
np
.
array
(
outs
[
0
].
get_tensor
())[
0
,
0
])
self
.
assertAlmostEqual
(
4.0
,
np
.
array
(
out1
.
get_tensor
())[
0
,
1
])
self
.
assertAlmostEqual
(
4.0
,
np
.
array
(
outs
[
1
].
get_tensor
())[
1
,
1
])
self
.
assertAlmostEqual
(
8.0
,
np
.
array
(
outs
[
4
].
get_tensor
())[
0
,
1
])
def
check_grad_with_place
(
self
,
place
):
def
check_grad_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
...
@@ -85,8 +88,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -85,8 +88,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
row_numel
=
2
row_numel
=
2
# attr
# attr
rows_sections
=
[
2
,
2
]
height_sections
=
[
5
,
5
]
height_sections
=
[]
# initialize input variable X
# initialize input variable X
out0_grad
=
scope
.
var
(
"out0@GRAD"
).
get_selected_rows
()
out0_grad
=
scope
.
var
(
"out0@GRAD"
).
get_selected_rows
()
...
@@ -113,7 +115,6 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -113,7 +115,6 @@ class TestSpliteSelectedRows(unittest.TestCase):
"sum"
,
"sum"
,
X
=
[
"out0@GRAD"
,
"out1@GRAD"
],
X
=
[
"out0@GRAD"
,
"out1@GRAD"
],
Out
=
"X@GRAD"
,
Out
=
"X@GRAD"
,
rows_sections
=
rows_sections
,
height_sections
=
height_sections
)
height_sections
=
height_sections
)
grad_op
.
run
(
scope
,
place
)
grad_op
.
run
(
scope
,
place
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录