Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
84680379
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
84680379
编写于
3月 09, 2018
作者:
Y
Yancey
提交者:
GitHub
3月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix sparse update memory error for distributed training (#8837)
Fix sparse update memory error for distributed training
上级
124b7501
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
72 addition
and
27 deletion
+72
-27
paddle/fluid/operators/send_op.cc
paddle/fluid/operators/send_op.cc
+4
-4
paddle/fluid/operators/sgd_op.cc
paddle/fluid/operators/sgd_op.cc
+8
-0
paddle/fluid/operators/sgd_op.h
paddle/fluid/operators/sgd_op.h
+9
-1
paddle/fluid/operators/split_selected_rows_op.h
paddle/fluid/operators/split_selected_rows_op.h
+31
-14
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+10
-4
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+7
-1
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
...ddle/fluid/tests/unittests/test_split_selected_rows_op.py
+3
-3
未找到文件。
paddle/fluid/operators/send_op.cc
浏览文件 @
84680379
...
...
@@ -24,15 +24,15 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
static
bool
IsVariableInitialize
d
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
varname
)
{
static
bool
NeedSen
d
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
varname
)
{
auto
*
var
=
scope
.
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Can not find variable '%s' in the send side."
,
varname
);
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
return
var
->
Get
<
framework
::
LoDTensor
>
().
IsInitialized
();
}
else
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
return
var
->
Get
<
framework
::
SelectedRows
>
().
value
().
IsInitialized
()
;
return
var
->
Get
<
framework
::
SelectedRows
>
().
rows
().
size
()
>
0UL
;
}
else
{
PADDLE_THROW
(
"Variable type in send side should be in "
...
...
@@ -67,7 +67,7 @@ class SendOp : public framework::OperatorBase {
detail
::
RPCClient
*
rpc_client
=
client_var
->
GetMutable
<
detail
::
RPCClient
>
();
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
IsVariableInitialize
d
(
scope
,
ins
[
i
]))
{
if
(
NeedSen
d
(
scope
,
ins
[
i
]))
{
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
rpc_client
->
AsyncSendVariable
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]);
}
else
{
...
...
paddle/fluid/operators/sgd_op.cc
浏览文件 @
84680379
...
...
@@ -39,6 +39,14 @@ class SGDOp : public framework::OperatorWithKernel {
// and run time.
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Param"
)
->
type
()),
ctx
.
GetPlace
());
}
};
class
SGDOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
paddle/fluid/operators/sgd_op.h
浏览文件 @
84680379
...
...
@@ -47,6 +47,12 @@ class SGDOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
// for distributed training, a sparse var may be empty,
// just skip updating.
if
(
grad
->
rows
().
size
()
==
0
)
{
return
;
}
auto
in_height
=
grad
->
height
();
auto
out_dims
=
param_out
->
dims
();
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
...
...
@@ -60,13 +66,15 @@ class SGDOpKernel : public framework::OpKernel<T> {
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
auto
*
lr
=
learning_rate
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
in_rows
.
size
();
i
++
)
{
PADDLE_ENFORCE
(
in_rows
[
i
]
<
in_height
,
"Input rows index should less than height"
);
for
(
int64_t
j
=
0
;
j
<
in_row_numel
;
j
++
)
{
out_data
[
in_rows
[
i
]
*
in_row_numel
+
j
]
-=
lr
[
0
]
*
in_data
[
i
*
in_row_numel
+
j
];
}
}
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
}
...
...
paddle/fluid/operators/split_selected_rows_op.h
浏览文件 @
84680379
...
...
@@ -21,15 +21,24 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
static
int
FindOutIdx
(
int
row
,
const
std
::
vector
<
int
>&
height_sections
)
{
int
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
height_sections
.
size
();
++
i
)
{
if
(
row
>=
offset
&&
row
<
(
offset
+
height_sections
[
i
]))
{
return
i
;
static
int
FindOutIdx
(
int
row
,
const
std
::
vector
<
int
>&
abs_sections
)
{
for
(
size_t
i
=
1
;
i
<
abs_sections
.
size
();
++
i
)
{
if
(
row
<
abs_sections
[
i
])
{
return
i
-
1
;
}
offset
+=
height_sections
[
i
];
}
return
-
1
;
return
abs_sections
.
size
()
-
1
;
}
static
std
::
vector
<
int
>
ToAbsoluteSection
(
const
std
::
vector
<
int
>&
height_sections
)
{
std
::
vector
<
int
>
abs_sections
;
abs_sections
.
resize
(
height_sections
.
size
());
abs_sections
[
0
]
=
0
;
for
(
size_t
i
=
1
;
i
<
height_sections
.
size
();
++
i
)
{
abs_sections
[
i
]
=
height_sections
[
i
-
1
]
+
abs_sections
[
i
-
1
];
}
return
abs_sections
;
}
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -40,16 +49,23 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
auto
outs
=
ctx
.
MultiOutput
<
framework
::
SelectedRows
>
(
"Out"
);
auto
height_sections
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"height_sections"
);
auto
abs_sections
=
ToAbsoluteSection
(
height_sections
);
auto
x_rows
=
x
->
rows
();
std
::
vector
<
std
::
vector
<
int
>>
outs_rows_idx
;
std
::
vector
<
std
::
vector
<
int
>>
outs_dense_idx
;
outs_rows_idx
.
resize
(
outs
.
size
());
outs_dense_idx
.
resize
(
outs
.
size
());
auto
row_numel
=
x
->
value
().
numel
()
/
x
->
value
().
dims
()[
0
];
auto
src
=
x
->
value
().
data
<
T
>
();
// split rows index into output sparse vars
for
(
size_t
i
=
0
;
i
<
x_rows
.
size
();
++
i
)
{
int
out_idx
=
FindOutIdx
(
x_rows
[
i
],
height_sections
);
outs_rows_idx
[
out_idx
].
push_back
(
i
);
int
out_idx
=
FindOutIdx
(
x_rows
[
i
],
abs_sections
);
outs_rows_idx
[
out_idx
].
push_back
(
x_rows
[
i
]);
outs_dense_idx
[
out_idx
].
push_back
(
i
);
}
auto
place
=
ctx
.
GetPlace
();
...
...
@@ -61,19 +77,20 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
dims
[
0
]
=
rows_idx
.
size
();
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
x
->
place
());
for
(
auto
idx
:
rows_idx
)
{
outs
[
i
]
->
mutable_rows
()
->
push_back
(
x_rows
[
idx
]);
outs
[
i
]
->
mutable_rows
()
->
push_back
(
idx
-
abs_sections
[
i
]);
}
auto
dst
=
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
size_t
j
=
0
;
j
<
rows_idx
.
size
();
j
++
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
+
j
*
row_numel
,
platform
::
CPUPlace
(),
src
+
rows_idx
[
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
);
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
+
j
*
row_numel
,
platform
::
CPUPlace
()
,
src
+
outs_dense_idx
[
i
][
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
memory
::
Copy
(
platform
::
CUDAPlace
(),
dst
+
j
*
row_numel
,
platform
::
CUDAPlace
(),
src
+
rows_idx
[
j
]
*
row_numel
,
platform
::
CUDAPlace
(),
src
+
outs_dense_idx
[
i
][
j
]
*
row_numel
,
sizeof
(
T
)
*
row_numel
,
stream
);
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
84680379
...
...
@@ -76,10 +76,16 @@ class SumOp : public framework::OperatorWithKernel {
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
ctx
.
device_context
());
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
SelectedRows
>
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
x_vars
[
0
]
->
Get
<
framework
::
SelectedRows
>
().
value
().
type
()),
ctx
.
device_context
());
for
(
auto
&
var
:
x_vars
)
{
auto
&
value
=
var
->
Get
<
framework
::
SelectedRows
>
().
value
();
if
(
value
.
IsInitialized
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
value
.
type
()),
ctx
.
device_context
());
}
}
// if input sparse vars are not initialized, use an default kernel type.
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
ctx
.
device_context
());
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
for
(
auto
&
x_var
:
x_vars
)
{
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
84680379
...
...
@@ -109,6 +109,12 @@ class SumKernel : public framework::OpKernel<T> {
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
return
;
}
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SelectedRowsAddTo
<
DeviceContext
,
T
>
functor
;
...
...
@@ -116,7 +122,7 @@ class SumKernel : public framework::OpKernel<T> {
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
!
sel_row
.
value
().
IsInitialized
()
||
sel_row
.
rows
().
size
()
==
0
)
{
if
(
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
...
...
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
浏览文件 @
84680379
...
...
@@ -60,8 +60,8 @@ class TestSpliteSelectedRows(unittest.TestCase):
# expected output selected rows
expected_out0_rows
=
[
0
,
4
]
expected_out1_rows
=
[
5
,
7
]
expected_out4_rows
=
[
2
0
]
expected_out1_rows
=
[
0
,
2
]
expected_out4_rows
=
[
0
]
op
=
Operator
(
"split_selected_rows"
,
...
...
@@ -101,7 +101,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
out0_grad_tensor
.
set
(
np_array
,
place
)
out1_grad
=
scope
.
var
(
"out1@GRAD"
).
get_selected_rows
()
rows1
=
[
7
,
5
]
rows1
=
[
2
,
0
]
out1_grad
.
set_rows
(
rows1
)
out1_grad
.
set_height
(
height
)
out1_grad_tensor
=
out1_grad
.
get_tensor
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录