Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
bcc0d416
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bcc0d416
编写于
4月 15, 2019
作者:
乔
乔龙飞 Qiao Longfei
提交者:
GitHub
4月 15, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16822 from jacquesqiao/optimize-merge-add
Optimize merge add
上级
82cff5ec
faae1b41
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
122 addition
and
26 deletion
+122
-26
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+56
-26
paddle/fluid/operators/math/selected_rows_functor_test.cc
paddle/fluid/operators/math/selected_rows_functor_test.cc
+66
-0
未找到文件。
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
bcc0d416
...
...
@@ -296,6 +296,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
auto
input_height
=
has_value_input
->
height
();
framework
::
SelectedRows
&
out
=
*
output
;
std
::
set
<
int64_t
>
merged_row_set
;
size_t
row_num
=
0
;
for
(
auto
*
input
:
inputs
)
{
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
...
...
@@ -305,42 +306,71 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
"dimension except for the first one"
);
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
"all input should have same height"
);
row_num
+=
input
->
rows
().
size
();
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
std
::
vector
<
int64_t
>
merge_rows
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
if
(
sorted_result
)
{
std
::
sort
(
merge_rows
.
begin
(),
merge_rows
.
end
());
}
std
::
unordered_map
<
int64_t
,
size_t
>
rows_to_id
;
for
(
size_t
i
=
0
;
i
<
merge_rows
.
size
();
++
i
)
{
rows_to_id
[
merge_rows
[
i
]]
=
i
;
}
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input_height
);
out
.
mutable_value
()
->
mutable_data
<
T
>
(
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
merge
_rows
.
size
()),
input_width
}),
{
static_cast
<
int64_t
>
(
merge
d_row_set
.
size
()),
input_width
}),
context
.
GetPlace
());
auto
*
out_data
=
out
.
mutable_value
()
->
data
<
T
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
constant_functor
;
constant_functor
(
context
,
out
.
mutable_value
(),
0.0
);
if
(
merged_row_set
.
size
()
==
row_num
&&
!
sorted_result
)
{
// no duplicated ids, just concat the result together
std
::
vector
<
int64_t
>
merge_rows
;
merge_rows
.
reserve
(
row_num
);
// concat rows
for
(
auto
*
in
:
inputs
)
{
merge_rows
.
insert
(
merge_rows
.
end
(),
in
->
rows
().
begin
(),
in
->
rows
().
end
());
}
out
.
set_rows
(
merge_rows
);
auto
in_place
=
inputs
[
0
]
->
place
();
auto
out_place
=
out
.
place
();
int64_t
copied_numel
=
0
;
for
(
auto
*
in
:
inputs
)
{
auto
*
in_data
=
in
->
value
().
data
<
T
>
();
auto
in_numel
=
in
->
value
().
numel
();
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
out_place
),
out_data
+
copied_numel
,
boost
::
get
<
platform
::
CPUPlace
>
(
in_place
),
in_data
,
in_numel
*
sizeof
(
T
));
copied_numel
+=
in_numel
;
}
}
else
{
std
::
vector
<
int64_t
>
merge_rows
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
auto
*
out_data
=
out
.
mutable_value
()
->
data
<
T
>
();
if
(
sorted_result
)
{
std
::
sort
(
merge_rows
.
begin
(),
merge_rows
.
end
());
}
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
auto
*
input
:
inputs
)
{
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
out
.
set_rows
(
merge_rows
);
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
constant_functor
;
constant_functor
(
context
,
out
.
mutable_value
(),
0.0
);
std
::
unordered_map
<
int64_t
,
size_t
>
rows_to_id
;
for
(
size_t
i
=
0
;
i
<
merge_rows
.
size
();
++
i
)
{
rows_to_id
[
merge_rows
[
i
]]
=
i
;
}
auto
*
input_data
=
input
->
value
().
data
<
T
>
();
auto
&
input_rows
=
input
->
rows
();
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
rows_to_id
[
input_rows
[
i
]];
elementwise_add_to
<
platform
::
CPUDeviceContext
,
T
>
(
context
,
&
blas
,
static_cast
<
size_t
>
(
input_width
),
&
input_data
[
i
*
input_width
],
&
out_data
[
out_i
*
input_width
]);
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
auto
*
input
:
inputs
)
{
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
auto
*
input_data
=
input
->
value
().
data
<
T
>
();
auto
&
input_rows
=
input
->
rows
();
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
rows_to_id
[
input_rows
[
i
]];
elementwise_add_to
<
platform
::
CPUDeviceContext
,
T
>
(
context
,
&
blas
,
static_cast
<
size_t
>
(
input_width
),
&
input_data
[
i
*
input_width
],
&
out_data
[
out_i
*
input_width
]);
}
}
}
}
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cc
浏览文件 @
bcc0d416
...
...
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include <memory>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/math_function.h"
TEST
(
selected_rows_functor
,
cpu_add
)
{
...
...
@@ -360,6 +363,69 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
}
}
TEST
(
selected_rows_functor
,
cpu_merge_add_multi_noduplicated
)
{
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
set_const
;
int64_t
height
=
10
;
int64_t
row_numel
=
8
;
std
::
vector
<
int64_t
>
rows1
{
1
,
3
,
5
,
7
,
9
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows1
{
new
paddle
::
framework
::
SelectedRows
(
rows1
,
height
)};
auto
*
in1_value
=
selected_rows1
->
mutable_value
();
in1_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows1
.
size
()),
row_numel
}),
cpu_place
);
set_const
(
ctx
,
in1_value
,
1.0
);
std
::
vector
<
int64_t
>
rows2
{
0
,
2
,
4
,
6
,
8
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows2
{
new
paddle
::
framework
::
SelectedRows
(
rows2
,
height
)};
auto
*
in2_value
=
selected_rows2
->
mutable_value
();
in2_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows2
.
size
()),
row_numel
}),
cpu_place
);
set_const
(
ctx
,
in2_value
,
2.0
);
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
output
{
new
paddle
::
framework
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
paddle
::
framework
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
inputs
.
push_back
(
selected_rows2
.
get
());
merge_add_functor
(
ctx
,
inputs
,
output
.
get
());
EXPECT_EQ
(
output
->
height
(),
height
);
EXPECT_EQ
(
output
->
value
().
dims
(),
paddle
::
framework
::
make_ddim
({
10
,
row_numel
}));
std
::
vector
<
int64_t
>
ret_rows
{
1
,
3
,
5
,
7
,
9
,
0
,
2
,
4
,
6
,
8
};
EXPECT_EQ
(
output
->
rows
(),
ret_rows
);
auto
*
out_data
=
output
->
value
().
data
<
float
>
();
for
(
size_t
i
=
0
;
i
<
ret_rows
.
size
();
++
i
)
{
float
data_value
=
0
;
if
(
i
<
5
)
{
data_value
=
1.0
;
}
else
{
data_value
=
2.0
;
}
for
(
size_t
j
=
0
;
j
<
static_cast
<
size_t
>
(
row_numel
);
++
j
)
{
EXPECT_EQ
(
out_data
[
i
*
row_numel
+
j
],
data_value
);
}
}
}
TEST
(
selected_rows_functor
,
cpu_sum_to
)
{
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录