Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e37c9e67
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e37c9e67
编写于
10月 15, 2018
作者:
Q
Qiyang Min
提交者:
GitHub
10月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13828 from velconia/accelerate_selected_rows_functor
Accelerate SelectedRows Functors:
上级
2562eb92
3f6ec900
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
342 addition
and
9 deletion
+342
-9
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+3
-3
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+52
-6
paddle/fluid/operators/math/selected_rows_functor.h
paddle/fluid/operators/math/selected_rows_functor.h
+116
-0
paddle/fluid/operators/math/selected_rows_functor_test.cc
paddle/fluid/operators/math/selected_rows_functor_test.cc
+171
-0
未找到文件。
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
e37c9e67
...
@@ -3,8 +3,8 @@ add_subdirectory(detail)
...
@@ -3,8 +3,8 @@ add_subdirectory(detail)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
function
(
math_library TARGET
)
function
(
math_library TARGET
)
# math_library is a function to create math library.
# math_library is a function to create math library.
# The interface is the same as cc_library.
# The interface is the same as cc_library.
# But it handle split GPU/CPU code and link some common library.
# But it handle split GPU/CPU code and link some common library.
set
(
cc_srcs
)
set
(
cc_srcs
)
set
(
cu_srcs
)
set
(
cu_srcs
)
...
@@ -53,7 +53,7 @@ cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context)
...
@@ -53,7 +53,7 @@ cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context)
math_library
(
math_function DEPS blas
)
math_library
(
math_function DEPS blas
)
math_library
(
maxouting
)
math_library
(
maxouting
)
math_library
(
pooling
)
math_library
(
pooling
)
math_library
(
selected_rows_functor DEPS selected_rows math_function
)
math_library
(
selected_rows_functor DEPS selected_rows math_function
blas
)
math_library
(
sequence2batch
)
math_library
(
sequence2batch
)
math_library
(
sequence_padding
)
math_library
(
sequence_padding
)
math_library
(
sequence_pooling DEPS math_function
)
math_library
(
sequence_pooling DEPS math_function
)
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
e37c9e67
...
@@ -15,7 +15,6 @@ limitations under the License. */
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include <set>
#include <set>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -150,6 +149,45 @@ template struct SelectedRowsAddTo<platform::CPUDeviceContext, double>;
...
@@ -150,6 +149,45 @@ template struct SelectedRowsAddTo<platform::CPUDeviceContext, double>;
template
struct
SelectedRowsAddTo
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SelectedRowsAddTo
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SelectedRowsAddTo
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SelectedRowsAddTo
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
<
typename
T
>
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
std
::
vector
<
framework
::
SelectedRows
*>&
input1
,
const
std
::
vector
<
int64_t
>&
input2_offsets
,
framework
::
SelectedRows
*
input2
)
{
// Ensure all selected rows have the same height
size_t
size
=
0u
;
for
(
auto
iter
=
input1
.
begin
();
iter
!=
input1
.
end
();
++
iter
)
{
auto
&
in_rows
=
(
*
iter
)
->
rows
();
size
+=
in_rows
.
end
()
-
in_rows
.
begin
();
auto
in1_height
=
(
*
iter
)
->
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
}
// concat rows
std
::
vector
<
int64_t
>
in2_rows
;
in2_rows
.
reserve
(
in2_rows
.
size
()
+
size
);
for
(
auto
iter
=
input1
.
begin
();
iter
!=
input1
.
end
();
++
iter
)
{
const
framework
::
Vector
<
int64_t
>&
in_rows
=
(
*
iter
)
->
rows
();
in2_rows
.
insert
(
in2_rows
.
end
(),
in_rows
.
begin
(),
in_rows
.
end
());
}
input2
->
set_rows
(
in2_rows
);
auto
*
in2_value
=
input2
->
mutable_value
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
size_t
offset
=
0u
;
for
(
size_t
i
=
0u
;
i
!=
input1
.
size
();
++
i
)
{
auto
&
in_value
=
input1
[
i
]
->
value
();
const
auto
*
in_data
=
in_value
.
data
<
T
>
();
offset
+=
input2_offsets
[
i
];
blas
.
VCOPY
(
in_value
.
numel
(),
in_data
,
in2_data
+
offset
);
}
}
};
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
double
>;
template
<
typename
T
>
template
<
typename
T
>
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
T
>
{
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
...
@@ -208,8 +246,18 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -208,8 +246,18 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
&
out
=
*
output
;
framework
::
SelectedRows
&
out
=
*
output
;
auto
input_rows
=
input
.
rows
();
auto
input_rows
=
input
.
rows
();
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
vector
<
int64_t
>
merge_rows
;
std
::
vector
<
int64_t
>
merge_rows
(
row_set
.
begin
(),
row_set
.
end
());
merge_rows
.
reserve
(
input_rows
.
size
());
std
::
unordered_map
<
int64_t
,
size_t
>
rows_pos_map
;
rows_pos_map
.
reserve
(
input_rows
.
size
());
size_t
idx
=
0u
;
for
(
std
::
vector
<
int64_t
>::
iterator
iter
=
input_rows
.
begin
();
iter
!=
input_rows
.
end
();
++
iter
)
{
if
(
rows_pos_map
.
find
(
*
iter
)
==
rows_pos_map
.
end
())
{
rows_pos_map
[
*
iter
]
=
idx
++
;
merge_rows
.
emplace_back
(
*
iter
);
}
}
auto
input_width
=
input
.
value
().
dims
()[
1
];
auto
input_width
=
input
.
value
().
dims
()[
1
];
out
.
set_rows
(
merge_rows
);
out
.
set_rows
(
merge_rows
);
...
@@ -226,7 +274,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -226,7 +274,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
auto
*
input_data
=
input
.
value
().
data
<
T
>
();
auto
*
input_data
=
input
.
value
().
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
FindPos
(
merge_rows
,
input_rows
[
i
])
;
size_t
out_i
=
rows_pos_map
[
input_rows
[
i
]]
;
for
(
int64_t
j
=
0
;
j
<
input_width
;
j
++
)
{
for
(
int64_t
j
=
0
;
j
<
input_width
;
j
++
)
{
out_data
[
out_i
*
input_width
+
j
]
+=
input_data
[
i
*
input_width
+
j
];
out_data
[
out_i
*
input_width
+
j
]
+=
input_data
[
i
*
input_width
+
j
];
}
}
...
@@ -234,8 +282,6 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -234,8 +282,6 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
}
}
};
};
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
int64_t
>;
...
...
paddle/fluid/operators/math/selected_rows_functor.h
浏览文件 @
e37c9e67
...
@@ -12,8 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#define INLINE_FOR2(sizei, sizej) \
#define INLINE_FOR2(sizei, sizej) \
...
@@ -49,6 +54,15 @@ struct SelectedRowsAddTo {
...
@@ -49,6 +54,15 @@ struct SelectedRowsAddTo {
const
int64_t
input2_offset
,
framework
::
SelectedRows
*
input2
);
const
int64_t
input2_offset
,
framework
::
SelectedRows
*
input2
);
};
};
// input2 = [all input in input1] + input2
template
<
typename
DeviceContext
,
typename
T
>
struct
SelectedRowsSumTo
{
void
operator
()(
const
DeviceContext
&
context
,
const
std
::
vector
<
framework
::
SelectedRows
*>&
input1
,
const
std
::
vector
<
int64_t
>&
input2_offsets
,
framework
::
SelectedRows
*
input2
);
};
// input2 = input1 + input2
// input2 = input1 + input2
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SelectedRowsAddToTensor
{
struct
SelectedRowsAddToTensor
{
...
@@ -70,6 +84,108 @@ struct MergeAdd {
...
@@ -70,6 +84,108 @@ struct MergeAdd {
framework
::
SelectedRows
*
output
);
framework
::
SelectedRows
*
output
);
};
};
template
<
>
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
float
>
{
framework
::
SelectedRows
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
)
{
framework
::
SelectedRows
out
;
(
*
this
)(
context
,
input
,
&
out
);
return
out
;
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
&
out
=
*
output
;
auto
input_rows
=
input
.
rows
();
std
::
vector
<
int64_t
>
merge_rows
;
merge_rows
.
reserve
(
input_rows
.
size
());
std
::
unordered_map
<
int64_t
,
size_t
>
rows_pos_map
;
rows_pos_map
.
reserve
(
input_rows
.
size
());
size_t
idx
=
0u
;
for
(
std
::
vector
<
int64_t
>::
iterator
iter
=
input_rows
.
begin
();
iter
!=
input_rows
.
end
();
++
iter
)
{
if
(
rows_pos_map
.
find
(
*
iter
)
==
rows_pos_map
.
end
())
{
rows_pos_map
[
*
iter
]
=
idx
++
;
merge_rows
.
emplace_back
(
*
iter
);
}
}
auto
input_width
=
input
.
value
().
dims
()[
1
];
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input
.
height
());
out
.
mutable_value
()
->
mutable_data
<
float
>
(
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
merge_rows
.
size
()),
input_width
}),
context
.
GetPlace
());
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
float
>
constant_functor
;
constant_functor
(
context
,
out
.
mutable_value
(),
0.0
);
auto
*
out_data
=
out
.
mutable_value
()
->
data
<
float
>
();
auto
*
input_data
=
input
.
value
().
data
<
float
>
();
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
float
>
(
context
);
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
rows_pos_map
[
input_rows
[
i
]];
float
*
y
=
out_data
+
out_i
*
input_width
;
const
float
*
x
=
input_data
+
i
*
input_width
;
blas
.
AXPY
(
input_width
,
1.
,
x
,
y
);
}
}
};
template
<
>
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
double
>
{
framework
::
SelectedRows
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
)
{
framework
::
SelectedRows
out
;
(
*
this
)(
context
,
input
,
&
out
);
return
out
;
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
&
out
=
*
output
;
auto
input_rows
=
input
.
rows
();
std
::
vector
<
int64_t
>
merge_rows
;
merge_rows
.
reserve
(
input_rows
.
size
());
std
::
unordered_map
<
int64_t
,
size_t
>
rows_pos_map
;
rows_pos_map
.
reserve
(
input_rows
.
size
());
size_t
idx
=
0u
;
for
(
std
::
vector
<
int64_t
>::
iterator
iter
=
input_rows
.
begin
();
iter
!=
input_rows
.
end
();
++
iter
)
{
if
(
rows_pos_map
.
find
(
*
iter
)
==
rows_pos_map
.
end
())
{
rows_pos_map
[
*
iter
]
=
idx
++
;
merge_rows
.
emplace_back
(
*
iter
);
}
}
auto
input_width
=
input
.
value
().
dims
()[
1
];
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input
.
height
());
out
.
mutable_value
()
->
mutable_data
<
double
>
(
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
merge_rows
.
size
()),
input_width
}),
context
.
GetPlace
());
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
double
>
constant_functor
;
constant_functor
(
context
,
out
.
mutable_value
(),
0.0
);
auto
*
out_data
=
out
.
mutable_value
()
->
data
<
double
>
();
auto
*
input_data
=
input
.
value
().
data
<
double
>
();
auto
blas
=
GetBlas
<
platform
::
CPUDeviceContext
,
double
>
(
context
);
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
rows_pos_map
[
input_rows
[
i
]];
double
*
y
=
out_data
+
out_i
*
input_width
;
const
double
*
x
=
input_data
+
i
*
input_width
;
blas
.
AXPY
(
input_width
,
1.
,
x
,
y
);
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
Add
{
struct
Add
{
framework
::
SelectedRows
operator
()(
const
DeviceContext
&
context
,
framework
::
SelectedRows
operator
()(
const
DeviceContext
&
context
,
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cc
浏览文件 @
e37c9e67
...
@@ -219,3 +219,174 @@ TEST(selected_rows_functor, cpu_add_to) {
...
@@ -219,3 +219,174 @@ TEST(selected_rows_functor, cpu_add_to) {
// row9: 2.0 + 3.0
// row9: 2.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
9
*
row_numel
+
6
],
5.0
);
EXPECT_EQ
(
tensor1_data
[
9
*
row_numel
+
6
],
5.0
);
}
}
TEST
(
selected_rows_functor
,
cpu_merge_add_float
)
{
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
functor
;
int64_t
height
=
10
;
int64_t
row_numel
=
10
;
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
4
,
7
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows
{
new
paddle
::
framework
::
SelectedRows
(
rows
,
height
)};
auto
*
in_value
=
selected_rows
->
mutable_value
();
in_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows
.
size
()),
row_numel
}),
cpu_place
);
functor
(
ctx
,
in_value
,
1.0
);
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
output
{
new
paddle
::
framework
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
EXPECT_EQ
(
out_height
,
height
);
auto
&
out_rows
=
output
->
rows
();
EXPECT_EQ
(
out_rows
[
0
],
0
);
EXPECT_EQ
(
out_rows
[
1
],
4
);
EXPECT_EQ
(
out_rows
[
2
],
7
);
auto
*
out_data
=
output
->
value
().
data
<
float
>
();
EXPECT_EQ
(
out_data
[
0
*
row_numel
],
1.0
);
EXPECT_EQ
(
out_data
[
1
*
row_numel
],
2.0
);
EXPECT_EQ
(
out_data
[
2
*
row_numel
],
1.0
);
}
TEST
(
selected_rows_functor
,
cpu_merge_add_int
)
{
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
functor
;
int64_t
height
=
10
;
int64_t
row_numel
=
10
;
std
::
vector
<
int64_t
>
rows
{
0
,
4
,
4
,
7
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows
{
new
paddle
::
framework
::
SelectedRows
(
rows
,
height
)};
auto
*
in_value
=
selected_rows
->
mutable_value
();
in_value
->
mutable_data
<
int
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows
.
size
()),
row_numel
}),
cpu_place
);
functor
(
ctx
,
in_value
,
1
);
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
output
{
new
paddle
::
framework
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
EXPECT_EQ
(
out_height
,
height
);
auto
&
out_rows
=
output
->
rows
();
EXPECT_EQ
(
out_rows
[
0
],
0
);
EXPECT_EQ
(
out_rows
[
1
],
4
);
EXPECT_EQ
(
out_rows
[
2
],
7
);
auto
*
out_data
=
output
->
value
().
data
<
int
>
();
EXPECT_EQ
(
out_data
[
0
*
row_numel
],
1
);
EXPECT_EQ
(
out_data
[
1
*
row_numel
],
2
);
EXPECT_EQ
(
out_data
[
2
*
row_numel
],
1
);
}
TEST
(
selected_rows_functor
,
cpu_sum_to
)
{
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
functor
;
int64_t
height
=
10
;
int64_t
row_numel
=
10
;
std
::
vector
<
int64_t
>
rows1
{
0
,
4
,
7
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows1
{
new
paddle
::
framework
::
SelectedRows
(
rows1
,
height
)};
auto
*
in1_value
=
selected_rows1
->
mutable_value
();
in1_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows1
.
size
()),
row_numel
}),
cpu_place
);
functor
(
ctx
,
in1_value
,
1.0
);
std
::
vector
<
int64_t
>
rows2
{
0
,
5
,
7
,
9
};
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
selected_rows2
{
new
paddle
::
framework
::
SelectedRows
(
rows2
,
height
)};
auto
*
in2_value
=
selected_rows2
->
mutable_value
();
in2_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
rows2
.
size
()),
row_numel
}),
cpu_place
);
functor
(
ctx
,
in2_value
,
2.0
);
std
::
unique_ptr
<
paddle
::
framework
::
SelectedRows
>
output
{
new
paddle
::
framework
::
SelectedRows
()};
output
->
set_height
(
height
);
auto
*
out_value
=
output
->
mutable_value
();
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsSumTo
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
sum_to_functor
;
sum_to_functor
(
ctx
,
std
::
vector
<
paddle
::
framework
::
SelectedRows
*>
(
{
selected_rows1
.
get
(),
selected_rows2
.
get
()}),
std
::
vector
<
int64_t
>
({
0
,
in1_value
->
numel
()}),
output
.
get
());
auto
out_height
=
output
->
height
();
EXPECT_EQ
(
out_height
,
height
);
auto
&
out_rows
=
output
->
rows
();
// input1 rows
EXPECT_EQ
(
out_rows
[
0
],
0
);
EXPECT_EQ
(
out_rows
[
1
],
4
);
EXPECT_EQ
(
out_rows
[
2
],
7
);
// input2 rows
EXPECT_EQ
(
out_rows
[
3
],
0
);
EXPECT_EQ
(
out_rows
[
4
],
5
);
EXPECT_EQ
(
out_rows
[
5
],
7
);
EXPECT_EQ
(
out_rows
[
6
],
9
);
auto
*
out_data
=
output
->
value
().
data
<
float
>
();
// input1 value
EXPECT_EQ
(
out_data
[
0
*
row_numel
+
0
],
1.0
);
EXPECT_EQ
(
out_data
[
0
*
row_numel
+
8
],
1.0
);
EXPECT_EQ
(
out_data
[
1
*
row_numel
+
1
],
1.0
);
EXPECT_EQ
(
out_data
[
2
*
row_numel
+
6
],
1.0
);
// input2 value
EXPECT_EQ
(
out_data
[
3
*
row_numel
+
3
],
2.0
);
EXPECT_EQ
(
out_data
[
3
*
row_numel
+
8
],
2.0
);
EXPECT_EQ
(
out_data
[
4
*
row_numel
+
4
],
2.0
);
EXPECT_EQ
(
out_data
[
5
*
row_numel
+
7
],
2.0
);
EXPECT_EQ
(
out_data
[
6
*
row_numel
+
9
],
2.0
);
std
::
unique_ptr
<
paddle
::
framework
::
Tensor
>
tensor1
{
new
paddle
::
framework
::
Tensor
()};
tensor1
->
mutable_data
<
float
>
(
paddle
::
framework
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
auto
*
tensor1_data
=
tensor1
->
data
<
float
>
();
// row0: 1.0 + 2.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
0
*
row_numel
+
0
],
6.0
);
// row1: 3.0
EXPECT_EQ
(
tensor1_data
[
1
*
row_numel
+
1
],
3.0
);
// row4 : 1.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
4
*
row_numel
+
6
],
4.0
);
// row5: 2.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
5
*
row_numel
+
7
],
5.0
);
// row6: 3.0
EXPECT_EQ
(
tensor1_data
[
6
*
row_numel
+
1
],
3.0
);
// row7: 1.0 + 2.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
7
*
row_numel
+
3
],
6.0
);
// row9: 2.0 + 3.0
EXPECT_EQ
(
tensor1_data
[
9
*
row_numel
+
6
],
5.0
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录