Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b6c6f4f9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b6c6f4f9
编写于
9月 23, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
9月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move selected_rows_functor (#46373)
上级
4dd5bf79
变更
35
隐藏空白更改
内联
并排
Showing
35 changed file
with
331 addition
and
359 deletion
+331
-359
paddle/fluid/distributed/ps/service/communicator/communicator.h
.../fluid/distributed/ps/service/communicator/communicator.h
+3
-4
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+25
-28
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+0
-14
paddle/fluid/operators/math/selected_rows_functor_test.cc
paddle/fluid/operators/math/selected_rows_functor_test.cc
+12
-19
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
+6
-9
paddle/fluid/operators/optimizers/adagrad_op.cc
paddle/fluid/operators/optimizers/adagrad_op.cc
+1
-1
paddle/fluid/operators/optimizers/adam_op_functor.h
paddle/fluid/operators/optimizers/adam_op_functor.h
+2
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+2
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+0
-1
paddle/phi/kernels/cpu/adagrad_kernel.cc
paddle/phi/kernels/cpu/adagrad_kernel.cc
+3
-4
paddle/phi/kernels/cpu/add_n_kernel.cc
paddle/phi/kernels/cpu/add_n_kernel.cc
+1
-1
paddle/phi/kernels/funcs/CMakeLists.txt
paddle/phi/kernels/funcs/CMakeLists.txt
+14
-0
paddle/phi/kernels/funcs/lamb_functors.h
paddle/phi/kernels/funcs/lamb_functors.h
+2
-2
paddle/phi/kernels/funcs/selected_rows_functor.cc
paddle/phi/kernels/funcs/selected_rows_functor.cc
+130
-132
paddle/phi/kernels/funcs/selected_rows_functor.cu
paddle/phi/kernels/funcs/selected_rows_functor.cu
+92
-94
paddle/phi/kernels/funcs/selected_rows_functor.h
paddle/phi/kernels/funcs/selected_rows_functor.h
+10
-13
paddle/phi/kernels/gpu/adagrad_kernel.cu
paddle/phi/kernels/gpu/adagrad_kernel.cu
+3
-4
paddle/phi/kernels/gpu/adam_kernel.cu
paddle/phi/kernels/gpu/adam_kernel.cu
+0
-1
paddle/phi/kernels/gpu/adamw_kernel.cu
paddle/phi/kernels/gpu/adamw_kernel.cu
+1
-1
paddle/phi/kernels/impl/adagrad_kernel_impl.h
paddle/phi/kernels/impl/adagrad_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/add_n_kernel_impl.h
paddle/phi/kernels/impl/add_n_kernel_impl.h
+1
-1
paddle/phi/kernels/impl/clip_kernel_impl.h
paddle/phi/kernels/impl/clip_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/momentum_kernel_impl.h
paddle/phi/kernels/impl/momentum_kernel_impl.h
+2
-2
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/clip_kernel.h
paddle/phi/kernels/selected_rows/clip_kernel.h
+0
-1
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
...phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
...e/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
+2
-2
未找到文件。
paddle/fluid/distributed/ps/service/communicator/communicator.h
浏览文件 @
b6c6f4f9
...
...
@@ -37,13 +37,13 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -212,11 +212,10 @@ inline void MergeVars(const std::string &var_name,
}
phi
::
CPUContext
dev_ctx
;
if
(
merge_add
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out_slr
);
}
else
{
paddle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
merge_average
;
phi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
merge_average
;
merge_average
(
dev_ctx
,
inputs
,
out_slr
);
}
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
b6c6f4f9
...
...
@@ -22,7 +22,6 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -30,6 +29,7 @@
#include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#ifdef PADDLE_WITH_XPU
#include "xpu/refactor/math.h"
#endif
...
...
@@ -354,15 +354,14 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
framework
::
TransToProtoVarType
(
src_selected_rows
.
value
().
dtype
());
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_tensor); \
return; \
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
phi::funcs::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_tensor); \
return; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
@@ -406,15 +405,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
dst_tensor
->
Resize
(
src_tensor
.
dims
());
dst_tensor
->
mutable_data
(
place
,
src_tensor
.
dtype
());
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_tensor, \
dst_tensor); \
return; \
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
phi::funcs::SelectedRowsAddTensor<dev_ctx_type, cpp_type> functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_tensor, \
dst_tensor); \
return; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
@@ -469,15 +467,14 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
phi
::
SelectedRows
*
dst_selected_rows
=
GetEmptyInnerTensor
<
phi
::
SelectedRows
>
(
dst_var
.
get
());
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::scatter::MergeAdd<dev_ctx_type, cpp_type> \
merge_add; \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_selected_rows); \
return dst_var; \
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
phi::funcs::scatter::MergeAdd<dev_ctx_type, cpp_type> merge_add; \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_selected_rows); \
return dst_var; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
...
@@ -31,20 +31,6 @@ math_library(sampler DEPS generator)
# math_library(math_function DEPS blas dense_tensor tensor)
math_library
(
maxouting
)
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
math_library
(
sequence_padding
)
math_library
(
sequence_pooling DEPS math_function jit_kernel_helper
)
math_library
(
sequence_scale
)
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cc
浏览文件 @
b6c6f4f9
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
@@ -48,7 +48,7 @@ TEST(selected_rows_functor, cpu_add) {
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -88,8 +88,7 @@ TEST(selected_rows_functor, cpu_add) {
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
add_tensor_functor
;
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
auto
*
tensor2_data
=
tensor2
->
data
<
float
>
();
...
...
@@ -141,8 +140,7 @@ TEST(selected_rows_functor, cpu_add_to) {
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
add_to_functor
;
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
...
@@ -179,7 +177,7 @@ TEST(selected_rows_functor, cpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
...
@@ -216,7 +214,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
cpu_place
);
functor
(
ctx
,
in_value
,
1.0
);
p
addle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
merge_average_functor
;
phi
::
SelectedRows
output
=
merge_average_functor
(
ctx
,
*
selected_rows
);
...
...
@@ -253,8 +251,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -290,8 +287,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -337,8 +333,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
@@ -387,8 +382,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
@@ -444,8 +438,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
auto
*
out_value
=
output
->
mutable_value
();
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
sum_to_functor
;
phi
::
funcs
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
sum_to_functor
;
sum_to_functor
(
ctx
,
std
::
vector
<
phi
::
SelectedRows
*>
(
{
selected_rows1
.
get
(),
selected_rows2
.
get
()}),
...
...
@@ -479,7 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
new
paddle
::
framework
::
Tensor
()};
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
auto
*
tensor1_data
=
tensor1
->
data
<
float
>
();
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
浏览文件 @
b6c6f4f9
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
@@ -61,7 +61,7 @@ TEST(selected_rows_functor, gpu_add) {
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -105,8 +105,7 @@ TEST(selected_rows_functor, gpu_add) {
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
add_tensor_functor
;
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
paddle
::
framework
::
Tensor
tensor2_cpu
;
...
...
@@ -164,8 +163,7 @@ TEST(selected_rows_functor, gpu_add_to) {
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
add_to_functor
;
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
...
@@ -206,7 +204,7 @@ TEST(selected_rows_functor, gpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
...
@@ -261,8 +259,7 @@ TEST(selected_rows_functor, gpu_merge_add) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
paddle/fluid/operators/optimizers/adagrad_op.cc
浏览文件 @
b6c6f4f9
...
...
@@ -17,10 +17,10 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/optimizers/adam_op_functor.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,12 +16,12 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
static
inline
float
GetAttrFromTensor
(
const
framework
::
Tensor
*
tensor
)
{
const
float
*
tensor_data
=
tensor
->
data
<
float
>
();
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -193,7 +193,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
phi
::
funcs
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
merge_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
...
...
paddle/phi/kernels/cpu/adagrad_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
...
...
@@ -38,7 +38,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
&
merge_rows
=
grad_merge
.
rows
();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
...
...
@@ -47,8 +47,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
auto
grad_square
=
SquareSelectedRows
<
phi
::
CPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
functor
;
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
functor
;
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
...
...
paddle/phi/kernels/cpu/add_n_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -53,7 +53,7 @@ void AddNKernel(const Context& dev_ctx,
}
}
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
// If in_place, just skip the first tensor
for
(
size_t
i
=
start
;
i
<
in_num
;
i
++
)
{
if
(
DenseTensor
::
classof
(
x
[
i
]))
{
...
...
paddle/phi/kernels/funcs/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
...
@@ -38,3 +38,17 @@ else()
math_library
(
fft DEPS dense_tensor pocketfft
)
endif
()
endif
()
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
paddle/phi/kernels/funcs/lamb_functors.h
浏览文件 @
b6c6f4f9
...
...
@@ -19,19 +19,19 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/memory/buffer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/squared_l2_norm.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
namespace
phi
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
template
<
typename
T
,
bool
IsMultiPrecision
>
struct
LambMomentREGUpdateFunctor
{
...
...
paddle/
fluid/operators/math
/selected_rows_functor.cc
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cc
浏览文件 @
b6c6f4f9
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
...
...
@@ -21,9 +21,8 @@ limitations under the License. */
#include "paddle/fluid/operators/mkldnn/axpy_handler.h"
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
phi
{
namespace
funcs
{
template
<
typename
T
>
struct
SelectedRowsAdd
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
phi
::
CPUContext
&
context
,
...
...
@@ -34,11 +33,11 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
.
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
output
->
set_height
(
in1_height
);
auto
&
in1_rows
=
input1
.
rows
();
...
...
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in2_value
.
numel
()
/
in2_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -67,42 +66,42 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and oupput width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
()));
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_cpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the CPU place."
));
auto
in2_place
=
input2
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
in2_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_cpu_place
(
in2_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the CPU place."
));
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
out_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_cpu_place
(
out_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the CPU place."
));
auto
*
out_data
=
out_value
->
data
<
T
>
();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
));
paddle
::
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
));
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
));
paddle
::
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
));
}
};
...
...
@@ -113,23 +112,23 @@ template <typename T>
struct
SelectedRowsAddTensor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
)
{
const
phi
::
Dense
Tensor
&
input2
,
phi
::
Dense
Tensor
*
output
)
{
auto
in1_height
=
input1
.
height
();
const
auto
&
in2_dims
=
input2
.
dims
();
const
auto
&
out_dims
=
output
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
PADDLE_ENFORCE_EQ
(
in1_height
,
out_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output height must be equal."
"But received input height = [%d], output height = [%d]"
,
in1_height
,
...
...
@@ -142,7 +141,7 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
.
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -150,7 +149,7 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
output
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
...
...
@@ -169,8 +168,8 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
}
}
auto
out_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
input2
);
auto
out_eigen
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
EigenVector
<
T
>::
Flatten
(
input2
);
out_eigen
.
device
(
*
context
.
eigen_device
())
=
out_eigen
+
in2_eigen
;
}
};
...
...
@@ -188,11 +187,11 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
...
...
@@ -205,23 +204,23 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
mixv_in2_rows
.
Extend
(
in1_rows
.
begin
(),
in1_rows
.
end
());
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_cpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the CPU place."
));
auto
in2_place
=
input2
->
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
in2_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_cpu_place
(
in2_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the CPU place."
));
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
));
paddle
::
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
));
}
};
...
...
@@ -244,7 +243,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
auto
in1_height
=
(
*
iter
)
->
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = [%d], second "
"input height = [%d]"
,
...
...
@@ -255,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
std
::
vector
<
int64_t
>
in2_rows
;
in2_rows
.
reserve
(
in2_rows
.
size
()
+
size
);
for
(
auto
iter
=
input1
.
begin
();
iter
!=
input1
.
end
();
++
iter
)
{
const
framework
::
Vector
<
int64_t
>&
in_rows
=
(
*
iter
)
->
rows
();
const
paddle
::
framework
::
Vector
<
int64_t
>&
in_rows
=
(
*
iter
)
->
rows
();
in2_rows
.
insert
(
in2_rows
.
end
(),
in_rows
.
begin
(),
in_rows
.
end
());
}
input2
->
set_rows
(
in2_rows
);
...
...
@@ -280,7 +279,7 @@ template <typename T>
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
phi
::
Dense
Tensor
*
input2
)
{
if
(
UNLIKELY
(
input1
.
rows
().
size
()
==
0
))
{
LOG
(
WARNING
)
<<
"input selected rows is empty!"
;
return
;
...
...
@@ -290,11 +289,11 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
...
...
@@ -303,7 +302,7 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -325,7 +324,7 @@ template struct SelectedRowsAddToTensor<phi::CPUContext, float>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
p
latform
::
bfloat16
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
p
hi
::
dtype
::
bfloat16
>;
// This is a separated namespace for manipulate SelectedRows typed
// data. Like merge duplicated rows, adding two SelectedRows etc.
//
...
...
@@ -355,7 +354,7 @@ typename std::enable_if<std::is_integral<T>::value>::type elementwise_add_to(
}
template
<
typename
T
,
typename
DeviceContext
>
typename
std
::
enable_if
<
std
::
is_same
<
T
,
p
latform
::
bfloat16
>::
value
>::
type
typename
std
::
enable_if
<
std
::
is_same
<
T
,
p
hi
::
dtype
::
bfloat16
>::
value
>::
type
add_sparse_inputs
(
const
std
::
vector
<
const
phi
::
SelectedRows
*>&
inputs
,
const
std
::
unordered_map
<
int64_t
,
size_t
>&
rows_to_id
,
int64_t
input_width
,
...
...
@@ -372,7 +371,7 @@ add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
auto
&
input_rows
=
input
->
rows
();
#ifdef PADDLE_WITH_MKLDNN
OneDNNAXPYHandler
<
T
>
axpy_handler
(
input_width
,
T
(
1.
f
));
paddle
::
operators
::
OneDNNAXPYHandler
<
T
>
axpy_handler
(
input_width
,
T
(
1.
f
));
for
(
size_t
i
=
0
;
i
<
input_rows
.
size
();
i
++
)
{
size_t
out_i
=
rows_to_id
.
at
(
input_rows
[
i
]);
axpy_handler
(
&
input_data
[
i
*
input_width
],
...
...
@@ -391,7 +390,7 @@ add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
}
template
<
typename
T
,
typename
DeviceContext
>
typename
std
::
enable_if
<!
std
::
is_same
<
T
,
p
latform
::
bfloat16
>::
value
>::
type
typename
std
::
enable_if
<!
std
::
is_same
<
T
,
p
hi
::
dtype
::
bfloat16
>::
value
>::
type
add_sparse_inputs
(
const
std
::
vector
<
const
phi
::
SelectedRows
*>&
inputs
,
const
std
::
unordered_map
<
int64_t
,
size_t
>&
rows_to_id
,
int64_t
input_width
,
...
...
@@ -463,15 +462,15 @@ struct MergeAddImpl {
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
]
,
platform
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
()
,
platform
::
errors
::
InvalidArgument
(
"All inputs should have same height."
));
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
],
phi
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
phi
::
errors
::
InvalidArgument
(
"All inputs should have same height."
));
row_num
+=
input
->
rows
().
size
();
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
...
...
@@ -499,11 +498,11 @@ struct MergeAddImpl {
for
(
auto
*
in
:
inputs
)
{
auto
*
in_data
=
in
->
value
().
data
<
T
>
();
auto
in_numel
=
in
->
rows
().
size
()
*
input_width
;
memory
::
Copy
(
out_place
,
out_data
+
copied_numel
,
in_place
,
in_data
,
in_numel
*
sizeof
(
T
));
paddle
::
memory
::
Copy
(
out_place
,
out_data
+
copied_numel
,
in_place
,
in_data
,
in_numel
*
sizeof
(
T
));
copied_numel
+=
in_numel
;
}
}
else
{
...
...
@@ -563,9 +562,9 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(float)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
double
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
int
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
int64_t
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
latform
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
latform
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
latform
::
complex
<
double
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
hi
::
dtype
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
hi
::
dtype
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
p
hi
::
dtype
::
complex
<
double
>
)
#ifdef PADDLE_WITH_XPU
template
<
typename
T
>
...
...
@@ -582,7 +581,7 @@ struct MergeAdd<phi::XPUContext, T> {
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
return
;
}
...
...
@@ -612,16 +611,16 @@ struct MergeAdd<phi::XPUContext, T> {
xpu
::
ctx_guard
RAII_GUARD
(
context
.
x_context
());
int64_t
*
x_rows_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
xm
);
int64_t
*
y_rows_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
ym
);
memory
::
Copy
(
context
.
GetPlace
(),
y_rows_data
,
platform
::
CPUPlace
(),
merge_rows
.
data
(),
ym
*
sizeof
(
int64_t
));
memory
::
Copy
(
context
.
GetPlace
(),
x_rows_data
,
platform
::
CPUPlace
(),
input_rows
.
data
(),
xm
*
sizeof
(
int64_t
));
paddle
::
memory
::
Copy
(
context
.
GetPlace
(),
y_rows_data
,
phi
::
CPUPlace
(),
merge_rows
.
data
(),
ym
*
sizeof
(
int64_t
));
paddle
::
memory
::
Copy
(
context
.
GetPlace
(),
x_rows_data
,
phi
::
CPUPlace
(),
input_rows
.
data
(),
xm
*
sizeof
(
int64_t
));
int
r
=
xpu
::
merge_dup_rows
<
T
,
int64_t
>
(
context
.
x_context
(),
x_data
,
y_data
,
...
...
@@ -661,15 +660,15 @@ struct MergeAdd<phi::XPUContext, T> {
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
]
,
platform
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
()
,
platform
::
errors
::
InvalidArgument
(
"All inputs should have same height."
));
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
],
phi
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
phi
::
errors
::
InvalidArgument
(
"All inputs should have same height."
));
row_num
+=
input
->
rows
().
size
();
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
...
...
@@ -709,16 +708,16 @@ struct MergeAdd<phi::XPUContext, T> {
xpu
::
ctx_guard
RAII_GUARD
(
context
.
x_context
());
int64_t
*
x_rows_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
xm
);
int64_t
*
y_rows_data
=
RAII_GUARD
.
alloc_l3_or_gm
<
int64_t
>
(
ym
);
memory
::
Copy
(
context
.
GetPlace
(),
y_rows_data
,
platform
::
CPUPlace
(),
merge_rows
.
data
(),
ym
*
sizeof
(
int64_t
));
memory
::
Copy
(
context
.
GetPlace
(),
x_rows_data
,
platform
::
CPUPlace
(),
input_rows
.
data
(),
xm
*
sizeof
(
int64_t
));
paddle
::
memory
::
Copy
(
context
.
GetPlace
(),
y_rows_data
,
phi
::
CPUPlace
(),
merge_rows
.
data
(),
ym
*
sizeof
(
int64_t
));
paddle
::
memory
::
Copy
(
context
.
GetPlace
(),
x_rows_data
,
phi
::
CPUPlace
(),
input_rows
.
data
(),
xm
*
sizeof
(
int64_t
));
int
r
=
xpu
::
merge_dup_rows
<
T
,
int64_t
>
(
context
.
x_context
(),
x_data
,
y_data
,
...
...
@@ -777,15 +776,15 @@ struct MergeAverage<phi::CPUContext, T> {
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
]
,
platform
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
()
,
platform
::
errors
::
InvalidArgument
(
"All input should have same height."
));
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
],
phi
::
errors
::
InvalidArgument
(
"All inputs should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
phi
::
errors
::
InvalidArgument
(
"All input should have same height."
));
row_num
+=
input
->
rows
().
size
();
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
...
...
@@ -851,17 +850,17 @@ struct UpdateToTensor<phi::CPUContext, T> {
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
phi
::
Dense
Tensor
*
input2
)
{
auto
in1_height
=
input1
.
height
();
const
auto
&
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
...
...
@@ -870,7 +869,7 @@ struct UpdateToTensor<phi::CPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -923,6 +922,5 @@ struct UpdateToTensor<phi::CPUContext, T> {
};
}
// namespace scatter
}
// namespace math
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/
fluid/operators/math
/selected_rows_functor.cu
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cu
浏览文件 @
b6c6f4f9
...
...
@@ -15,15 +15,14 @@ limitations under the License. */
#include <set>
#include <vector>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
phi
{
namespace
funcs
{
template
<
typename
T
>
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
...
...
@@ -34,14 +33,14 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
.
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
output
->
set_height
(
in1_height
);
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
auto
&
in2_rows
=
input2
.
rows
();
std
::
vector
<
int64_t
>
out_rows
;
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
...
...
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in2_value
.
numel
()
/
in2_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -67,7 +66,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and oupput width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
...
...
@@ -77,35 +76,35 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in2_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in2_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
out_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
out_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
}
};
...
...
@@ -137,15 +136,15 @@ template <typename T>
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
)
{
const
phi
::
Dense
Tensor
&
input2
,
phi
::
Dense
Tensor
*
output
)
{
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
.
dims
();
auto
out_dims
=
output
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = [%d], first input height = [%d]"
,
in1_height
,
...
...
@@ -153,7 +152,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
out_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output height must be equal."
"But received input height = [%d], output height = [%d]"
,
in1_height
,
...
...
@@ -166,7 +165,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
.
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -174,7 +173,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
output
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
...
...
@@ -198,16 +197,16 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
out_data
,
in1_row_numel
);
auto
out_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
input2
);
auto
out_eigen
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
EigenVector
<
T
>::
Flatten
(
input2
);
out_eigen
.
device
(
*
context
.
eigen_device
())
=
out_eigen
+
in2_eigen
;
}
};
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
<
typename
T
>
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
T
>
{
...
...
@@ -219,11 +218,11 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
...
...
@@ -238,24 +237,24 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
}
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
->
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
}
};
...
...
@@ -263,7 +262,7 @@ template struct SelectedRowsAddTo<phi::GPUContext, float>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
{
template
<
typename
T
,
int
block_size
>
...
...
@@ -289,17 +288,17 @@ template <typename T>
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
phi
::
Dense
Tensor
*
input2
)
{
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
...
...
@@ -308,7 +307,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -333,7 +332,7 @@ template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
scatter
{
...
...
@@ -379,7 +378,7 @@ struct MergeAddImpl {
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
return
;
}
...
...
@@ -387,7 +386,7 @@ struct MergeAddImpl {
phi
::
SelectedRows
&
out
=
*
output
;
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
auto
input_width
=
input
.
value
().
dims
()[
1
];
...
...
@@ -446,20 +445,20 @@ struct MergeAddImpl {
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
]
,
platform
::
errors
::
InvalidArgument
(
"All input should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
()
,
platform
::
errors
::
InvalidArgument
(
"All input should have same height."
));
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
],
phi
::
errors
::
InvalidArgument
(
"All input should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
phi
::
errors
::
InvalidArgument
(
"All input should have same height."
));
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input_height
);
...
...
@@ -530,10 +529,10 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD(float)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
double
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int64_t
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
double
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
double
>
)
template
<
typename
T
,
int
block_size
>
__global__
void
UpdateToTensorKernel
(
const
T
*
selected_rows
,
...
...
@@ -591,7 +590,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
Dense
Tensor
*
input2
)
{
// NOTE: Use SelectedRowsAddToTensor for better performance
// no additional MergeAdd called.
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
...
...
@@ -602,11 +601,11 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
merged_in1
.
value
();
auto
&
in1_rows
=
merged_in1
.
rows
();
...
...
@@ -615,7 +614,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -624,14 +623,13 @@ struct UpdateToTensor<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
template
data
<
T
>();
auto
*
in2_data
=
input2
->
data
<
T
>
();
dim3
threads
(
platform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
threads
(
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
UpdateToTensorKernel
<
T
,
platform
::
PADDLE_CUDA_NUM_THREADS
>
UpdateToTensorKernel
<
T
,
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
in1_data
,
in1_rows
.
cuda_data
(),
op
,
in2_data
,
in1_row_numel
);
}
};
}
// namespace scatter
}
// namespace math
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/
fluid/operators/math
/selected_rows_functor.h
→
paddle/
phi/kernels/funcs
/selected_rows_functor.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,19 +16,17 @@ limitations under the License. */
#include <map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#define INLINE_FOR2(sizei, sizej) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t j = 0; j < sizej; j++)
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
phi
{
namespace
funcs
{
// SelectedRows + SelectedRows will simplely concat value and rows.
// The real computation happens in dealing with LoDTensor.
...
...
@@ -44,8 +42,8 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddTensor
{
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
);
const
phi
::
Dense
Tensor
&
input2
,
phi
::
Dense
Tensor
*
output
);
};
// input2 = input1 + input2
...
...
@@ -73,7 +71,7 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddToTensor
{
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
namespace
scatter
{
...
...
@@ -115,10 +113,9 @@ struct UpdateToTensor {
void
operator
()(
const
DeviceContext
&
context
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
}
// namespace scatter
}
// namespace math
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/gpu/adagrad_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
...
...
@@ -85,7 +85,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
...
...
@@ -93,8 +93,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
auto
grad_square
=
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
functor
;
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
functor
;
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
...
...
paddle/phi/kernels/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
paddle/phi/kernels/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
template
<
typename
T
,
typename
MT
>
...
...
paddle/phi/kernels/impl/adagrad_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
paddle/phi/kernels/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -21,7 +21,7 @@
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
...
...
paddle/phi/kernels/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h"
...
...
paddle/phi/kernels/impl/momentum_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,12 +14,12 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/momentum_kernel.h"
namespace
phi
{
...
...
@@ -547,7 +547,7 @@ void MomentumSparseImpl(const Context& ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
...
...
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,10 +16,10 @@
#include <math.h>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/rmsprop_kernel.h"
namespace
phi
{
...
...
@@ -304,7 +304,7 @@ void RmspropSparseKernel(const Context &ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
...
...
paddle/phi/kernels/selected_rows/clip_kernel.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
...
...
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -16,11 +16,11 @@
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -118,7 +118,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -15,7 +15,6 @@
#include "paddle/phi/kernels/selected_rows/adam_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -23,6 +22,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -191,7 +191,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -214,7 +214,7 @@ void AdamwDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,10 +16,10 @@
#include "paddle/phi/kernels/selected_rows/add_n_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -73,7 +73,7 @@ void AddNKernel(const Context &dev_ctx,
}
}
if
(
has_data
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out
);
out
->
SyncIndex
();
...
...
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/clip_by_norm_kernel.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
namespace
phi
{
...
...
@@ -30,7 +30,7 @@ void ClipByNormKernel(const Context& dev_ctx,
float
max_norm
,
SelectedRows
*
out
)
{
phi
::
SelectedRows
merged_input
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
&
merged_input
);
auto
input
=
&
(
merged_input
.
value
());
out
->
set_rows
(
merged_input
.
rows
());
...
...
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_kernel.h"
namespace
phi
{
...
...
@@ -45,7 +45,7 @@ void ClipSparseKernel(const Context& dev_ctx,
out
,
errors
::
InvalidArgument
(
"Inplace clip is not allowed "
"when x is SelectedRows"
));
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
auto
*
out_tensor
=
out
->
mutable_value
();
auto
*
out_data
=
out_tensor
->
data
<
T
>
();
...
...
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -212,7 +212,7 @@ void ComputeRowImpl(const Context& dev_ctx,
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -18,7 +18,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -27,7 +27,7 @@ template <typename T, typename Context>
void
MergeSelectedRowsKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
}
...
...
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -181,7 +181,7 @@ void AdamDenseParamSparseGradKernel(
if
(
is_strict_sorted
)
{
grad_merge_ptr
=
&
grad
;
}
else
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
xpu_wait
(
dev_ctx
.
x_context
()
->
xpu_stream
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录