Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
b6c6f4f9
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b6c6f4f9
编写于
9月 23, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
9月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move selected_rows_functor (#46373)
上级
4dd5bf79
变更
35
展开全部
隐藏空白更改
内联
并排
Showing
35 changed file
with
331 addition
and
359 deletion
+331
-359
paddle/fluid/distributed/ps/service/communicator/communicator.h
.../fluid/distributed/ps/service/communicator/communicator.h
+3
-4
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+25
-28
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+0
-14
paddle/fluid/operators/math/selected_rows_functor_test.cc
paddle/fluid/operators/math/selected_rows_functor_test.cc
+12
-19
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
+6
-9
paddle/fluid/operators/optimizers/adagrad_op.cc
paddle/fluid/operators/optimizers/adagrad_op.cc
+1
-1
paddle/fluid/operators/optimizers/adam_op_functor.h
paddle/fluid/operators/optimizers/adam_op_functor.h
+2
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+2
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+0
-1
paddle/phi/kernels/cpu/adagrad_kernel.cc
paddle/phi/kernels/cpu/adagrad_kernel.cc
+3
-4
paddle/phi/kernels/cpu/add_n_kernel.cc
paddle/phi/kernels/cpu/add_n_kernel.cc
+1
-1
paddle/phi/kernels/funcs/CMakeLists.txt
paddle/phi/kernels/funcs/CMakeLists.txt
+14
-0
paddle/phi/kernels/funcs/lamb_functors.h
paddle/phi/kernels/funcs/lamb_functors.h
+2
-2
paddle/phi/kernels/funcs/selected_rows_functor.cc
paddle/phi/kernels/funcs/selected_rows_functor.cc
+130
-132
paddle/phi/kernels/funcs/selected_rows_functor.cu
paddle/phi/kernels/funcs/selected_rows_functor.cu
+92
-94
paddle/phi/kernels/funcs/selected_rows_functor.h
paddle/phi/kernels/funcs/selected_rows_functor.h
+10
-13
paddle/phi/kernels/gpu/adagrad_kernel.cu
paddle/phi/kernels/gpu/adagrad_kernel.cu
+3
-4
paddle/phi/kernels/gpu/adam_kernel.cu
paddle/phi/kernels/gpu/adam_kernel.cu
+0
-1
paddle/phi/kernels/gpu/adamw_kernel.cu
paddle/phi/kernels/gpu/adamw_kernel.cu
+1
-1
paddle/phi/kernels/impl/adagrad_kernel_impl.h
paddle/phi/kernels/impl/adagrad_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/add_n_kernel_impl.h
paddle/phi/kernels/impl/add_n_kernel_impl.h
+1
-1
paddle/phi/kernels/impl/clip_kernel_impl.h
paddle/phi/kernels/impl/clip_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/momentum_kernel_impl.h
paddle/phi/kernels/impl/momentum_kernel_impl.h
+2
-2
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/clip_kernel.h
paddle/phi/kernels/selected_rows/clip_kernel.h
+0
-1
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
...phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
...e/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
+2
-2
未找到文件。
paddle/fluid/distributed/ps/service/communicator/communicator.h
浏览文件 @
b6c6f4f9
...
...
@@ -37,13 +37,13 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -212,11 +212,10 @@ inline void MergeVars(const std::string &var_name,
}
phi
::
CPUContext
dev_ctx
;
if
(
merge_add
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out_slr
);
}
else
{
paddle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
merge_average
;
phi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
merge_average
;
merge_average
(
dev_ctx
,
inputs
,
out_slr
);
}
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
b6c6f4f9
...
...
@@ -22,7 +22,6 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -30,6 +29,7 @@
#include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#ifdef PADDLE_WITH_XPU
#include "xpu/refactor/math.h"
#endif
...
...
@@ -354,15 +354,14 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
framework
::
TransToProtoVarType
(
src_selected_rows
.
value
().
dtype
());
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_tensor); \
return; \
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
phi::funcs::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_tensor); \
return; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
@@ -406,15 +405,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
dst_tensor
->
Resize
(
src_tensor
.
dims
());
dst_tensor
->
mutable_data
(
place
,
src_tensor
.
dtype
());
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_tensor, \
dst_tensor); \
return; \
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
phi::funcs::SelectedRowsAddTensor<dev_ctx_type, cpp_type> functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_tensor, \
dst_tensor); \
return; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
@@ -469,15 +467,14 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
phi
::
SelectedRows
*
dst_selected_rows
=
GetEmptyInnerTensor
<
phi
::
SelectedRows
>
(
dst_var
.
get
());
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::scatter::MergeAdd<dev_ctx_type, cpp_type> \
merge_add; \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_selected_rows); \
return dst_var; \
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
phi::funcs::scatter::MergeAdd<dev_ctx_type, cpp_type> merge_add; \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
dst_selected_rows); \
return dst_var; \
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
...
@@ -31,20 +31,6 @@ math_library(sampler DEPS generator)
# math_library(math_function DEPS blas dense_tensor tensor)
math_library
(
maxouting
)
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
math_library
(
sequence_padding
)
math_library
(
sequence_pooling DEPS math_function jit_kernel_helper
)
math_library
(
sequence_scale
)
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cc
浏览文件 @
b6c6f4f9
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
@@ -48,7 +48,7 @@ TEST(selected_rows_functor, cpu_add) {
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -88,8 +88,7 @@ TEST(selected_rows_functor, cpu_add) {
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
add_tensor_functor
;
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
auto
*
tensor2_data
=
tensor2
->
data
<
float
>
();
...
...
@@ -141,8 +140,7 @@ TEST(selected_rows_functor, cpu_add_to) {
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
add_to_functor
;
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
...
@@ -179,7 +177,7 @@ TEST(selected_rows_functor, cpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
...
@@ -216,7 +214,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
cpu_place
);
functor
(
ctx
,
in_value
,
1.0
);
p
addle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
merge_average_functor
;
phi
::
SelectedRows
output
=
merge_average_functor
(
ctx
,
*
selected_rows
);
...
...
@@ -253,8 +251,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -290,8 +287,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -337,8 +333,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
@@ -387,8 +382,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
@@ -444,8 +438,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
auto
*
out_value
=
output
->
mutable_value
();
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
sum_to_functor
;
phi
::
funcs
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
sum_to_functor
;
sum_to_functor
(
ctx
,
std
::
vector
<
phi
::
SelectedRows
*>
(
{
selected_rows1
.
get
(),
selected_rows2
.
get
()}),
...
...
@@ -479,7 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
new
paddle
::
framework
::
Tensor
()};
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
auto
*
tensor1_data
=
tensor1
->
data
<
float
>
();
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
浏览文件 @
b6c6f4f9
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
@@ -61,7 +61,7 @@ TEST(selected_rows_functor, gpu_add) {
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
...
...
@@ -105,8 +105,7 @@ TEST(selected_rows_functor, gpu_add) {
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
add_tensor_functor
;
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
paddle
::
framework
::
Tensor
tensor2_cpu
;
...
...
@@ -164,8 +163,7 @@ TEST(selected_rows_functor, gpu_add_to) {
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
add_to_functor
;
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
...
@@ -206,7 +204,7 @@ TEST(selected_rows_functor, gpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
...
@@ -261,8 +259,7 @@ TEST(selected_rows_functor, gpu_merge_add) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
merge_add_functor
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
paddle/fluid/operators/optimizers/adagrad_op.cc
浏览文件 @
b6c6f4f9
...
...
@@ -17,10 +17,10 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/optimizers/adam_op_functor.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,12 +16,12 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
static
inline
float
GetAttrFromTensor
(
const
framework
::
Tensor
*
tensor
)
{
const
float
*
tensor_data
=
tensor
->
data
<
float
>
();
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -193,7 +193,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
phi
::
funcs
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
merge_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
...
...
paddle/phi/kernels/cpu/adagrad_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
...
...
@@ -38,7 +38,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
&
merge_rows
=
grad_merge
.
rows
();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
...
...
@@ -47,8 +47,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
auto
grad_square
=
SquareSelectedRows
<
phi
::
CPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
functor
;
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
functor
;
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
...
...
paddle/phi/kernels/cpu/add_n_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -53,7 +53,7 @@ void AddNKernel(const Context& dev_ctx,
}
}
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
// If in_place, just skip the first tensor
for
(
size_t
i
=
start
;
i
<
in_num
;
i
++
)
{
if
(
DenseTensor
::
classof
(
x
[
i
]))
{
...
...
paddle/phi/kernels/funcs/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
...
@@ -38,3 +38,17 @@ else()
math_library
(
fft DEPS dense_tensor pocketfft
)
endif
()
endif
()
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
paddle/phi/kernels/funcs/lamb_functors.h
浏览文件 @
b6c6f4f9
...
...
@@ -19,19 +19,19 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/memory/buffer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/squared_l2_norm.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
namespace
phi
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
template
<
typename
T
,
bool
IsMultiPrecision
>
struct
LambMomentREGUpdateFunctor
{
...
...
paddle/
fluid/operators/math
/selected_rows_functor.cc
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cc
浏览文件 @
b6c6f4f9
此差异已折叠。
点击以展开。
paddle/
fluid/operators/math
/selected_rows_functor.cu
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cu
浏览文件 @
b6c6f4f9
...
...
@@ -15,15 +15,14 @@ limitations under the License. */
#include <set>
#include <vector>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
phi
{
namespace
funcs
{
template
<
typename
T
>
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
...
...
@@ -34,14 +33,14 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
.
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
.
height
()));
output
->
set_height
(
in1_height
);
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
auto
&
in2_rows
=
input2
.
rows
();
std
::
vector
<
int64_t
>
out_rows
;
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
...
...
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in2_value
.
numel
()
/
in2_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -67,7 +66,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and oupput width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
...
...
@@ -77,35 +76,35 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in2_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in2_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
out_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
out_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
out_place
,
out_data
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
}
};
...
...
@@ -137,15 +136,15 @@ template <typename T>
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
)
{
const
phi
::
Dense
Tensor
&
input2
,
phi
::
Dense
Tensor
*
output
)
{
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
.
dims
();
auto
out_dims
=
output
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = [%d], first input height = [%d]"
,
in1_height
,
...
...
@@ -153,7 +152,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
out_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output height must be equal."
"But received input height = [%d], output height = [%d]"
,
in1_height
,
...
...
@@ -166,7 +165,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
.
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -174,7 +173,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
output
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output width must be equal."
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
...
...
@@ -198,16 +197,16 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
out_data
,
in1_row_numel
);
auto
out_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
input2
);
auto
out_eigen
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
EigenVector
<
T
>::
Flatten
(
input2
);
out_eigen
.
device
(
*
context
.
eigen_device
())
=
out_eigen
+
in2_eigen
;
}
};
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
<
typename
T
>
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
T
>
{
...
...
@@ -219,11 +218,11 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
input2
->
height
()));
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
...
...
@@ -238,24 +237,24 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
}
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
->
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
paddle
::
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in1_place
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
}
};
...
...
@@ -263,7 +262,7 @@ template struct SelectedRowsAddTo<phi::GPUContext, float>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
{
template
<
typename
T
,
int
block_size
>
...
...
@@ -289,17 +288,17 @@ template <typename T>
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
phi
::
Dense
Tensor
*
input2
)
{
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
...
...
@@ -308,7 +307,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -333,7 +332,7 @@ template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
scatter
{
...
...
@@ -379,7 +378,7 @@ struct MergeAddImpl {
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
return
;
}
...
...
@@ -387,7 +386,7 @@ struct MergeAddImpl {
phi
::
SelectedRows
&
out
=
*
output
;
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
auto
input_width
=
input
.
value
().
dims
()[
1
];
...
...
@@ -446,20 +445,20 @@ struct MergeAddImpl {
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
]
,
platform
::
errors
::
InvalidArgument
(
"All input should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
()
,
platform
::
errors
::
InvalidArgument
(
"All input should have same height."
));
PADDLE_ENFORCE_EQ
(
input_width
,
input
->
value
().
dims
()[
1
],
phi
::
errors
::
InvalidArgument
(
"All input should have same "
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
input
->
height
(),
phi
::
errors
::
InvalidArgument
(
"All input should have same height."
));
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input_height
);
...
...
@@ -530,10 +529,10 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD(float)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
double
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int64_t
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
double
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
double
>
)
template
<
typename
T
,
int
block_size
>
__global__
void
UpdateToTensorKernel
(
const
T
*
selected_rows
,
...
...
@@ -591,7 +590,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
Dense
Tensor
*
input2
)
{
// NOTE: Use SelectedRowsAddToTensor for better performance
// no additional MergeAdd called.
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
...
...
@@ -602,11 +601,11 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
merged_in1
.
value
();
auto
&
in1_rows
=
merged_in1
.
rows
();
...
...
@@ -615,7 +614,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
...
...
@@ -624,14 +623,13 @@ struct UpdateToTensor<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
template
data
<
T
>();
auto
*
in2_data
=
input2
->
data
<
T
>
();
dim3
threads
(
platform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
threads
(
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
UpdateToTensorKernel
<
T
,
platform
::
PADDLE_CUDA_NUM_THREADS
>
UpdateToTensorKernel
<
T
,
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
in1_data
,
in1_rows
.
cuda_data
(),
op
,
in2_data
,
in1_row_numel
);
}
};
}
// namespace scatter
}
// namespace math
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/
fluid/operators/math
/selected_rows_functor.h
→
paddle/
phi/kernels/funcs
/selected_rows_functor.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,19 +16,17 @@ limitations under the License. */
#include <map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#define INLINE_FOR2(sizei, sizej) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t j = 0; j < sizej; j++)
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
phi
{
namespace
funcs
{
// SelectedRows + SelectedRows will simplely concat value and rows.
// The real computation happens in dealing with LoDTensor.
...
...
@@ -44,8 +42,8 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddTensor
{
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
);
const
phi
::
Dense
Tensor
&
input2
,
phi
::
Dense
Tensor
*
output
);
};
// input2 = input1 + input2
...
...
@@ -73,7 +71,7 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddToTensor
{
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
namespace
scatter
{
...
...
@@ -115,10 +113,9 @@ struct UpdateToTensor {
void
operator
()(
const
DeviceContext
&
context
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
}
// namespace scatter
}
// namespace math
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/gpu/adagrad_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
...
...
@@ -85,7 +85,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
...
...
@@ -93,8 +93,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
auto
grad_square
=
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
functor
;
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
functor
;
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
...
...
paddle/phi/kernels/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
paddle/phi/kernels/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
template
<
typename
T
,
typename
MT
>
...
...
paddle/phi/kernels/impl/adagrad_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
paddle/phi/kernels/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -21,7 +21,7 @@
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
...
...
paddle/phi/kernels/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h"
...
...
paddle/phi/kernels/impl/momentum_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,12 +14,12 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/momentum_kernel.h"
namespace
phi
{
...
...
@@ -547,7 +547,7 @@ void MomentumSparseImpl(const Context& ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
...
...
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,10 +16,10 @@
#include <math.h>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/rmsprop_kernel.h"
namespace
phi
{
...
...
@@ -304,7 +304,7 @@ void RmspropSparseKernel(const Context &ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
...
...
paddle/phi/kernels/selected_rows/clip_kernel.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
...
...
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -16,11 +16,11 @@
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -118,7 +118,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -15,7 +15,6 @@
#include "paddle/phi/kernels/selected_rows/adam_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -23,6 +22,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -191,7 +191,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
...
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -214,7 +214,7 @@ void AdamwDenseParamSparseGradKernel(
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -16,10 +16,10 @@
#include "paddle/phi/kernels/selected_rows/add_n_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -73,7 +73,7 @@ void AddNKernel(const Context &dev_ctx,
}
}
if
(
has_data
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out
);
out
->
SyncIndex
();
...
...
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/clip_by_norm_kernel.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
namespace
phi
{
...
...
@@ -30,7 +30,7 @@ void ClipByNormKernel(const Context& dev_ctx,
float
max_norm
,
SelectedRows
*
out
)
{
phi
::
SelectedRows
merged_input
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
&
merged_input
);
auto
input
=
&
(
merged_input
.
value
());
out
->
set_rows
(
merged_input
.
rows
());
...
...
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -14,11 +14,11 @@
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_kernel.h"
namespace
phi
{
...
...
@@ -45,7 +45,7 @@ void ClipSparseKernel(const Context& dev_ctx,
out
,
errors
::
InvalidArgument
(
"Inplace clip is not allowed "
"when x is SelectedRows"
));
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
auto
*
out_tensor
=
out
->
mutable_value
();
auto
*
out_data
=
out_tensor
->
data
<
T
>
();
...
...
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
浏览文件 @
b6c6f4f9
...
...
@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -212,7 +212,7 @@ void ComputeRowImpl(const Context& dev_ctx,
}
else
{
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
}
...
...
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -18,7 +18,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -27,7 +27,7 @@ template <typename T, typename Context>
void
MergeSelectedRowsKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
}
...
...
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
...
@@ -19,7 +19,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
sr
{
...
...
@@ -181,7 +181,7 @@ void AdamDenseParamSparseGradKernel(
if
(
is_strict_sorted
)
{
grad_merge_ptr
=
&
grad
;
}
else
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
xpu_wait
(
dev_ctx
.
x_context
()
->
xpu_stream
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录